unsigned long src_addr,
struct page **pagep)
{
+ int vm_shared = dst_vma->vm_flags & VM_SHARED;
struct hstate *h = hstate_vma(dst_vma);
pte_t _dst_pte;
spinlock_t *ptl;
__SetPageUptodate(page);
set_page_huge_active(page);
+ /*
+ * If shared, add to page cache
+ */
+ if (vm_shared) {
+ struct address_space *mapping = dst_vma->vm_file->f_mapping;
+ pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+
+ ret = huge_add_to_page_cache(page, mapping, idx);
+ if (ret)
+ goto out_release_nounlock;
+ }
+
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
spin_lock(ptl);
if (!huge_pte_none(huge_ptep_get(dst_pte)))
goto out_release_unlock;
- ClearPagePrivate(page);
- hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+ if (vm_shared) {
+ page_dup_rmap(page, true);
+ } else {
+ ClearPagePrivate(page);
+ hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+ }
_dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
if (dst_vma->vm_flags & VM_WRITE)
update_mmu_cache(dst_vma, dst_addr, dst_pte);
spin_unlock(ptl);
+ if (vm_shared)
+ unlock_page(page);
ret = 0;
out:
return ret;
out_release_unlock:
spin_unlock(ptl);
+out_release_nounlock:
+ if (vm_shared)
+ unlock_page(page);
put_page(page);
goto out;
}
unsigned long len,
bool zeropage)
{
+ int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
+ int vm_shared = dst_vma->vm_flags & VM_SHARED;
ssize_t err;
pte_t *dst_pte;
unsigned long src_addr, dst_addr;
goto out_unlock;
/*
- * Make sure the vma is not shared, that the remaining dst
- * range is both valid and fully within a single existing vma.
+ * Make sure the remaining dst range is both valid and
+ * fully within a single existing vma.
*/
- if (dst_vma->vm_flags & VM_SHARED)
- goto out_unlock;
if (dst_start < dst_vma->vm_start ||
dst_start + len > dst_vma->vm_end)
goto out_unlock;
+
+ vm_shared = dst_vma->vm_flags & VM_SHARED;
}
if (WARN_ON(dst_addr & (vma_hpagesize - 1) ||
goto out_unlock;
/*
- * Ensure the dst_vma has a anon_vma.
+ * If not shared, ensure the dst_vma has a anon_vma.
*/
err = -ENOMEM;
- if (unlikely(anon_vma_prepare(dst_vma)))
- goto out_unlock;
+ if (!vm_shared) {
+ if (unlikely(anon_vma_prepare(dst_vma)))
+ goto out_unlock;
+ }
h = hstate_vma(dst_vma);
dst_addr, src_addr, &page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ vm_alloc_shared = vm_shared;
cond_resched();
if (page) {
/*
* We encountered an error and are about to free a newly
- * allocated huge page. It is possible that there was a
- * reservation associated with the page that has been
- * consumed. See the routine restore_reserve_on_error
- * for details. Unfortunately, we can not call
- * restore_reserve_on_error now as it would require holding
- * mmap_sem. Clear the PagePrivate flag so that the global
+ * allocated huge page.
+ *
+ * Reservation handling is very subtle, and is different for
+ * private and shared mappings. See the routine
+ * restore_reserve_on_error for details. Unfortunately, we
+ * can not call restore_reserve_on_error now as it would
+ * require holding mmap_sem.
+ *
+ * If a reservation for the page existed in the reservation
+ * map of a private mapping, the map was modified to indicate
+ * the reservation was consumed when the page was allocated.
+ * We clear the PagePrivate flag now so that the global
* reserve count will not be incremented in free_huge_page.
* The reservation map will still indicate the reservation
* was consumed and possibly prevent later page allocation.
- * This is better than leaking a global reservation.
+ * This is better than leaking a global reservation. If no
+ * reservation existed, it is still safe to clear PagePrivate
+ * as no adjustments to reservation counts were made during
+ * allocation.
+ *
+ * The reservation map for shared mappings indicates which
+ * pages have reservations. When a huge page is allocated
+ * for an address with a reservation, no change is made to
+ * the reserve map. In this case PagePrivate will be set
+ * to indicate that the global reservation count should be
+ * incremented when the page is freed. This is the desired
+ * behavior. However, when a huge page is allocated for an
+ * address without a reservation a reservation entry is added
+ * to the reservation map, and PagePrivate will not be set.
+ * When the page is freed, the global reserve count will NOT
+ * be incremented and it will appear as though we have leaked
+ * reserved page. In this case, set PagePrivate so that the
+ * global reserve count will be incremented to match the
+ * reservation map entry which was created.
+ *
+ * Note that vm_alloc_shared is based on the flags of the vma
+ * for which the page was originally allocated. dst_vma could
+ * be different or NULL on error.
*/
- ClearPagePrivate(page);
+ if (vm_alloc_shared)
+ SetPagePrivate(page);
+ else
+ ClearPagePrivate(page);
put_page(page);
}
BUG_ON(copied < 0);
dst_vma = find_vma(dst_mm, dst_start);
if (!dst_vma)
goto out_unlock;
- if (!vma_is_shmem(dst_vma) && dst_vma->vm_flags & VM_SHARED)
+ /*
+ * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but
+ * it will overwrite vm_ops, so vma_is_anonymous must return false.
+ */
+ if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) &&
+ dst_vma->vm_flags & VM_SHARED))
goto out_unlock;
+
if (dst_start < dst_vma->vm_start ||
dst_start + len > dst_vma->vm_end)
goto out_unlock;