thp: pte alloc trans splitting
authorAndrea Arcangeli <aarcange@redhat.com>
Thu, 13 Jan 2011 23:46:43 +0000 (15:46 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 Jan 2011 01:32:40 +0000 (17:32 -0800)
pte alloc routines must wait for split_huge_page if the pmd is not present
and not null (i.e.  pmd_trans_splitting).  The additional branches are
optimized away at compile time by pmd_trans_splitting if the config option
is off.  However we must pass the vma down in order to know the anon_vma
lock to wait for.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/arm/mm/pgd.c
arch/ia64/mm/hugetlbpage.c
arch/sh/mm/hugetlbpage.c
arch/sparc/mm/generic_32.c
arch/sparc/mm/generic_64.c
arch/sparc/mm/hugetlbpage.c
arch/um/kernel/skas/mmu.c
arch/x86/kernel/tboot.c
include/linux/mm.h
mm/memory.c
mm/mremap.c

index 93292a18cf77f35bd9369ad84f3f6f56e948ddc8..709244c66fa3148b3f144310216240fdadfac9a6 100644 (file)
@@ -50,7 +50,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
                if (!new_pmd)
                        goto no_pmd;
 
-               new_pte = pte_alloc_map(mm, new_pmd, 0);
+               new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
                if (!new_pte)
                        goto no_pte;
 
index 1841ee7e65f9b2a11f800e1fc3c965d1c9172c10..5ca674b74737bd7bc4b3d3e03d58d534b8d44633 100644 (file)
@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
        if (pud) {
                pmd = pmd_alloc(mm, pud, taddr);
                if (pmd)
-                       pte = pte_alloc_map(mm, pmd, taddr);
+                       pte = pte_alloc_map(mm, NULL, pmd, taddr);
        }
        return pte;
 }
index 9163db3e8d15c521d75a8ca2a505428cc3409072..d7762349ea4869be1b40898a5ae7e6ff8e79c6aa 100644 (file)
@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                if (pud) {
                        pmd = pmd_alloc(mm, pud, addr);
                        if (pmd)
-                               pte = pte_alloc_map(mm, pmd, addr);
+                               pte = pte_alloc_map(mm, NULL, pmd, addr);
                }
        }
 
index 5edcac184eafc720c169fe02f12763fb8ed56ba1..e6067b75f11cbdf946f5e9cbd38e6b306e313ef6 100644 (file)
@@ -50,7 +50,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
                end = PGDIR_SIZE;
        offset -= address;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, address);
+               pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
                if (!pte)
                        return -ENOMEM;
                io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
index 04f2bf4cd57105dca5765cb31c0730b2c75358d7..3cb00dfd4bd67397cba2831fa6671a60300763de 100644 (file)
@@ -92,7 +92,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
                end = PGDIR_SIZE;
        offset -= address;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, address);
+               pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
                if (!pte)
                        return -ENOMEM;
                io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
index 5fdddf134caa6992afb6c4fdf2056a93e7ffdb2d..f4e97646ce23140781a143f47a66c7ebca578bcf 100644 (file)
@@ -214,7 +214,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        if (pud) {
                pmd = pmd_alloc(mm, pud, addr);
                if (pmd)
-                       pte = pte_alloc_map(mm, pmd, addr);
+                       pte = pte_alloc_map(mm, NULL, pmd, addr);
        }
        return pte;
 }
index 3d099f97478595be72b5cd15b5eebb9877ea32c6..1aee587e9c5d915110969428d39662367799e0a3 100644 (file)
@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
        if (!pmd)
                goto out_pmd;
 
-       pte = pte_alloc_map(mm, pmd, proc);
+       pte = pte_alloc_map(mm, NULL, pmd, proc);
        if (!pte)
                goto out_pte;
 
index c2f1b26141e2c4ab3888315b4754cbf1a324be40..998e972f3b1a93638c7456650927db96704e68c6 100644 (file)
@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
        pmd = pmd_alloc(&tboot_mm, pud, vaddr);
        if (!pmd)
                return -1;
-       pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
+       pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
        if (!pte)
                return -1;
        set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
index 6bef67d74adf3366190749bb6ec9eefef0510071..14ddd98b063f72998bac252dea9400ef3b1d5ab4 100644 (file)
@@ -1131,7 +1131,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
 /*
@@ -1200,16 +1201,18 @@ static inline void pgtable_page_dtor(struct page *page)
        pte_unmap(pte);                                 \
 } while (0)
 
-#define pte_alloc_map(mm, pmd, address)                        \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
-               NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address)                           \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,    \
+                                                       pmd, address))? \
+        NULL: pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)     \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,   \
+                                                       pmd, address))? \
                NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)                 \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
                NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
index bdf19366b70505fb09fef21d54bc0ade334b147b..567bca80ea539e410e271527264d2bdc014f1e7e 100644 (file)
@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
        }
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address)
 {
        pgtable_t new = pte_alloc_one(mm, address);
+       int wait_split_huge_page;
        if (!new)
                return -ENOMEM;
 
@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
        smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
 
        spin_lock(&mm->page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       wait_split_huge_page = 0;
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                mm->nr_ptes++;
                pmd_populate(mm, pmd, new);
                new = NULL;
-       }
+       } else if (unlikely(pmd_trans_splitting(*pmd)))
+               wait_split_huge_page = 1;
        spin_unlock(&mm->page_table_lock);
        if (new)
                pte_free(mm, new);
+       if (wait_split_huge_page)
+               wait_split_huge_page(vma->anon_vma, pmd);
        return 0;
 }
 
@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
        smp_wmb(); /* See comment in __pte_alloc */
 
        spin_lock(&init_mm.page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
-       }
+       } else
+               VM_BUG_ON(pmd_trans_splitting(*pmd));
        spin_unlock(&init_mm.page_table_lock);
        if (new)
                pte_free_kernel(&init_mm, new);
@@ -3253,7 +3260,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        pmd = pmd_alloc(mm, pud, address);
        if (!pmd)
                return VM_FAULT_OOM;
-       pte = pte_alloc_map(mm, pmd, address);
+       pte = pte_alloc_map(mm, vma, pmd, address);
        if (!pte)
                return VM_FAULT_OOM;
 
index 563fbdd6293ae3f0139450d6b7d848f862c1f8dc..b09eefaea0b8c6ea0d126e959453bfc59874525f 100644 (file)
@@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
        return pmd;
 }
 
-static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
+static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+                           unsigned long addr)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
        if (!pmd)
                return NULL;
 
-       if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
+       VM_BUG_ON(pmd_trans_huge(*pmd));
+       if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr))
                return NULL;
 
        return pmd;
@@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                old_pmd = get_old_pmd(vma->vm_mm, old_addr);
                if (!old_pmd)
                        continue;
-               new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
+               new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
                if (!new_pmd)
                        break;
                next = (new_addr + PMD_SIZE) & PMD_MASK;