x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d
authorXiong Zhang <xiong.y.zhang@intel.com>
Fri, 17 Mar 2017 18:55:14 +0000 (21:55 +0300)
committerIngo Molnar <mingo@kernel.org>
Mon, 27 Mar 2017 06:56:49 +0000 (08:56 +0200)
Split these helpers into a couple of per-level functions and add support for
an additional page table level.

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
[ Split off into separate patch ]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170317185515.8636-6-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/xen/mmu.c
arch/x86/xen/mmu.h

index ebbfe00133f7c5e691b833fecfa5d749a53e9dbe..e6adebbb5f8d3b94f82a4218b292bf2a8f5cb475 100644 (file)
@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
 }
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
+static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
+       for (i = 0; i < nr; i++) {
+               if (!pmd_none(pmd[i]))
+                       flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+       }
+       return flush;
+}
+
+static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
+       for (i = 0; i < nr; i++) {
+               pmd_t *pmd;
+
+               if (pud_none(pud[i]))
+                       continue;
+
+               pmd = pmd_offset(&pud[i], 0);
+               if (PTRS_PER_PMD > 1)
+                       flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
+               flush |= xen_pmd_walk(mm, pmd, func,
+                               last && i == nr - 1, limit);
+       }
+       return flush;
+}
+
+static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+       for (i = 0; i < nr; i++) {
+               pud_t *pud;
+
+               if (p4d_none(p4d[i]))
+                       continue;
+
+               pud = pud_offset(&p4d[i], 0);
+               if (PTRS_PER_PUD > 1)
+                       flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+               flush |= xen_pud_walk(mm, pud, func,
+                               last && i == nr - 1, limit);
+       }
+       return flush;
+}
+
 /*
  * (Yet another) pagetable walker.  This one is intended for pinning a
  * pagetable.  This means that it walks a pagetable and calls the
@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
                                      enum pt_level),
                          unsigned long limit)
 {
-       int flush = 0;
+       int i, nr, flush = 0;
        unsigned hole_low, hole_high;
-       unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
-       unsigned pgdidx, pudidx, pmdidx;
 
        /* The limit is the last byte to be touched */
        limit--;
@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
        hole_low = pgd_index(USER_LIMIT);
        hole_high = pgd_index(PAGE_OFFSET);
 
-       pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
-       pudidx_limit = pud_index(limit);
-#else
-       pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
-       pmdidx_limit = pmd_index(limit);
-#else
-       pmdidx_limit = 0;
-#endif
-
-       for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
-               pud_t *pud;
+       nr = pgd_index(limit) + 1;
+       for (i = 0; i < nr; i++) {
+               p4d_t *p4d;
 
-               if (pgdidx >= hole_low && pgdidx < hole_high)
+               if (i >= hole_low && i < hole_high)
                        continue;
 
-               if (!pgd_val(pgd[pgdidx]))
+               if (pgd_none(pgd[i]))
                        continue;
 
-               pud = pud_offset(&pgd[pgdidx], 0);
-
-               if (PTRS_PER_PUD > 1) /* not folded */
-                       flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
-               for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
-                       pmd_t *pmd;
-
-                       if (pgdidx == pgdidx_limit &&
-                           pudidx > pudidx_limit)
-                               goto out;
-
-                       if (pud_none(pud[pudidx]))
-                               continue;
-
-                       pmd = pmd_offset(&pud[pudidx], 0);
-
-                       if (PTRS_PER_PMD > 1) /* not folded */
-                               flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
-                       for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
-                               struct page *pte;
-
-                               if (pgdidx == pgdidx_limit &&
-                                   pudidx == pudidx_limit &&
-                                   pmdidx > pmdidx_limit)
-                                       goto out;
-
-                               if (pmd_none(pmd[pmdidx]))
-                                       continue;
-
-                               pte = pmd_page(pmd[pmdidx]);
-                               flush |= (*func)(mm, pte, PT_PTE);
-                       }
-               }
+               p4d = p4d_offset(&pgd[i], 0);
+               if (PTRS_PER_P4D > 1)
+                       flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+               flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
        }
 
-out:
        /* Do the top level last, so that the callbacks can use it as
           a cue to do final things like tlb flushes. */
        flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
        xen_free_ro_pages(pa, PAGE_SIZE);
 }
 
+static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
+{
+       unsigned long pa;
+       pte_t *pte_tbl;
+       int i;
+
+       if (pmd_large(*pmd)) {
+               pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, PMD_SIZE);
+               return;
+       }
+
+       pte_tbl = pte_offset_kernel(pmd, 0);
+       for (i = 0; i < PTRS_PER_PTE; i++) {
+               if (pte_none(pte_tbl[i]))
+                       continue;
+               pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
+               xen_free_ro_pages(pa, PAGE_SIZE);
+       }
+       set_pmd(pmd, __pmd(0));
+       xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+{
+       unsigned long pa;
+       pmd_t *pmd_tbl;
+       int i;
+
+       if (pud_large(*pud)) {
+               pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, PUD_SIZE);
+               return;
+       }
+
+       pmd_tbl = pmd_offset(pud, 0);
+       for (i = 0; i < PTRS_PER_PMD; i++) {
+               if (pmd_none(pmd_tbl[i]))
+                       continue;
+               xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
+       }
+       set_pud(pud, __pud(0));
+       xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
+{
+       unsigned long pa;
+       pud_t *pud_tbl;
+       int i;
+
+       if (p4d_large(*p4d)) {
+               pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, P4D_SIZE);
+               return;
+       }
+
+       pud_tbl = pud_offset(p4d, 0);
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               if (pud_none(pud_tbl[i]))
+                       continue;
+               xen_cleanmfnmap_pud(pud_tbl + i, unpin);
+       }
+       set_p4d(p4d, __p4d(0));
+       xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
+}
+
 /*
  * Since it is well isolated we can (and since it is perhaps large we should)
  * also free the page tables mapping the initial P->M table.
  */
 static void __init xen_cleanmfnmap(unsigned long vaddr)
 {
-       unsigned long va = vaddr & PMD_MASK;
-       unsigned long pa;
-       pgd_t *pgd = pgd_offset_k(va);
-       pud_t *pud_page = pud_offset(pgd, 0);
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgd;
+       p4d_t *p4d;
        unsigned int i;
        bool unpin;
 
        unpin = (vaddr == 2 * PGDIR_SIZE);
-       set_pgd(pgd, __pgd(0));
-       do {
-               pud = pud_page + pud_index(va);
-               if (pud_none(*pud)) {
-                       va += PUD_SIZE;
-               } else if (pud_large(*pud)) {
-                       pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
-                       xen_free_ro_pages(pa, PUD_SIZE);
-                       va += PUD_SIZE;
-               } else {
-                       pmd = pmd_offset(pud, va);
-                       if (pmd_large(*pmd)) {
-                               pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
-                               xen_free_ro_pages(pa, PMD_SIZE);
-                       } else if (!pmd_none(*pmd)) {
-                               pte = pte_offset_kernel(pmd, va);
-                               set_pmd(pmd, __pmd(0));
-                               for (i = 0; i < PTRS_PER_PTE; ++i) {
-                                       if (pte_none(pte[i]))
-                                               break;
-                                       pa = pte_pfn(pte[i]) << PAGE_SHIFT;
-                                       xen_free_ro_pages(pa, PAGE_SIZE);
-                               }
-                               xen_cleanmfnmap_free_pgtbl(pte, unpin);
-                       }
-                       va += PMD_SIZE;
-                       if (pmd_index(va))
-                               continue;
-                       set_pud(pud, __pud(0));
-                       xen_cleanmfnmap_free_pgtbl(pmd, unpin);
-               }
-
-       } while (pud_index(va) || pmd_index(va));
-       xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
+       vaddr &= PMD_MASK;
+       pgd = pgd_offset_k(vaddr);
+       p4d = p4d_offset(pgd, 0);
+       for (i = 0; i < PTRS_PER_P4D; i++) {
+               if (p4d_none(p4d[i]))
+                       continue;
+               xen_cleanmfnmap_p4d(p4d + i, unpin);
+       }
+       if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+               set_pgd(pgd, __pgd(0));
+               xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+       }
 }
 
 static void __init xen_pagetable_p2m_free(void)
index 73809bb951b40a7b8baf73773260276ed575c623..3fe2b3292915847b8a74b3a58bdfd9464db35935 100644 (file)
@@ -5,6 +5,7 @@
 
 enum pt_level {
        PT_PGD,
+       PT_P4D,
        PT_PUD,
        PT_PMD,
        PT_PTE