x86/mm: Convert trivial cases of page table walk to 5-level paging
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Mon, 13 Mar 2017 14:33:05 +0000 (17:33 +0300)
committerIngo Molnar <mingo@kernel.org>
Tue, 14 Mar 2017 07:45:08 +0000 (08:45 +0100)
This patch only covers simple cases. Less trivial cases will be
converted with separate patches.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170313143309.16020-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/tboot.c
arch/x86/kernel/vm86_32.c
arch/x86/mm/fault.c
arch/x86/mm/init_32.c
arch/x86/mm/ioremap.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/platform/efi/efi_64.c
arch/x86/power/hibernate_32.c

index b868fa1b812b3a82713e0556c39900bced73f338..5db0f33cbf2c55d4feb1b0f1c3097d7163a52f25 100644 (file)
@@ -118,12 +118,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
                          pgprot_t prot)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
        pgd = pgd_offset(&tboot_mm, vaddr);
-       pud = pud_alloc(&tboot_mm, pgd, vaddr);
+       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+       if (!p4d)
+               return -1;
+       pud = pud_alloc(&tboot_mm, p4d, vaddr);
        if (!pud)
                return -1;
        pmd = pmd_alloc(&tboot_mm, pud, vaddr);
index 23ee89ce59a940712a0b9d91fc78edde94fce8fb..62597c300d94bcfea908c1cdde8e4728db614091 100644 (file)
@@ -164,6 +164,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
        struct vm_area_struct *vma;
        spinlock_t *ptl;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -173,7 +174,10 @@ static void mark_screen_rdonly(struct mm_struct *mm)
        pgd = pgd_offset(mm, 0xA0000);
        if (pgd_none_or_clear_bad(pgd))
                goto out;
-       pud = pud_offset(pgd, 0xA0000);
+       p4d = p4d_offset(pgd, 0xA0000);
+       if (p4d_none_or_clear_bad(p4d))
+               goto out;
+       pud = pud_offset(p4d, 0xA0000);
        if (pud_none_or_clear_bad(pud))
                goto out;
        pmd = pmd_offset(pud, 0xA0000);
index 428e31763cb93e593f261a9f443c3999cb8c473d..605fd5e8e048d012a7ea507a5baff83f5bf96350 100644 (file)
@@ -253,6 +253,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
        unsigned index = pgd_index(address);
        pgd_t *pgd_k;
+       p4d_t *p4d, *p4d_k;
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;
 
@@ -265,10 +266,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
        /*
         * set_pgd(pgd, *pgd_k); here would be useless on PAE
         * and redundant with the set_pmd() on non-PAE. As would
-        * set_pud.
+        * set_p4d/set_pud.
         */
-       pud = pud_offset(pgd, address);
-       pud_k = pud_offset(pgd_k, address);
+       p4d = p4d_offset(pgd, address);
+       p4d_k = p4d_offset(pgd_k, address);
+       if (!p4d_present(*p4d_k))
+               return NULL;
+
+       pud = pud_offset(p4d, address);
+       pud_k = pud_offset(p4d_k, address);
        if (!pud_present(*pud_k))
                return NULL;
 
@@ -384,6 +390,8 @@ static void dump_pagetable(unsigned long address)
 {
        pgd_t *base = __va(read_cr3());
        pgd_t *pgd = &base[pgd_index(address)];
+       p4d_t *p4d;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
@@ -392,7 +400,9 @@ static void dump_pagetable(unsigned long address)
        if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
                goto out;
 #endif
-       pmd = pmd_offset(pud_offset(pgd, address), address);
+       p4d = p4d_offset(pgd, address);
+       pud = pud_offset(p4d, address);
+       pmd = pmd_offset(pud, address);
        printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
 
        /*
@@ -526,6 +536,7 @@ static void dump_pagetable(unsigned long address)
 {
        pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
        pgd_t *pgd = base + pgd_index(address);
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -538,7 +549,15 @@ static void dump_pagetable(unsigned long address)
        if (!pgd_present(*pgd))
                goto out;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (bad_address(p4d))
+               goto bad;
+
+       printk("P4D %lx ", p4d_val(*p4d));
+       if (!p4d_present(*p4d) || p4d_large(*p4d))
+               goto out;
+
+       pud = pud_offset(p4d, address);
        if (bad_address(pud))
                goto bad;
 
@@ -1082,6 +1101,7 @@ static noinline int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -1104,7 +1124,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
        if (!pgd_present(*pgd))
                return 0;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (!p4d_present(*p4d))
+               return 0;
+
+       if (p4d_large(*p4d))
+               return spurious_fault_check(error_code, (pte_t *) p4d);
+
+       pud = pud_offset(p4d, address);
        if (!pud_present(*pud))
                return 0;
 
index 2b4b53e6793f16b24d2e96199166c724d068f339..5ed3c141bbd5cf43dc1725f24ad1560622621ea2 100644 (file)
@@ -67,6 +67,7 @@ bool __read_mostly __vmalloc_start_set = false;
  */
 static pmd_t * __init one_md_table_init(pgd_t *pgd)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd_table;
 
@@ -75,13 +76,15 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
                pmd_table = (pmd_t *)alloc_low_page();
                paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-               pud = pud_offset(pgd, 0);
+               p4d = p4d_offset(pgd, 0);
+               pud = pud_offset(p4d, 0);
                BUG_ON(pmd_table != pmd_offset(pud, 0));
 
                return pmd_table;
        }
 #endif
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
        pmd_table = pmd_offset(pud, 0);
 
        return pmd_table;
@@ -390,8 +393,11 @@ pte_t *kmap_pte;
 
 static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 {
-       return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
-                       vaddr), vaddr), vaddr);
+       pgd_t *pgd = pgd_offset_k(vaddr);
+       p4d_t *p4d = p4d_offset(pgd, vaddr);
+       pud_t *pud = pud_offset(p4d, vaddr);
+       pmd_t *pmd = pmd_offset(pud, vaddr);
+       return pte_offset_kernel(pmd, vaddr);
 }
 
 static void __init kmap_init(void)
@@ -410,6 +416,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
        unsigned long vaddr;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -418,7 +425,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
        page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
        pgd = swapper_pg_dir + pgd_index(vaddr);
-       pud = pud_offset(pgd, vaddr);
+       p4d = p4d_offset(pgd, vaddr);
+       pud = pud_offset(p4d, vaddr);
        pmd = pmd_offset(pud, vaddr);
        pte = pte_offset_kernel(pmd, vaddr);
        pkmap_page_table = pte;
@@ -450,6 +458,7 @@ void __init native_pagetable_init(void)
 {
        unsigned long pfn, va;
        pgd_t *pgd, *base = swapper_pg_dir;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -469,7 +478,8 @@ void __init native_pagetable_init(void)
                if (!pgd_present(*pgd))
                        break;
 
-               pud = pud_offset(pgd, va);
+               p4d = p4d_offset(pgd, va);
+               pud = pud_offset(p4d, va);
                pmd = pmd_offset(pud, va);
                if (!pmd_present(*pmd))
                        break;
index 7aaa2635862d7771797b38cfcc553bcf955e0531..a5e1cda859742626a0f71ae1857d653cee860fd4 100644 (file)
@@ -425,7 +425,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
        /* Don't assume we're using swapper_pg_dir at this point */
        pgd_t *base = __va(read_cr3());
        pgd_t *pgd = &base[pgd_index(addr)];
-       pud_t *pud = pud_offset(pgd, addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
        pmd_t *pmd = pmd_offset(pud, addr);
 
        return pmd;
index 6cbdff26bb96a25939a0e818d4895b42b1a5a130..38b6daf72debc6396347de943762b2e1cda30f54 100644 (file)
@@ -261,13 +261,15 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 
 static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 {
+       p4d_t *p4d;
        pud_t *pud;
        int i;
 
        if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
                return;
 
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
 
        for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
                pmd_t *pmd = pmds[i];
index 9adce776852be885cff56c59b07cec98b64f4dde..3d275a791c76cff93990c81520b85453b4865c77 100644 (file)
@@ -26,6 +26,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -35,7 +36,12 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
                BUG();
                return;
        }
-       pud = pud_offset(pgd, vaddr);
+       p4d = p4d_offset(pgd, vaddr);
+       if (p4d_none(*p4d)) {
+               BUG();
+               return;
+       }
+       pud = pud_offset(p4d, vaddr);
        if (pud_none(*pud)) {
                BUG();
                return;
index a4695da42d77b39cec3b65083c7b7115b1faa32d..8544dae3d1b4d29607bdf536984684e066c3d0d3 100644 (file)
@@ -166,6 +166,7 @@ void efi_sync_low_kernel_mappings(void)
 {
        unsigned num_entries;
        pgd_t *pgd_k, *pgd_efi;
+       p4d_t *p4d_k, *p4d_efi;
        pud_t *pud_k, *pud_efi;
 
        if (efi_enabled(EFI_OLD_MEMMAP))
@@ -197,16 +198,20 @@ void efi_sync_low_kernel_mappings(void)
        BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
 
        pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
-       pud_efi = pud_offset(pgd_efi, 0);
+       p4d_efi = p4d_offset(pgd_efi, 0);
+       pud_efi = pud_offset(p4d_efi, 0);
 
        pgd_k = pgd_offset_k(EFI_VA_END);
-       pud_k = pud_offset(pgd_k, 0);
+       p4d_k = p4d_offset(pgd_k, 0);
+       pud_k = pud_offset(p4d_k, 0);
 
        num_entries = pud_index(EFI_VA_END);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
 
-       pud_efi = pud_offset(pgd_efi, EFI_VA_START);
-       pud_k = pud_offset(pgd_k, EFI_VA_START);
+       p4d_efi = p4d_offset(pgd_efi, EFI_VA_START);
+       pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+       p4d_k = p4d_offset(pgd_k, EFI_VA_START);
+       pud_k = pud_offset(p4d_k, EFI_VA_START);
 
        num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
index 9f14bd34581d663a22cb326d0ad3f98b7c2822d6..c35fdb585c6853a1f281b043d54ca3b59c801d06 100644 (file)
@@ -32,6 +32,7 @@ pgd_t *resume_pg_dir;
  */
 static pmd_t *resume_one_md_table_init(pgd_t *pgd)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd_table;
 
@@ -41,11 +42,13 @@ static pmd_t *resume_one_md_table_init(pgd_t *pgd)
                return NULL;
 
        set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
 
        BUG_ON(pmd_table != pmd_offset(pud, 0));
 #else
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
        pmd_table = pmd_offset(pud, 0);
 #endif