From d30e45eeabefadc6039d7f876a59e5f5f6cb11c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 16 Nov 2010 00:16:01 +0000 Subject: [PATCH] ARM: pgtable: switch order of Linux vs hardware page tables This switches the ordering of the Linux vs hardware page tables in each page, thereby eliminating some of the arithmetic in the page table walks. As we now place the Linux page table at the beginning of the page, we can deal with the offset in the pgt by simply masking it away, along with the other control bits. This also makes the arithmetic all be positive, rather than a mixture. Signed-off-by: Russell King --- arch/arm/include/asm/pgalloc.h | 39 +++++++++++++++------------------- arch/arm/include/asm/pgtable.h | 31 +++++++++++++-------------- arch/arm/mm/fault.c | 2 +- arch/arm/mm/proc-macros.S | 10 ++++----- arch/arm/mm/proc-v7.S | 8 +++---- 5 files changed, 41 insertions(+), 49 deletions(-) diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 1f1064b519c0..9763be04f77e 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -35,6 +35,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +static inline void clean_pte_table(pte_t *pte) +{ + clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE); +} + /* * Allocate one PTE table. * @@ -42,14 +47,14 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); * into one table thus: * * +------------+ - * | h/w pt 0 | - * +------------+ - * | h/w pt 1 | - * +------------+ * | Linux pt 0 | * +------------+ * | Linux pt 1 | * +------------+ + * | h/w pt 0 | + * +------------+ + * | h/w pt 1 | + * +------------+ */ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) @@ -57,10 +62,8 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) pte_t *pte; pte = (pte_t *)__get_free_page(PGALLOC_GFP); - if (pte) { - clean_dcache_area(pte, sizeof(pte_t) * PTRS_PER_PTE); - pte += PTRS_PER_PTE; - } + if (pte) + clean_pte_table(pte); return pte; } @@ -76,10 +79,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) pte = alloc_pages(PGALLOC_GFP, 0); #endif if (pte) { - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE); - } + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); pgtable_page_ctor(pte); } @@ -91,10 +92,8 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) */ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - if (pte) { - pte -= PTRS_PER_PTE; + if (pte) free_page((unsigned long)pte); - } } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -106,7 +105,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte) static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, unsigned long prot) { - unsigned long pmdval = pte | prot; + unsigned long pmdval = (pte + PTE_HWTABLE_OFF) | prot; pmdp[0] = __pmd(pmdval); pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t)); flush_pmd_entry(pmdp); @@ -121,14 +120,10 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - unsigned long pte_ptr = (unsigned long)ptep; - /* - * The pmd must be loaded with the physical - * address of the PTE table + * The pmd must be loaded with the physical address of the PTE table */ - pte_ptr -= PTRS_PER_PTE * sizeof(void *); - __pmd_populate(pmdp, __pa(pte_ptr), _PAGE_KERNEL_TABLE); + __pmd_populate(pmdp, __pa(ptep), _PAGE_KERNEL_TABLE); } static inline void diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 50eb0b4278ec..e582214b00df 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -55,7 +55,7 @@ * Therefore, we tweak the implementation slightly - we tell Linux that we * have 2048 entries in the first level, each of which is 8 bytes (iow, two * hardware pointers to the second level.) The second level contains two - * hardware PTE tables arranged contiguously, followed by Linux versions + * hardware PTE tables arranged contiguously, preceded by Linux versions * which contain the state information Linux needs. We, therefore, end up * with 512 entries in the "PTE" level. * @@ -63,15 +63,15 @@ * * pgd pte * | | - * +--------+ +0 - * | |-----> +------------+ +0 + * +--------+ + * | | +------------+ +0 + * +- - - - + | Linux pt 0 | + * | | +------------+ +1024 + * +--------+ +0 | Linux pt 1 | + * | |-----> +------------+ +2048 * +- - - - + +4 | h/w pt 0 | - * | |-----> +------------+ +1024 + * | |-----> +------------+ +3072 * +--------+ +8 | h/w pt 1 | - * | | +------------+ +2048 - * +- - - - + | Linux pt 0 | - * | | +------------+ +3072 - * +--------+ | Linux pt 1 | * | | +------------+ +4096 * * See L_PTE_xxx below for definitions of bits in the "Linux pt", and @@ -103,6 +103,10 @@ #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 2048 +#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) +#define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) +#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) + /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map @@ -323,12 +327,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; static inline pte_t *pmd_page_vaddr(pmd_t pmd) { - phys_addr_t ptr; - - ptr = pmd_val(pmd) & ~(PTRS_PER_PTE * sizeof(void *) - 1); - ptr += PTRS_PER_PTE * sizeof(void *); - - return __va(ptr); + return __va(pmd_val(pmd) & PAGE_MASK); } #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) @@ -341,8 +340,8 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define __pte_map(pmd) pmd_page_vaddr(*(pmd)) #define __pte_unmap(pte) do { } while (0) #else -#define __pte_map(pmd) ((pte_t *)kmap_atomic(pmd_page(*(pmd))) + PTRS_PER_PTE) -#define __pte_unmap(pte) kunmap_atomic((pte - PTRS_PER_PTE)) +#define __pte_map(pmd) (pte_t *)kmap_atomic(pmd_page(*(pmd))) +#define __pte_unmap(pte) kunmap_atomic(pte) #endif #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 1e21e125fe3a..f10f9bac2206 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -108,7 +108,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) pte = pte_offset_map(pmd, addr); printk(", *pte=%08lx", pte_val(*pte)); - printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE])); + printk(", *ppte=%08lx", pte_val(pte[PTE_HWTABLE_PTRS])); pte_unmap(pte); } while(0); diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7d63beaf9745..cbedf9c46b9d 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -121,7 +121,7 @@ .endm .macro armv6_set_pte_ext pfx - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version bic r3, r1, #0x000003fc bic r3, r3, #PTE_TYPE_MASK @@ -170,7 +170,7 @@ * 1111 0xff r/w r/w */ .macro armv3_set_pte_ext wc_disable=1 - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY @@ -193,7 +193,7 @@ bicne r2, r2, #PTE_BUFFERABLE #endif .endif - str r2, [r0] @ hardware version + str r2, [r0] @ hardware version .endm @@ -213,7 +213,7 @@ * 1111 11 r/w r/w */ .macro xscale_set_pte_ext_prologue - str r1, [r0], #-2048 @ linux version + str r1, [r0] @ linux version eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY @@ -232,7 +232,7 @@ tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? movne r2, #0 @ no -> fault - str r2, [r0] @ hardware version + str r2, [r0, #2048]! @ hardware version mov ip, #0 mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line mcr p15, 0, ip, c7, c10, 4 @ data write barrier diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 53cbe2225153..89c31a6dae5c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -124,15 +124,13 @@ ENDPROC(cpu_v7_switch_mm) * Set a level 2 translation table entry. * * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at -1024 bytes) + * (hardware version is stored at +2048 bytes) * - pte - PTE value to store * - ext - value for extended PTE bits */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - ARM( str r1, [r0], #-2048 ) @ linux version - THUMB( str r1, [r0] ) @ linux version - THUMB( sub r0, r0, #2048 ) + str r1, [r0] @ linux version bic r3, r1, #0x000003f0 bic r3, r3, #PTE_TYPE_MASK @@ -158,7 +156,7 @@ ENTRY(cpu_v7_set_pte_ext) tstne r1, #L_PTE_PRESENT moveq r3, #0 - str r3, [r0] + str r3, [r0, #2048]! mcr p15, 0, r0, c7, c10, 1 @ flush_pte #endif mov pc, lr -- 2.20.1