sparc64: Fix physical memory management regressions with large max_phys_bits.
authorDavid S. Miller <davem@davemloft.net>
Thu, 25 Sep 2014 03:56:11 +0000 (20:56 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 5 Oct 2014 23:53:39 +0000 (16:53 -0700)
If max_phys_bits needs to be > 43 (f.e. for T4 chips), things like
DEBUG_PAGEALLOC stop working because the 3-level page tables only
can cover up to 43 bits.

Another problem is that when we increased MAX_PHYS_ADDRESS_BITS up to
47, several statically allocated tables became enormous.

Compounding this is that we will need to support up to 49 bits of
physical addressing for M7 chips.

The two tables in question are sparc64_valid_addr_bitmap and
kpte_linear_bitmap.

The first holds a bitmap, with 1 bit for each 4MB chunk of physical
memory, indicating whether that chunk actually exists in the machine
and is valid.

The second table is a set of 2-bit values which tell how large of a
mapping (4MB, 256MB, 2GB, 16GB, respectively) we can use at each 256MB
chunk of ram in the system.

These tables are huge and take up an enormous amount of the BSS
section of the sparc64 kernel image.  Specifically, the
sparc64_valid_addr_bitmap is 4MB, and the kpte_linear_bitmap is 128K.

So let's solve the space wastage and the DEBUG_PAGEALLOC problem
at the same time, by using the kernel page tables (as designed) to
manage this information.

We have to keep using large mappings when DEBUG_PAGEALLOC is disabled,
and we do this by encoding huge PMDs and PUDs.

On a T4-2 with 256GB of ram the kernel page table takes up 16K with
DEBUG_PAGEALLOC disabled and 256MB with it enabled.  Furthermore, this
memory is dynamically allocated at run time rather than coded
statically into the kernel image.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Bob Picco <bob.picco@oracle.com>
arch/sparc/include/asm/page_64.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/tsb.h
arch/sparc/kernel/ktlb.S
arch/sparc/kernel/vmlinux.lds.S
arch/sparc/mm/init_64.c
arch/sparc/mm/init_64.h

index 2211a8036bfaaa0581162447a4e4a2aee613b24b..732ba178a2891b0da3ba6be5cec8ed261c041852 100644 (file)
@@ -128,9 +128,6 @@ extern unsigned long PAGE_OFFSET;
  */
 #define MAX_PHYS_ADDRESS_BITS  47
 
-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
- * and kpte_linear_bitmap.
- */
 #define ILOG2_4MB              22
 #define ILOG2_256MB            28
 
index 31ac919920a903aa9bf684f329582e0464f266fa..a305b22ab5818fef02efe3d7962fc229c7625cea 100644 (file)
 
 #include <linux/sched.h>
 
-extern unsigned long sparc64_valid_addr_bitmap[];
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-static inline bool __kern_addr_valid(unsigned long paddr)
-{
-       if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
-               return false;
-       return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-}
-
-static inline bool kern_addr_valid(unsigned long addr)
-{
-       unsigned long paddr = __pa(addr);
-
-       return __kern_addr_valid(paddr);
-}
+bool kern_addr_valid(unsigned long addr);
 
 /* Entries per page directory level. */
 #define PTRS_PER_PTE   (1UL << (PAGE_SHIFT-3))
@@ -122,6 +107,7 @@ static inline bool kern_addr_valid(unsigned long addr)
 #define _PAGE_R                  _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
 #define _PAGE_SPECIAL     _AC(0x0200000000000000,UL) /* Special page         */
 #define _PAGE_PMD_HUGE    _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_PUD_HUGE    _PAGE_PMD_HUGE
 
 /* Advertise support for _PAGE_SPECIAL */
 #define __HAVE_ARCH_PTE_SPECIAL
@@ -668,26 +654,26 @@ static inline unsigned long pmd_large(pmd_t pmd)
        return pte_val(pte) & _PAGE_PMD_HUGE;
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline unsigned long pmd_young(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_young(pte);
+       return pte_pfn(pte);
 }
 
-static inline unsigned long pmd_write(pmd_t pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_young(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_write(pte);
+       return pte_young(pte);
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_write(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
-       return pte_pfn(pte);
+       return pte_write(pte);
 }
 
 static inline unsigned long pmd_trans_huge(pmd_t pmd)
@@ -781,18 +767,15 @@ static inline int pmd_present(pmd_t pmd)
  * the top bits outside of the range of any physical address size we
  * support are clear as well.  We also validate the physical itself.
  */
-#define pmd_bad(pmd)                   ((pmd_val(pmd) & ~PAGE_MASK) || \
-                                        !__kern_addr_valid(pmd_val(pmd)))
+#define pmd_bad(pmd)                   (pmd_val(pmd) & ~PAGE_MASK)
 
 #define pud_none(pud)                  (!pud_val(pud))
 
-#define pud_bad(pud)                   ((pud_val(pud) & ~PAGE_MASK) || \
-                                        !__kern_addr_valid(pud_val(pud)))
+#define pud_bad(pud)                   (pud_val(pud) & ~PAGE_MASK)
 
 #define pgd_none(pgd)                  (!pgd_val(pgd))
 
-#define pgd_bad(pgd)                   ((pgd_val(pgd) & ~PAGE_MASK) || \
-                                        !__kern_addr_valid(pgd_val(pgd)))
+#define pgd_bad(pgd)                   (pgd_val(pgd) & ~PAGE_MASK)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -835,6 +818,20 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 #define pgd_present(pgd)               (pgd_val(pgd) != 0U)
 #define pgd_clear(pgdp)                        (pgd_val(*(pgd)) = 0UL)
 
+static inline unsigned long pud_large(pud_t pud)
+{
+       pte_t pte = __pte(pud_val(pud));
+
+       return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+       pte_t pte = __pte(pud_val(pud));
+
+       return pte_pfn(pte);
+}
+
 /* Same in both SUN4V and SUN4U.  */
 #define pte_none(pte)                  (!pte_val(pte))
 
index a2f5419057154ca94addf02e0ba0135b23f42073..ecb49cfa3be9fa274053fb15bc2d5f3c38c76e08 100644 (file)
@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        sub     TSB, 0x8, TSB;   \
        TSB_STORE(TSB, TAG);
 
-       /* Do a kernel page table walk.  Leaves physical PTE pointer in
-        * REG1.  Jumps to FAIL_LABEL on early page table walk termination.
-        * VADDR will not be clobbered, but REG2 will.
+       /* Do a kernel page table walk.  Leaves valid PTE value in
+        * REG1.  Jumps to FAIL_LABEL on early page table walk
+        * termination.  VADDR will not be clobbered, but REG2 will.
+        *
+        * There are two masks we must apply to propagate bits from
+        * the virtual address into the PTE physical address field
+        * when dealing with huge pages.  This is because the page
+        * table boundaries do not match the huge page size(s) the
+        * hardware supports.
+        *
+        * In these cases we propagate the bits that are below the
+        * page table level where we saw the huge page mapping, but
+        * are still within the relevant physical bits for the huge
+        * page size in question.  So for PMD mappings (which fall on
+        * bit 23, for 8MB per PMD) we must propagate bit 22 for a
+        * 4MB huge page.  For huge PUDs (which fall on bit 33, for
+        * 8GB per PUD), we have to accomodate 256MB and 2GB huge
+        * pages.  So for those we propagate bits 32 to 28.
         */
 #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)       \
        sethi           %hi(swapper_pg_dir), REG1; \
@@ -150,15 +165,35 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
        andn            REG2, 0x7, REG2; \
        ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
        brz,pn          REG1, FAIL_LABEL; \
-        sllx           VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+       sethi           %uhi(_PAGE_PUD_HUGE), REG2; \
+       brz,pn          REG1, FAIL_LABEL; \
+        sllx           REG2, 32, REG2; \
+       andcc           REG1, REG2, %g0; \
+       sethi           %hi(0xf8000000), REG2; \
+       bne,pt          %xcc, 697f; \
+        sllx           REG2, 1, REG2; \
+       sllx            VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        andn            REG2, 0x7, REG2; \
        ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+       sethi           %uhi(_PAGE_PMD_HUGE), REG2; \
        brz,pn          REG1, FAIL_LABEL; \
-        sllx           VADDR, 64 - PMD_SHIFT, REG2; \
+        sllx           REG2, 32, REG2; \
+       andcc           REG1, REG2, %g0; \
+       be,pn           %xcc, 698f; \
+        sethi          %hi(0x400000), REG2; \
+697:   brgez,pn        REG1, FAIL_LABEL; \
+        andn           REG1, REG2, REG1; \
+       and             VADDR, REG2, REG2; \
+       ba,pt           %xcc, 699f; \
+        or             REG1, REG2, REG1; \
+698:   sllx            VADDR, 64 - PMD_SHIFT, REG2; \
        srlx            REG2, 64 - PAGE_SHIFT, REG2; \
        andn            REG2, 0x7, REG2; \
-       add             REG1, REG2, REG1;
+       ldxa            [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+       brgez,pn        REG1, FAIL_LABEL; \
+        nop; \
+699:
 
        /* PMD has been loaded into REG1, interpret the value, seeing
         * if it is a HUGE PMD or a normal one.  If it is not valid
index 605d49204580585356a7fda6dede8657641fb7e1..94a1e6648bd08af6a9e884bba7730f9b67b298c4 100644 (file)
@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
 
        TSB_LOCK_TAG(%g1, %g2, %g7)
-
-       /* Load and check PTE.  */
-       ldxa            [%g5] ASI_PHYS_USE_EC, %g5
-       mov             1, %g7
-       sllx            %g7, TSB_TAG_INVALID_BIT, %g7
-       brgez,a,pn      %g5, kvmap_itlb_longpath
-        TSB_STORE(%g1, %g7)
-
        TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
@@ -118,6 +110,12 @@ kvmap_dtlb_obp:
        ba,pt           %xcc, kvmap_dtlb_load
         nop
 
+kvmap_linear_early:
+       sethi           %hi(kern_linear_pte_xor), %g7
+       ldx             [%g7 + %lo(kern_linear_pte_xor)], %g2
+       ba,pt           %xcc, kvmap_dtlb_tsb4m_load
+        xor            %g2, %g4, %g5
+
        .align          32
 kvmap_dtlb_tsb4m_load:
        TSB_LOCK_TAG(%g1, %g2, %g7)
@@ -146,105 +144,17 @@ kvmap_dtlb_4v:
        /* Correct TAG_TARGET is already in %g6, check 4mb TSB.  */
        KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
 #endif
-       /* TSB entry address left in %g1, lookup linear PTE.
-        * Must preserve %g1 and %g6 (TAG).
-        */
-kvmap_dtlb_tsb4m_miss:
-       /* Clear the PAGE_OFFSET top virtual bits, shift
-        * down to get PFN, and make sure PFN is in range.
-        */
-661:   sllx            %g4, 0, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       /* Check to see if we know about valid memory at the 4MB
-        * chunk this physical address will reside within.
+       /* Linear mapping TSB lookup failed.  Fallthrough to kernel
+        * page table based lookup.
         */
-661:   srlx            %g5, MAX_PHYS_ADDRESS_BITS, %g2
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       brnz,pn         %g2, kvmap_dtlb_longpath
-        nop
-
-       /* This unconditional branch and delay-slot nop gets patched
-        * by the sethi sequence once the bitmap is properly setup.
-        */
-       .globl          valid_addr_bitmap_insn
-valid_addr_bitmap_insn:
-       ba,pt           %xcc, 2f
-        nop
-       .subsection     2
-       .globl          valid_addr_bitmap_patch
-valid_addr_bitmap_patch:
-       sethi           %hi(sparc64_valid_addr_bitmap), %g7
-       or              %g7, %lo(sparc64_valid_addr_bitmap), %g7
-       .previous
-
-661:   srlx            %g5, ILOG2_4MB, %g2
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       srlx            %g2, 6, %g5
-       and             %g2, 63, %g2
-       sllx            %g5, 3, %g5
-       ldx             [%g7 + %g5], %g5
-       mov             1, %g7
-       sllx            %g7, %g2, %g7
-       andcc           %g5, %g7, %g0
-       be,pn           %xcc, kvmap_dtlb_longpath
-
-2:      sethi          %hi(kpte_linear_bitmap), %g2
-
-       /* Get the 256MB physical address index. */
-661:   sllx            %g4, 0, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       or              %g2, %lo(kpte_linear_bitmap), %g2
-
-661:   srlx            %g5, ILOG2_256MB, %g5
-       .section        .page_offset_shift_patch, "ax"
-       .word           661b
-       .previous
-
-       and             %g5, (32 - 1), %g7
-
-       /* Divide by 32 to get the offset into the bitmask.  */
-       srlx            %g5, 5, %g5
-       add             %g7, %g7, %g7
-       sllx            %g5, 3, %g5
-
-       /* kern_linear_pte_xor[(mask >> shift) & 3)] */
-       ldx             [%g2 + %g5], %g2
-       srlx            %g2, %g7, %g7
-       sethi           %hi(kern_linear_pte_xor), %g5
-       and             %g7, 3, %g7
-       or              %g5, %lo(kern_linear_pte_xor), %g5
-       sllx            %g7, 3, %g7
-       ldx             [%g5 + %g7], %g2
-
        .globl          kvmap_linear_patch
 kvmap_linear_patch:
-       ba,pt           %xcc, kvmap_dtlb_tsb4m_load
-        xor            %g2, %g4, %g5
+       ba,a,pt         %xcc, kvmap_linear_early
 
 kvmap_dtlb_vmalloc_addr:
        KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
 
        TSB_LOCK_TAG(%g1, %g2, %g7)
-
-       /* Load and check PTE.  */
-       ldxa            [%g5] ASI_PHYS_USE_EC, %g5
-       mov             1, %g7
-       sllx            %g7, TSB_TAG_INVALID_BIT, %g7
-       brgez,a,pn      %g5, kvmap_dtlb_longpath
-        TSB_STORE(%g1, %g7)
-
        TSB_WRITE(%g1, %g5, %g6)
 
        /* fallthrough to TLB load */
index 932ff90fd7602b3f44aeac291f3e56936a197af2..0bacceb19150ebff3a9e9088b11b66d8f5d4490b 100644 (file)
@@ -122,11 +122,6 @@ SECTIONS
                *(.swapper_4m_tsb_phys_patch)
                __swapper_4m_tsb_phys_patch_end = .;
        }
-       .page_offset_shift_patch : {
-               __page_offset_shift_patch = .;
-               *(.page_offset_shift_patch)
-               __page_offset_shift_patch_end = .;
-       }
        .popc_3insn_patch : {
                __popc_3insn_patch = .;
                *(.popc_3insn_patch)
index 35fcc9cb960d9e8caf25145742bc41e410afadb4..848440a331258e4da089a6924d75866e174e4f26 100644 (file)
@@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly;
  * 'cpu' properties, but we need to have this table setup before the
  * MDESC is initialized.
  */
-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
@@ -84,6 +83,7 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
  */
 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
 #endif
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
 
 static unsigned long cpu_pgsz_mask;
 
@@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property,
             cmp_p64, NULL);
 }
 
-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
-                                       sizeof(unsigned long)];
-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-
 /* Kernel physical address base and size in bytes.  */
 unsigned long kern_base __read_mostly;
 unsigned long kern_size __read_mostly;
@@ -1369,9 +1365,145 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
 static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
 static int pall_ents __initdata;
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long max_phys_bits = 40;
+
+bool kern_addr_valid(unsigned long addr)
+{
+       unsigned long above = ((long)addr) >> max_phys_bits;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if (above != 0 && above != -1UL)
+               return false;
+
+       if (addr >= (unsigned long) KERNBASE &&
+           addr < (unsigned long)&_end)
+               return true;
+
+       if (addr >= PAGE_OFFSET) {
+               unsigned long pa = __pa(addr);
+
+               return pfn_valid(pa >> PAGE_SHIFT);
+       }
+
+       pgd = pgd_offset_k(addr);
+       if (pgd_none(*pgd))
+               return 0;
+
+       pud = pud_offset(pgd, addr);
+       if (pud_none(*pud))
+               return 0;
+
+       if (pud_large(*pud))
+               return pfn_valid(pud_pfn(*pud));
+
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return 0;
+
+       if (pmd_large(*pmd))
+               return pfn_valid(pmd_pfn(*pmd));
+
+       pte = pte_offset_kernel(pmd, addr);
+       if (pte_none(*pte))
+               return 0;
+
+       return pfn_valid(pte_pfn(*pte));
+}
+EXPORT_SYMBOL(kern_addr_valid);
+
+static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+                                             unsigned long vend,
+                                             pud_t *pud)
+{
+       const unsigned long mask16gb = (1UL << 34) - 1UL;
+       u64 pte_val = vstart;
+
+       /* Each PUD is 8GB */
+       if ((vstart & mask16gb) ||
+           (vend - vstart <= mask16gb)) {
+               pte_val ^= kern_linear_pte_xor[2];
+               pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
+
+               return vstart + PUD_SIZE;
+       }
+
+       pte_val ^= kern_linear_pte_xor[3];
+       pte_val |= _PAGE_PUD_HUGE;
+
+       vend = vstart + mask16gb + 1UL;
+       while (vstart < vend) {
+               pud_val(*pud) = pte_val;
+
+               pte_val += PUD_SIZE;
+               vstart += PUD_SIZE;
+               pud++;
+       }
+       return vstart;
+}
+
+static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
+                                  bool guard)
+{
+       if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
+               return true;
+
+       return false;
+}
+
+static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
+                                             unsigned long vend,
+                                             pmd_t *pmd)
+{
+       const unsigned long mask256mb = (1UL << 28) - 1UL;
+       const unsigned long mask2gb = (1UL << 31) - 1UL;
+       u64 pte_val = vstart;
+
+       /* Each PMD is 8MB */
+       if ((vstart & mask256mb) ||
+           (vend - vstart <= mask256mb)) {
+               pte_val ^= kern_linear_pte_xor[0];
+               pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
+
+               return vstart + PMD_SIZE;
+       }
+
+       if ((vstart & mask2gb) ||
+           (vend - vstart <= mask2gb)) {
+               pte_val ^= kern_linear_pte_xor[1];
+               pte_val |= _PAGE_PMD_HUGE;
+               vend = vstart + mask256mb + 1UL;
+       } else {
+               pte_val ^= kern_linear_pte_xor[2];
+               pte_val |= _PAGE_PMD_HUGE;
+               vend = vstart + mask2gb + 1UL;
+       }
+
+       while (vstart < vend) {
+               pmd_val(*pmd) = pte_val;
+
+               pte_val += PMD_SIZE;
+               vstart += PMD_SIZE;
+               pmd++;
+       }
+
+       return vstart;
+}
+
+static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
+                                  bool guard)
+{
+       if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
+               return true;
+
+       return false;
+}
+
 static unsigned long __ref kernel_map_range(unsigned long pstart,
-                                           unsigned long pend, pgprot_t prot)
+                                           unsigned long pend, pgprot_t prot,
+                                           bool use_huge)
 {
        unsigned long vstart = PAGE_OFFSET + pstart;
        unsigned long vend = PAGE_OFFSET + pend;
@@ -1401,15 +1533,23 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
                if (pud_none(*pud)) {
                        pmd_t *new;
 
+                       if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
+                               vstart = kernel_map_hugepud(vstart, vend, pud);
+                               continue;
+                       }
                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
                        alloc_bytes += PAGE_SIZE;
                        pud_populate(&init_mm, pud, new);
                }
 
                pmd = pmd_offset(pud, vstart);
-               if (!pmd_present(*pmd)) {
+               if (pmd_none(*pmd)) {
                        pte_t *new;
 
+                       if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
+                               vstart = kernel_map_hugepmd(vstart, vend, pmd);
+                               continue;
+                       }
                        new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
                        alloc_bytes += PAGE_SIZE;
                        pmd_populate_kernel(&init_mm, pmd, new);
@@ -1432,100 +1572,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
        return alloc_bytes;
 }
 
-extern unsigned int kvmap_linear_patch[1];
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-static void __init kpte_set_val(unsigned long index, unsigned long val)
-{
-       unsigned long *ptr = kpte_linear_bitmap;
-
-       val <<= ((index % (BITS_PER_LONG / 2)) * 2);
-       ptr += (index / (BITS_PER_LONG / 2));
-
-       *ptr |= val;
-}
-
-static const unsigned long kpte_shift_min = 28; /* 256MB */
-static const unsigned long kpte_shift_max = 34; /* 16GB */
-static const unsigned long kpte_shift_incr = 3;
-
-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
-                                          unsigned long shift)
+static void __init flush_all_kernel_tsbs(void)
 {
-       unsigned long size = (1UL << shift);
-       unsigned long mask = (size - 1UL);
-       unsigned long remains = end - start;
-       unsigned long val;
-
-       if (remains < size || (start & mask))
-               return start;
-
-       /* VAL maps:
-        *
-        *      shift 28 --> kern_linear_pte_xor index 1
-        *      shift 31 --> kern_linear_pte_xor index 2
-        *      shift 34 --> kern_linear_pte_xor index 3
-        */
-       val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
-
-       remains &= ~mask;
-       if (shift != kpte_shift_max)
-               remains = size;
-
-       while (remains) {
-               unsigned long index = start >> kpte_shift_min;
+       int i;
 
-               kpte_set_val(index, val);
+       for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
+               struct tsb *ent = &swapper_tsb[i];
 
-               start += 1UL << kpte_shift_min;
-               remains -= 1UL << kpte_shift_min;
+               ent->tag = (1UL << TSB_TAG_INVALID_BIT);
        }
+#ifndef CONFIG_DEBUG_PAGEALLOC
+       for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
+               struct tsb *ent = &swapper_4m_tsb[i];
 
-       return start;
-}
-
-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
-{
-       unsigned long smallest_size, smallest_mask;
-       unsigned long s;
-
-       smallest_size = (1UL << kpte_shift_min);
-       smallest_mask = (smallest_size - 1UL);
-
-       while (start < end) {
-               unsigned long orig_start = start;
-
-               for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
-                       start = kpte_mark_using_shift(start, end, s);
-
-                       if (start != orig_start)
-                               break;
-               }
-
-               if (start == orig_start)
-                       start = (start + smallest_size) & ~smallest_mask;
+               ent->tag = (1UL << TSB_TAG_INVALID_BIT);
        }
+#endif
 }
 
-static void __init init_kpte_bitmap(void)
-{
-       unsigned long i;
-
-       for (i = 0; i < pall_ents; i++) {
-               unsigned long phys_start, phys_end;
-
-               phys_start = pall[i].phys_addr;
-               phys_end = phys_start + pall[i].reg_size;
-
-               mark_kpte_bitmap(phys_start, phys_end);
-       }
-}
+extern unsigned int kvmap_linear_patch[1];
 
 static void __init kernel_physical_mapping_init(void)
 {
-#ifdef CONFIG_DEBUG_PAGEALLOC
        unsigned long i, mem_alloced = 0UL;
+       bool use_huge = true;
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       use_huge = false;
+#endif
        for (i = 0; i < pall_ents; i++) {
                unsigned long phys_start, phys_end;
 
@@ -1533,7 +1607,7 @@ static void __init kernel_physical_mapping_init(void)
                phys_end = phys_start + pall[i].reg_size;
 
                mem_alloced += kernel_map_range(phys_start, phys_end,
-                                               PAGE_KERNEL);
+                                               PAGE_KERNEL, use_huge);
        }
 
        printk("Allocated %ld bytes for kernel page tables.\n",
@@ -1542,8 +1616,9 @@ static void __init kernel_physical_mapping_init(void)
        kvmap_linear_patch[0] = 0x01000000; /* nop */
        flushi(&kvmap_linear_patch[0]);
 
+       flush_all_kernel_tsbs();
+
        __flush_tlb_all();
-#endif
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -1553,7 +1628,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
        unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
 
        kernel_map_range(phys_start, phys_end,
-                        (enable ? PAGE_KERNEL : __pgprot(0)));
+                        (enable ? PAGE_KERNEL : __pgprot(0)), false);
 
        flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
                               PAGE_OFFSET + phys_end);
@@ -1581,62 +1656,11 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
 unsigned long PAGE_OFFSET;
 EXPORT_SYMBOL(PAGE_OFFSET);
 
-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
-{
-       unsigned long final_shift;
-       unsigned int val = *insn;
-       unsigned int cnt;
-
-       /* We are patching in ilog2(max_supported_phys_address), and
-        * we are doing so in a manner similar to a relocation addend.
-        * That is, we are adding the shift value to whatever value
-        * is in the shift instruction count field already.
-        */
-       cnt = (val & 0x3f);
-       val &= ~0x3f;
-
-       /* If we are trying to shift >= 64 bits, clear the destination
-        * register.  This can happen when phys_bits ends up being equal
-        * to MAX_PHYS_ADDRESS_BITS.
-        */
-       final_shift = (cnt + (64 - phys_bits));
-       if (final_shift >= 64) {
-               unsigned int rd = (val >> 25) & 0x1f;
-
-               val = 0x80100000 | (rd << 25);
-       } else {
-               val |= final_shift;
-       }
-       *insn = val;
-
-       __asm__ __volatile__("flush     %0"
-                            : /* no outputs */
-                            : "r" (insn));
-}
-
-static void __init page_offset_shift_patch(unsigned long phys_bits)
-{
-       extern unsigned int __page_offset_shift_patch;
-       extern unsigned int __page_offset_shift_patch_end;
-       unsigned int *p;
-
-       p = &__page_offset_shift_patch;
-       while (p < &__page_offset_shift_patch_end) {
-               unsigned int *insn = (unsigned int *)(unsigned long)*p;
-
-               page_offset_shift_patch_one(insn, phys_bits);
-
-               p++;
-       }
-}
-
 unsigned long sparc64_va_hole_top =    0xfffff80000000000UL;
 unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
 
 static void __init setup_page_offset(void)
 {
-       unsigned long max_phys_bits = 40;
-
        if (tlb_type == cheetah || tlb_type == cheetah_plus) {
                /* Cheetah/Panther support a full 64-bit virtual
                 * address, so we can use all that our page tables
@@ -1685,8 +1709,6 @@ static void __init setup_page_offset(void)
 
        pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
                PAGE_OFFSET, max_phys_bits);
-
-       page_offset_shift_patch(max_phys_bits);
 }
 
 static void __init tsb_phys_patch(void)
@@ -1731,7 +1753,6 @@ static void __init tsb_phys_patch(void)
 #define NUM_KTSB_DESCR 1
 #endif
 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
 
 /* The swapper TSBs are loaded with a base sequence of:
  *
@@ -2077,11 +2098,9 @@ void __init paging_init(void)
 
        pmd = swapper_low_pmd_dir + (shift / sizeof(pmd_t));
        pud_set(&swapper_pud_dir[0], pmd);
-       
+
        inherit_prom_mappings();
        
-       init_kpte_bitmap();
-
        /* Ok, we can use our TLB miss and window trap handlers safely.  */
        setup_tba();
 
@@ -2188,70 +2207,6 @@ int page_in_phys_avail(unsigned long paddr)
        return 0;
 }
 
-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
-static int pavail_rescan_ents __initdata;
-
-/* Certain OBP calls, such as fetching "available" properties, can
- * claim physical memory.  So, along with initializing the valid
- * address bitmap, what we do here is refetch the physical available
- * memory list again, and make sure it provides at least as much
- * memory as 'pavail' does.
- */
-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
-{
-       int i;
-
-       read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
-
-       for (i = 0; i < pavail_ents; i++) {
-               unsigned long old_start, old_end;
-
-               old_start = pavail[i].phys_addr;
-               old_end = old_start + pavail[i].reg_size;
-               while (old_start < old_end) {
-                       int n;
-
-                       for (n = 0; n < pavail_rescan_ents; n++) {
-                               unsigned long new_start, new_end;
-
-                               new_start = pavail_rescan[n].phys_addr;
-                               new_end = new_start +
-                                       pavail_rescan[n].reg_size;
-
-                               if (new_start <= old_start &&
-                                   new_end >= (old_start + PAGE_SIZE)) {
-                                       set_bit(old_start >> ILOG2_4MB, bitmap);
-                                       goto do_next_page;
-                               }
-                       }
-
-                       prom_printf("mem_init: Lost memory in pavail\n");
-                       prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
-                                   pavail[i].phys_addr,
-                                   pavail[i].reg_size);
-                       prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
-                                   pavail_rescan[i].phys_addr,
-                                   pavail_rescan[i].reg_size);
-                       prom_printf("mem_init: Cannot continue, aborting.\n");
-                       prom_halt();
-
-               do_next_page:
-                       old_start += PAGE_SIZE;
-               }
-       }
-}
-
-static void __init patch_tlb_miss_handler_bitmap(void)
-{
-       extern unsigned int valid_addr_bitmap_insn[];
-       extern unsigned int valid_addr_bitmap_patch[];
-
-       valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
-       mb();
-       valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
-       flushi(&valid_addr_bitmap_insn[0]);
-}
-
 static void __init register_page_bootmem_info(void)
 {
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2264,18 +2219,6 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
-       unsigned long addr, last;
-
-       addr = PAGE_OFFSET + kern_base;
-       last = PAGE_ALIGN(kern_size) + addr;
-       while (addr < last) {
-               set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-               addr += PAGE_SIZE;
-       }
-
-       setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
-       patch_tlb_miss_handler_bitmap();
-
        high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
        register_page_bootmem_info();
index 0668b364f44ddb93ccac1849677529b206d5500f..29ff73fc96b410dda1f55daa9f68d7666d81d900 100644 (file)
@@ -8,15 +8,8 @@
  */
 
 #define MAX_PHYS_ADDRESS       (1UL << MAX_PHYS_ADDRESS_BITS)
-#define KPTE_BITMAP_CHUNK_SZ           (256UL * 1024UL * 1024UL)
-#define KPTE_BITMAP_BYTES      \
-       ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ     (4UL * 1024UL * 1024UL)
-#define VALID_ADDR_BITMAP_BYTES        \
-       ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
 
 extern unsigned long kern_linear_pte_xor[4];
-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 extern unsigned int sparc64_highest_unlocked_tlb_ent;
 extern unsigned long sparc64_kern_pri_context;
 extern unsigned long sparc64_kern_pri_nuc_bits;