x86: convert to the new dynamic percpu allocator
authorTejun Heo <tj@kernel.org>
Fri, 20 Feb 2009 07:29:09 +0000 (16:29 +0900)
committerTejun Heo <tj@kernel.org>
Fri, 20 Feb 2009 07:29:09 +0000 (16:29 +0900)
Impact: use new dynamic allocator, unified access to static/dynamic
        percpu memory

Convert to the new dynamic percpu allocator.

* implement populate_extra_pte() for both 32 and 64
* update setup_per_cpu_areas() to use pcpu_setup_static()
* define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr()
* define config HAVE_DYNAMIC_PER_CPU_AREA

Signed-off-by: Tejun Heo <tj@kernel.org>
arch/x86/Kconfig
arch/x86/include/asm/percpu.h
arch/x86/include/asm/pgtable.h
arch/x86/kernel/setup_percpu.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c

index f760a22f95dcc9cfa24d1d84b1fb5bd626da9884..d3f6eadfd4ba23546c7ddf8684f691084df4f259 100644 (file)
@@ -135,6 +135,9 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
        def_bool y
 
+config HAVE_DYNAMIC_PER_CPU_AREA
+       def_bool y
+
 config HAVE_CPUMASK_OF_CPU_MAP
        def_bool X86_64_SMP
 
index aee103b26d01778c987e7f828876b876473a3bfd..8f1d2fbec1d4ecfebdd3053f79f1157b80c799e9 100644 (file)
 #else /* ...!ASSEMBLY */
 
 #include <linux/stringify.h>
+#include <asm/sections.h>
+
+#define __addr_to_pcpu_ptr(addr)                                       \
+       (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr  \
+                + (unsigned long)__per_cpu_start)
+#define __pcpu_ptr_to_addr(ptr)                                                \
+       (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr   \
+                - (unsigned long)__per_cpu_start)
 
 #ifdef CONFIG_SMP
 #define __percpu_arg(x)                "%%"__stringify(__percpu_seg)":%P" #x
index 6f7c102018bf3812595546b01e333caf0e87ef06..dd91c2515c645418d4839d67741ac9c415c50b72 100644 (file)
@@ -402,6 +402,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+void populate_extra_pte(unsigned long vaddr);
 
 #ifdef CONFIG_X86_32
 extern void native_pagetable_setup_start(pgd_t *base);
index d992e6cff73023f43a4c2a3d8ed7d95a7d28f6c1..2dce43558217ac90d1254bec19dfdeccf0fd62fc 100644 (file)
@@ -61,38 +61,56 @@ static inline void setup_percpu_segment(int cpu)
  */
 void __init setup_per_cpu_areas(void)
 {
-       ssize_t size;
-       char *ptr;
-       int cpu;
-
-       /* Copy section for each CPU (we discard the original) */
-       size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+       ssize_t size = __per_cpu_end - __per_cpu_start;
+       unsigned int nr_cpu_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+       static struct page **pages;
+       size_t pages_size;
+       unsigned int cpu, i, j;
+       unsigned long delta;
+       size_t pcpu_unit_size;
 
        pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
                NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+       pr_info("PERCPU: Allocating %zd bytes for static per cpu data\n", size);
 
-       pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
+       pages_size = nr_cpu_pages * num_possible_cpus() * sizeof(pages[0]);
+       pages = alloc_bootmem(pages_size);
 
+       j = 0;
        for_each_possible_cpu(cpu) {
+               void *ptr;
+
+               for (i = 0; i < nr_cpu_pages; i++) {
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-               ptr = alloc_bootmem_pages(size);
+                       ptr = alloc_bootmem_pages(PAGE_SIZE);
 #else
-               int node = early_cpu_to_node(cpu);
-               if (!node_online(node) || !NODE_DATA(node)) {
-                       ptr = alloc_bootmem_pages(size);
-                       pr_info("cpu %d has no node %d or node-local memory\n",
-                               cpu, node);
-                       pr_debug("per cpu data for cpu%d at %016lx\n",
-                                cpu, __pa(ptr));
-               } else {
-                       ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
-                       pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
-                               cpu, node, __pa(ptr));
-               }
+                       int node = early_cpu_to_node(cpu);
+
+                       if (!node_online(node) || !NODE_DATA(node)) {
+                               ptr = alloc_bootmem_pages(PAGE_SIZE);
+                               pr_info("cpu %d has no node %d or node-local "
+                                       "memory\n", cpu, node);
+                               pr_debug("per cpu data for cpu%d at %016lx\n",
+                                        cpu, __pa(ptr));
+                       } else {
+                               ptr = alloc_bootmem_pages_node(NODE_DATA(node),
+                                                              PAGE_SIZE);
+                               pr_debug("per cpu data for cpu%d on node%d "
+                                        "at %016lx\n", cpu, node, __pa(ptr));
+                       }
 #endif
+                       memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
+                       pages[j++] = virt_to_page(ptr);
+               }
+       }
+
+       pcpu_unit_size = pcpu_setup_static(populate_extra_pte, pages, size);
 
-               memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
-               per_cpu_offset(cpu) = ptr - __per_cpu_start;
+       free_bootmem(__pa(pages), pages_size);
+
+       delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+       for_each_possible_cpu(cpu) {
+               per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
                per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
                per_cpu(cpu_number, cpu) = cpu;
                setup_percpu_segment(cpu);
index 00263bf07a88749f3ae92838465d3783fcdd6dd3..8b1a0ef7f87441ac8f324f74656a73b717a9f922 100644 (file)
@@ -137,6 +137,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
        return pte_offset_kernel(pmd, 0);
 }
 
+void __init populate_extra_pte(unsigned long vaddr)
+{
+       int pgd_idx = pgd_index(vaddr);
+       int pmd_idx = pmd_index(vaddr);
+       pmd_t *pmd;
+
+       pmd = one_md_table_init(swapper_pg_dir + pgd_idx);
+       one_page_table_init(pmd + pmd_idx);
+}
+
 static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
                                           unsigned long vaddr, pte_t *lastpte)
 {
index e6d36b490250bed6952f2d93de3d1f337c3cf7f4..7f91e2cdc4ced891bf4a2bbbea66a0c1138f4022 100644 (file)
@@ -223,6 +223,25 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
        set_pte_vaddr_pud(pud_page, vaddr, pteval);
 }
 
+void __init populate_extra_pte(unsigned long vaddr)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+
+       pgd = pgd_offset_k(vaddr);
+       if (pgd_none(*pgd)) {
+               pud = (pud_t *)spp_getpage();
+               pgd_populate(&init_mm, pgd, pud);
+               if (pud != pud_offset(pgd, 0)) {
+                       printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
+                              pud, pud_offset(pgd, 0));
+                       return;
+               }
+       }
+
+       set_pte_vaddr_pud((pud_t *)pgd_page_vaddr(*pgd), vaddr, __pte(0));
+}
+
 /*
  * Create large page table mappings for a range of physical addresses.
  */