memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
authorYasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Sat, 23 Feb 2013 00:33:00 +0000 (16:33 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2013 01:50:12 +0000 (17:50 -0800)
For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by
get_page_bootmem().  So the patch searches pages of virtual mapping and
registers the pages by get_page_bootmem().

NOTE: register_page_bootmem_memmap() is not implemented for ia64,
      ppc, s390, and sparc.  So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE
      and revert register_page_bootmem_info_node() when platform doesn't
      support it.

      It's implemented by adding a new Kconfig option named
      CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected
      by memory-hotplug feature fully supported archs(currently only on
      x86_64).

      Since we have 2 config options called MEMORY_HOTPLUG and
      MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately,
      and codes in function register_page_bootmem_info_node() are only
      used for collecting infomation for hot-remove, so reside it under
      MEMORY_HOTREMOVE.

      Besides page_isolation.c selected by MEMORY_ISOLATION under
      MEMORY_HOTPLUG is also such case, move it too.

[mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE]
[linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()]
[mhocko@suse.cz: remove the arch specific functions without any implementation]
[linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed]
[rientjes@google.com: fix defined but not used warning]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wu Jianguo <wujianguo@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/ia64/mm/discontig.c
arch/powerpc/mm/init_64.c
arch/sparc/mm/init_64.c
arch/x86/mm/init_64.c
include/linux/memory_hotplug.h
include/linux/mm.h
mm/Kconfig
mm/memory_hotplug.c

index c641333cd997635f1c885d12d895459111168e4a..731bf84094b64f9caa4d70e4458fd3229a77461e 100644 (file)
@@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page,
 {
        return vmemmap_populate_basepages(start_page, size, node);
 }
+
 #endif
index 95a45293e5ac0dd2c6b343f1aa7a81ed8b69b666..42bf082f012489673593f3b0111b960d9e040df3 100644 (file)
@@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page,
 
        return 0;
 }
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
index 5c2c6e61facb95dd50f2d37ba65d0fbb22eabfed..59c6fcfdc782bb796bd943dae43416844189f14b 100644 (file)
@@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void)
                node_start = 0;
        }
 }
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 static void prot_init_common(unsigned long page_none,
index b6dd1c480b30255c5ae911a34e40308c65628b96..f17aa76dc1ae2d435b3e889522250f6c771bc1b3 100644 (file)
@@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
        return 0;
 }
 
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
+void register_page_bootmem_memmap(unsigned long section_nr,
+                                 struct page *start_page, unsigned long size)
+{
+       unsigned long addr = (unsigned long)start_page;
+       unsigned long end = (unsigned long)(start_page + size);
+       unsigned long next;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       unsigned int nr_pages;
+       struct page *page;
+
+       for (; addr < end; addr = next) {
+               pte_t *pte = NULL;
+
+               pgd = pgd_offset_k(addr);
+               if (pgd_none(*pgd)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+               pud = pud_offset(pgd, addr);
+               if (pud_none(*pud)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+               if (!cpu_has_pse) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       pmd = pmd_offset(pud, addr);
+                       if (pmd_none(*pmd))
+                               continue;
+                       get_page_bootmem(section_nr, pmd_page(*pmd),
+                                        MIX_SECTION_INFO);
+
+                       pte = pte_offset_kernel(pmd, addr);
+                       if (pte_none(*pte))
+                               continue;
+                       get_page_bootmem(section_nr, pte_page(*pte),
+                                        SECTION_INFO);
+               } else {
+                       next = pmd_addr_end(addr, end);
+
+                       pmd = pmd_offset(pud, addr);
+                       if (pmd_none(*pmd))
+                               continue;
+
+                       nr_pages = 1 << (get_order(PMD_SIZE));
+                       page = pmd_page(*pmd);
+                       while (nr_pages--)
+                               get_page_bootmem(section_nr, page++,
+                                                SECTION_INFO);
+               }
+       }
+}
+#endif
+
 void __meminit vmemmap_populate_print_last(void)
 {
        if (p_start) {
index 31a563bbd93600cf61c4ba409fcbe78ac55bb023..4d523fe75ba19355048c614bfeb442c445261f0c 100644 (file)
@@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
+#else
 static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
 }
-static inline void put_page_bootmem(struct page *page)
-{
-}
-#else
-extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
-extern void put_page_bootmem(struct page *page);
 #endif
+extern void put_page_bootmem(struct page *page);
+extern void get_page_bootmem(unsigned long ingo, struct page *page,
+                            unsigned long type);
 
 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
index 95db68e34b18c424c4ab804a3883a1fdf08b5b27..060557b9764f87304896ce7a914c430354306ef4 100644 (file)
@@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page,
                                                unsigned long pages, int node);
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
-
+void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+                                 unsigned long size);
 
 enum mf_flags {
        MF_COUNT_INCREASED = 1 << 0,
index 0b23db9a879177decb8c6f96f9d8c5266ba62a2c..2c7aea7106f9816dc41184f01a2d367c81749a1d 100644 (file)
@@ -162,10 +162,16 @@ config MOVABLE_NODE
          Say Y here if you want to hotplug a whole node.
          Say N here if you want kernel to use memory on all nodes evenly.
 
+#
+# Only be set on architectures that have completely implemented memory hotplug
+# feature. If you are not sure, don't touch it.
+#
+config HAVE_BOOTMEM_INFO_NODE
+       def_bool n
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
        bool "Allow for memory hot-add"
-       select MEMORY_ISOLATION
        depends on SPARSEMEM || X86_64_ACPI_NUMA
        depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
        depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
@@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE
 
 config MEMORY_HOTREMOVE
        bool "Allow for memory hot remove"
+       select MEMORY_ISOLATION
+       select HAVE_BOOTMEM_INFO_NODE if X86_64
        depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
        depends on MIGRATION
 
index 942b43f6d7361aca8f8dd793f25d6604ed35e4b0..6c90d222ec0ac4529598f7aae3b2d2e26eaad289 100644 (file)
@@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page,
-                            unsigned long type)
+void get_page_bootmem(unsigned long info,  struct page *page,
+                     unsigned long type)
 {
        page->lru.next = (struct list_head *) type;
        SetPagePrivate(page);
@@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page)
 
 }
 
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
        unsigned long *usemap, mapsize, section_nr, i;
@@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
                get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
+#else /* CONFIG_SPARSEMEM_VMEMMAP */
+static void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+       unsigned long *usemap, mapsize, section_nr, i;
+       struct mem_section *ms;
+       struct page *page, *memmap;
+
+       if (!pfn_valid(start_pfn))
+               return;
+
+       section_nr = pfn_to_section_nr(start_pfn);
+       ms = __nr_to_section(section_nr);
+
+       memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+       register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
+
+       usemap = __nr_to_section(section_nr)->pageblock_flags;
+       page = virt_to_page(usemap);
+
+       mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
+
+       for (i = 0; i < mapsize; i++, page++)
+               get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
+}
+#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
@@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
                        register_page_bootmem_info_section(pfn);
        }
 }
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
 
 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
                           unsigned long end_pfn)