mm/page_alloc.c: introduce kernelcore=mirror option
authorTaku Izumi <izumi.taku@jp.fujitsu.com>
Tue, 15 Mar 2016 21:55:22 +0000 (14:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Mar 2016 23:55:16 +0000 (16:55 -0700)
This patch extends existing "kernelcore" option and introduces
kernelcore=mirror option.  By specifying "mirror" instead of specifying
the amount of memory, non-mirrored (non-reliable) region will be
arranged into ZONE_MOVABLE.

[akpm@linux-foundation.org: fix build with CONFIG_HAVE_MEMBLOCK_NODE_MAP=n]
Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/kernel-parameters.txt
mm/page_alloc.c

index 8ae47a7b4923b87ed4f5d7ea722f60fbc88f8cba..208ae7287659a999010437ba566ae62787bfad4d 100644 (file)
@@ -1750,7 +1750,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
        keepinitrd      [HW,ARM]
 
-       kernelcore=nn[KMG]      [KNL,X86,IA-64,PPC] This parameter
+       kernelcore=     [KNL,X86,IA-64,PPC]
+                       Format: nn[KMGTPE] | "mirror"
+                       This parameter
                        specifies the amount of memory usable by the kernel
                        for non-movable allocations.  The requested amount is
                        spread evenly throughout all nodes in the system. The
@@ -1766,6 +1768,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        use the HighMem zone if it exists, and the Normal
                        zone if it does not.
 
+                       Instead of specifying the amount of memory (nn[KMGTPE]),
+                       you can specify "mirror" option. In case "mirror"
+                       option is specified, mirrored (reliable) memory is used
+                       for non-movable allocations and remaining memory is used
+                       for Movable pages. nn[KMGTPE] and "mirror" are exclusive,
+                       so you can NOT specify nn[KMGTPE] and "mirror" at the same
+                       time.
+
        kgdbdbgp=       [KGDB,HW] kgdb over EHCI usb debug port.
                        Format: <Controller#>[,poll interval]
                        The controller # is the number of the ehci usb debug
index 0d20a19151a4382a59f1d61476159b184164b636..b8160b9d5e7255b2a40773cf3b72bf2de8da7317 100644 (file)
@@ -247,6 +247,7 @@ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __initdata required_kernelcore;
 static unsigned long __initdata required_movablecore;
 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
+static bool mirrored_kernelcore;
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -4491,6 +4492,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long pfn;
        unsigned long nr_initialised = 0;
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+       struct memblock_region *r = NULL, *tmp;
+#endif
 
        if (highest_memmap_pfn < end_pfn - 1)
                highest_memmap_pfn = end_pfn - 1;
@@ -4516,6 +4520,40 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                        if (!update_defer_init(pgdat, pfn, end_pfn,
                                                &nr_initialised))
                                break;
+
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+                       /*
+                        * if not mirrored_kernelcore and ZONE_MOVABLE exists,
+                        * range from zone_movable_pfn[nid] to end of each node
+                        * should be ZONE_MOVABLE not ZONE_NORMAL. skip it.
+                        */
+                       if (!mirrored_kernelcore && zone_movable_pfn[nid])
+                               if (zone == ZONE_NORMAL &&
+                                   pfn >= zone_movable_pfn[nid])
+                                       continue;
+
+                       /*
+                        * check given memblock attribute by firmware which
+                        * can affect kernel memory layout.
+                        * if zone==ZONE_MOVABLE but memory is mirrored,
+                        * it's an overlapped memmap init. skip it.
+                        */
+                       if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
+                               if (!r ||
+                                   pfn >= memblock_region_memory_end_pfn(r)) {
+                                       for_each_memblock(memory, tmp)
+                                               if (pfn < memblock_region_memory_end_pfn(tmp))
+                                                       break;
+                                       r = tmp;
+                               }
+                               if (pfn >= memblock_region_memory_base_pfn(r) &&
+                                   memblock_is_mirror(r)) {
+                                       /* already initialized as NORMAL */
+                                       pfn = memblock_region_memory_end_pfn(r);
+                                       continue;
+                               }
+                       }
+#endif
                }
 
                /*
@@ -4934,11 +4972,6 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
                        *zone_end_pfn = min(node_end_pfn,
                                arch_zone_highest_possible_pfn[movable_zone]);
 
-               /* Adjust for ZONE_MOVABLE starting within this range */
-               } else if (*zone_start_pfn < zone_movable_pfn[nid] &&
-                               *zone_end_pfn > zone_movable_pfn[nid]) {
-                       *zone_end_pfn = zone_movable_pfn[nid];
-
                /* Check if this whole range is within ZONE_MOVABLE */
                } else if (*zone_start_pfn >= zone_movable_pfn[nid])
                        *zone_start_pfn = *zone_end_pfn;
@@ -5023,6 +5056,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
        unsigned long zone_start_pfn, zone_end_pfn;
+       unsigned long nr_absent;
 
        /* When hotadd a new node from cpu_up(), the node should be empty */
        if (!node_start_pfn && !node_end_pfn)
@@ -5034,7 +5068,39 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
        adjust_zone_range_for_zone_movable(nid, zone_type,
                        node_start_pfn, node_end_pfn,
                        &zone_start_pfn, &zone_end_pfn);
-       return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
+       nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
+
+       /*
+        * ZONE_MOVABLE handling.
+        * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
+        * and vice versa.
+        */
+       if (zone_movable_pfn[nid]) {
+               if (mirrored_kernelcore) {
+                       unsigned long start_pfn, end_pfn;
+                       struct memblock_region *r;
+
+                       for_each_memblock(memory, r) {
+                               start_pfn = clamp(memblock_region_memory_base_pfn(r),
+                                                 zone_start_pfn, zone_end_pfn);
+                               end_pfn = clamp(memblock_region_memory_end_pfn(r),
+                                               zone_start_pfn, zone_end_pfn);
+
+                               if (zone_type == ZONE_MOVABLE &&
+                                   memblock_is_mirror(r))
+                                       nr_absent += end_pfn - start_pfn;
+
+                               if (zone_type == ZONE_NORMAL &&
+                                   !memblock_is_mirror(r))
+                                       nr_absent += end_pfn - start_pfn;
+                       }
+               } else {
+                       if (zone_type == ZONE_NORMAL)
+                               nr_absent += node_end_pfn - zone_movable_pfn[nid];
+               }
+       }
+
+       return nr_absent;
 }
 
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
@@ -5546,6 +5612,36 @@ static void __init find_zone_movable_pfns_for_nodes(void)
                goto out2;
        }
 
+       /*
+        * If kernelcore=mirror is specified, ignore movablecore option
+        */
+       if (mirrored_kernelcore) {
+               bool mem_below_4gb_not_mirrored = false;
+
+               for_each_memblock(memory, r) {
+                       if (memblock_is_mirror(r))
+                               continue;
+
+                       nid = r->nid;
+
+                       usable_startpfn = memblock_region_memory_base_pfn(r);
+
+                       if (usable_startpfn < 0x100000) {
+                               mem_below_4gb_not_mirrored = true;
+                               continue;
+                       }
+
+                       zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+                               min(usable_startpfn, zone_movable_pfn[nid]) :
+                               usable_startpfn;
+               }
+
+               if (mem_below_4gb_not_mirrored)
+                       pr_warn("This configuration results in unmirrored kernel memory.");
+
+               goto out2;
+       }
+
        /*
         * If movablecore=nn[KMG] was specified, calculate what size of
         * kernelcore that corresponds so that memory usable for
@@ -5806,6 +5902,12 @@ static int __init cmdline_parse_core(char *p, unsigned long *core)
  */
 static int __init cmdline_parse_kernelcore(char *p)
 {
+       /* parse kernelcore=mirror */
+       if (parse_option_str(p, "mirror")) {
+               mirrored_kernelcore = true;
+               return 0;
+       }
+
        return cmdline_parse_core(p, &required_kernelcore);
 }