mm: meminit: make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid
authorMel Gorman <mgorman@suse.de>
Tue, 30 Jun 2015 21:56:55 +0000 (14:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 1 Jul 2015 02:44:56 +0000 (19:44 -0700)
__early_pfn_to_nid() use static variables to cache recent lookups as
memblock lookups are very expensive but it assumes that memory
initialisation is single-threaded.  Parallel initialisation of struct
pages will break that assumption so this patch makes __early_pfn_to_nid()
SMP-safe by requiring the caller to cache recent search information.
early_pfn_to_nid() keeps the same interface but is only safe to use early
in boot due to the use of a global static variable.  meminit_pfn_in_nid()
is an SMP-safe version that callers must maintain their own state for.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Nate Zimmer <nzimmer@sgi.com>
Tested-by: Waiman Long <waiman.long@hp.com>
Tested-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Nate Zimmer <nzimmer@sgi.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/ia64/mm/numa.c
include/linux/mm.h
include/linux/mmzone.h
mm/page_alloc.c

index ea21d4cad540eb8207f29d19fb01dd5517843f4a..aa19b7ac8222a2fb604b90fc9edd3d3b86ef483c 100644 (file)
@@ -58,27 +58,22 @@ paddr_to_nid(unsigned long paddr)
  * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
  * the section resides.
  */
-int __meminit __early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state)
 {
        int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
-       /*
-        * NOTE: The following SMP-unsafe globals are only used early in boot
-        * when the kernel is running single-threaded.
-        */
-       static int __meminitdata last_ssec, last_esec;
-       static int __meminitdata last_nid;
 
-       if (section >= last_ssec && section < last_esec)
-               return last_nid;
+       if (section >= state->last_start && section < state->last_end)
+               return state->last_nid;
 
        for (i = 0; i < num_node_memblks; i++) {
                ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
                esec = (node_memblk[i].start_paddr + node_memblk[i].size +
                        ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
                if (section >= ssec && section < esec) {
-                       last_ssec = ssec;
-                       last_esec = esec;
-                       last_nid = node_memblk[i].nid;
+                       state->last_start = ssec;
+                       state->last_end = esec;
+                       state->last_nid = node_memblk[i].nid;
                        return node_memblk[i].nid;
                }
        }
index d662af2d0d0127af1b94c5d667f2161c58d1b3a1..2e872f92dbac0cecc2c5b3a65fbe7ff8678e48d5 100644 (file)
@@ -1726,7 +1726,8 @@ extern void sparse_memory_present_with_active_regions(int nid);
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
     !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
-static inline int __early_pfn_to_nid(unsigned long pfn)
+static inline int __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state)
 {
        return 0;
 }
@@ -1734,7 +1735,8 @@ static inline int __early_pfn_to_nid(unsigned long pfn)
 /* please see mm/page_alloc.c */
 extern int __meminit early_pfn_to_nid(unsigned long pfn);
 /* there is a per-arch backend function. */
-extern int __meminit __early_pfn_to_nid(unsigned long pfn);
+extern int __meminit __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
index 54d74f6eb233521d6cb84b2720a15c3cb2e6b734..b2473d8225490312ed9ec020f7c41c86b9280436 100644 (file)
@@ -1216,10 +1216,24 @@ void sparse_init(void);
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * During memory init memblocks map pfns to nids. The search is expensive and
+ * this caches recent lookups. The implementation of __early_pfn_to_nid
+ * may treat start/end as pfns or sections.
+ */
+struct mminit_pfnnid_cache {
+       unsigned long last_start;
+       unsigned long last_end;
+       int last_nid;
+};
+
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
 bool early_pfn_in_nid(unsigned long pfn, int nid);
+bool meminit_pfn_in_nid(unsigned long pfn, int node,
+                       struct mminit_pfnnid_cache *state);
 #else
-#define early_pfn_in_nid(pfn, nid)     (1)
+#define early_pfn_in_nid(pfn, nid)             (1)
+#define meminit_pfn_in_nid(pfn, nid, state)    (1)
 #endif
 
 #ifndef early_pfn_valid
index c2ee4ecad083f897d9ce181492502f48645e0bc3..ffdb2308848d495fecaa375821f9eac92dad3e4a 100644 (file)
@@ -4551,39 +4551,41 @@ int __meminit init_currently_empty_zone(struct zone *zone,
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+
 /*
  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
  */
-int __meminit __early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+                                       struct mminit_pfnnid_cache *state)
 {
        unsigned long start_pfn, end_pfn;
        int nid;
-       /*
-        * NOTE: The following SMP-unsafe globals are only used early in boot
-        * when the kernel is running single-threaded.
-        */
-       static unsigned long __meminitdata last_start_pfn, last_end_pfn;
-       static int __meminitdata last_nid;
 
-       if (last_start_pfn <= pfn && pfn < last_end_pfn)
-               return last_nid;
+       if (state->last_start <= pfn && pfn < state->last_end)
+               return state->last_nid;
 
        nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
        if (nid != -1) {
-               last_start_pfn = start_pfn;
-               last_end_pfn = end_pfn;
-               last_nid = nid;
+               state->last_start = start_pfn;
+               state->last_end = end_pfn;
+               state->last_nid = nid;
        }
 
        return nid;
 }
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 
+static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
+
+/* Only safe to use early in boot when initialisation is single-threaded */
 int __meminit early_pfn_to_nid(unsigned long pfn)
 {
        int nid;
 
-       nid = __early_pfn_to_nid(pfn);
+       /* The system will behave unpredictably otherwise */
+       BUG_ON(system_state != SYSTEM_BOOTING);
+
+       nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
        if (nid >= 0)
                return nid;
        /* just returns 0 */
@@ -4591,15 +4593,23 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
 }
 
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
-bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
+                                       struct mminit_pfnnid_cache *state)
 {
        int nid;
 
-       nid = __early_pfn_to_nid(pfn);
+       nid = __early_pfn_to_nid(pfn, state);
        if (nid >= 0 && nid != node)
                return false;
        return true;
 }
+
+/* Only safe to use early in boot when initialisation is single-threaded */
+bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+       return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache);
+}
+
 #endif
 
 /**