sh: Support for multiple nodes.
authorPaul Mundt <lethal@linux-sh.org>
Wed, 6 Jun 2007 08:52:19 +0000 (17:52 +0900)
committerPaul Mundt <lethal@hera.kernel.org>
Fri, 8 Jun 2007 02:43:49 +0000 (02:43 +0000)
This adds basic support for multiple nodes on SH machines.
This is primarily useful for boards with many different
memory blocks that are otherwise unused (SH7722/SH7785 URAM
and so forth).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
arch/sh/mm/Kconfig
arch/sh/mm/Makefile
arch/sh/mm/numa.c [new file with mode: 0644]
include/asm-sh/mmzone.h [new file with mode: 0644]
include/asm-sh/topology.h

index 955a851c0c75ec08a2c85de0a882c3f9ee8581bc..8c5b73ab47724448d6c973e8156f5b513ddcd183 100644 (file)
@@ -293,6 +293,17 @@ config VSYSCALL
          For systems with an MMU that can afford to give up a page,
          (the default value) say Y.
 
+config NUMA
+       bool "Non Uniform Memory Access (NUMA) Support"
+       depends on MMU && EXPERIMENTAL
+       default n
+       help
+         Some SH systems have many various memories scattered around
+         the address space, each with varying latencies. This enables
+         support for these blocks by binding them to nodes and allowing
+         memory policies to be used for prioritizing and controlling
+         allocation behaviour.
+
 config NODES_SHIFT
        int
        default "1"
index 3ffd7f68c0a206befbcc7ca3f1a940a73027dfb2..47c330c528db1eb90c30cf224bae8967095b48fb 100644 (file)
@@ -29,3 +29,4 @@ endif
 
 obj-$(CONFIG_SH7705_CACHE_32KB)        += cache-sh7705.o
 obj-$(CONFIG_32BIT)            += pmb.o
+obj-$(CONFIG_NUMA)             += numa.o
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
new file mode 100644 (file)
index 0000000..8aff065
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * arch/sh/mm/numa.c - Multiple node support for SH machines
+ *
+ *  Copyright (C) 2007  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <asm/sections.h>
+
+static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL_GPL(node_data);
+
+/*
+ * On SH machines the conventional approach is to stash system RAM
+ * in node 0, and other memory blocks in to node 1 and up, ordered by
+ * latency. Each node's pgdat is node-local at the beginning of the node,
+ * immediately followed by the node mem map.
+ */
+void __init setup_memory(void)
+{
+       unsigned long free_pfn = PFN_UP(__pa(_end));
+
+       /*
+        * Node 0 sets up its pgdat at the first available pfn,
+        * and bumps it up before setting up the bootmem allocator.
+        */
+       NODE_DATA(0) = pfn_to_kaddr(free_pfn);
+       memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
+       free_pfn += PFN_UP(sizeof(struct pglist_data));
+       NODE_DATA(0)->bdata = &plat_node_bdata[0];
+
+       /* Set up node 0 */
+       setup_bootmem_allocator(free_pfn);
+
+       /* Give the platforms a chance to hook up their nodes */
+       plat_mem_setup();
+}
+
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+       unsigned long bootmap_pages, bootmap_start, bootmap_size;
+       unsigned long start_pfn, free_pfn, end_pfn;
+
+       /* Don't allow bogus node assignment */
+       BUG_ON(nid > MAX_NUMNODES || nid == 0);
+
+       /*
+        * The free pfn starts at the beginning of the range, and is
+        * advanced as necessary for pgdat and node map allocations.
+        */
+       free_pfn = start_pfn = start >> PAGE_SHIFT;
+       end_pfn = end >> PAGE_SHIFT;
+
+       add_active_range(nid, start_pfn, end_pfn);
+
+       /* Node-local pgdat */
+       NODE_DATA(nid) = pfn_to_kaddr(free_pfn);
+       free_pfn += PFN_UP(sizeof(struct pglist_data));
+       memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+       NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+       NODE_DATA(nid)->node_start_pfn = start_pfn;
+       NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+
+       /* Node-local bootmap */
+       bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+       bootmap_start = (unsigned long)pfn_to_kaddr(free_pfn);
+       bootmap_size = init_bootmem_node(NODE_DATA(nid), free_pfn, start_pfn,
+                                   end_pfn);
+
+       free_bootmem_with_active_regions(nid, end_pfn);
+
+       /* Reserve the pgdat and bootmap space with the bootmem allocator */
+       reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
+                            sizeof(struct pglist_data));
+       reserve_bootmem_node(NODE_DATA(nid), free_pfn << PAGE_SHIFT,
+                            bootmap_pages << PAGE_SHIFT);
+
+       /* It's up */
+       node_set_online(nid);
+
+       /* Kick sparsemem */
+       sparse_memory_present_with_active_regions(nid);
+}
diff --git a/include/asm-sh/mmzone.h b/include/asm-sh/mmzone.h
new file mode 100644 (file)
index 0000000..7969f38
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef __ASM_SH_MMZONE_H
+#define __ASM_SH_MMZONE_H
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)         (node_data[nid])
+
+#define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid)      (NODE_DATA(nid)->node_start_pfn + \
+                                NODE_DATA(nid)->node_spanned_pages)
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+       int nid;
+
+       for (nid = 0; nid < MAX_NUMNODES; nid++)
+               if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid))
+                       break;
+
+       return nid;
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+       return NODE_DATA(pfn_to_nid(pfn));
+}
+
+/* arch/sh/mm/numa.c */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end);
+#else
+static inline void
+setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+/* Platform specific mem init */
+void __init plat_mem_setup(void);
+
+/* arch/sh/kernel/setup.c */
+void __init setup_bootmem_allocator(unsigned long start_pfn);
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_SH_MMZONE_H */
index cff001c316feb98c80782c094437a8f90e977a1a..f402a3b1cfa48fd4b0d626d80ef3837305c4f41a 100644 (file)
@@ -1,6 +1,36 @@
 #ifndef _ASM_SH_TOPOLOGY_H
 #define _ASM_SH_TOPOLOGY_H
 
+#ifdef CONFIG_NUMA
+
+/* sched_domains SD_NODE_INIT for sh machines */
+#define SD_NODE_INIT (struct sched_domain) {           \
+       .span                   = CPU_MASK_NONE,        \
+       .parent                 = NULL,                 \
+       .child                  = NULL,                 \
+       .groups                 = NULL,                 \
+       .min_interval           = 8,                    \
+       .max_interval           = 32,                   \
+       .busy_factor            = 32,                   \
+       .imbalance_pct          = 125,                  \
+       .cache_nice_tries       = 2,                    \
+       .busy_idx               = 3,                    \
+       .idle_idx               = 2,                    \
+       .newidle_idx            = 0,                    \
+       .wake_idx               = 1,                    \
+       .forkexec_idx           = 1,                    \
+       .flags                  = SD_LOAD_BALANCE       \
+                               | SD_BALANCE_FORK       \
+                               | SD_BALANCE_EXEC       \
+                               | SD_SERIALIZE          \
+                               | SD_WAKE_BALANCE,      \
+       .last_balance           = jiffies,              \
+       .balance_interval       = 1,                    \
+       .nr_balance_failed      = 0,                    \
+}
+
+#endif
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_SH_TOPOLOGY_H */