s390/topology: add drawer scheduling domain level
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Wed, 25 May 2016 08:25:50 +0000 (10:25 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 13 Jun 2016 13:58:27 +0000 (15:58 +0200)
The z13 machine added a fourth level to the cpu topology
information. The new top level is called drawer.

A drawer contains two books, which used to be the top level.

Adding this additional scheduling domain did show performance
improvements for some workloads of up to 8%, while there don't
seem to be any workloads impacted in a negative way.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/Kconfig
arch/s390/include/asm/topology.h
arch/s390/kernel/topology.c
arch/s390/numa/mode_emu.c

index 3529a285dda816c1e93f4db28ef86e5aaee17541..ac963903d54f18126c6c9ba699942cf1431c75f7 100644 (file)
@@ -478,6 +478,9 @@ config SCHED_MC
 config SCHED_BOOK
        def_bool n
 
+config SCHED_DRAWER
+       def_bool n
+
 config SCHED_TOPOLOGY
        def_bool y
        prompt "Topology scheduler support"
@@ -485,6 +488,7 @@ config SCHED_TOPOLOGY
        select SCHED_SMT
        select SCHED_MC
        select SCHED_BOOK
+       select SCHED_DRAWER
        help
          Topology scheduler support improves the CPU scheduler's decision
          making when dealing with machines that have multi-threading,
index 6b53962e807e003bc22c41116cba1b55b96f9e8d..f15f5571ca2b5e6bf64283e6588ac1560cc7238a 100644 (file)
@@ -14,10 +14,12 @@ struct cpu_topology_s390 {
        unsigned short core_id;
        unsigned short socket_id;
        unsigned short book_id;
+       unsigned short drawer_id;
        unsigned short node_id;
        cpumask_t thread_mask;
        cpumask_t core_mask;
        cpumask_t book_mask;
+       cpumask_t drawer_mask;
 };
 
 DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 #define topology_core_cpumask(cpu)       (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)            (per_cpu(cpu_topology, cpu).book_id)
 #define topology_book_cpumask(cpu)       (&per_cpu(cpu_topology, cpu).book_mask)
+#define topology_drawer_id(cpu)                  (per_cpu(cpu_topology, cpu).drawer_id)
+#define topology_drawer_cpumask(cpu)     (&per_cpu(cpu_topology, cpu).drawer_mask)
 
 #define mc_capable() 1
 
index 64298a8675895e4529954fcb7103acbae712b1a7..44745e751c3a2921ba1cadbf846e3558600af2c6 100644 (file)
@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
  */
 static struct mask_info socket_info;
 static struct mask_info book_info;
+static struct mask_info drawer_info;
 
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -80,6 +81,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 }
 
 static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
+                                         struct mask_info *drawer,
                                          struct mask_info *book,
                                          struct mask_info *socket,
                                          int one_socket_per_cpu)
@@ -97,9 +99,11 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
                        continue;
                for (i = 0; i <= smp_cpu_mtid; i++) {
                        topo = &per_cpu(cpu_topology, lcpu + i);
+                       topo->drawer_id = drawer->id;
                        topo->book_id = book->id;
                        topo->core_id = rcore;
                        topo->thread_id = lcpu + i;
+                       cpumask_set_cpu(lcpu + i, &drawer->mask);
                        cpumask_set_cpu(lcpu + i, &book->mask);
                        cpumask_set_cpu(lcpu + i, &socket->mask);
                        if (one_socket_per_cpu)
@@ -128,6 +132,11 @@ static void clear_masks(void)
                cpumask_clear(&info->mask);
                info = info->next;
        }
+       info = &drawer_info;
+       while (info) {
+               cpumask_clear(&info->mask);
+               info = info->next;
+       }
 }
 
 static union topology_entry *next_tle(union topology_entry *tle)
@@ -141,12 +150,17 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
 {
        struct mask_info *socket = &socket_info;
        struct mask_info *book = &book_info;
+       struct mask_info *drawer = &drawer_info;
        union topology_entry *tle, *end;
 
        tle = info->tle;
        end = (union topology_entry *)((unsigned long)info + info->length);
        while (tle < end) {
                switch (tle->nl) {
+               case 3:
+                       drawer = drawer->next;
+                       drawer->id = tle->container.id;
+                       break;
                case 2:
                        book = book->next;
                        book->id = tle->container.id;
@@ -156,7 +170,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
                        socket->id = tle->container.id;
                        break;
                case 0:
-                       add_cpus_to_mask(&tle->cpu, book, socket, 0);
+                       add_cpus_to_mask(&tle->cpu, drawer, book, socket, 0);
                        break;
                default:
                        clear_masks();
@@ -170,6 +184,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
 {
        struct mask_info *socket = &socket_info;
        struct mask_info *book = &book_info;
+       struct mask_info *drawer = &drawer_info;
        union topology_entry *tle, *end;
 
        tle = info->tle;
@@ -181,7 +196,7 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
                        book->id = tle->container.id;
                        break;
                case 0:
-                       socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+                       socket = add_cpus_to_mask(&tle->cpu, drawer, book, socket, 1);
                        break;
                default:
                        clear_masks();
@@ -257,11 +272,13 @@ static void update_cpu_masks(void)
                topo->thread_mask = cpu_thread_map(cpu);
                topo->core_mask = cpu_group_map(&socket_info, cpu);
                topo->book_mask = cpu_group_map(&book_info, cpu);
+               topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
                if (!MACHINE_HAS_TOPOLOGY) {
                        topo->thread_id = cpu;
                        topo->core_id = cpu;
                        topo->socket_id = cpu;
                        topo->book_id = cpu;
+                       topo->drawer_id = cpu;
                }
        }
        numa_update_cpu_topology();
@@ -269,10 +286,7 @@ static void update_cpu_masks(void)
 
 void store_topology(struct sysinfo_15_1_x *info)
 {
-       if (topology_max_mnest >= 3)
-               stsi(info, 15, 1, 3);
-       else
-               stsi(info, 15, 1, 2);
+       stsi(info, 15, 1, min(topology_max_mnest, 4));
 }
 
 int arch_update_cpu_topology(void)
@@ -442,6 +456,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
        return &per_cpu(cpu_topology, cpu).book_mask;
 }
 
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+       return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
 static int __init early_parse_topology(char *p)
 {
        return kstrtobool(p, &topology_enabled);
@@ -452,6 +471,7 @@ static struct sched_domain_topology_level s390_topology[] = {
        { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
        { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
        { cpu_book_mask, SD_INIT_NAME(BOOK) },
+       { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
        { cpu_cpu_mask, SD_INIT_NAME(DIE) },
        { NULL, },
 };
@@ -487,6 +507,7 @@ static int __init s390_topology_init(void)
        printk(KERN_CONT " / %d\n", info->mnest);
        alloc_masks(info, &socket_info, 1);
        alloc_masks(info, &book_info, 2);
+       alloc_masks(info, &drawer_info, 3);
        set_sched_topology(s390_topology);
        return 0;
 }
index 828d0695d0d4a703e22164bc2b6576c18bc4032a..fbc394e16b2cc60f7e8a733d4788566357d55cae 100644 (file)
@@ -34,7 +34,8 @@
 #define DIST_CORE      1
 #define DIST_MC                2
 #define DIST_BOOK      3
-#define DIST_MAX       4
+#define DIST_DRAWER    4
+#define DIST_MAX       5
 
 /* Node distance reported to common code */
 #define EMU_NODE_DIST  10
@@ -43,7 +44,7 @@
 #define NODE_ID_FREE   -1
 
 /* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
 
 /* The two toptree IDs */
 enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@@ -113,6 +114,14 @@ static int cores_free(struct toptree *tree)
  * Return node of core
  */
 static struct toptree *core_node(struct toptree *core)
+{
+       return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
 {
        return core->parent->parent->parent;
 }
@@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
  */
 static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
 {
+       if (core_drawer(core1)->id != core_drawer(core2)->id)
+               return DIST_DRAWER;
        if (core_book(core1)->id != core_book(core2)->id)
                return DIST_BOOK;
        if (core_mc(core1)->id != core_mc(core2)->id)
@@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
        struct toptree *core;
 
        /* Always try to move perfectly fitting structures first */
+       move_level_to_numa(numa, phys, DRAWER, true);
+       move_level_to_numa(numa, phys, DRAWER, false);
        move_level_to_numa(numa, phys, BOOK, true);
        move_level_to_numa(numa, phys, BOOK, false);
        move_level_to_numa(numa, phys, MC, true);
@@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
  */
 static struct toptree *toptree_from_topology(void)
 {
-       struct toptree *phys, *node, *book, *mc, *core;
+       struct toptree *phys, *node, *drawer, *book, *mc, *core;
        struct cpu_topology_s390 *top;
        int cpu;
 
@@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
        for_each_online_cpu(cpu) {
                top = &per_cpu(cpu_topology, cpu);
                node = toptree_get_child(phys, 0);
-               book = toptree_get_child(node, top->book_id);
+               drawer = toptree_get_child(node, top->drawer_id);
+               book = toptree_get_child(drawer, top->book_id);
                mc = toptree_get_child(book, top->socket_id);
                core = toptree_get_child(mc, top->core_id);
-               if (!book || !mc || !core)
+               if (!drawer || !book || !mc || !core)
                        panic("NUMA emulation could not allocate memory");
                cpumask_set_cpu(cpu, &core->mask);
                toptree_update_mask(mc);
@@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
                cpumask_copy(&top->thread_mask, &core->mask);
                cpumask_copy(&top->core_mask, &core_mc(core)->mask);
                cpumask_copy(&top->book_mask, &core_book(core)->mask);
+               cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
                cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
                top->node_id = core_node(core)->id;
        }