FROMLIST: sched/core: Disable SD_PREFER_SIBLING on asymmetric cpu capacity domains
authorMorten Rasmussen <morten.rasmussen@arm.com>
Thu, 28 Jun 2018 16:31:25 +0000 (17:31 +0100)
committerChris Redpath <chris.redpath@arm.com>
Wed, 18 Jul 2018 09:42:47 +0000 (10:42 +0100)
The 'prefer sibling' sched_domain flag is intended to encourage
spreading tasks to sibling sched_domain to take advantage of more caches
and core for SMT systems. It has recently been changed to be on all
non-NUMA topology level. However, spreading across domains with cpu
capacity asymmetry isn't desirable, e.g. spreading from high capacity to
low capacity cpus even if high capacity cpus aren't overutilized might
give access to more cache but the cpu will be slower and possibly lead
to worse overall throughput.

To prevent this, we need to remove SD_PREFER_SIBLING on the sched_domain
level immediately below SD_ASYM_CPUCAPACITY.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
[from https://lore.kernel.org/lkml/1530699470-29808-13-git-send-email-morten.rasmussen@arm.com/]
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Change-Id: I944a003c7b685132c57a90a8aaf85509196679e6

kernel/sched/topology.c

index 678f87b2c3cca8c5ed8ba83e62ca1a13bc26b5d4..8a9be7d4d28637a777aa1ae1b8aaaa174c8049d0 100644 (file)
@@ -1287,7 +1287,7 @@ sd_init(struct sched_domain_topology_level *tl,
                                        | 0*SD_SHARE_CPUCAPACITY
                                        | 0*SD_SHARE_PKG_RESOURCES
                                        | 0*SD_SERIALIZE
-                                       | 0*SD_PREFER_SIBLING
+                                       | 1*SD_PREFER_SIBLING
                                        | 0*SD_NUMA
                                        | sd_flags
                                        ,
@@ -1333,12 +1333,17 @@ sd_init(struct sched_domain_topology_level *tl,
        if (sd->flags & SD_ASYM_CPUCAPACITY) {
                struct sched_domain *t = sd;
 
+               /*
+                * Don't attempt to spread across cpus of different capacities.
+                */
+               if (sd->child)
+                       sd->child->flags &= ~SD_PREFER_SIBLING;
+
                for_each_lower_domain(t)
                        t->flags |= SD_BALANCE_WAKE;
        }
 
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
-               sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
 
@@ -1353,6 +1358,7 @@ sd_init(struct sched_domain_topology_level *tl,
                sd->busy_idx = 3;
                sd->idle_idx = 2;
 
+               sd->flags &= ~SD_PREFER_SIBLING;
                sd->flags |= SD_SERIALIZE;
                if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
                        sd->flags &= ~(SD_BALANCE_EXEC |
@@ -1362,7 +1368,6 @@ sd_init(struct sched_domain_topology_level *tl,
 
 #endif
        } else {
-               sd->flags |= SD_PREFER_SIBLING;
                sd->cache_nice_tries = 1;
                sd->busy_idx = 2;
                sd->idle_idx = 1;