sched: ems: introduce task band
authorPark Bumgyu <bumgyu.park@samsung.com>
Fri, 25 May 2018 05:01:52 +0000 (14:01 +0900)
committerlakkyung.jung <lakkyung.jung@samsung.com>
Mon, 23 Jul 2018 05:59:00 +0000 (14:59 +0900)
Change-Id: Ic3fbe3e80c8033f5c1c77f02cb0eeb6ee04d9630
Signed-off-by: Park Bumgyu <bumgyu.park@samsung.com>
include/linux/ems.h
include/linux/sched.h
include/trace/events/ems.h
kernel/exit.c
kernel/sched/core.c
kernel/sched/ems/Makefile
kernel/sched/ems/band.c [new file with mode: 0644]
kernel/sched/ems/core.c
kernel/sched/ems/ems.h
kernel/sched/tune.c

index 27f45da8ca886aa01c9111ceb0eda54e813cbc58..c38d7f206b5c35a172f5e72270fa4424e728020a 100644 (file)
@@ -31,6 +31,21 @@ struct gb_qos_request {
        bool active;
 };
 
+#define LEAVE_BAND     0
+
+struct task_band {
+       int id;
+       pid_t tgid;
+       raw_spinlock_t lock;
+
+       struct list_head members;
+       int member_count;
+       struct cpumask playable_cpus;
+
+       unsigned long util;
+       unsigned long last_update_time;
+};
+
 #ifdef CONFIG_SCHED_EMS
 /* task util initialization */
 extern void exynos_init_entity_util_avg(struct sched_entity *se);
@@ -59,6 +74,13 @@ extern void gb_qos_update_request(struct gb_qos_request *req, u32 new_value);
 
 /* prefer perf */
 extern void request_kernel_prefer_perf(int grp_idx, int enable);
+
+/* task band */
+extern void sync_band(struct task_struct *p, bool join);
+extern void newbie_join_band(struct task_struct *newbie);
+extern int alloc_bands(void);
+extern void update_band(struct task_struct *p, long old_util);
+extern int band_playing(struct task_struct *p, int cpu);
 #else
 static inline void exynos_init_entity_util_avg(struct sched_entity *se) { }
 
@@ -92,6 +114,18 @@ static inline void update_lbt_overutil(int cpu, unsigned long capacity) { }
 static inline void gb_qos_update_request(struct gb_qos_request *req, u32 new_value) { }
 
 static inline void request_kernel_prefer_perf(int grp_idx, int enable) { }
+
+static inline void sync_band(struct task_struct *p, bool join) { }
+static inline void newbie_join_band(struct task_struct *newbie) { }
+static inline int alloc_bands(void)
+{
+       return 0;
+}
+static inline void update_band(struct task_struct *p, long old_util) { }
+static inline int band_playing(struct task_struct *p, int cpu)
+{
+       return 0;
+}
 #endif /* CONFIG_SCHED_EMS */
 
 #ifdef CONFIG_SIMPLIFIED_ENERGY_MODEL
index 5bc3b25c406422fd0e8be74baaf5665154cc4de7..1cbd64bd7e0b86882121c2f245b21510990cade0 100644 (file)
@@ -677,6 +677,11 @@ struct task_struct {
        int victim_flag;
 #endif
 
+#ifdef CONFIG_SCHED_EMS
+       struct task_band *band;
+       struct list_head band_members;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
        struct task_group               *sched_task_group;
 #endif
index 7c9535bd0427df2ef07f45ac76d2300b26e6f4dd..c17b9fcbc26bc70b74828e6d9acd1269317eb554 100644 (file)
@@ -399,6 +399,54 @@ TRACE_EVENT(ems_lbt_overutilized,
                __entry->capacity, __entry->overutilized)
 );
 
+TRACE_EVENT(ems_update_band,
+
+       TP_PROTO(int band_id, unsigned long band_util, int member_count, unsigned int playable_cpus),
+
+       TP_ARGS(band_id, band_util, member_count, playable_cpus),
+
+       TP_STRUCT__entry(
+               __field( int,           band_id                 )
+               __field( unsigned long, band_util               )
+               __field( int,           member_count            )
+               __field( unsigned int,  playable_cpus           )
+       ),
+
+       TP_fast_assign(
+               __entry->band_id                = band_id;
+               __entry->band_util              = band_util;
+               __entry->member_count           = member_count;
+               __entry->playable_cpus          = playable_cpus;
+       ),
+
+       TP_printk("band_id=%d band_util=%ld member_count=%d playable_cpus=%#x",
+                       __entry->band_id, __entry->band_util, __entry->member_count,
+                       __entry->playable_cpus)
+);
+
+TRACE_EVENT(ems_manage_band,
+
+       TP_PROTO(struct task_struct *p, int band_id, char *event),
+
+       TP_ARGS(p, band_id, event),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,           TASK_COMM_LEN   )
+               __field( pid_t,         pid                             )
+               __field( int,           band_id                         )
+               __array( char,          event,          64              )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid                    = p->pid;
+               __entry->band_id                = band_id;
+               strncpy(__entry->event, event, 64);
+       ),
+
+       TP_printk("comm=%s pid=%d band_id=%d event=%s",
+                       __entry->comm, __entry->pid, __entry->band_id, __entry->event)
+);
 #endif /* _TRACE_EMS_H */
 
 /* This part must be outside protection */
index e3a08761eb4074216ecd29e98a2973023c62db1b..a61276b15f0e813093e76093c6fb241f271eeac9 100644 (file)
@@ -62,6 +62,8 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/cpufreq_times.h>
+#include <linux/ems.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -809,6 +811,8 @@ void __noreturn do_exit(long code)
        }
 
        exit_signals(tsk);  /* sets PF_EXITING */
+       sync_band(tsk, LEAVE_BAND);
+
        /*
         * Ensure that all new tsk->pi_lock acquisitions must observe
         * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
index 130be7688cfb71360f0cc3d38fb7550b3e41e03c..57c88dba204857c70e18430d71ed0418c4788d80 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/debug-snapshot.h>
+#include <linux/ems.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -2212,6 +2213,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 
        INIT_LIST_HEAD(&p->se.group_node);
+#ifdef CONFIG_SCHED_EMS
+       rcu_assign_pointer(p->band, NULL);
+       INIT_LIST_HEAD(&p->band_members);
+#endif
        walt_init_new_task_load(p);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2491,6 +2496,8 @@ void wake_up_new_task(struct task_struct *p)
 
        raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 
+       newbie_join_band(p);
+
        walt_init_new_task_load(p);
 
        p->state = TASK_RUNNING;
@@ -3082,6 +3089,8 @@ void scheduler_tick(void)
        trigger_load_balance(rq);
 #endif
        rq_last_tick_reset(rq);
+
+       update_band(curr, -1);
 }
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -6010,6 +6019,8 @@ void __init sched_init(void)
 
        set_load_weight(&init_task);
 
+       alloc_bands();
+
        /*
         * The boot idle thread does lazy MMU switching as well:
         */
index dbf8b6365d61d0d9fd0bfbe1729feeed1f1231a4..8537cfc6240f65089e5dd09b824e6fca92f60250 100644 (file)
@@ -1,4 +1,4 @@
-obj-y += core.o pcf.o global_boost.o lbt.o ontime.o energy.o init_util.o
+obj-y += core.o pcf.o global_boost.o lbt.o ontime.o energy.o init_util.o band.o
 
 obj-$(CONFIG_SCHED_TUNE) += st_addon.o
 obj-$(CONFIG_FREQVAR_TUNE) += freqvar_tune.o
diff --git a/kernel/sched/ems/band.c b/kernel/sched/ems/band.c
new file mode 100644 (file)
index 0000000..056a201
--- /dev/null
@@ -0,0 +1,267 @@
+/*
+ * thread group band
+ *
+ * Copyright (C) 2018 Samsung Electronics Co., Ltd
+ * Park Bumgyu <bumgyu.park@samsung.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/ems.h>
+#include <linux/sched/signal.h>
+#include <trace/events/ems.h>
+
+#include "ems.h"
+#include "../sched.h"
+
+static struct task_band *lookup_band(struct task_struct *p)
+{
+       struct task_band *band;
+
+       rcu_read_lock();
+       band = rcu_dereference(p->band);
+       rcu_read_unlock();
+
+       if (!band)
+               return NULL;
+
+       return band;
+}
+
+int band_play_cpu(struct task_struct *p)
+{
+       struct task_band *band;
+       int cpu, min_cpu = -1;
+       unsigned long min_util = ULONG_MAX;
+
+       band = lookup_band(p);
+       if (!band)
+               return -1;
+
+       for_each_cpu(cpu, &band->playable_cpus) {
+               if (!cpu_rq(cpu)->nr_running)
+                       return cpu;
+
+               if (cpu_util(cpu) < min_util) {
+                       min_cpu = cpu;
+                       min_util = cpu_util(cpu);
+               }
+       }
+
+       return min_cpu;
+}
+
+static void pick_playable_cpus(struct task_band *band)
+{
+       cpumask_clear(&band->playable_cpus);
+
+       /* pick condition should be fixed */
+       if (band->util < 442) // LIT up-threshold * 2
+               cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(0));
+       else if (band->util < 1260) // MED up-threshold * 2
+               cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(4));
+       else
+               cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(6));
+}
+
+static unsigned long out_of_time = 100000000;  /* 100ms */
+
+/* This function should be called protected with band->lock */
+static void __update_band(struct task_band *band, unsigned long now)
+{
+       struct task_struct *task;
+       unsigned long util_sum = 0;
+
+       list_for_each_entry(task, &band->members, band_members) {
+               if (now - task->se.avg.last_update_time > out_of_time)
+                       continue;
+               util_sum += task_util(task);
+       }
+
+       band->util = util_sum;
+       band->last_update_time = now;
+
+       pick_playable_cpus(band);
+
+       task = list_first_entry(&band->members, struct task_struct, band_members);
+       trace_ems_update_band(band->id, band->util, band->member_count,
+               *(unsigned int *)cpumask_bits(&band->playable_cpus));
+}
+
+static int update_interval = 20000000; /* 20ms */
+
+void update_band(struct task_struct *p, long old_util)
+{
+       struct task_band *band;
+       unsigned long now = cpu_rq(0)->clock_task;
+
+       band = lookup_band(p);
+       if (!band)
+               return;
+
+       /*
+        * Updates the utilization of the band only when it has been enough time
+        * to update the utilization of the band, or when the utilization of the
+        * task changes abruptly.
+        */
+       if (now - band->last_update_time >= update_interval ||
+           (old_util >= 0 && abs(old_util - task_util(p)) > (SCHED_CAPACITY_SCALE >> 4))) {
+               raw_spin_lock(&band->lock);
+               __update_band(band, now);
+               raw_spin_unlock(&band->lock);
+       }
+}
+
+#define MAX_NUM_BAND_ID                20
+static struct task_band *bands[MAX_NUM_BAND_ID];
+
+DEFINE_RWLOCK(band_rwlock);
+
+#define band_playing(band)     (band->tgid >= 0)
+static void join_band(struct task_struct *p)
+{
+       struct task_band *band;
+       int pos, empty = -1;
+       char event[30] = "join band";
+
+       if (lookup_band(p))
+               return;
+
+       write_lock(&band_rwlock);
+
+       /*
+        * Find the band assigned to the tasks's thread group in the
+        * band pool. If there is no band assigend to thread group, it
+        * indicates that the task is the first one in the thread group
+        * to join the band. In this case, assign the first empty band
+        * in the band pool to the thread group.
+        */
+       for (pos = 0; pos < MAX_NUM_BAND_ID; pos++) {
+               band = bands[pos];
+
+               if (!band_playing(band)) {
+                       if (empty < 0)
+                               empty = pos;
+                       continue;
+               }
+
+               if (p->tgid == band->tgid)
+                       break;
+       }
+
+       /* failed to find band, organize the new band */
+       if (pos == MAX_NUM_BAND_ID)
+               band = bands[empty];
+
+       raw_spin_lock(&band->lock);
+       if (!band_playing(band))
+               band->tgid = p->tgid;
+       list_add(&p->band_members, &band->members);
+       rcu_assign_pointer(p->band, band);
+       band->member_count++;
+       trace_ems_manage_band(p, band->id, event);
+
+       __update_band(band, cpu_rq(0)->clock_task);
+       raw_spin_unlock(&band->lock);
+
+       write_unlock(&band_rwlock);
+}
+
+static void leave_band(struct task_struct *p)
+{
+       struct task_band *band;
+       char event[30] = "leave band";
+
+       if (!lookup_band(p))
+               return;
+
+       write_lock(&band_rwlock);
+       band = p->band;
+
+       raw_spin_lock(&band->lock);
+       list_del_init(&p->band_members);
+       rcu_assign_pointer(p->band, NULL);
+       band->member_count--;
+       trace_ems_manage_band(p, band->id, event);
+
+       /* last member of band, band split up */
+       if (list_empty(&band->members)) {
+               band->tgid = -1;
+               cpumask_clear(&band->playable_cpus);
+       }
+
+       __update_band(band, cpu_rq(0)->clock_task);
+       raw_spin_unlock(&band->lock);
+
+       write_unlock(&band_rwlock);
+}
+
+void sync_band(struct task_struct *p, bool join)
+{
+       if (join)
+               join_band(p);
+       else
+               leave_band(p);
+}
+
+void newbie_join_band(struct task_struct *newbie)
+{
+       unsigned long flags;
+       struct task_band *band;
+       struct task_struct *leader = newbie->group_leader;
+       char event[30] = "newbie join band";
+
+       if (thread_group_leader(newbie))
+               return;
+
+       write_lock_irqsave(&band_rwlock, flags);
+
+       band = lookup_band(leader);
+       if (!band || newbie->band) {
+               write_unlock_irqrestore(&band_rwlock, flags);
+               return;
+       }
+
+       raw_spin_lock(&band->lock);
+       list_add(&newbie->band_members, &band->members);
+       rcu_assign_pointer(newbie->band, band);
+       band->member_count++;
+       trace_ems_manage_band(newbie, band->id, event);
+       raw_spin_unlock(&band->lock);
+
+       write_unlock_irqrestore(&band_rwlock, flags);
+}
+
+int alloc_bands(void)
+{
+       struct task_band *band;
+       int pos, ret, i;
+
+       for (pos = 0; pos < MAX_NUM_BAND_ID; pos++) {
+               band = kzalloc(sizeof(*band), GFP_KERNEL);
+               if (!band) {
+                       ret = -ENOMEM;
+                       goto fail;
+               }
+
+               band->id = pos;
+               band->tgid = -1;
+               raw_spin_lock_init(&band->lock);
+               INIT_LIST_HEAD(&band->members);
+               band->member_count = 0;
+               cpumask_clear(&band->playable_cpus);
+
+               bands[pos] = band;
+       }
+
+       return 0;
+
+fail:
+       for (i = pos - 1; i >= 0; i--) {
+               kfree(bands[i]);
+               bands[i] = NULL;
+       }
+
+       return ret;
+}
index 0366fc59a2f4d06d3501a0df31cfb380f9a00b7d..fa23b26716f7211b509053ec581f2cd33a8b274e 100644 (file)
@@ -198,8 +198,13 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
         * the utilization to determine which cpu the task will be assigned to.
         * Exclude new task.
         */
-       if (!(sd_flag & SD_BALANCE_FORK))
+       if (!(sd_flag & SD_BALANCE_FORK)) {
+               unsigned long old_util = task_util(p);
+
                sync_entity_load_avg(&p->se);
+               /* update the band if a large amount of task util is decayed */
+               update_band(p, old_util);
+       }
 
        /*
         * Priority 1 : ontime task
@@ -236,7 +241,26 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
        }
 
        /*
-        * Priority 3 : global boosting
+        * Priority 3 : task band
+        *
+        * The tasks in a process are likely to interact, and its operations are
+        * sequential and share resources. Therefore, if these tasks are packed and
+        * and assign on a specific cpu or cluster, the latency for interaction
+        * decreases and the reusability of the cache increases, thereby improving
+        * performance.
+        *
+        * The "task band" is a function that groups tasks on a per-process basis
+        * and assigns them to a specific cpu or cluster. If the attribute "band"
+        * of schedtune.cgroup is set to '1', task band operate on this cgroup.
+        */
+       target_cpu = band_play_cpu(p);
+       if (cpu_selected(target_cpu)) {
+               strcpy(state, "task band");
+               goto out;
+       }
+
+       /*
+        * Priority 4 : global boosting
         *
         * Global boost is a function that preferentially assigns all tasks in the
         * system to the performance cpu. Unlike prefer-perf, which targets only
@@ -256,7 +280,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
        }
 
        /*
-        * Priority 4 : group balancing
+        * Priority 5 : group balancing
         */
        target_cpu = group_balancing(p);
        if (cpu_selected(target_cpu)) {
@@ -265,7 +289,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
        }
 
        /*
-        * Priority 5 : prefer-idle
+        * Priority 6 : prefer-idle
         *
         * Prefer-idle is a function that operates on cgroup basis managed by
         * schedtune. When perfer-idle is set to 1, the tasks in the group are
@@ -281,7 +305,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
        }
 
        /*
-        * Priority 6 : energy cpu
+        * Priority 7 : energy cpu
         *
         * A scheduling scheme based on cpu energy, find the least power consumption
         * cpu referring energy table when assigning task.
@@ -293,7 +317,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int
        }
 
        /*
-        * Priority 7 : proper cpu
+        * Priority 8 : proper cpu
         */
        target_cpu = select_proper_cpu(p, prev_cpu);
        if (cpu_selected(target_cpu))
index 728d68c0adec6a26705276cd1642d68b5f5d6d3c..1ca906233d5ff99b3402d2f1c015361ecffb0a1b 100644 (file)
@@ -23,6 +23,7 @@ extern int global_boosting(struct task_struct *p);
 extern int global_boosted(void);
 extern bool lbt_bring_overutilize(int cpu, struct task_struct *p);
 extern int select_energy_cpu(struct task_struct *p, int prev_cpu, int sd_flag, int sync);
+extern int band_play_cpu(struct task_struct *p);
 
 #ifdef CONFIG_SCHED_TUNE
 extern int prefer_perf_cpu(struct task_struct *p);
index d388f71fa1dbee0b1b390839398218db875d9c6a..d341e70e01e634dbd5a72d3082668c323524e339 100644 (file)
@@ -82,6 +82,9 @@ struct schedtune {
 
        /* SchedTune util-est */
        int util_est_en;
+
+       /* Hint to group tasks by process */
+       int band;
 };
 
 static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -113,6 +116,7 @@ root_schedtune = {
        .boost  = 0,
        .prefer_idle = 0,
        .prefer_perf = 0,
+       .band = 0,
 };
 
 /*
@@ -360,6 +364,15 @@ void schedtune_cancel_attach(struct cgroup_taskset *tset)
        WARN(1, "SchedTune cancel attach not implemented");
 }
 
+static void schedtune_attach(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       struct cgroup_subsys_state *css;
+
+       cgroup_taskset_for_each(task, css, tset)
+               sync_band(task, css_st(css)->band);
+}
+
 /*
  * NOTE: This function must be called while holding the lock on the CPU RQ
  */
@@ -797,6 +810,24 @@ util_est_en_write(struct cgroup_subsys_state *css, struct cftype *cft,
        return 0;
 }
 
+static u64
+band_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+       struct schedtune *st = css_st(css);
+
+       return st->band;
+}
+
+static int
+band_write(struct cgroup_subsys_state *css, struct cftype *cft,
+           u64 band)
+{
+       struct schedtune *st = css_st(css);
+       st->band = band;
+
+       return 0;
+}
+
 static u64
 prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -874,6 +905,11 @@ static struct cftype files[] = {
                .read_u64 = prefer_perf_read,
                .write_u64 = prefer_perf_write,
        },
+       {
+               .name = "band",
+               .read_u64 = band_read,
+               .write_u64 = band_write,
+       },
        {
                .name = "gb_util",
                .read_u64 = gb_util_read,
@@ -1023,6 +1059,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
        .css_free       = schedtune_css_free,
        .can_attach     = schedtune_can_attach,
        .cancel_attach  = schedtune_cancel_attach,
+       .attach         = schedtune_attach,
        .legacy_cftypes = files,
        .early_init     = 1,
 };