From 0a548a53bc72be0f4269c1f819f844ba8d23e4e6 Mon Sep 17 00:00:00 2001 From: Park Bumgyu Date: Fri, 25 May 2018 14:01:52 +0900 Subject: [PATCH] sched: ems: introduce task band Change-Id: Ic3fbe3e80c8033f5c1c77f02cb0eeb6ee04d9630 Signed-off-by: Park Bumgyu --- include/linux/ems.h | 34 +++++ include/linux/sched.h | 5 + include/trace/events/ems.h | 48 +++++++ kernel/exit.c | 4 + kernel/sched/core.c | 11 ++ kernel/sched/ems/Makefile | 2 +- kernel/sched/ems/band.c | 267 +++++++++++++++++++++++++++++++++++++ kernel/sched/ems/core.c | 36 ++++- kernel/sched/ems/ems.h | 1 + kernel/sched/tune.c | 37 +++++ 10 files changed, 438 insertions(+), 7 deletions(-) create mode 100644 kernel/sched/ems/band.c diff --git a/include/linux/ems.h b/include/linux/ems.h index 27f45da8ca88..c38d7f206b5c 100644 --- a/include/linux/ems.h +++ b/include/linux/ems.h @@ -31,6 +31,21 @@ struct gb_qos_request { bool active; }; +#define LEAVE_BAND 0 + +struct task_band { + int id; + pid_t tgid; + raw_spinlock_t lock; + + struct list_head members; + int member_count; + struct cpumask playable_cpus; + + unsigned long util; + unsigned long last_update_time; +}; + #ifdef CONFIG_SCHED_EMS /* task util initialization */ extern void exynos_init_entity_util_avg(struct sched_entity *se); @@ -59,6 +74,13 @@ extern void gb_qos_update_request(struct gb_qos_request *req, u32 new_value); /* prefer perf */ extern void request_kernel_prefer_perf(int grp_idx, int enable); + +/* task band */ +extern void sync_band(struct task_struct *p, bool join); +extern void newbie_join_band(struct task_struct *newbie); +extern int alloc_bands(void); +extern void update_band(struct task_struct *p, long old_util); +extern int band_playing(struct task_struct *p, int cpu); #else static inline void exynos_init_entity_util_avg(struct sched_entity *se) { } @@ -92,6 +114,18 @@ static inline void update_lbt_overutil(int cpu, unsigned long capacity) { } static inline void gb_qos_update_request(struct gb_qos_request *req, u32 new_value) { } static inline void request_kernel_prefer_perf(int grp_idx, int enable) { } + +static inline void sync_band(struct task_struct *p, bool join) { } +static inline void newbie_join_band(struct task_struct *newbie) { } +static inline int alloc_bands(void) +{ + return 0; +} +static inline void update_band(struct task_struct *p, long old_util) { } +static inline int band_playing(struct task_struct *p, int cpu) +{ + return 0; +} #endif /* CONFIG_SCHED_EMS */ #ifdef CONFIG_SIMPLIFIED_ENERGY_MODEL diff --git a/include/linux/sched.h b/include/linux/sched.h index 5bc3b25c4064..1cbd64bd7e0b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -677,6 +677,11 @@ struct task_struct { int victim_flag; #endif +#ifdef CONFIG_SCHED_EMS + struct task_band *band; + struct list_head band_members; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif diff --git a/include/trace/events/ems.h b/include/trace/events/ems.h index 7c9535bd0427..c17b9fcbc26b 100644 --- a/include/trace/events/ems.h +++ b/include/trace/events/ems.h @@ -399,6 +399,54 @@ TRACE_EVENT(ems_lbt_overutilized, __entry->capacity, __entry->overutilized) ); +TRACE_EVENT(ems_update_band, + + TP_PROTO(int band_id, unsigned long band_util, int member_count, unsigned int playable_cpus), + + TP_ARGS(band_id, band_util, member_count, playable_cpus), + + TP_STRUCT__entry( + __field( int, band_id ) + __field( unsigned long, band_util ) + __field( int, member_count ) + __field( unsigned int, playable_cpus ) + ), + + TP_fast_assign( + __entry->band_id = band_id; + __entry->band_util = band_util; + __entry->member_count = member_count; + __entry->playable_cpus = playable_cpus; + ), + + TP_printk("band_id=%d band_util=%ld member_count=%d playable_cpus=%#x", + __entry->band_id, __entry->band_util, __entry->member_count, + __entry->playable_cpus) +); + +TRACE_EVENT(ems_manage_band, + + TP_PROTO(struct task_struct *p, int band_id, char *event), + + TP_ARGS(p, band_id, event), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, band_id ) + __array( char, event, 64 ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->band_id = band_id; + strncpy(__entry->event, event, 64); + ), + + TP_printk("comm=%s pid=%d band_id=%d event=%s", + __entry->comm, __entry->pid, __entry->band_id, __entry->event) +); #endif /* _TRACE_EMS_H */ /* This part must be outside protection */ diff --git a/kernel/exit.c b/kernel/exit.c index e3a08761eb40..a61276b15f0e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -62,6 +62,8 @@ #include #include #include +#include +#include #include #include @@ -809,6 +811,8 @@ void __noreturn do_exit(long code) } exit_signals(tsk); /* sets PF_EXITING */ + sync_band(tsk, LEAVE_BAND); + /* * Ensure that all new tsk->pi_lock acquisitions must observe * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 130be7688cfb..57c88dba2048 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -2212,6 +2213,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #endif INIT_LIST_HEAD(&p->se.group_node); +#ifdef CONFIG_SCHED_EMS + rcu_assign_pointer(p->band, NULL); + INIT_LIST_HEAD(&p->band_members); +#endif walt_init_new_task_load(p); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -2491,6 +2496,8 @@ void wake_up_new_task(struct task_struct *p) raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + newbie_join_band(p); + walt_init_new_task_load(p); p->state = TASK_RUNNING; @@ -3082,6 +3089,8 @@ void scheduler_tick(void) trigger_load_balance(rq); #endif rq_last_tick_reset(rq); + + update_band(curr, -1); } #ifdef CONFIG_NO_HZ_FULL @@ -6010,6 +6019,8 @@ void __init sched_init(void) set_load_weight(&init_task); + alloc_bands(); + /* * The boot idle thread does lazy MMU switching as well: */ diff --git a/kernel/sched/ems/Makefile b/kernel/sched/ems/Makefile index dbf8b6365d61..8537cfc6240f 100644 --- a/kernel/sched/ems/Makefile +++ b/kernel/sched/ems/Makefile @@ -1,4 +1,4 @@ -obj-y += core.o pcf.o global_boost.o lbt.o ontime.o energy.o init_util.o +obj-y += core.o pcf.o global_boost.o lbt.o ontime.o energy.o init_util.o band.o obj-$(CONFIG_SCHED_TUNE) += st_addon.o obj-$(CONFIG_FREQVAR_TUNE) += freqvar_tune.o diff --git a/kernel/sched/ems/band.c b/kernel/sched/ems/band.c new file mode 100644 index 000000000000..056a20156a6f --- /dev/null +++ b/kernel/sched/ems/band.c @@ -0,0 +1,267 @@ +/* + * thread group band + * + * Copyright (C) 2018 Samsung Electronics Co., Ltd + * Park Bumgyu + */ + +#include +#include +#include +#include +#include +#include + +#include "ems.h" +#include "../sched.h" + +static struct task_band *lookup_band(struct task_struct *p) +{ + struct task_band *band; + + rcu_read_lock(); + band = rcu_dereference(p->band); + rcu_read_unlock(); + + if (!band) + return NULL; + + return band; +} + +int band_play_cpu(struct task_struct *p) +{ + struct task_band *band; + int cpu, min_cpu = -1; + unsigned long min_util = ULONG_MAX; + + band = lookup_band(p); + if (!band) + return -1; + + for_each_cpu(cpu, &band->playable_cpus) { + if (!cpu_rq(cpu)->nr_running) + return cpu; + + if (cpu_util(cpu) < min_util) { + min_cpu = cpu; + min_util = cpu_util(cpu); + } + } + + return min_cpu; +} + +static void pick_playable_cpus(struct task_band *band) +{ + cpumask_clear(&band->playable_cpus); + + /* pick condition should be fixed */ + if (band->util < 442) // LIT up-threshold * 2 + cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(0)); + else if (band->util < 1260) // MED up-threshold * 2 + cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(4)); + else + cpumask_and(&band->playable_cpus, cpu_online_mask, cpu_coregroup_mask(6)); +} + +static unsigned long out_of_time = 100000000; /* 100ms */ + +/* This function should be called protected with band->lock */ +static void __update_band(struct task_band *band, unsigned long now) +{ + struct task_struct *task; + unsigned long util_sum = 0; + + list_for_each_entry(task, &band->members, band_members) { + if (now - task->se.avg.last_update_time > out_of_time) + continue; + util_sum += task_util(task); + } + + band->util = util_sum; + band->last_update_time = now; + + pick_playable_cpus(band); + + task = list_first_entry(&band->members, struct task_struct, band_members); + trace_ems_update_band(band->id, band->util, band->member_count, + *(unsigned int *)cpumask_bits(&band->playable_cpus)); +} + +static int update_interval = 20000000; /* 20ms */ + +void update_band(struct task_struct *p, long old_util) +{ + struct task_band *band; + unsigned long now = cpu_rq(0)->clock_task; + + band = lookup_band(p); + if (!band) + return; + + /* + * Updates the utilization of the band only when it has been enough time + * to update the utilization of the band, or when the utilization of the + * task changes abruptly. + */ + if (now - band->last_update_time >= update_interval || + (old_util >= 0 && abs(old_util - task_util(p)) > (SCHED_CAPACITY_SCALE >> 4))) { + raw_spin_lock(&band->lock); + __update_band(band, now); + raw_spin_unlock(&band->lock); + } +} + +#define MAX_NUM_BAND_ID 20 +static struct task_band *bands[MAX_NUM_BAND_ID]; + +DEFINE_RWLOCK(band_rwlock); + +#define band_playing(band) (band->tgid >= 0) +static void join_band(struct task_struct *p) +{ + struct task_band *band; + int pos, empty = -1; + char event[30] = "join band"; + + if (lookup_band(p)) + return; + + write_lock(&band_rwlock); + + /* + * Find the band assigned to the tasks's thread group in the + * band pool. If there is no band assigend to thread group, it + * indicates that the task is the first one in the thread group + * to join the band. In this case, assign the first empty band + * in the band pool to the thread group. + */ + for (pos = 0; pos < MAX_NUM_BAND_ID; pos++) { + band = bands[pos]; + + if (!band_playing(band)) { + if (empty < 0) + empty = pos; + continue; + } + + if (p->tgid == band->tgid) + break; + } + + /* failed to find band, organize the new band */ + if (pos == MAX_NUM_BAND_ID) + band = bands[empty]; + + raw_spin_lock(&band->lock); + if (!band_playing(band)) + band->tgid = p->tgid; + list_add(&p->band_members, &band->members); + rcu_assign_pointer(p->band, band); + band->member_count++; + trace_ems_manage_band(p, band->id, event); + + __update_band(band, cpu_rq(0)->clock_task); + raw_spin_unlock(&band->lock); + + write_unlock(&band_rwlock); +} + +static void leave_band(struct task_struct *p) +{ + struct task_band *band; + char event[30] = "leave band"; + + if (!lookup_band(p)) + return; + + write_lock(&band_rwlock); + band = p->band; + + raw_spin_lock(&band->lock); + list_del_init(&p->band_members); + rcu_assign_pointer(p->band, NULL); + band->member_count--; + trace_ems_manage_band(p, band->id, event); + + /* last member of band, band split up */ + if (list_empty(&band->members)) { + band->tgid = -1; + cpumask_clear(&band->playable_cpus); + } + + __update_band(band, cpu_rq(0)->clock_task); + raw_spin_unlock(&band->lock); + + write_unlock(&band_rwlock); +} + +void sync_band(struct task_struct *p, bool join) +{ + if (join) + join_band(p); + else + leave_band(p); +} + +void newbie_join_band(struct task_struct *newbie) +{ + unsigned long flags; + struct task_band *band; + struct task_struct *leader = newbie->group_leader; + char event[30] = "newbie join band"; + + if (thread_group_leader(newbie)) + return; + + write_lock_irqsave(&band_rwlock, flags); + + band = lookup_band(leader); + if (!band || newbie->band) { + write_unlock_irqrestore(&band_rwlock, flags); + return; + } + + raw_spin_lock(&band->lock); + list_add(&newbie->band_members, &band->members); + rcu_assign_pointer(newbie->band, band); + band->member_count++; + trace_ems_manage_band(newbie, band->id, event); + raw_spin_unlock(&band->lock); + + write_unlock_irqrestore(&band_rwlock, flags); +} + +int alloc_bands(void) +{ + struct task_band *band; + int pos, ret, i; + + for (pos = 0; pos < MAX_NUM_BAND_ID; pos++) { + band = kzalloc(sizeof(*band), GFP_KERNEL); + if (!band) { + ret = -ENOMEM; + goto fail; + } + + band->id = pos; + band->tgid = -1; + raw_spin_lock_init(&band->lock); + INIT_LIST_HEAD(&band->members); + band->member_count = 0; + cpumask_clear(&band->playable_cpus); + + bands[pos] = band; + } + + return 0; + +fail: + for (i = pos - 1; i >= 0; i--) { + kfree(bands[i]); + bands[i] = NULL; + } + + return ret; +} diff --git a/kernel/sched/ems/core.c b/kernel/sched/ems/core.c index 0366fc59a2f4..fa23b26716f7 100644 --- a/kernel/sched/ems/core.c +++ b/kernel/sched/ems/core.c @@ -198,8 +198,13 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int * the utilization to determine which cpu the task will be assigned to. * Exclude new task. */ - if (!(sd_flag & SD_BALANCE_FORK)) + if (!(sd_flag & SD_BALANCE_FORK)) { + unsigned long old_util = task_util(p); + sync_entity_load_avg(&p->se); + /* update the band if a large amount of task util is decayed */ + update_band(p, old_util); + } /* * Priority 1 : ontime task @@ -236,7 +241,26 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int } /* - * Priority 3 : global boosting + * Priority 3 : task band + * + * The tasks in a process are likely to interact, and its operations are + * sequential and share resources. Therefore, if these tasks are packed and + * and assign on a specific cpu or cluster, the latency for interaction + * decreases and the reusability of the cache increases, thereby improving + * performance. + * + * The "task band" is a function that groups tasks on a per-process basis + * and assigns them to a specific cpu or cluster. If the attribute "band" + * of schedtune.cgroup is set to '1', task band operate on this cgroup. + */ + target_cpu = band_play_cpu(p); + if (cpu_selected(target_cpu)) { + strcpy(state, "task band"); + goto out; + } + + /* + * Priority 4 : global boosting * * Global boost is a function that preferentially assigns all tasks in the * system to the performance cpu. Unlike prefer-perf, which targets only @@ -256,7 +280,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int } /* - * Priority 4 : group balancing + * Priority 5 : group balancing */ target_cpu = group_balancing(p); if (cpu_selected(target_cpu)) { @@ -265,7 +289,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int } /* - * Priority 5 : prefer-idle + * Priority 6 : prefer-idle * * Prefer-idle is a function that operates on cgroup basis managed by * schedtune. When perfer-idle is set to 1, the tasks in the group are @@ -281,7 +305,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int } /* - * Priority 6 : energy cpu + * Priority 7 : energy cpu * * A scheduling scheme based on cpu energy, find the least power consumption * cpu referring energy table when assigning task. @@ -293,7 +317,7 @@ int exynos_wakeup_balance(struct task_struct *p, int prev_cpu, int sd_flag, int } /* - * Priority 7 : proper cpu + * Priority 8 : proper cpu */ target_cpu = select_proper_cpu(p, prev_cpu); if (cpu_selected(target_cpu)) diff --git a/kernel/sched/ems/ems.h b/kernel/sched/ems/ems.h index 728d68c0adec..1ca906233d5f 100644 --- a/kernel/sched/ems/ems.h +++ b/kernel/sched/ems/ems.h @@ -23,6 +23,7 @@ extern int global_boosting(struct task_struct *p); extern int global_boosted(void); extern bool lbt_bring_overutilize(int cpu, struct task_struct *p); extern int select_energy_cpu(struct task_struct *p, int prev_cpu, int sd_flag, int sync); +extern int band_play_cpu(struct task_struct *p); #ifdef CONFIG_SCHED_TUNE extern int prefer_perf_cpu(struct task_struct *p); diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index d388f71fa1db..d341e70e01e6 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -82,6 +82,9 @@ struct schedtune { /* SchedTune util-est */ int util_est_en; + + /* Hint to group tasks by process */ + int band; }; static inline struct schedtune *css_st(struct cgroup_subsys_state *css) @@ -113,6 +116,7 @@ root_schedtune = { .boost = 0, .prefer_idle = 0, .prefer_perf = 0, + .band = 0, }; /* @@ -360,6 +364,15 @@ void schedtune_cancel_attach(struct cgroup_taskset *tset) WARN(1, "SchedTune cancel attach not implemented"); } +static void schedtune_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + + cgroup_taskset_for_each(task, css, tset) + sync_band(task, css_st(css)->band); +} + /* * NOTE: This function must be called while holding the lock on the CPU RQ */ @@ -797,6 +810,24 @@ util_est_en_write(struct cgroup_subsys_state *css, struct cftype *cft, return 0; } +static u64 +band_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + struct schedtune *st = css_st(css); + + return st->band; +} + +static int +band_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 band) +{ + struct schedtune *st = css_st(css); + st->band = band; + + return 0; +} + static u64 prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -874,6 +905,11 @@ static struct cftype files[] = { .read_u64 = prefer_perf_read, .write_u64 = prefer_perf_write, }, + { + .name = "band", + .read_u64 = band_read, + .write_u64 = band_write, + }, { .name = "gb_util", .read_u64 = gb_util_read, @@ -1023,6 +1059,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = { .css_free = schedtune_css_free, .can_attach = schedtune_can_attach, .cancel_attach = schedtune_cancel_attach, + .attach = schedtune_attach, .legacy_cftypes = files, .early_init = 1, }; -- 2.20.1