kernel/sched/cpufreq_schedutil.c

   1 /*
   2  * CPUFreq governor based on scheduler-provided CPU utilization data.
   3  *
   4  * Copyright (C) 2016, Intel Corporation
   5  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License version 2 as
   9  * published by the Free Software Foundation.
  10  */
  11
  12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14 #include <linux/cpufreq.h>
  15 #include <linux/kthread.h>
  16 #include <uapi/linux/sched/types.h>
  17 #include <linux/slab.h>
  18 #include <trace/events/power.h>
  19
  20 #include "sched.h"
  21
  22 unsigned long boosted_cpu_util(int cpu, unsigned long other_util);
  23
  24 #define SUGOV_KTHREAD_PRIORITY  50
  25
  26 struct sugov_tunables {
  27         struct gov_attr_set attr_set;
  28         unsigned int up_rate_limit_us;
  29         unsigned int down_rate_limit_us;
  30 };
  31
  32 struct sugov_policy {
  33         struct cpufreq_policy *policy;
  34
  35         struct sugov_tunables *tunables;
  36         struct list_head tunables_hook;
  37
  38         raw_spinlock_t update_lock;  /* For shared policies */
  39         u64 last_freq_update_time;
  40         s64 min_rate_limit_ns;
  41         s64 up_rate_delay_ns;
  42         s64 down_rate_delay_ns;
  43         unsigned int next_freq;
  44         unsigned int cached_raw_freq;
  45
  46         /* The next fields are only needed if fast switch cannot be used. */
  47         struct irq_work irq_work;
  48         struct kthread_work work;
  49         struct mutex work_lock;
  50         struct kthread_worker worker;
  51         struct task_struct *thread;
  52         bool work_in_progress;
  53
  54         bool need_freq_update;
  55 };
  56
  57 struct sugov_cpu {
  58         struct update_util_data update_util;
  59         struct sugov_policy *sg_policy;
  60         unsigned int cpu;
  61
  62         bool iowait_boost_pending;
  63         unsigned int iowait_boost;
  64         unsigned int iowait_boost_max;
  65         u64 last_update;
  66
  67         /* The fields below are only needed when sharing a policy. */
  68         unsigned long util;
  69         unsigned long max;
  70         unsigned int flags;
  71
  72         /* The field below is for single-CPU policies only. */
  73 #ifdef CONFIG_NO_HZ_COMMON
  74         unsigned long saved_idle_calls;
  75 #endif
  76 };
  77
  78 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
  79
  80 /************************ Governor internals ***********************/
  81
  82 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
  83 {
  84         s64 delta_ns;
  85
  86         /*
  87          * Since cpufreq_update_util() is called with rq->lock held for
  88          * the @target_cpu, our per-cpu data is fully serialized.
  89          *
  90          * However, drivers cannot in general deal with cross-cpu
  91          * requests, so while get_next_freq() will work, our
  92          * sugov_update_commit() call may not for the fast switching platforms.
  93          *
  94          * Hence stop here for remote requests if they aren't supported
  95          * by the hardware, as calculating the frequency is pointless if
  96          * we cannot in fact act on it.
  97          *
  98          * For the slow switching platforms, the kthread is always scheduled on
  99          * the right set of CPUs and any CPU can find the next frequency and
 100          * schedule the kthread.
 101          */
 102         if (sg_policy->policy->fast_switch_enabled &&
 103             !cpufreq_can_do_remote_dvfs(sg_policy->policy))
 104                 return false;
 105
 106         if (sg_policy->work_in_progress)
 107                 return false;
 108
 109         if (unlikely(sg_policy->need_freq_update)) {
 110                 sg_policy->need_freq_update = false;
 111                 /*
 112                  * This happens when limits change, so forget the previous
 113                  * next_freq value and force an update.
 114                  */
 115                 sg_policy->next_freq = UINT_MAX;
 116                 return true;
 117         }
 118
 119         /* No need to recalculate next freq for min_rate_limit_us
 120          * at least. However we might still decide to further rate
 121          * limit once frequency change direction is decided, according
 122          * to the separate rate limits.
 123          */
 124
 125         delta_ns = time - sg_policy->last_freq_update_time;
 126         return delta_ns >= sg_policy->min_rate_limit_ns;
 127 }
 128
 129 static bool sugov_up_down_rate_limit(struct sugov_policy *sg_policy, u64 time,
 130                                      unsigned int next_freq)
 131 {
 132         s64 delta_ns;
 133
 134         delta_ns = time - sg_policy->last_freq_update_time;
 135
 136         if (next_freq > sg_policy->next_freq &&
 137             delta_ns < sg_policy->up_rate_delay_ns)
 138                         return true;
 139
 140         if (next_freq < sg_policy->next_freq &&
 141             delta_ns < sg_policy->down_rate_delay_ns)
 142                         return true;
 143
 144         return false;
 145 }
 146
 147 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 148                                 unsigned int next_freq)
 149 {
 150         struct cpufreq_policy *policy = sg_policy->policy;
 151
 152         if (sg_policy->next_freq == next_freq)
 153                 return;
 154
 155         if (sugov_up_down_rate_limit(sg_policy, time, next_freq))
 156                 return;
 157
 158         sg_policy->next_freq = next_freq;
 159         sg_policy->last_freq_update_time = time;
 160
 161         if (policy->fast_switch_enabled) {
 162                 next_freq = cpufreq_driver_fast_switch(policy, next_freq);
 163                 if (!next_freq)
 164                         return;
 165
 166                 policy->cur = next_freq;
 167                 trace_cpu_frequency(next_freq, smp_processor_id());
 168         } else {
 169                 sg_policy->work_in_progress = true;
 170                 irq_work_queue(&sg_policy->irq_work);
 171         }
 172 }
 173
 174 /**
 175  * get_next_freq - Compute a new frequency for a given cpufreq policy.
 176  * @sg_policy: schedutil policy object to compute the new frequency for.
 177  * @util: Current CPU utilization.
 178  * @max: CPU capacity.
 179  *
 180  * If the utilization is frequency-invariant, choose the new frequency to be
 181  * proportional to it, that is
 182  *
 183  * next_freq = C * max_freq * util / max
 184  *
 185  * Otherwise, approximate the would-be frequency-invariant utilization by
 186  * util_raw * (curr_freq / max_freq) which leads to
 187  *
 188  * next_freq = C * curr_freq * util_raw / max
 189  *
 190  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
 191  *
 192  * The lowest driver-supported frequency which is equal or greater than the raw
 193  * next_freq (as calculated above) is returned, subject to policy min/max and
 194  * cpufreq driver limitations.
 195  */
 196 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 197                                   unsigned long util, unsigned long max)
 198 {
 199         struct cpufreq_policy *policy = sg_policy->policy;
 200         unsigned int freq = arch_scale_freq_invariant() ?
 201                                 policy->cpuinfo.max_freq : policy->cur;
 202
 203         freq = (freq + (freq >> 2)) * util / max;
 204
 205         if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
 206                 return sg_policy->next_freq;
 207         sg_policy->cached_raw_freq = freq;
 208         return cpufreq_driver_resolve_freq(policy, freq);
 209 }
 210
 211 static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
 212 {
 213         unsigned long max_cap, rt;
 214
 215         max_cap = arch_scale_cpu_capacity(NULL, cpu);
 216
 217         rt = sched_get_rt_rq_util(cpu);
 218
 219         *util = boosted_cpu_util(cpu, rt);
 220         *util = min(*util, max_cap);
 221         *max = max_cap;
 222 }
 223
 224 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 225                                    unsigned int flags)
 226 {
 227         if (flags & SCHED_CPUFREQ_IOWAIT) {
 228                 if (sg_cpu->iowait_boost_pending)
 229                         return;
 230
 231                 sg_cpu->iowait_boost_pending = true;
 232
 233                 if (sg_cpu->iowait_boost) {
 234                         sg_cpu->iowait_boost <<= 1;
 235                         if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
 236                                 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
 237                 } else {
 238                         sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
 239                 }
 240         } else if (sg_cpu->iowait_boost) {
 241                 s64 delta_ns = time - sg_cpu->last_update;
 242
 243                 /* Clear iowait_boost if the CPU apprears to have been idle. */
 244                 if (delta_ns > TICK_NSEC) {
 245                         sg_cpu->iowait_boost = 0;
 246                         sg_cpu->iowait_boost_pending = false;
 247                 }
 248         }
 249 }
 250
 251 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 252                                unsigned long *max)
 253 {
 254         unsigned int boost_util, boost_max;
 255
 256         if (!sg_cpu->iowait_boost)
 257                 return;
 258
 259         if (sg_cpu->iowait_boost_pending) {
 260                 sg_cpu->iowait_boost_pending = false;
 261         } else {
 262                 sg_cpu->iowait_boost >>= 1;
 263                 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
 264                         sg_cpu->iowait_boost = 0;
 265                         return;
 266                 }
 267         }
 268
 269         boost_util = sg_cpu->iowait_boost;
 270         boost_max = sg_cpu->iowait_boost_max;
 271
 272         if (*util * boost_max < *max * boost_util) {
 273                 *util = boost_util;
 274                 *max = boost_max;
 275         }
 276 }
 277
 278 #ifdef CONFIG_NO_HZ_COMMON
 279 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 280 {
 281         unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
 282         bool ret = idle_calls == sg_cpu->saved_idle_calls;
 283
 284         sg_cpu->saved_idle_calls = idle_calls;
 285         return ret;
 286 }
 287 #else
 288 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
 289 #endif /* CONFIG_NO_HZ_COMMON */
 290
 291 static void sugov_update_single(struct update_util_data *hook, u64 time,
 292                                 unsigned int flags)
 293 {
 294         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 295         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 296         struct cpufreq_policy *policy = sg_policy->policy;
 297         unsigned long util, max;
 298         unsigned int next_f;
 299         bool busy;
 300
 301         sugov_set_iowait_boost(sg_cpu, time, flags);
 302         sg_cpu->last_update = time;
 303
 304         if (!sugov_should_update_freq(sg_policy, time))
 305                 return;
 306
 307         busy = sugov_cpu_is_busy(sg_cpu);
 308
 309         if (flags & SCHED_CPUFREQ_DL) {
 310                 next_f = policy->cpuinfo.max_freq;
 311         } else {
 312                 sugov_get_util(&util, &max, sg_cpu->cpu);
 313                 sugov_iowait_boost(sg_cpu, &util, &max);
 314                 next_f = get_next_freq(sg_policy, util, max);
 315                 /*
 316                  * Do not reduce the frequency if the CPU has not been idle
 317                  * recently, as the reduction is likely to be premature then.
 318                  */
 319                 if (busy && next_f < sg_policy->next_freq &&
 320                     sg_policy->next_freq != UINT_MAX) {
 321                         next_f = sg_policy->next_freq;
 322
 323                         /* Reset cached freq as next_freq has changed */
 324                         sg_policy->cached_raw_freq = 0;
 325                 }
 326         }
 327
 328         sugov_update_commit(sg_policy, time, next_f);
 329 }
 330
 331 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 332 {
 333         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 334         struct cpufreq_policy *policy = sg_policy->policy;
 335         unsigned long util = 0, max = 1;
 336         unsigned int j;
 337
 338         for_each_cpu(j, policy->cpus) {
 339                 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 340                 unsigned long j_util, j_max;
 341                 s64 delta_ns;
 342
 343                 /*
 344                  * If the CPU utilization was last updated before the previous
 345                  * frequency update and the time elapsed between the last update
 346                  * of the CPU utilization and the last frequency update is long
 347                  * enough, don't take the CPU into account as it probably is
 348                  * idle now (and clear iowait_boost for it).
 349                  */
 350                 delta_ns = time - j_sg_cpu->last_update;
 351                 if (delta_ns > TICK_NSEC) {
 352                         j_sg_cpu->iowait_boost = 0;
 353                         j_sg_cpu->iowait_boost_pending = false;
 354                         continue;
 355                 }
 356                 if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
 357                         return policy->cpuinfo.max_freq;
 358
 359                 j_util = j_sg_cpu->util;
 360                 j_max = j_sg_cpu->max;
 361                 if (j_util * max > j_max * util) {
 362                         util = j_util;
 363                         max = j_max;
 364                 }
 365
 366                 sugov_iowait_boost(j_sg_cpu, &util, &max);
 367         }
 368
 369         return get_next_freq(sg_policy, util, max);
 370 }
 371
 372 static void sugov_update_shared(struct update_util_data *hook, u64 time,
 373                                 unsigned int flags)
 374 {
 375         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 376         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 377         unsigned long util, max;
 378         unsigned int next_f;
 379
 380         sugov_get_util(&util, &max, sg_cpu->cpu);
 381
 382         raw_spin_lock(&sg_policy->update_lock);
 383
 384         sg_cpu->util = util;
 385         sg_cpu->max = max;
 386         sg_cpu->flags = flags;
 387
 388         sugov_set_iowait_boost(sg_cpu, time, flags);
 389         sg_cpu->last_update = time;
 390
 391         if (sugov_should_update_freq(sg_policy, time)) {
 392                 if (flags & SCHED_CPUFREQ_DL)
 393                         next_f = sg_policy->policy->cpuinfo.max_freq;
 394                 else
 395                         next_f = sugov_next_freq_shared(sg_cpu, time);
 396
 397                 sugov_update_commit(sg_policy, time, next_f);
 398         }
 399
 400         raw_spin_unlock(&sg_policy->update_lock);
 401 }
 402
 403 static void sugov_work(struct kthread_work *work)
 404 {
 405         struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
 406
 407         mutex_lock(&sg_policy->work_lock);
 408         __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
 409                                 CPUFREQ_RELATION_L);
 410         mutex_unlock(&sg_policy->work_lock);
 411
 412         sg_policy->work_in_progress = false;
 413 }
 414
 415 static void sugov_irq_work(struct irq_work *irq_work)
 416 {
 417         struct sugov_policy *sg_policy;
 418
 419         sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
 420
 421         /*
 422          * For RT and deadline tasks, the schedutil governor shoots the
 423          * frequency to maximum. Special care must be taken to ensure that this
 424          * kthread doesn't result in the same behavior.
 425          *
 426          * This is (mostly) guaranteed by the work_in_progress flag. The flag is
 427          * updated only at the end of the sugov_work() function and before that
 428          * the schedutil governor rejects all other frequency scaling requests.
 429          *
 430          * There is a very rare case though, where the RT thread yields right
 431          * after the work_in_progress flag is cleared. The effects of that are
 432          * neglected for now.
 433          */
 434         kthread_queue_work(&sg_policy->worker, &sg_policy->work);
 435 }
 436
 437 /************************** sysfs interface ************************/
 438
 439 static struct sugov_tunables *global_tunables;
 440 static DEFINE_MUTEX(global_tunables_lock);
 441
 442 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
 443 {
 444         return container_of(attr_set, struct sugov_tunables, attr_set);
 445 }
 446
 447 static DEFINE_MUTEX(min_rate_lock);
 448
 449 static void update_min_rate_limit_ns(struct sugov_policy *sg_policy)
 450 {
 451         mutex_lock(&min_rate_lock);
 452         sg_policy->min_rate_limit_ns = min(sg_policy->up_rate_delay_ns,
 453                                            sg_policy->down_rate_delay_ns);
 454         mutex_unlock(&min_rate_lock);
 455 }
 456
 457 static ssize_t up_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 458 {
 459         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 460
 461         return sprintf(buf, "%u\n", tunables->up_rate_limit_us);
 462 }
 463
 464 static ssize_t down_rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
 465 {
 466         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 467
 468         return sprintf(buf, "%u\n", tunables->down_rate_limit_us);
 469 }
 470
 471 static ssize_t up_rate_limit_us_store(struct gov_attr_set *attr_set,
 472                                       const char *buf, size_t count)
 473 {
 474         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 475         struct sugov_policy *sg_policy;
 476         unsigned int rate_limit_us;
 477
 478         if (kstrtouint(buf, 10, &rate_limit_us))
 479                 return -EINVAL;
 480
 481         tunables->up_rate_limit_us = rate_limit_us;
 482
 483         list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
 484                 sg_policy->up_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
 485                 update_min_rate_limit_ns(sg_policy);
 486         }
 487
 488         return count;
 489 }
 490
 491 static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
 492                                         const char *buf, size_t count)
 493 {
 494         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
 495         struct sugov_policy *sg_policy;
 496         unsigned int rate_limit_us;
 497
 498         if (kstrtouint(buf, 10, &rate_limit_us))
 499                 return -EINVAL;
 500
 501         tunables->down_rate_limit_us = rate_limit_us;
 502
 503         list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) {
 504                 sg_policy->down_rate_delay_ns = rate_limit_us * NSEC_PER_USEC;
 505                 update_min_rate_limit_ns(sg_policy);
 506         }
 507
 508         return count;
 509 }
 510
 511 static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
 512 static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
 513
 514 static struct attribute *sugov_attributes[] = {
 515         &up_rate_limit_us.attr,
 516         &down_rate_limit_us.attr,
 517         NULL
 518 };
 519
 520 static struct kobj_type sugov_tunables_ktype = {
 521         .default_attrs = sugov_attributes,
 522         .sysfs_ops = &governor_sysfs_ops,
 523 };
 524
 525 /********************** cpufreq governor interface *********************/
 526
 527 static struct cpufreq_governor schedutil_gov;
 528
 529 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
 530 {
 531         struct sugov_policy *sg_policy;
 532
 533         sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
 534         if (!sg_policy)
 535                 return NULL;
 536
 537         sg_policy->policy = policy;
 538         raw_spin_lock_init(&sg_policy->update_lock);
 539         return sg_policy;
 540 }
 541
 542 static void sugov_policy_free(struct sugov_policy *sg_policy)
 543 {
 544         kfree(sg_policy);
 545 }
 546
 547 static int sugov_kthread_create(struct sugov_policy *sg_policy)
 548 {
 549         struct task_struct *thread;
 550         struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
 551         struct cpufreq_policy *policy = sg_policy->policy;
 552         int ret;
 553
 554         /* kthread only required for slow path */
 555         if (policy->fast_switch_enabled)
 556                 return 0;
 557
 558         kthread_init_work(&sg_policy->work, sugov_work);
 559         kthread_init_worker(&sg_policy->worker);
 560         thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
 561                                 "sugov:%d",
 562                                 cpumask_first(policy->related_cpus));
 563         if (IS_ERR(thread)) {
 564                 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
 565                 return PTR_ERR(thread);
 566         }
 567
 568         ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, &param);
 569         if (ret) {
 570                 kthread_stop(thread);
 571                 pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
 572                 return ret;
 573         }
 574
 575         sg_policy->thread = thread;
 576
 577         /* Kthread is bound to all CPUs by default */
 578         if (!policy->dvfs_possible_from_any_cpu)
 579                 kthread_bind_mask(thread, policy->related_cpus);
 580
 581         init_irq_work(&sg_policy->irq_work, sugov_irq_work);
 582         mutex_init(&sg_policy->work_lock);
 583
 584         wake_up_process(thread);
 585
 586         return 0;
 587 }
 588
 589 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
 590 {
 591         /* kthread only required for slow path */
 592         if (sg_policy->policy->fast_switch_enabled)
 593                 return;
 594
 595         kthread_flush_worker(&sg_policy->worker);
 596         kthread_stop(sg_policy->thread);
 597         mutex_destroy(&sg_policy->work_lock);
 598 }
 599
 600 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
 601 {
 602         struct sugov_tunables *tunables;
 603
 604         tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
 605         if (tunables) {
 606                 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
 607                 if (!have_governor_per_policy())
 608                         global_tunables = tunables;
 609         }
 610         return tunables;
 611 }
 612
 613 static void sugov_tunables_free(struct sugov_tunables *tunables)
 614 {
 615         if (!have_governor_per_policy())
 616                 global_tunables = NULL;
 617
 618         kfree(tunables);
 619 }
 620
 621 static int sugov_init(struct cpufreq_policy *policy)
 622 {
 623         struct sugov_policy *sg_policy;
 624         struct sugov_tunables *tunables;
 625         int ret = 0;
 626
 627         /* State should be equivalent to EXIT */
 628         if (policy->governor_data)
 629                 return -EBUSY;
 630
 631         cpufreq_enable_fast_switch(policy);
 632
 633         sg_policy = sugov_policy_alloc(policy);
 634         if (!sg_policy) {
 635                 ret = -ENOMEM;
 636                 goto disable_fast_switch;
 637         }
 638
 639         ret = sugov_kthread_create(sg_policy);
 640         if (ret)
 641                 goto free_sg_policy;
 642
 643         mutex_lock(&global_tunables_lock);
 644
 645         if (global_tunables) {
 646                 if (WARN_ON(have_governor_per_policy())) {
 647                         ret = -EINVAL;
 648                         goto stop_kthread;
 649                 }
 650                 policy->governor_data = sg_policy;
 651                 sg_policy->tunables = global_tunables;
 652
 653                 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
 654                 goto out;
 655         }
 656
 657         tunables = sugov_tunables_alloc(sg_policy);
 658         if (!tunables) {
 659                 ret = -ENOMEM;
 660                 goto stop_kthread;
 661         }
 662
 663         tunables->up_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
 664         tunables->down_rate_limit_us = cpufreq_policy_transition_delay_us(policy);
 665
 666         policy->governor_data = sg_policy;
 667         sg_policy->tunables = tunables;
 668
 669         ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
 670                                    get_governor_parent_kobj(policy), "%s",
 671                                    schedutil_gov.name);
 672         if (ret)
 673                 goto fail;
 674
 675 out:
 676         mutex_unlock(&global_tunables_lock);
 677         return 0;
 678
 679 fail:
 680         policy->governor_data = NULL;
 681         sugov_tunables_free(tunables);
 682
 683 stop_kthread:
 684         sugov_kthread_stop(sg_policy);
 685         mutex_unlock(&global_tunables_lock);
 686
 687 free_sg_policy:
 688         sugov_policy_free(sg_policy);
 689
 690 disable_fast_switch:
 691         cpufreq_disable_fast_switch(policy);
 692
 693         pr_err("initialization failed (error %d)\n", ret);
 694         return ret;
 695 }
 696
 697 static void sugov_exit(struct cpufreq_policy *policy)
 698 {
 699         struct sugov_policy *sg_policy = policy->governor_data;
 700         struct sugov_tunables *tunables = sg_policy->tunables;
 701         unsigned int count;
 702
 703         mutex_lock(&global_tunables_lock);
 704
 705         count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
 706         policy->governor_data = NULL;
 707         if (!count)
 708                 sugov_tunables_free(tunables);
 709
 710         mutex_unlock(&global_tunables_lock);
 711
 712         sugov_kthread_stop(sg_policy);
 713         sugov_policy_free(sg_policy);
 714         cpufreq_disable_fast_switch(policy);
 715 }
 716
 717 static int sugov_start(struct cpufreq_policy *policy)
 718 {
 719         struct sugov_policy *sg_policy = policy->governor_data;
 720         unsigned int cpu;
 721
 722         sg_policy->up_rate_delay_ns =
 723                 sg_policy->tunables->up_rate_limit_us * NSEC_PER_USEC;
 724         sg_policy->down_rate_delay_ns =
 725                 sg_policy->tunables->down_rate_limit_us * NSEC_PER_USEC;
 726         update_min_rate_limit_ns(sg_policy);
 727         sg_policy->last_freq_update_time = 0;
 728         sg_policy->next_freq = UINT_MAX;
 729         sg_policy->work_in_progress = false;
 730         sg_policy->need_freq_update = false;
 731         sg_policy->cached_raw_freq = 0;
 732
 733         for_each_cpu(cpu, policy->cpus) {
 734                 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 735
 736                 memset(sg_cpu, 0, sizeof(*sg_cpu));
 737                 sg_cpu->cpu = cpu;
 738                 sg_cpu->sg_policy = sg_policy;
 739                 sg_cpu->flags = SCHED_CPUFREQ_DL;
 740                 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 741         }
 742
 743         for_each_cpu(cpu, policy->cpus) {
 744                 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 745
 746                 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 747                                              policy_is_shared(policy) ?
 748                                                         sugov_update_shared :
 749                                                         sugov_update_single);
 750         }
 751         return 0;
 752 }
 753
 754 static void sugov_stop(struct cpufreq_policy *policy)
 755 {
 756         struct sugov_policy *sg_policy = policy->governor_data;
 757         unsigned int cpu;
 758
 759         for_each_cpu(cpu, policy->cpus)
 760                 cpufreq_remove_update_util_hook(cpu);
 761
 762         synchronize_sched();
 763
 764         if (!policy->fast_switch_enabled) {
 765                 irq_work_sync(&sg_policy->irq_work);
 766                 kthread_cancel_work_sync(&sg_policy->work);
 767         }
 768 }
 769
 770 static void sugov_limits(struct cpufreq_policy *policy)
 771 {
 772         struct sugov_policy *sg_policy = policy->governor_data;
 773
 774         if (!policy->fast_switch_enabled) {
 775                 mutex_lock(&sg_policy->work_lock);
 776                 cpufreq_policy_apply_limits(policy);
 777                 mutex_unlock(&sg_policy->work_lock);
 778         }
 779
 780         sg_policy->need_freq_update = true;
 781 }
 782
 783 static struct cpufreq_governor schedutil_gov = {
 784         .name = "schedutil",
 785         .owner = THIS_MODULE,
 786         .dynamic_switching = true,
 787         .init = sugov_init,
 788         .exit = sugov_exit,
 789         .start = sugov_start,
 790         .stop = sugov_stop,
 791         .limits = sugov_limits,
 792 };
 793
 794 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 795 struct cpufreq_governor *cpufreq_default_governor(void)
 796 {
 797         return &schedutil_gov;
 798 }
 799 #endif
 800
 801 static int __init sugov_register(void)
 802 {
 803         return cpufreq_register_governor(&schedutil_gov);
 804 }
 805 fs_initcall(sugov_register);