kernel/sched/rt.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
   4  * policies)
   5  */
   6
   7 #include "sched.h"
   8
   9 #include <linux/slab.h>
  10 #include <linux/irq_work.h>
  11
  12 #include "walt.h"
  13
  14 int sched_rr_timeslice = RR_TIMESLICE;
  15 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
  16
  17 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
  18
  19 struct rt_bandwidth def_rt_bandwidth;
  20
  21 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
  22 {
  23         struct rt_bandwidth *rt_b =
  24                 container_of(timer, struct rt_bandwidth, rt_period_timer);
  25         int idle = 0;
  26         int overrun;
  27
  28         raw_spin_lock(&rt_b->rt_runtime_lock);
  29         for (;;) {
  30                 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
  31                 if (!overrun)
  32                         break;
  33
  34                 raw_spin_unlock(&rt_b->rt_runtime_lock);
  35                 idle = do_sched_rt_period_timer(rt_b, overrun);
  36                 raw_spin_lock(&rt_b->rt_runtime_lock);
  37         }
  38         if (idle)
  39                 rt_b->rt_period_active = 0;
  40         raw_spin_unlock(&rt_b->rt_runtime_lock);
  41
  42         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  43 }
  44
  45 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  46 {
  47         rt_b->rt_period = ns_to_ktime(period);
  48         rt_b->rt_runtime = runtime;
  49
  50         raw_spin_lock_init(&rt_b->rt_runtime_lock);
  51
  52         hrtimer_init(&rt_b->rt_period_timer,
  53                         CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  54         rt_b->rt_period_timer.function = sched_rt_period_timer;
  55 }
  56
  57 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  58 {
  59         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
  60                 return;
  61
  62         raw_spin_lock(&rt_b->rt_runtime_lock);
  63         if (!rt_b->rt_period_active) {
  64                 rt_b->rt_period_active = 1;
  65                 /*
  66                  * SCHED_DEADLINE updates the bandwidth, as a run away
  67                  * RT task with a DL task could hog a CPU. But DL does
  68                  * not reset the period. If a deadline task was running
  69                  * without an RT task running, it can cause RT tasks to
  70                  * throttle when they start up. Kick the timer right away
  71                  * to update the period.
  72                  */
  73                 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
  74                 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
  75         }
  76         raw_spin_unlock(&rt_b->rt_runtime_lock);
  77 }
  78
  79 void init_rt_rq(struct rt_rq *rt_rq)
  80 {
  81         struct rt_prio_array *array;
  82         int i;
  83
  84         array = &rt_rq->active;
  85         for (i = 0; i < MAX_RT_PRIO; i++) {
  86                 INIT_LIST_HEAD(array->queue + i);
  87                 __clear_bit(i, array->bitmap);
  88         }
  89         /* delimiter for bitsearch: */
  90         __set_bit(MAX_RT_PRIO, array->bitmap);
  91
  92 #if defined CONFIG_SMP
  93         rt_rq->highest_prio.curr = MAX_RT_PRIO;
  94         rt_rq->highest_prio.next = MAX_RT_PRIO;
  95         rt_rq->rt_nr_migratory = 0;
  96         rt_rq->overloaded = 0;
  97         plist_head_init(&rt_rq->pushable_tasks);
  98 #endif /* CONFIG_SMP */
  99         /* We start is dequeued state, because no RT tasks are queued */
 100         rt_rq->rt_queued = 0;
 101
 102         rt_rq->rt_time = 0;
 103         rt_rq->rt_throttled = 0;
 104         rt_rq->rt_runtime = 0;
 105         raw_spin_lock_init(&rt_rq->rt_runtime_lock);
 106 }
 107
 108 #ifdef CONFIG_RT_GROUP_SCHED
 109 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
 110 {
 111         hrtimer_cancel(&rt_b->rt_period_timer);
 112 }
 113
 114 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
 115
 116 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 117 {
 118 #ifdef CONFIG_SCHED_DEBUG
 119         WARN_ON_ONCE(!rt_entity_is_task(rt_se));
 120 #endif
 121         return container_of(rt_se, struct task_struct, rt);
 122 }
 123
 124 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 125 {
 126         return rt_rq->rq;
 127 }
 128
 129 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 130 {
 131         return rt_se->rt_rq;
 132 }
 133
 134 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
 135 {
 136         struct rt_rq *rt_rq = rt_se->rt_rq;
 137
 138         return rt_rq->rq;
 139 }
 140
 141 void free_rt_sched_group(struct task_group *tg)
 142 {
 143         int i;
 144
 145         if (tg->rt_se)
 146                 destroy_rt_bandwidth(&tg->rt_bandwidth);
 147
 148         for_each_possible_cpu(i) {
 149                 if (tg->rt_rq)
 150                         kfree(tg->rt_rq[i]);
 151                 if (tg->rt_se)
 152                         kfree(tg->rt_se[i]);
 153         }
 154
 155         kfree(tg->rt_rq);
 156         kfree(tg->rt_se);
 157 }
 158
 159 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 160                 struct sched_rt_entity *rt_se, int cpu,
 161                 struct sched_rt_entity *parent)
 162 {
 163         struct rq *rq = cpu_rq(cpu);
 164
 165         rt_rq->highest_prio.curr = MAX_RT_PRIO;
 166         rt_rq->rt_nr_boosted = 0;
 167         rt_rq->rq = rq;
 168         rt_rq->tg = tg;
 169
 170         tg->rt_rq[cpu] = rt_rq;
 171         tg->rt_se[cpu] = rt_se;
 172
 173         if (!rt_se)
 174                 return;
 175
 176         if (!parent)
 177                 rt_se->rt_rq = &rq->rt;
 178         else
 179                 rt_se->rt_rq = parent->my_q;
 180
 181         rt_se->my_q = rt_rq;
 182         rt_se->parent = parent;
 183         INIT_LIST_HEAD(&rt_se->run_list);
 184 }
 185
 186 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 187 {
 188         struct rt_rq *rt_rq;
 189         struct sched_rt_entity *rt_se;
 190         int i;
 191
 192         tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
 193         if (!tg->rt_rq)
 194                 goto err;
 195         tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
 196         if (!tg->rt_se)
 197                 goto err;
 198
 199         init_rt_bandwidth(&tg->rt_bandwidth,
 200                         ktime_to_ns(def_rt_bandwidth.rt_period), 0);
 201
 202         for_each_possible_cpu(i) {
 203                 rt_rq = kzalloc_node(sizeof(struct rt_rq),
 204                                      GFP_KERNEL, cpu_to_node(i));
 205                 if (!rt_rq)
 206                         goto err;
 207
 208                 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
 209                                      GFP_KERNEL, cpu_to_node(i));
 210                 if (!rt_se)
 211                         goto err_free_rq;
 212
 213                 init_rt_rq(rt_rq);
 214                 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
 215                 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
 216         }
 217
 218         return 1;
 219
 220 err_free_rq:
 221         kfree(rt_rq);
 222 err:
 223         return 0;
 224 }
 225
 226 #else /* CONFIG_RT_GROUP_SCHED */
 227
 228 #define rt_entity_is_task(rt_se) (1)
 229
 230 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 231 {
 232         return container_of(rt_se, struct task_struct, rt);
 233 }
 234
 235 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 236 {
 237         return container_of(rt_rq, struct rq, rt);
 238 }
 239
 240 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
 241 {
 242         struct task_struct *p = rt_task_of(rt_se);
 243
 244         return task_rq(p);
 245 }
 246
 247 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 248 {
 249         struct rq *rq = rq_of_rt_se(rt_se);
 250
 251         return &rq->rt;
 252 }
 253
 254 void free_rt_sched_group(struct task_group *tg) { }
 255
 256 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 257 {
 258         return 1;
 259 }
 260 #endif /* CONFIG_RT_GROUP_SCHED */
 261
 262 #ifdef CONFIG_SMP
 263
 264 static void pull_rt_task(struct rq *this_rq);
 265
 266 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 267 {
 268         /* Try to pull RT tasks here if we lower this rq's prio */
 269         return rq->rt.highest_prio.curr > prev->prio;
 270 }
 271
 272 static inline int rt_overloaded(struct rq *rq)
 273 {
 274         return atomic_read(&rq->rd->rto_count);
 275 }
 276
 277 static inline void rt_set_overload(struct rq *rq)
 278 {
 279         if (!rq->online)
 280                 return;
 281
 282         cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
 283         /*
 284          * Make sure the mask is visible before we set
 285          * the overload count. That is checked to determine
 286          * if we should look at the mask. It would be a shame
 287          * if we looked at the mask, but the mask was not
 288          * updated yet.
 289          *
 290          * Matched by the barrier in pull_rt_task().
 291          */
 292         smp_wmb();
 293         atomic_inc(&rq->rd->rto_count);
 294 }
 295
 296 static inline void rt_clear_overload(struct rq *rq)
 297 {
 298         if (!rq->online)
 299                 return;
 300
 301         /* the order here really doesn't matter */
 302         atomic_dec(&rq->rd->rto_count);
 303         cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
 304 }
 305
 306 static void update_rt_migration(struct rt_rq *rt_rq)
 307 {
 308         if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
 309                 if (!rt_rq->overloaded) {
 310                         rt_set_overload(rq_of_rt_rq(rt_rq));
 311                         rt_rq->overloaded = 1;
 312                 }
 313         } else if (rt_rq->overloaded) {
 314                 rt_clear_overload(rq_of_rt_rq(rt_rq));
 315                 rt_rq->overloaded = 0;
 316         }
 317 }
 318
 319 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 320 {
 321         struct task_struct *p;
 322
 323         if (!rt_entity_is_task(rt_se))
 324                 return;
 325
 326         p = rt_task_of(rt_se);
 327         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 328
 329         rt_rq->rt_nr_total++;
 330         if (p->nr_cpus_allowed > 1)
 331                 rt_rq->rt_nr_migratory++;
 332
 333         update_rt_migration(rt_rq);
 334 }
 335
 336 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 337 {
 338         struct task_struct *p;
 339
 340         if (!rt_entity_is_task(rt_se))
 341                 return;
 342
 343         p = rt_task_of(rt_se);
 344         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 345
 346         rt_rq->rt_nr_total--;
 347         if (p->nr_cpus_allowed > 1)
 348                 rt_rq->rt_nr_migratory--;
 349
 350         update_rt_migration(rt_rq);
 351 }
 352
 353 static inline int has_pushable_tasks(struct rq *rq)
 354 {
 355         return !plist_head_empty(&rq->rt.pushable_tasks);
 356 }
 357
 358 static DEFINE_PER_CPU(struct callback_head, rt_push_head);
 359 static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
 360
 361 static void push_rt_tasks(struct rq *);
 362 static void pull_rt_task(struct rq *);
 363
 364 static inline void queue_push_tasks(struct rq *rq)
 365 {
 366         if (!has_pushable_tasks(rq))
 367                 return;
 368
 369         queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
 370 }
 371
 372 static inline void queue_pull_task(struct rq *rq)
 373 {
 374         queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
 375 }
 376
 377 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 378 {
 379         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 380         plist_node_init(&p->pushable_tasks, p->prio);
 381         plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
 382
 383         /* Update the highest prio pushable task */
 384         if (p->prio < rq->rt.highest_prio.next)
 385                 rq->rt.highest_prio.next = p->prio;
 386 }
 387
 388 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 389 {
 390         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 391
 392         /* Update the new highest prio pushable task */
 393         if (has_pushable_tasks(rq)) {
 394                 p = plist_first_entry(&rq->rt.pushable_tasks,
 395                                       struct task_struct, pushable_tasks);
 396                 rq->rt.highest_prio.next = p->prio;
 397         } else
 398                 rq->rt.highest_prio.next = MAX_RT_PRIO;
 399 }
 400
 401 #else
 402
 403 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 404 {
 405 }
 406
 407 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 408 {
 409 }
 410
 411 static inline
 412 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 413 {
 414 }
 415
 416 static inline
 417 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 418 {
 419 }
 420
 421 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 422 {
 423         return false;
 424 }
 425
 426 static inline void pull_rt_task(struct rq *this_rq)
 427 {
 428 }
 429
 430 static inline void queue_push_tasks(struct rq *rq)
 431 {
 432 }
 433 #endif /* CONFIG_SMP */
 434
 435 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
 436 static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
 437
 438 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 439 {
 440         return rt_se->on_rq;
 441 }
 442
 443 #ifdef CONFIG_RT_GROUP_SCHED
 444
 445 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 446 {
 447         if (!rt_rq->tg)
 448                 return RUNTIME_INF;
 449
 450         return rt_rq->rt_runtime;
 451 }
 452
 453 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 454 {
 455         return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 456 }
 457
 458 typedef struct task_group *rt_rq_iter_t;
 459
 460 static inline struct task_group *next_task_group(struct task_group *tg)
 461 {
 462         do {
 463                 tg = list_entry_rcu(tg->list.next,
 464                         typeof(struct task_group), list);
 465         } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
 466
 467         if (&tg->list == &task_groups)
 468                 tg = NULL;
 469
 470         return tg;
 471 }
 472
 473 #define for_each_rt_rq(rt_rq, iter, rq)                                 \
 474         for (iter = container_of(&task_groups, typeof(*iter), list);    \
 475                 (iter = next_task_group(iter)) &&                       \
 476                 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
 477
 478 #define for_each_sched_rt_entity(rt_se) \
 479         for (; rt_se; rt_se = rt_se->parent)
 480
 481 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 482 {
 483         return rt_se->my_q;
 484 }
 485
 486 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
 487 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
 488
 489 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 490 {
 491         struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
 492         struct rq *rq = rq_of_rt_rq(rt_rq);
 493         struct sched_rt_entity *rt_se;
 494
 495         int cpu = cpu_of(rq);
 496
 497         rt_se = rt_rq->tg->rt_se[cpu];
 498
 499         if (rt_rq->rt_nr_running) {
 500                 if (!rt_se)
 501                         enqueue_top_rt_rq(rt_rq);
 502                 else if (!on_rt_rq(rt_se))
 503                         enqueue_rt_entity(rt_se, 0);
 504
 505                 if (rt_rq->highest_prio.curr < curr->prio)
 506                         resched_curr(rq);
 507         }
 508 }
 509
 510 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 511 {
 512         struct sched_rt_entity *rt_se;
 513         int cpu = cpu_of(rq_of_rt_rq(rt_rq));
 514
 515         rt_se = rt_rq->tg->rt_se[cpu];
 516
 517         if (!rt_se)
 518                 dequeue_top_rt_rq(rt_rq);
 519         else if (on_rt_rq(rt_se))
 520                 dequeue_rt_entity(rt_se, 0);
 521 }
 522
 523 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 524 {
 525         return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
 526 }
 527
 528 static int rt_se_boosted(struct sched_rt_entity *rt_se)
 529 {
 530         struct rt_rq *rt_rq = group_rt_rq(rt_se);
 531         struct task_struct *p;
 532
 533         if (rt_rq)
 534                 return !!rt_rq->rt_nr_boosted;
 535
 536         p = rt_task_of(rt_se);
 537         return p->prio != p->normal_prio;
 538 }
 539
 540 #ifdef CONFIG_SMP
 541 static inline const struct cpumask *sched_rt_period_mask(void)
 542 {
 543         return this_rq()->rd->span;
 544 }
 545 #else
 546 static inline const struct cpumask *sched_rt_period_mask(void)
 547 {
 548         return cpu_online_mask;
 549 }
 550 #endif
 551
 552 static inline
 553 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
 554 {
 555         return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
 556 }
 557
 558 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 559 {
 560         return &rt_rq->tg->rt_bandwidth;
 561 }
 562
 563 #else /* !CONFIG_RT_GROUP_SCHED */
 564
 565 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 566 {
 567         return rt_rq->rt_runtime;
 568 }
 569
 570 static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 571 {
 572         return ktime_to_ns(def_rt_bandwidth.rt_period);
 573 }
 574
 575 typedef struct rt_rq *rt_rq_iter_t;
 576
 577 #define for_each_rt_rq(rt_rq, iter, rq) \
 578         for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
 579
 580 #define for_each_sched_rt_entity(rt_se) \
 581         for (; rt_se; rt_se = NULL)
 582
 583 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 584 {
 585         return NULL;
 586 }
 587
 588 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 589 {
 590         struct rq *rq = rq_of_rt_rq(rt_rq);
 591
 592         if (!rt_rq->rt_nr_running)
 593                 return;
 594
 595         enqueue_top_rt_rq(rt_rq);
 596         resched_curr(rq);
 597 }
 598
 599 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 600 {
 601         dequeue_top_rt_rq(rt_rq);
 602 }
 603
 604 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 605 {
 606         return rt_rq->rt_throttled;
 607 }
 608
 609 static inline const struct cpumask *sched_rt_period_mask(void)
 610 {
 611         return cpu_online_mask;
 612 }
 613
 614 static inline
 615 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
 616 {
 617         return &cpu_rq(cpu)->rt;
 618 }
 619
 620 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 621 {
 622         return &def_rt_bandwidth;
 623 }
 624
 625 #endif /* CONFIG_RT_GROUP_SCHED */
 626
 627 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
 628 {
 629         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 630
 631         return (hrtimer_active(&rt_b->rt_period_timer) ||
 632                 rt_rq->rt_time < rt_b->rt_runtime);
 633 }
 634
 635 #ifdef CONFIG_SMP
 636 /*
 637  * We ran out of runtime, see if we can borrow some from our neighbours.
 638  */
 639 static void do_balance_runtime(struct rt_rq *rt_rq)
 640 {
 641         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 642         struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
 643         int i, weight;
 644         u64 rt_period;
 645
 646         weight = cpumask_weight(rd->span);
 647
 648         raw_spin_lock(&rt_b->rt_runtime_lock);
 649         rt_period = ktime_to_ns(rt_b->rt_period);
 650         for_each_cpu(i, rd->span) {
 651                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 652                 s64 diff;
 653
 654                 if (iter == rt_rq)
 655                         continue;
 656
 657                 raw_spin_lock(&iter->rt_runtime_lock);
 658                 /*
 659                  * Either all rqs have inf runtime and there's nothing to steal
 660                  * or __disable_runtime() below sets a specific rq to inf to
 661                  * indicate its been disabled and disalow stealing.
 662                  */
 663                 if (iter->rt_runtime == RUNTIME_INF)
 664                         goto next;
 665
 666                 /*
 667                  * From runqueues with spare time, take 1/n part of their
 668                  * spare time, but no more than our period.
 669                  */
 670                 diff = iter->rt_runtime - iter->rt_time;
 671                 if (diff > 0) {
 672                         diff = div_u64((u64)diff, weight);
 673                         if (rt_rq->rt_runtime + diff > rt_period)
 674                                 diff = rt_period - rt_rq->rt_runtime;
 675                         iter->rt_runtime -= diff;
 676                         rt_rq->rt_runtime += diff;
 677                         if (rt_rq->rt_runtime == rt_period) {
 678                                 raw_spin_unlock(&iter->rt_runtime_lock);
 679                                 break;
 680                         }
 681                 }
 682 next:
 683                 raw_spin_unlock(&iter->rt_runtime_lock);
 684         }
 685         raw_spin_unlock(&rt_b->rt_runtime_lock);
 686 }
 687
 688 /*
 689  * Ensure this RQ takes back all the runtime it lend to its neighbours.
 690  */
 691 static void __disable_runtime(struct rq *rq)
 692 {
 693         struct root_domain *rd = rq->rd;
 694         rt_rq_iter_t iter;
 695         struct rt_rq *rt_rq;
 696
 697         if (unlikely(!scheduler_running))
 698                 return;
 699
 700         for_each_rt_rq(rt_rq, iter, rq) {
 701                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 702                 s64 want;
 703                 int i;
 704
 705                 raw_spin_lock(&rt_b->rt_runtime_lock);
 706                 raw_spin_lock(&rt_rq->rt_runtime_lock);
 707                 /*
 708                  * Either we're all inf and nobody needs to borrow, or we're
 709                  * already disabled and thus have nothing to do, or we have
 710                  * exactly the right amount of runtime to take out.
 711                  */
 712                 if (rt_rq->rt_runtime == RUNTIME_INF ||
 713                                 rt_rq->rt_runtime == rt_b->rt_runtime)
 714                         goto balanced;
 715                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
 716
 717                 /*
 718                  * Calculate the difference between what we started out with
 719                  * and what we current have, that's the amount of runtime
 720                  * we lend and now have to reclaim.
 721                  */
 722                 want = rt_b->rt_runtime - rt_rq->rt_runtime;
 723
 724                 /*
 725                  * Greedy reclaim, take back as much as we can.
 726                  */
 727                 for_each_cpu(i, rd->span) {
 728                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 729                         s64 diff;
 730
 731                         /*
 732                          * Can't reclaim from ourselves or disabled runqueues.
 733                          */
 734                         if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
 735                                 continue;
 736
 737                         raw_spin_lock(&iter->rt_runtime_lock);
 738                         if (want > 0) {
 739                                 diff = min_t(s64, iter->rt_runtime, want);
 740                                 iter->rt_runtime -= diff;
 741                                 want -= diff;
 742                         } else {
 743                                 iter->rt_runtime -= want;
 744                                 want -= want;
 745                         }
 746                         raw_spin_unlock(&iter->rt_runtime_lock);
 747
 748                         if (!want)
 749                                 break;
 750                 }
 751
 752                 raw_spin_lock(&rt_rq->rt_runtime_lock);
 753                 /*
 754                  * We cannot be left wanting - that would mean some runtime
 755                  * leaked out of the system.
 756                  */
 757                 BUG_ON(want);
 758 balanced:
 759                 /*
 760                  * Disable all the borrow logic by pretending we have inf
 761                  * runtime - in which case borrowing doesn't make sense.
 762                  */
 763                 rt_rq->rt_runtime = RUNTIME_INF;
 764                 rt_rq->rt_throttled = 0;
 765                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
 766                 raw_spin_unlock(&rt_b->rt_runtime_lock);
 767
 768                 /* Make rt_rq available for pick_next_task() */
 769                 sched_rt_rq_enqueue(rt_rq);
 770         }
 771 }
 772
 773 static void __enable_runtime(struct rq *rq)
 774 {
 775         rt_rq_iter_t iter;
 776         struct rt_rq *rt_rq;
 777
 778         if (unlikely(!scheduler_running))
 779                 return;
 780
 781         /*
 782          * Reset each runqueue's bandwidth settings
 783          */
 784         for_each_rt_rq(rt_rq, iter, rq) {
 785                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 786
 787                 raw_spin_lock(&rt_b->rt_runtime_lock);
 788                 raw_spin_lock(&rt_rq->rt_runtime_lock);
 789                 rt_rq->rt_runtime = rt_b->rt_runtime;
 790                 rt_rq->rt_time = 0;
 791                 rt_rq->rt_throttled = 0;
 792                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
 793                 raw_spin_unlock(&rt_b->rt_runtime_lock);
 794         }
 795 }
 796
 797 static void balance_runtime(struct rt_rq *rt_rq)
 798 {
 799         if (!sched_feat(RT_RUNTIME_SHARE))
 800                 return;
 801
 802         if (rt_rq->rt_time > rt_rq->rt_runtime) {
 803                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
 804                 do_balance_runtime(rt_rq);
 805                 raw_spin_lock(&rt_rq->rt_runtime_lock);
 806         }
 807 }
 808 #else /* !CONFIG_SMP */
 809 static inline void balance_runtime(struct rt_rq *rt_rq) {}
 810 #endif /* CONFIG_SMP */
 811
 812 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 813 {
 814         int i, idle = 1, throttled = 0;
 815         const struct cpumask *span;
 816
 817         span = sched_rt_period_mask();
 818 #ifdef CONFIG_RT_GROUP_SCHED
 819         /*
 820          * FIXME: isolated CPUs should really leave the root task group,
 821          * whether they are isolcpus or were isolated via cpusets, lest
 822          * the timer run on a CPU which does not service all runqueues,
 823          * potentially leaving other CPUs indefinitely throttled.  If
 824          * isolation is really required, the user will turn the throttle
 825          * off to kill the perturbations it causes anyway.  Meanwhile,
 826          * this maintains functionality for boot and/or troubleshooting.
 827          */
 828         if (rt_b == &root_task_group.rt_bandwidth)
 829                 span = cpu_online_mask;
 830 #endif
 831         for_each_cpu(i, span) {
 832                 int enqueue = 0;
 833                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 834                 struct rq *rq = rq_of_rt_rq(rt_rq);
 835                 int skip;
 836
 837                 /*
 838                  * When span == cpu_online_mask, taking each rq->lock
 839                  * can be time-consuming. Try to avoid it when possible.
 840                  */
 841                 raw_spin_lock(&rt_rq->rt_runtime_lock);
 842                 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
 843                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
 844                 if (skip)
 845                         continue;
 846
 847                 raw_spin_lock(&rq->lock);
 848                 if (rt_rq->rt_time) {
 849                         u64 runtime;
 850
 851                         raw_spin_lock(&rt_rq->rt_runtime_lock);
 852                         if (rt_rq->rt_throttled)
 853                                 balance_runtime(rt_rq);
 854                         runtime = rt_rq->rt_runtime;
 855                         rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
 856                         if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 857                                 rt_rq->rt_throttled = 0;
 858                                 enqueue = 1;
 859
 860                                 /*
 861                                  * When we're idle and a woken (rt) task is
 862                                  * throttled check_preempt_curr() will set
 863                                  * skip_update and the time between the wakeup
 864                                  * and this unthrottle will get accounted as
 865                                  * 'runtime'.
 866                                  */
 867                                 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
 868                                         rq_clock_skip_update(rq, false);
 869                         }
 870                         if (rt_rq->rt_time || rt_rq->rt_nr_running)
 871                                 idle = 0;
 872                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
 873                 } else if (rt_rq->rt_nr_running) {
 874                         idle = 0;
 875                         if (!rt_rq_throttled(rt_rq))
 876                                 enqueue = 1;
 877                 }
 878                 if (rt_rq->rt_throttled)
 879                         throttled = 1;
 880
 881                 if (enqueue)
 882                         sched_rt_rq_enqueue(rt_rq);
 883                 raw_spin_unlock(&rq->lock);
 884         }
 885
 886         if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
 887                 return 1;
 888
 889         return idle;
 890 }
 891
 892 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 893 {
 894 #ifdef CONFIG_RT_GROUP_SCHED
 895         struct rt_rq *rt_rq = group_rt_rq(rt_se);
 896
 897         if (rt_rq)
 898                 return rt_rq->highest_prio.curr;
 899 #endif
 900
 901         return rt_task_of(rt_se)->prio;
 902 }
 903
 904 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 905 {
 906         u64 runtime = sched_rt_runtime(rt_rq);
 907
 908         if (rt_rq->rt_throttled)
 909                 return rt_rq_throttled(rt_rq);
 910
 911         if (runtime >= sched_rt_period(rt_rq))
 912                 return 0;
 913
 914         balance_runtime(rt_rq);
 915         runtime = sched_rt_runtime(rt_rq);
 916         if (runtime == RUNTIME_INF)
 917                 return 0;
 918
 919         if (rt_rq->rt_time > runtime) {
 920                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 921
 922                 /*
 923                  * Don't actually throttle groups that have no runtime assigned
 924                  * but accrue some time due to boosting.
 925                  */
 926                 if (likely(rt_b->rt_runtime)) {
 927                         rt_rq->rt_throttled = 1;
 928                         printk_deferred_once("sched: RT throttling activated\n");
 929                 } else {
 930                         /*
 931                          * In case we did anyway, make it go away,
 932                          * replenishment is a joke, since it will replenish us
 933                          * with exactly 0 ns.
 934                          */
 935                         rt_rq->rt_time = 0;
 936                 }
 937
 938                 if (rt_rq_throttled(rt_rq)) {
 939                         sched_rt_rq_dequeue(rt_rq);
 940                         return 1;
 941                 }
 942         }
 943
 944         return 0;
 945 }
 946
 947 /*
 948  * Update the current task's runtime statistics. Skip current tasks that
 949  * are not in our scheduling class.
 950  */
 951 static void update_curr_rt(struct rq *rq)
 952 {
 953         struct task_struct *curr = rq->curr;
 954         struct sched_rt_entity *rt_se = &curr->rt;
 955         u64 delta_exec;
 956
 957         if (curr->sched_class != &rt_sched_class)
 958                 return;
 959
 960         delta_exec = rq_clock_task(rq) - curr->se.exec_start;
 961         if (unlikely((s64)delta_exec <= 0))
 962                 return;
 963
 964         /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 965         cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
 966
 967         schedstat_set(curr->se.statistics.exec_max,
 968                       max(curr->se.statistics.exec_max, delta_exec));
 969
 970         curr->se.sum_exec_runtime += delta_exec;
 971         account_group_exec_runtime(curr, delta_exec);
 972
 973         curr->se.exec_start = rq_clock_task(rq);
 974         cpuacct_charge(curr, delta_exec);
 975
 976         sched_rt_avg_update(rq, delta_exec);
 977
 978         if (!rt_bandwidth_enabled())
 979                 return;
 980
 981         for_each_sched_rt_entity(rt_se) {
 982                 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 983
 984                 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 985                         raw_spin_lock(&rt_rq->rt_runtime_lock);
 986                         rt_rq->rt_time += delta_exec;
 987                         if (sched_rt_runtime_exceeded(rt_rq))
 988                                 resched_curr(rq);
 989                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
 990                 }
 991         }
 992 }
 993
 994 static void
 995 dequeue_top_rt_rq(struct rt_rq *rt_rq)
 996 {
 997         struct rq *rq = rq_of_rt_rq(rt_rq);
 998
 999         BUG_ON(&rq->rt != rt_rq);
1000
1001         if (!rt_rq->rt_queued)
1002                 return;
1003
1004         BUG_ON(!rq->nr_running);
1005
1006         sub_nr_running(rq, rt_rq->rt_nr_running);
1007         rt_rq->rt_queued = 0;
1008 }
1009
1010 static void
1011 enqueue_top_rt_rq(struct rt_rq *rt_rq)
1012 {
1013         struct rq *rq = rq_of_rt_rq(rt_rq);
1014
1015         BUG_ON(&rq->rt != rt_rq);
1016
1017         if (rt_rq->rt_queued)
1018                 return;
1019         if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
1020                 return;
1021
1022         add_nr_running(rq, rt_rq->rt_nr_running);
1023         rt_rq->rt_queued = 1;
1024 }
1025
1026 #if defined CONFIG_SMP
1027
1028 static void
1029 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1030 {
1031         struct rq *rq = rq_of_rt_rq(rt_rq);
1032
1033 #ifdef CONFIG_RT_GROUP_SCHED
1034         /*
1035          * Change rq's cpupri only if rt_rq is the top queue.
1036          */
1037         if (&rq->rt != rt_rq)
1038                 return;
1039 #endif
1040         if (rq->online && prio < prev_prio)
1041                 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
1042 }
1043
1044 static void
1045 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1046 {
1047         struct rq *rq = rq_of_rt_rq(rt_rq);
1048
1049 #ifdef CONFIG_RT_GROUP_SCHED
1050         /*
1051          * Change rq's cpupri only if rt_rq is the top queue.
1052          */
1053         if (&rq->rt != rt_rq)
1054                 return;
1055 #endif
1056         if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1057                 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1058 }
1059
1060 #else /* CONFIG_SMP */
1061
1062 static inline
1063 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1064 static inline
1065 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1066
1067 #endif /* CONFIG_SMP */
1068
1069 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
1070 static void
1071 inc_rt_prio(struct rt_rq *rt_rq, int prio)
1072 {
1073         int prev_prio = rt_rq->highest_prio.curr;
1074
1075         if (prio < prev_prio)
1076                 rt_rq->highest_prio.curr = prio;
1077
1078         inc_rt_prio_smp(rt_rq, prio, prev_prio);
1079 }
1080
1081 static void
1082 dec_rt_prio(struct rt_rq *rt_rq, int prio)
1083 {
1084         int prev_prio = rt_rq->highest_prio.curr;
1085
1086         if (rt_rq->rt_nr_running) {
1087
1088                 WARN_ON(prio < prev_prio);
1089
1090                 /*
1091                  * This may have been our highest task, and therefore
1092                  * we may have some recomputation to do
1093                  */
1094                 if (prio == prev_prio) {
1095                         struct rt_prio_array *array = &rt_rq->active;
1096
1097                         rt_rq->highest_prio.curr =
1098                                 sched_find_first_bit(array->bitmap);
1099                 }
1100
1101         } else
1102                 rt_rq->highest_prio.curr = MAX_RT_PRIO;
1103
1104         dec_rt_prio_smp(rt_rq, prio, prev_prio);
1105 }
1106
1107 #else
1108
1109 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1110 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1111
1112 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
1113
1114 #ifdef CONFIG_RT_GROUP_SCHED
1115
1116 static void
1117 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1118 {
1119         if (rt_se_boosted(rt_se))
1120                 rt_rq->rt_nr_boosted++;
1121
1122         if (rt_rq->tg)
1123                 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1124 }
1125
1126 static void
1127 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1128 {
1129         if (rt_se_boosted(rt_se))
1130                 rt_rq->rt_nr_boosted--;
1131
1132         WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1133 }
1134
1135 #else /* CONFIG_RT_GROUP_SCHED */
1136
1137 static void
1138 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1139 {
1140         start_rt_bandwidth(&def_rt_bandwidth);
1141 }
1142
1143 static inline
1144 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1145
1146 #endif /* CONFIG_RT_GROUP_SCHED */
1147
1148 static inline
1149 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1150 {
1151         struct rt_rq *group_rq = group_rt_rq(rt_se);
1152
1153         if (group_rq)
1154                 return group_rq->rt_nr_running;
1155         else
1156                 return 1;
1157 }
1158
1159 static inline
1160 unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1161 {
1162         struct rt_rq *group_rq = group_rt_rq(rt_se);
1163         struct task_struct *tsk;
1164
1165         if (group_rq)
1166                 return group_rq->rr_nr_running;
1167
1168         tsk = rt_task_of(rt_se);
1169
1170         return (tsk->policy == SCHED_RR) ? 1 : 0;
1171 }
1172
1173 static inline
1174 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1175 {
1176         int prio = rt_se_prio(rt_se);
1177
1178         WARN_ON(!rt_prio(prio));
1179         rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1180         rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1181
1182         inc_rt_prio(rt_rq, prio);
1183         inc_rt_migration(rt_se, rt_rq);
1184         inc_rt_group(rt_se, rt_rq);
1185 }
1186
1187 static inline
1188 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1189 {
1190         WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1191         WARN_ON(!rt_rq->rt_nr_running);
1192         rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1193         rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1194
1195         dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1196         dec_rt_migration(rt_se, rt_rq);
1197         dec_rt_group(rt_se, rt_rq);
1198 }
1199
1200 /*
1201  * Change rt_se->run_list location unless SAVE && !MOVE
1202  *
1203  * assumes ENQUEUE/DEQUEUE flags match
1204  */
1205 static inline bool move_entity(unsigned int flags)
1206 {
1207         if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1208                 return false;
1209
1210         return true;
1211 }
1212
1213 static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1214 {
1215         list_del_init(&rt_se->run_list);
1216
1217         if (list_empty(array->queue + rt_se_prio(rt_se)))
1218                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1219
1220         rt_se->on_list = 0;
1221 }
1222
1223 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1224 {
1225         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1226         struct rt_prio_array *array = &rt_rq->active;
1227         struct rt_rq *group_rq = group_rt_rq(rt_se);
1228         struct list_head *queue = array->queue + rt_se_prio(rt_se);
1229
1230         /*
1231          * Don't enqueue the group if its throttled, or when empty.
1232          * The latter is a consequence of the former when a child group
1233          * get throttled and the current group doesn't have any other
1234          * active members.
1235          */
1236         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1237                 if (rt_se->on_list)
1238                         __delist_rt_entity(rt_se, array);
1239                 return;
1240         }
1241
1242         if (move_entity(flags)) {
1243                 WARN_ON_ONCE(rt_se->on_list);
1244                 if (flags & ENQUEUE_HEAD)
1245                         list_add(&rt_se->run_list, queue);
1246                 else
1247                         list_add_tail(&rt_se->run_list, queue);
1248
1249                 __set_bit(rt_se_prio(rt_se), array->bitmap);
1250                 rt_se->on_list = 1;
1251         }
1252         rt_se->on_rq = 1;
1253
1254         inc_rt_tasks(rt_se, rt_rq);
1255 }
1256
1257 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1258 {
1259         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1260         struct rt_prio_array *array = &rt_rq->active;
1261
1262         if (move_entity(flags)) {
1263                 WARN_ON_ONCE(!rt_se->on_list);
1264                 __delist_rt_entity(rt_se, array);
1265         }
1266         rt_se->on_rq = 0;
1267
1268         dec_rt_tasks(rt_se, rt_rq);
1269 }
1270
1271 /*
1272  * Because the prio of an upper entry depends on the lower
1273  * entries, we must remove entries top - down.
1274  */
1275 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
1276 {
1277         struct sched_rt_entity *back = NULL;
1278
1279         for_each_sched_rt_entity(rt_se) {
1280                 rt_se->back = back;
1281                 back = rt_se;
1282         }
1283
1284         dequeue_top_rt_rq(rt_rq_of_se(back));
1285
1286         for (rt_se = back; rt_se; rt_se = rt_se->back) {
1287                 if (on_rt_rq(rt_se))
1288                         __dequeue_rt_entity(rt_se, flags);
1289         }
1290 }
1291
1292 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1293 {
1294         struct rq *rq = rq_of_rt_se(rt_se);
1295
1296         dequeue_rt_stack(rt_se, flags);
1297         for_each_sched_rt_entity(rt_se)
1298                 __enqueue_rt_entity(rt_se, flags);
1299         enqueue_top_rt_rq(&rq->rt);
1300 }
1301
1302 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1303 {
1304         struct rq *rq = rq_of_rt_se(rt_se);
1305
1306         dequeue_rt_stack(rt_se, flags);
1307
1308         for_each_sched_rt_entity(rt_se) {
1309                 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1310
1311                 if (rt_rq && rt_rq->rt_nr_running)
1312                         __enqueue_rt_entity(rt_se, flags);
1313         }
1314         enqueue_top_rt_rq(&rq->rt);
1315 }
1316
1317 /*
1318  * Adding/removing a task to/from a priority array:
1319  */
1320 static void
1321 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1322 {
1323         struct sched_rt_entity *rt_se = &p->rt;
1324
1325         if (flags & ENQUEUE_WAKEUP)
1326                 rt_se->timeout = 0;
1327
1328         enqueue_rt_entity(rt_se, flags);
1329         walt_inc_cumulative_runnable_avg(rq, p);
1330
1331         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1332                 enqueue_pushable_task(rq, p);
1333 }
1334
1335 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1336 {
1337         struct sched_rt_entity *rt_se = &p->rt;
1338
1339         update_curr_rt(rq);
1340         dequeue_rt_entity(rt_se, flags);
1341         walt_dec_cumulative_runnable_avg(rq, p);
1342
1343         dequeue_pushable_task(rq, p);
1344 }
1345
1346 /*
1347  * Put task to the head or the end of the run list without the overhead of
1348  * dequeue followed by enqueue.
1349  */
1350 static void
1351 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1352 {
1353         if (on_rt_rq(rt_se)) {
1354                 struct rt_prio_array *array = &rt_rq->active;
1355                 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1356
1357                 if (head)
1358                         list_move(&rt_se->run_list, queue);
1359                 else
1360                         list_move_tail(&rt_se->run_list, queue);
1361         }
1362 }
1363
1364 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1365 {
1366         struct sched_rt_entity *rt_se = &p->rt;
1367         struct rt_rq *rt_rq;
1368
1369         for_each_sched_rt_entity(rt_se) {
1370                 rt_rq = rt_rq_of_se(rt_se);
1371                 requeue_rt_entity(rt_rq, rt_se, head);
1372         }
1373 }
1374
1375 static void yield_task_rt(struct rq *rq)
1376 {
1377         requeue_task_rt(rq, rq->curr, 0);
1378 }
1379
1380 #ifdef CONFIG_SMP
1381 static int find_lowest_rq(struct task_struct *task);
1382
1383 static int
1384 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
1385                   int sibling_count_hint)
1386 {
1387         struct task_struct *curr;
1388         struct rq *rq;
1389
1390         /* For anything but wake ups, just return the task_cpu */
1391         if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1392                 goto out;
1393
1394         rq = cpu_rq(cpu);
1395
1396         rcu_read_lock();
1397         curr = READ_ONCE(rq->curr); /* unlocked access */
1398
1399         /*
1400          * If the current task on @p's runqueue is an RT task, then
1401          * try to see if we can wake this RT task up on another
1402          * runqueue. Otherwise simply start this RT task
1403          * on its current runqueue.
1404          *
1405          * We want to avoid overloading runqueues. If the woken
1406          * task is a higher priority, then it will stay on this CPU
1407          * and the lower prio task should be moved to another CPU.
1408          * Even though this will probably make the lower prio task
1409          * lose its cache, we do not want to bounce a higher task
1410          * around just because it gave up its CPU, perhaps for a
1411          * lock?
1412          *
1413          * For equal prio tasks, we just let the scheduler sort it out.
1414          *
1415          * Otherwise, just let it ride on the affined RQ and the
1416          * post-schedule router will push the preempted task away
1417          *
1418          * This test is optimistic, if we get it wrong the load-balancer
1419          * will have to sort it out.
1420          */
1421         if (curr && unlikely(rt_task(curr)) &&
1422             (curr->nr_cpus_allowed < 2 ||
1423              curr->prio <= p->prio)) {
1424                 int target = find_lowest_rq(p);
1425
1426                 /*
1427                  * Don't bother moving it if the destination CPU is
1428                  * not running a lower priority task.
1429                  */
1430                 if (target != -1 &&
1431                     p->prio < cpu_rq(target)->rt.highest_prio.curr)
1432                         cpu = target;
1433         }
1434         rcu_read_unlock();
1435
1436 out:
1437         return cpu;
1438 }
1439
1440 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1441 {
1442         /*
1443          * Current can't be migrated, useless to reschedule,
1444          * let's hope p can move out.
1445          */
1446         if (rq->curr->nr_cpus_allowed == 1 ||
1447             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1448                 return;
1449
1450         /*
1451          * p is migratable, so let's not schedule it and
1452          * see if it is pushed or pulled somewhere else.
1453          */
1454         if (p->nr_cpus_allowed != 1
1455             && cpupri_find(&rq->rd->cpupri, p, NULL))
1456                 return;
1457
1458         /*
1459          * There appears to be other cpus that can accept
1460          * current and none to run 'p', so lets reschedule
1461          * to try and push current away:
1462          */
1463         requeue_task_rt(rq, p, 1);
1464         resched_curr(rq);
1465 }
1466
1467 #endif /* CONFIG_SMP */
1468
1469 /*
1470  * Preempt the current task with a newly woken task if needed:
1471  */
1472 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1473 {
1474         if (p->prio < rq->curr->prio) {
1475                 resched_curr(rq);
1476                 return;
1477         }
1478
1479 #ifdef CONFIG_SMP
1480         /*
1481          * If:
1482          *
1483          * - the newly woken task is of equal priority to the current task
1484          * - the newly woken task is non-migratable while current is migratable
1485          * - current will be preempted on the next reschedule
1486          *
1487          * we should check to see if current can readily move to a different
1488          * cpu.  If so, we will reschedule to allow the push logic to try
1489          * to move current somewhere else, making room for our non-migratable
1490          * task.
1491          */
1492         if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1493                 check_preempt_equal_prio(rq, p);
1494 #endif
1495 }
1496
1497 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1498                                                    struct rt_rq *rt_rq)
1499 {
1500         struct rt_prio_array *array = &rt_rq->active;
1501         struct sched_rt_entity *next = NULL;
1502         struct list_head *queue;
1503         int idx;
1504
1505         idx = sched_find_first_bit(array->bitmap);
1506         BUG_ON(idx >= MAX_RT_PRIO);
1507
1508         queue = array->queue + idx;
1509         next = list_entry(queue->next, struct sched_rt_entity, run_list);
1510
1511         return next;
1512 }
1513
1514 static struct task_struct *_pick_next_task_rt(struct rq *rq)
1515 {
1516         struct sched_rt_entity *rt_se;
1517         struct task_struct *p;
1518         struct rt_rq *rt_rq  = &rq->rt;
1519
1520         do {
1521                 rt_se = pick_next_rt_entity(rq, rt_rq);
1522                 BUG_ON(!rt_se);
1523                 rt_rq = group_rt_rq(rt_se);
1524         } while (rt_rq);
1525
1526         p = rt_task_of(rt_se);
1527         p->se.exec_start = rq_clock_task(rq);
1528
1529         return p;
1530 }
1531
1532 extern int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running);
1533
1534 static struct task_struct *
1535 pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
1536 {
1537         struct task_struct *p;
1538         struct rt_rq *rt_rq = &rq->rt;
1539
1540         if (need_pull_rt_task(rq, prev)) {
1541                 /*
1542                  * This is OK, because current is on_cpu, which avoids it being
1543                  * picked for load-balance and preemption/IRQs are still
1544                  * disabled avoiding further scheduler activity on it and we're
1545                  * being very careful to re-start the picking loop.
1546                  */
1547                 rq_unpin_lock(rq, rf);
1548                 pull_rt_task(rq);
1549                 rq_repin_lock(rq, rf);
1550                 /*
1551                  * pull_rt_task() can drop (and re-acquire) rq->lock; this
1552                  * means a dl or stop task can slip in, in which case we need
1553                  * to re-start task selection.
1554                  */
1555                 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
1556                              rq->dl.dl_nr_running))
1557                         return RETRY_TASK;
1558         }
1559
1560         /*
1561          * We may dequeue prev's rt_rq in put_prev_task().
1562          * So, we update time before rt_nr_running check.
1563          */
1564         if (prev->sched_class == &rt_sched_class)
1565                 update_curr_rt(rq);
1566
1567         if (!rt_rq->rt_queued)
1568                 return NULL;
1569
1570         put_prev_task(rq, prev);
1571
1572         p = _pick_next_task_rt(rq);
1573
1574         /* The running task is never eligible for pushing */
1575         dequeue_pushable_task(rq, p);
1576
1577         queue_push_tasks(rq);
1578
1579         if (p)
1580                 update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), rt_rq,
1581                                         rq->curr->sched_class == &rt_sched_class);
1582
1583         return p;
1584 }
1585
1586 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1587 {
1588         update_curr_rt(rq);
1589
1590         update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);
1591
1592         /*
1593          * The previous task needs to be made eligible for pushing
1594          * if it is still active
1595          */
1596         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1597                 enqueue_pushable_task(rq, p);
1598 }
1599
1600 #ifdef CONFIG_SMP
1601
1602 /* Only try algorithms three times */
1603 #define RT_MAX_TRIES 3
1604
1605 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1606 {
1607         if (!task_running(rq, p) &&
1608             cpumask_test_cpu(cpu, &p->cpus_allowed))
1609                 return 1;
1610         return 0;
1611 }
1612
1613 /*
1614  * Return the highest pushable rq's task, which is suitable to be executed
1615  * on the cpu, NULL otherwise
1616  */
1617 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1618 {
1619         struct plist_head *head = &rq->rt.pushable_tasks;
1620         struct task_struct *p;
1621
1622         if (!has_pushable_tasks(rq))
1623                 return NULL;
1624
1625         plist_for_each_entry(p, head, pushable_tasks) {
1626                 if (pick_rt_task(rq, p, cpu))
1627                         return p;
1628         }
1629
1630         return NULL;
1631 }
1632
1633 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1634
1635 static int find_lowest_rq(struct task_struct *task)
1636 {
1637         struct sched_domain *sd;
1638         struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1639         int this_cpu = smp_processor_id();
1640         int cpu      = task_cpu(task);
1641
1642         /* Make sure the mask is initialized first */
1643         if (unlikely(!lowest_mask))
1644                 return -1;
1645
1646         if (task->nr_cpus_allowed == 1)
1647                 return -1; /* No other targets possible */
1648
1649         if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
1650                 return -1; /* No targets found */
1651
1652         /*
1653          * At this point we have built a mask of cpus representing the
1654          * lowest priority tasks in the system.  Now we want to elect
1655          * the best one based on our affinity and topology.
1656          *
1657          * We prioritize the last cpu that the task executed on since
1658          * it is most likely cache-hot in that location.
1659          */
1660         if (cpumask_test_cpu(cpu, lowest_mask))
1661                 return cpu;
1662
1663         /*
1664          * Otherwise, we consult the sched_domains span maps to figure
1665          * out which cpu is logically closest to our hot cache data.
1666          */
1667         if (!cpumask_test_cpu(this_cpu, lowest_mask))
1668                 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1669
1670         rcu_read_lock();
1671         for_each_domain(cpu, sd) {
1672                 if (sd->flags & SD_WAKE_AFFINE) {
1673                         int best_cpu;
1674
1675                         /*
1676                          * "this_cpu" is cheaper to preempt than a
1677                          * remote processor.
1678                          */
1679                         if (this_cpu != -1 &&
1680                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1681                                 rcu_read_unlock();
1682                                 return this_cpu;
1683                         }
1684
1685                         best_cpu = cpumask_first_and(lowest_mask,
1686                                                      sched_domain_span(sd));
1687                         if (best_cpu < nr_cpu_ids) {
1688                                 rcu_read_unlock();
1689                                 return best_cpu;
1690                         }
1691                 }
1692         }
1693         rcu_read_unlock();
1694
1695         /*
1696          * And finally, if there were no matches within the domains
1697          * just give the caller *something* to work with from the compatible
1698          * locations.
1699          */
1700         if (this_cpu != -1)
1701                 return this_cpu;
1702
1703         cpu = cpumask_any(lowest_mask);
1704         if (cpu < nr_cpu_ids)
1705                 return cpu;
1706         return -1;
1707 }
1708
1709 /* Will lock the rq it finds */
1710 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1711 {
1712         struct rq *lowest_rq = NULL;
1713         int tries;
1714         int cpu;
1715
1716         for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1717                 cpu = find_lowest_rq(task);
1718
1719                 if ((cpu == -1) || (cpu == rq->cpu))
1720                         break;
1721
1722                 lowest_rq = cpu_rq(cpu);
1723
1724                 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1725                         /*
1726                          * Target rq has tasks of equal or higher priority,
1727                          * retrying does not release any lock and is unlikely
1728                          * to yield a different result.
1729                          */
1730                         lowest_rq = NULL;
1731                         break;
1732                 }
1733
1734                 /* if the prio of this runqueue changed, try again */
1735                 if (double_lock_balance(rq, lowest_rq)) {
1736                         /*
1737                          * We had to unlock the run queue. In
1738                          * the mean time, task could have
1739                          * migrated already or had its affinity changed.
1740                          * Also make sure that it wasn't scheduled on its rq.
1741                          */
1742                         if (unlikely(task_rq(task) != rq ||
1743                                      !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
1744                                      task_running(rq, task) ||
1745                                      !rt_task(task) ||
1746                                      !task_on_rq_queued(task))) {
1747
1748                                 double_unlock_balance(rq, lowest_rq);
1749                                 lowest_rq = NULL;
1750                                 break;
1751                         }
1752                 }
1753
1754                 /* If this rq is still suitable use it. */
1755                 if (lowest_rq->rt.highest_prio.curr > task->prio)
1756                         break;
1757
1758                 /* try again */
1759                 double_unlock_balance(rq, lowest_rq);
1760                 lowest_rq = NULL;
1761         }
1762
1763         return lowest_rq;
1764 }
1765
1766 static struct task_struct *pick_next_pushable_task(struct rq *rq)
1767 {
1768         struct task_struct *p;
1769
1770         if (!has_pushable_tasks(rq))
1771                 return NULL;
1772
1773         p = plist_first_entry(&rq->rt.pushable_tasks,
1774                               struct task_struct, pushable_tasks);
1775
1776         BUG_ON(rq->cpu != task_cpu(p));
1777         BUG_ON(task_current(rq, p));
1778         BUG_ON(p->nr_cpus_allowed <= 1);
1779
1780         BUG_ON(!task_on_rq_queued(p));
1781         BUG_ON(!rt_task(p));
1782
1783         return p;
1784 }
1785
1786 /*
1787  * If the current CPU has more than one RT task, see if the non
1788  * running task can migrate over to a CPU that is running a task
1789  * of lesser priority.
1790  */
1791 static int push_rt_task(struct rq *rq)
1792 {
1793         struct task_struct *next_task;
1794         struct rq *lowest_rq;
1795         int ret = 0;
1796
1797         if (!rq->rt.overloaded)
1798                 return 0;
1799
1800         next_task = pick_next_pushable_task(rq);
1801         if (!next_task)
1802                 return 0;
1803
1804 retry:
1805         if (unlikely(next_task == rq->curr)) {
1806                 WARN_ON(1);
1807                 return 0;
1808         }
1809
1810         /*
1811          * It's possible that the next_task slipped in of
1812          * higher priority than current. If that's the case
1813          * just reschedule current.
1814          */
1815         if (unlikely(next_task->prio < rq->curr->prio)) {
1816                 resched_curr(rq);
1817                 return 0;
1818         }
1819
1820         /* We might release rq lock */
1821         get_task_struct(next_task);
1822
1823         /* find_lock_lowest_rq locks the rq if found */
1824         lowest_rq = find_lock_lowest_rq(next_task, rq);
1825         if (!lowest_rq) {
1826                 struct task_struct *task;
1827                 /*
1828                  * find_lock_lowest_rq releases rq->lock
1829                  * so it is possible that next_task has migrated.
1830                  *
1831                  * We need to make sure that the task is still on the same
1832                  * run-queue and is also still the next task eligible for
1833                  * pushing.
1834                  */
1835                 task = pick_next_pushable_task(rq);
1836                 if (task == next_task) {
1837                         /*
1838                          * The task hasn't migrated, and is still the next
1839                          * eligible task, but we failed to find a run-queue
1840                          * to push it to.  Do not retry in this case, since
1841                          * other cpus will pull from us when ready.
1842                          */
1843                         goto out;
1844                 }
1845
1846                 if (!task)
1847                         /* No more tasks, just exit */
1848                         goto out;
1849
1850                 /*
1851                  * Something has shifted, try again.
1852                  */
1853                 put_task_struct(next_task);
1854                 next_task = task;
1855                 goto retry;
1856         }
1857
1858         deactivate_task(rq, next_task, 0);
1859         next_task->on_rq = TASK_ON_RQ_MIGRATING;
1860         set_task_cpu(next_task, lowest_rq->cpu);
1861         next_task->on_rq = TASK_ON_RQ_QUEUED;
1862         activate_task(lowest_rq, next_task, 0);
1863         ret = 1;
1864
1865         resched_curr(lowest_rq);
1866
1867         double_unlock_balance(rq, lowest_rq);
1868
1869 out:
1870         put_task_struct(next_task);
1871
1872         return ret;
1873 }
1874
1875 static void push_rt_tasks(struct rq *rq)
1876 {
1877         /* push_rt_task will return true if it moved an RT */
1878         while (push_rt_task(rq))
1879                 ;
1880 }
1881
1882 #ifdef HAVE_RT_PUSH_IPI
1883
1884 /*
1885  * When a high priority task schedules out from a CPU and a lower priority
1886  * task is scheduled in, a check is made to see if there's any RT tasks
1887  * on other CPUs that are waiting to run because a higher priority RT task
1888  * is currently running on its CPU. In this case, the CPU with multiple RT
1889  * tasks queued on it (overloaded) needs to be notified that a CPU has opened
1890  * up that may be able to run one of its non-running queued RT tasks.
1891  *
1892  * All CPUs with overloaded RT tasks need to be notified as there is currently
1893  * no way to know which of these CPUs have the highest priority task waiting
1894  * to run. Instead of trying to take a spinlock on each of these CPUs,
1895  * which has shown to cause large latency when done on machines with many
1896  * CPUs, sending an IPI to the CPUs to have them push off the overloaded
1897  * RT tasks waiting to run.
1898  *
1899  * Just sending an IPI to each of the CPUs is also an issue, as on large
1900  * count CPU machines, this can cause an IPI storm on a CPU, especially
1901  * if its the only CPU with multiple RT tasks queued, and a large number
1902  * of CPUs scheduling a lower priority task at the same time.
1903  *
1904  * Each root domain has its own irq work function that can iterate over
1905  * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
1906  * tassk must be checked if there's one or many CPUs that are lowering
1907  * their priority, there's a single irq work iterator that will try to
1908  * push off RT tasks that are waiting to run.
1909  *
1910  * When a CPU schedules a lower priority task, it will kick off the
1911  * irq work iterator that will jump to each CPU with overloaded RT tasks.
1912  * As it only takes the first CPU that schedules a lower priority task
1913  * to start the process, the rto_start variable is incremented and if
1914  * the atomic result is one, then that CPU will try to take the rto_lock.
1915  * This prevents high contention on the lock as the process handles all
1916  * CPUs scheduling lower priority tasks.
1917  *
1918  * All CPUs that are scheduling a lower priority task will increment the
1919  * rt_loop_next variable. This will make sure that the irq work iterator
1920  * checks all RT overloaded CPUs whenever a CPU schedules a new lower
1921  * priority task, even if the iterator is in the middle of a scan. Incrementing
1922  * the rt_loop_next will cause the iterator to perform another scan.
1923  *
1924  */
1925 static int rto_next_cpu(struct root_domain *rd)
1926 {
1927         int next;
1928         int cpu;
1929
1930         /*
1931          * When starting the IPI RT pushing, the rto_cpu is set to -1,
1932          * rt_next_cpu() will simply return the first CPU found in
1933          * the rto_mask.
1934          *
1935          * If rto_next_cpu() is called with rto_cpu is a valid cpu, it
1936          * will return the next CPU found in the rto_mask.
1937          *
1938          * If there are no more CPUs left in the rto_mask, then a check is made
1939          * against rto_loop and rto_loop_next. rto_loop is only updated with
1940          * the rto_lock held, but any CPU may increment the rto_loop_next
1941          * without any locking.
1942          */
1943         for (;;) {
1944
1945                 /* When rto_cpu is -1 this acts like cpumask_first() */
1946                 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
1947
1948                 rd->rto_cpu = cpu;
1949
1950                 if (cpu < nr_cpu_ids)
1951                         return cpu;
1952
1953                 rd->rto_cpu = -1;
1954
1955                 /*
1956                  * ACQUIRE ensures we see the @rto_mask changes
1957                  * made prior to the @next value observed.
1958                  *
1959                  * Matches WMB in rt_set_overload().
1960                  */
1961                 next = atomic_read_acquire(&rd->rto_loop_next);
1962
1963                 if (rd->rto_loop == next)
1964                         break;
1965
1966                 rd->rto_loop = next;
1967         }
1968
1969         return -1;
1970 }
1971
1972 static inline bool rto_start_trylock(atomic_t *v)
1973 {
1974         return !atomic_cmpxchg_acquire(v, 0, 1);
1975 }
1976
1977 static inline void rto_start_unlock(atomic_t *v)
1978 {
1979         atomic_set_release(v, 0);
1980 }
1981
1982 static void tell_cpu_to_push(struct rq *rq)
1983 {
1984         int cpu = -1;
1985
1986         /* Keep the loop going if the IPI is currently active */
1987         atomic_inc(&rq->rd->rto_loop_next);
1988
1989         /* Only one CPU can initiate a loop at a time */
1990         if (!rto_start_trylock(&rq->rd->rto_loop_start))
1991                 return;
1992
1993         raw_spin_lock(&rq->rd->rto_lock);
1994
1995         /*
1996          * The rto_cpu is updated under the lock, if it has a valid cpu
1997          * then the IPI is still running and will continue due to the
1998          * update to loop_next, and nothing needs to be done here.
1999          * Otherwise it is finishing up and an ipi needs to be sent.
2000          */
2001         if (rq->rd->rto_cpu < 0)
2002                 cpu = rto_next_cpu(rq->rd);
2003
2004         raw_spin_unlock(&rq->rd->rto_lock);
2005
2006         rto_start_unlock(&rq->rd->rto_loop_start);
2007
2008         if (cpu >= 0) {
2009                 /* Make sure the rd does not get freed while pushing */
2010                 sched_get_rd(rq->rd);
2011                 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
2012         }
2013 }
2014
2015 /* Called from hardirq context */
2016 void rto_push_irq_work_func(struct irq_work *work)
2017 {
2018         struct root_domain *rd =
2019                 container_of(work, struct root_domain, rto_push_work);
2020         struct rq *rq;
2021         int cpu;
2022
2023         rq = this_rq();
2024
2025         /*
2026          * We do not need to grab the lock to check for has_pushable_tasks.
2027          * When it gets updated, a check is made if a push is possible.
2028          */
2029         if (has_pushable_tasks(rq)) {
2030                 raw_spin_lock(&rq->lock);
2031                 push_rt_tasks(rq);
2032                 raw_spin_unlock(&rq->lock);
2033         }
2034
2035         raw_spin_lock(&rd->rto_lock);
2036
2037         /* Pass the IPI to the next rt overloaded queue */
2038         cpu = rto_next_cpu(rd);
2039
2040         raw_spin_unlock(&rd->rto_lock);
2041
2042         if (cpu < 0) {
2043                 sched_put_rd(rd);
2044                 return;
2045         }
2046
2047         /* Try the next RT overloaded CPU */
2048         irq_work_queue_on(&rd->rto_push_work, cpu);
2049 }
2050 #endif /* HAVE_RT_PUSH_IPI */
2051
2052 static void pull_rt_task(struct rq *this_rq)
2053 {
2054         int this_cpu = this_rq->cpu, cpu;
2055         bool resched = false;
2056         struct task_struct *p;
2057         struct rq *src_rq;
2058         int rt_overload_count = rt_overloaded(this_rq);
2059
2060         if (likely(!rt_overload_count))
2061                 return;
2062
2063         /*
2064          * Match the barrier from rt_set_overloaded; this guarantees that if we
2065          * see overloaded we must also see the rto_mask bit.
2066          */
2067         smp_rmb();
2068
2069         /* If we are the only overloaded CPU do nothing */
2070         if (rt_overload_count == 1 &&
2071             cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2072                 return;
2073
2074 #ifdef HAVE_RT_PUSH_IPI
2075         if (sched_feat(RT_PUSH_IPI)) {
2076                 tell_cpu_to_push(this_rq);
2077                 return;
2078         }
2079 #endif
2080
2081         for_each_cpu(cpu, this_rq->rd->rto_mask) {
2082                 if (this_cpu == cpu)
2083                         continue;
2084
2085                 src_rq = cpu_rq(cpu);
2086
2087                 /*
2088                  * Don't bother taking the src_rq->lock if the next highest
2089                  * task is known to be lower-priority than our current task.
2090                  * This may look racy, but if this value is about to go
2091                  * logically higher, the src_rq will push this task away.
2092                  * And if its going logically lower, we do not care
2093                  */
2094                 if (src_rq->rt.highest_prio.next >=
2095                     this_rq->rt.highest_prio.curr)
2096                         continue;
2097
2098                 /*
2099                  * We can potentially drop this_rq's lock in
2100                  * double_lock_balance, and another CPU could
2101                  * alter this_rq
2102                  */
2103                 double_lock_balance(this_rq, src_rq);
2104
2105                 /*
2106                  * We can pull only a task, which is pushable
2107                  * on its rq, and no others.
2108                  */
2109                 p = pick_highest_pushable_task(src_rq, this_cpu);
2110
2111                 /*
2112                  * Do we have an RT task that preempts
2113                  * the to-be-scheduled task?
2114                  */
2115                 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
2116                         WARN_ON(p == src_rq->curr);
2117                         WARN_ON(!task_on_rq_queued(p));
2118
2119                         /*
2120                          * There's a chance that p is higher in priority
2121                          * than what's currently running on its cpu.
2122                          * This is just that p is wakeing up and hasn't
2123                          * had a chance to schedule. We only pull
2124                          * p if it is lower in priority than the
2125                          * current task on the run queue
2126                          */
2127                         if (p->prio < src_rq->curr->prio)
2128                                 goto skip;
2129
2130                         resched = true;
2131
2132                         deactivate_task(src_rq, p, 0);
2133                         p->on_rq = TASK_ON_RQ_MIGRATING;
2134                         set_task_cpu(p, this_cpu);
2135                         p->on_rq = TASK_ON_RQ_QUEUED;
2136                         activate_task(this_rq, p, 0);
2137                         /*
2138                          * We continue with the search, just in
2139                          * case there's an even higher prio task
2140                          * in another runqueue. (low likelihood
2141                          * but possible)
2142                          */
2143                 }
2144 skip:
2145                 double_unlock_balance(this_rq, src_rq);
2146         }
2147
2148         if (resched)
2149                 resched_curr(this_rq);
2150 }
2151
2152 /*
2153  * If we are not running and we are not going to reschedule soon, we should
2154  * try to push tasks away now
2155  */
2156 static void task_woken_rt(struct rq *rq, struct task_struct *p)
2157 {
2158         if (!task_running(rq, p) &&
2159             !test_tsk_need_resched(rq->curr) &&
2160             p->nr_cpus_allowed > 1 &&
2161             (dl_task(rq->curr) || rt_task(rq->curr)) &&
2162             (rq->curr->nr_cpus_allowed < 2 ||
2163              rq->curr->prio <= p->prio))
2164                 push_rt_tasks(rq);
2165 }
2166
2167 /* Assumes rq->lock is held */
2168 static void rq_online_rt(struct rq *rq)
2169 {
2170         if (rq->rt.overloaded)
2171                 rt_set_overload(rq);
2172
2173         __enable_runtime(rq);
2174
2175         cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
2176 }
2177
2178 /* Assumes rq->lock is held */
2179 static void rq_offline_rt(struct rq *rq)
2180 {
2181         if (rq->rt.overloaded)
2182                 rt_clear_overload(rq);
2183
2184         __disable_runtime(rq);
2185
2186         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
2187 }
2188
2189 /*
2190  * When switch from the rt queue, we bring ourselves to a position
2191  * that we might want to pull RT tasks from other runqueues.
2192  */
2193 static void switched_from_rt(struct rq *rq, struct task_struct *p)
2194 {
2195         /*
2196          * If there are other RT tasks then we will reschedule
2197          * and the scheduling of the other RT tasks will handle
2198          * the balancing. But if we are the last RT task
2199          * we may need to handle the pulling of RT tasks
2200          * now.
2201          */
2202         if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
2203                 return;
2204
2205         queue_pull_task(rq);
2206 }
2207
2208 void __init init_sched_rt_class(void)
2209 {
2210         unsigned int i;
2211
2212         for_each_possible_cpu(i) {
2213                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
2214                                         GFP_KERNEL, cpu_to_node(i));
2215         }
2216 }
2217 #endif /* CONFIG_SMP */
2218
2219 /*
2220  * When switching a task to RT, we may overload the runqueue
2221  * with RT tasks. In this case we try to push them off to
2222  * other runqueues.
2223  */
2224 static void switched_to_rt(struct rq *rq, struct task_struct *p)
2225 {
2226         /*
2227          * If we are already running, then there's nothing
2228          * that needs to be done. But if we are not running
2229          * we may need to preempt the current running task.
2230          * If that current running task is also an RT task
2231          * then see if we can move to another run queue.
2232          */
2233         if (task_on_rq_queued(p) && rq->curr != p) {
2234 #ifdef CONFIG_SMP
2235                 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2236                         queue_push_tasks(rq);
2237 #endif /* CONFIG_SMP */
2238                 if (p->prio < rq->curr->prio)
2239                         resched_curr(rq);
2240         }
2241 }
2242
2243 /*
2244  * Priority of the task has changed. This may cause
2245  * us to initiate a push or pull.
2246  */
2247 static void
2248 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
2249 {
2250         if (!task_on_rq_queued(p))
2251                 return;
2252
2253         if (rq->curr == p) {
2254 #ifdef CONFIG_SMP
2255                 /*
2256                  * If our priority decreases while running, we
2257                  * may need to pull tasks to this runqueue.
2258                  */
2259                 if (oldprio < p->prio)
2260                         queue_pull_task(rq);
2261
2262                 /*
2263                  * If there's a higher priority task waiting to run
2264                  * then reschedule.
2265                  */
2266                 if (p->prio > rq->rt.highest_prio.curr)
2267                         resched_curr(rq);
2268 #else
2269                 /* For UP simply resched on drop of prio */
2270                 if (oldprio < p->prio)
2271                         resched_curr(rq);
2272 #endif /* CONFIG_SMP */
2273         } else {
2274                 /*
2275                  * This task is not running, but if it is
2276                  * greater than the current running task
2277                  * then reschedule.
2278                  */
2279                 if (p->prio < rq->curr->prio)
2280                         resched_curr(rq);
2281         }
2282 }
2283
2284 #ifdef CONFIG_POSIX_TIMERS
2285 static void watchdog(struct rq *rq, struct task_struct *p)
2286 {
2287         unsigned long soft, hard;
2288
2289         /* max may change after cur was read, this will be fixed next tick */
2290         soft = task_rlimit(p, RLIMIT_RTTIME);
2291         hard = task_rlimit_max(p, RLIMIT_RTTIME);
2292
2293         if (soft != RLIM_INFINITY) {
2294                 unsigned long next;
2295
2296                 if (p->rt.watchdog_stamp != jiffies) {
2297                         p->rt.timeout++;
2298                         p->rt.watchdog_stamp = jiffies;
2299                 }
2300
2301                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2302                 if (p->rt.timeout > next)
2303                         p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
2304         }
2305 }
2306 #else
2307 static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2308 #endif
2309
2310 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2311 {
2312         struct sched_rt_entity *rt_se = &p->rt;
2313
2314         update_curr_rt(rq);
2315         update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);
2316
2317         watchdog(rq, p);
2318
2319         /*
2320          * RR tasks need a special form of timeslice management.
2321          * FIFO tasks have no timeslices.
2322          */
2323         if (p->policy != SCHED_RR)
2324                 return;
2325
2326         if (--p->rt.time_slice)
2327                 return;
2328
2329         p->rt.time_slice = sched_rr_timeslice;
2330
2331         /*
2332          * Requeue to the end of queue if we (and all of our ancestors) are not
2333          * the only element on the queue
2334          */
2335         for_each_sched_rt_entity(rt_se) {
2336                 if (rt_se->run_list.prev != rt_se->run_list.next) {
2337                         requeue_task_rt(rq, p, 0);
2338                         resched_curr(rq);
2339                         return;
2340                 }
2341         }
2342 }
2343
2344 static void set_curr_task_rt(struct rq *rq)
2345 {
2346         struct task_struct *p = rq->curr;
2347
2348         p->se.exec_start = rq_clock_task(rq);
2349
2350         /* The running task is never eligible for pushing */
2351         dequeue_pushable_task(rq, p);
2352 }
2353
2354 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2355 {
2356         /*
2357          * Time slice is 0 for SCHED_FIFO tasks
2358          */
2359         if (task->policy == SCHED_RR)
2360                 return sched_rr_timeslice;
2361         else
2362                 return 0;
2363 }
2364
2365 const struct sched_class rt_sched_class = {
2366         .next                   = &fair_sched_class,
2367         .enqueue_task           = enqueue_task_rt,
2368         .dequeue_task           = dequeue_task_rt,
2369         .yield_task             = yield_task_rt,
2370
2371         .check_preempt_curr     = check_preempt_curr_rt,
2372
2373         .pick_next_task         = pick_next_task_rt,
2374         .put_prev_task          = put_prev_task_rt,
2375
2376 #ifdef CONFIG_SMP
2377         .select_task_rq         = select_task_rq_rt,
2378
2379         .set_cpus_allowed       = set_cpus_allowed_common,
2380         .rq_online              = rq_online_rt,
2381         .rq_offline             = rq_offline_rt,
2382         .task_woken             = task_woken_rt,
2383         .switched_from          = switched_from_rt,
2384 #endif
2385
2386         .set_curr_task          = set_curr_task_rt,
2387         .task_tick              = task_tick_rt,
2388
2389         .get_rr_interval        = get_rr_interval_rt,
2390
2391         .prio_changed           = prio_changed_rt,
2392         .switched_to            = switched_to_rt,
2393
2394         .update_curr            = update_curr_rt,
2395 };
2396
2397 #ifdef CONFIG_RT_GROUP_SCHED
2398 /*
2399  * Ensure that the real time constraints are schedulable.
2400  */
2401 static DEFINE_MUTEX(rt_constraints_mutex);
2402
2403 /* Must be called with tasklist_lock held */
2404 static inline int tg_has_rt_tasks(struct task_group *tg)
2405 {
2406         struct task_struct *g, *p;
2407
2408         /*
2409          * Autogroups do not have RT tasks; see autogroup_create().
2410          */
2411         if (task_group_is_autogroup(tg))
2412                 return 0;
2413
2414         for_each_process_thread(g, p) {
2415                 if (rt_task(p) && task_group(p) == tg)
2416                         return 1;
2417         }
2418
2419         return 0;
2420 }
2421
2422 struct rt_schedulable_data {
2423         struct task_group *tg;
2424         u64 rt_period;
2425         u64 rt_runtime;
2426 };
2427
2428 static int tg_rt_schedulable(struct task_group *tg, void *data)
2429 {
2430         struct rt_schedulable_data *d = data;
2431         struct task_group *child;
2432         unsigned long total, sum = 0;
2433         u64 period, runtime;
2434
2435         period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2436         runtime = tg->rt_bandwidth.rt_runtime;
2437
2438         if (tg == d->tg) {
2439                 period = d->rt_period;
2440                 runtime = d->rt_runtime;
2441         }
2442
2443         /*
2444          * Cannot have more runtime than the period.
2445          */
2446         if (runtime > period && runtime != RUNTIME_INF)
2447                 return -EINVAL;
2448
2449         /*
2450          * Ensure we don't starve existing RT tasks.
2451          */
2452         if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
2453                 return -EBUSY;
2454
2455         total = to_ratio(period, runtime);
2456
2457         /*
2458          * Nobody can have more than the global setting allows.
2459          */
2460         if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2461                 return -EINVAL;
2462
2463         /*
2464          * The sum of our children's runtime should not exceed our own.
2465          */
2466         list_for_each_entry_rcu(child, &tg->children, siblings) {
2467                 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2468                 runtime = child->rt_bandwidth.rt_runtime;
2469
2470                 if (child == d->tg) {
2471                         period = d->rt_period;
2472                         runtime = d->rt_runtime;
2473                 }
2474
2475                 sum += to_ratio(period, runtime);
2476         }
2477
2478         if (sum > total)
2479                 return -EINVAL;
2480
2481         return 0;
2482 }
2483
2484 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2485 {
2486         int ret;
2487
2488         struct rt_schedulable_data data = {
2489                 .tg = tg,
2490                 .rt_period = period,
2491                 .rt_runtime = runtime,
2492         };
2493
2494         rcu_read_lock();
2495         ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2496         rcu_read_unlock();
2497
2498         return ret;
2499 }
2500
2501 static int tg_set_rt_bandwidth(struct task_group *tg,
2502                 u64 rt_period, u64 rt_runtime)
2503 {
2504         int i, err = 0;
2505
2506         /*
2507          * Disallowing the root group RT runtime is BAD, it would disallow the
2508          * kernel creating (and or operating) RT threads.
2509          */
2510         if (tg == &root_task_group && rt_runtime == 0)
2511                 return -EINVAL;
2512
2513         /* No period doesn't make any sense. */
2514         if (rt_period == 0)
2515                 return -EINVAL;
2516
2517         mutex_lock(&rt_constraints_mutex);
2518         read_lock(&tasklist_lock);
2519         err = __rt_schedulable(tg, rt_period, rt_runtime);
2520         if (err)
2521                 goto unlock;
2522
2523         raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2524         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2525         tg->rt_bandwidth.rt_runtime = rt_runtime;
2526
2527         for_each_possible_cpu(i) {
2528                 struct rt_rq *rt_rq = tg->rt_rq[i];
2529
2530                 raw_spin_lock(&rt_rq->rt_runtime_lock);
2531                 rt_rq->rt_runtime = rt_runtime;
2532                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2533         }
2534         raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2535 unlock:
2536         read_unlock(&tasklist_lock);
2537         mutex_unlock(&rt_constraints_mutex);
2538
2539         return err;
2540 }
2541
2542 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2543 {
2544         u64 rt_runtime, rt_period;
2545
2546         rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2547         rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2548         if (rt_runtime_us < 0)
2549                 rt_runtime = RUNTIME_INF;
2550
2551         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2552 }
2553
2554 long sched_group_rt_runtime(struct task_group *tg)
2555 {
2556         u64 rt_runtime_us;
2557
2558         if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2559                 return -1;
2560
2561         rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2562         do_div(rt_runtime_us, NSEC_PER_USEC);
2563         return rt_runtime_us;
2564 }
2565
2566 int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2567 {
2568         u64 rt_runtime, rt_period;
2569
2570         rt_period = rt_period_us * NSEC_PER_USEC;
2571         rt_runtime = tg->rt_bandwidth.rt_runtime;
2572
2573         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2574 }
2575
2576 long sched_group_rt_period(struct task_group *tg)
2577 {
2578         u64 rt_period_us;
2579
2580         rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2581         do_div(rt_period_us, NSEC_PER_USEC);
2582         return rt_period_us;
2583 }
2584
2585 static int sched_rt_global_constraints(void)
2586 {
2587         int ret = 0;
2588
2589         mutex_lock(&rt_constraints_mutex);
2590         read_lock(&tasklist_lock);
2591         ret = __rt_schedulable(NULL, 0, 0);
2592         read_unlock(&tasklist_lock);
2593         mutex_unlock(&rt_constraints_mutex);
2594
2595         return ret;
2596 }
2597
2598 int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2599 {
2600         /* Don't accept realtime tasks when there is no way for them to run */
2601         if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2602                 return 0;
2603
2604         return 1;
2605 }
2606
2607 #else /* !CONFIG_RT_GROUP_SCHED */
2608 static int sched_rt_global_constraints(void)
2609 {
2610         unsigned long flags;
2611         int i;
2612
2613         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2614         for_each_possible_cpu(i) {
2615                 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2616
2617                 raw_spin_lock(&rt_rq->rt_runtime_lock);
2618                 rt_rq->rt_runtime = global_rt_runtime();
2619                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2620         }
2621         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2622
2623         return 0;
2624 }
2625 #endif /* CONFIG_RT_GROUP_SCHED */
2626
2627 static int sched_rt_global_validate(void)
2628 {
2629         if (sysctl_sched_rt_period <= 0)
2630                 return -EINVAL;
2631
2632         if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2633                 (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
2634                 return -EINVAL;
2635
2636         return 0;
2637 }
2638
2639 static void sched_rt_do_global(void)
2640 {
2641         def_rt_bandwidth.rt_runtime = global_rt_runtime();
2642         def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
2643 }
2644
2645 int sched_rt_handler(struct ctl_table *table, int write,
2646                 void __user *buffer, size_t *lenp,
2647                 loff_t *ppos)
2648 {
2649         int old_period, old_runtime;
2650         static DEFINE_MUTEX(mutex);
2651         int ret;
2652
2653         mutex_lock(&mutex);
2654         old_period = sysctl_sched_rt_period;
2655         old_runtime = sysctl_sched_rt_runtime;
2656
2657         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2658
2659         if (!ret && write) {
2660                 ret = sched_rt_global_validate();
2661                 if (ret)
2662                         goto undo;
2663
2664                 ret = sched_dl_global_validate();
2665                 if (ret)
2666                         goto undo;
2667
2668                 ret = sched_rt_global_constraints();
2669                 if (ret)
2670                         goto undo;
2671
2672                 sched_rt_do_global();
2673                 sched_dl_do_global();
2674         }
2675         if (0) {
2676 undo:
2677                 sysctl_sched_rt_period = old_period;
2678                 sysctl_sched_rt_runtime = old_runtime;
2679         }
2680         mutex_unlock(&mutex);
2681
2682         return ret;
2683 }
2684
2685 int sched_rr_handler(struct ctl_table *table, int write,
2686                 void __user *buffer, size_t *lenp,
2687                 loff_t *ppos)
2688 {
2689         int ret;
2690         static DEFINE_MUTEX(mutex);
2691
2692         mutex_lock(&mutex);
2693         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2694         /*
2695          * Make sure that internally we keep jiffies.
2696          * Also, writing zero resets the timeslice to default:
2697          */
2698         if (!ret && write) {
2699                 sched_rr_timeslice =
2700                         sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
2701                         msecs_to_jiffies(sysctl_sched_rr_timeslice);
2702         }
2703         mutex_unlock(&mutex);
2704         return ret;
2705 }
2706
2707 #ifdef CONFIG_SCHED_DEBUG
2708 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
2709
2710 void print_rt_stats(struct seq_file *m, int cpu)
2711 {
2712         rt_rq_iter_t iter;
2713         struct rt_rq *rt_rq;
2714
2715         rcu_read_lock();
2716         for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
2717                 print_rt_rq(m, cpu, rt_rq);
2718         rcu_read_unlock();
2719 }
2720 #endif /* CONFIG_SCHED_DEBUG */