Commit | Line | Data |
---|---|---|
73fbec60 FW |
1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | |
3 | #include <linux/tsacct_kern.h> | |
4 | #include <linux/kernel_stat.h> | |
5 | #include <linux/static_key.h> | |
abf917cd | 6 | #include <linux/context_tracking.h> |
32ef5517 | 7 | #include <linux/sched/cputime.h> |
42d293dc | 8 | #include <linux/cpufreq_times.h> |
73fbec60 | 9 | #include "sched.h" |
0bb35550 | 10 | #include "walt.h" |
73fbec60 FW |
11 | |
12 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
13 | ||
14 | /* | |
15 | * There are no locks covering percpu hardirq/softirq time. | |
bf9fae9f | 16 | * They are only modified in vtime_account, on corresponding CPU |
73fbec60 FW |
17 | * with interrupts disabled. So, writes are safe. |
18 | * They are read and saved off onto struct rq in update_rq_clock(). | |
19 | * This may result in other CPU reading this CPU's irq time and can | |
bf9fae9f | 20 | * race with irq/vtime_account on this CPU. We would either get old |
73fbec60 FW |
21 | * or new value with a side effect of accounting a slice of irq time to wrong |
22 | * task when irq is in progress while we read rq->clock. That is a worthy | |
23 | * compromise in place of having locks on each irq in account_system_time. | |
24 | */ | |
19d23dbf | 25 | DEFINE_PER_CPU(struct irqtime, cpu_irqtime); |
73fbec60 | 26 | |
73fbec60 FW |
27 | static int sched_clock_irqtime; |
28 | ||
29 | void enable_sched_clock_irqtime(void) | |
30 | { | |
31 | sched_clock_irqtime = 1; | |
32 | } | |
33 | ||
34 | void disable_sched_clock_irqtime(void) | |
35 | { | |
36 | sched_clock_irqtime = 0; | |
37 | } | |
38 | ||
25e2d8c1 FW |
39 | static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, |
40 | enum cpu_usage_stat idx) | |
41 | { | |
42 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
43 | ||
44 | u64_stats_update_begin(&irqtime->sync); | |
45 | cpustat[idx] += delta; | |
46 | irqtime->total += delta; | |
47 | irqtime->tick_delta += delta; | |
48 | u64_stats_update_end(&irqtime->sync); | |
49 | } | |
50 | ||
73fbec60 FW |
51 | /* |
52 | * Called before incrementing preempt_count on {soft,}irq_enter | |
53 | * and before decrementing preempt_count on {soft,}irq_exit. | |
54 | */ | |
3e1df4f5 | 55 | void irqtime_account_irq(struct task_struct *curr) |
73fbec60 | 56 | { |
19d23dbf | 57 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
73fbec60 FW |
58 | s64 delta; |
59 | int cpu; | |
0bb35550 SV |
60 | #ifdef CONFIG_SCHED_WALT |
61 | u64 wallclock; | |
62 | bool account = true; | |
63 | #endif | |
73fbec60 FW |
64 | |
65 | if (!sched_clock_irqtime) | |
66 | return; | |
67 | ||
73fbec60 | 68 | cpu = smp_processor_id(); |
0bb35550 SV |
69 | #ifdef CONFIG_SCHED_WALT |
70 | wallclock = sched_clock_cpu(cpu); | |
71 | #endif | |
19d23dbf FW |
72 | delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; |
73 | irqtime->irq_start_time += delta; | |
73fbec60 | 74 | |
73fbec60 FW |
75 | /* |
76 | * We do not account for softirq time from ksoftirqd here. | |
77 | * We want to continue accounting softirq time to ksoftirqd thread | |
78 | * in that case, so as not to confuse scheduler with a special task | |
79 | * that do not consume any time, but still wants to run. | |
80 | */ | |
25e2d8c1 FW |
81 | if (hardirq_count()) |
82 | irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); | |
83 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | |
84 | irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); | |
0bb35550 SV |
85 | #ifdef CONFIG_SCHED_WALT |
86 | else | |
87 | account = false; | |
88 | ||
89 | if (account) | |
90 | walt_account_irqtime(cpu, curr, delta, wallclock); | |
91 | #endif | |
73fbec60 | 92 | } |
3e1df4f5 | 93 | EXPORT_SYMBOL_GPL(irqtime_account_irq); |
73fbec60 | 94 | |
2b1f967d | 95 | static u64 irqtime_tick_accounted(u64 maxtime) |
73fbec60 | 96 | { |
a499a5a1 | 97 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
2b1f967d | 98 | u64 delta; |
73fbec60 | 99 | |
2b1f967d FW |
100 | delta = min(irqtime->tick_delta, maxtime); |
101 | irqtime->tick_delta -= delta; | |
2810f611 | 102 | |
a499a5a1 | 103 | return delta; |
73fbec60 FW |
104 | } |
105 | ||
106 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
107 | ||
108 | #define sched_clock_irqtime (0) | |
109 | ||
2b1f967d | 110 | static u64 irqtime_tick_accounted(u64 dummy) |
57430218 RR |
111 | { |
112 | return 0; | |
113 | } | |
114 | ||
73fbec60 FW |
115 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ |
116 | ||
117 | static inline void task_group_account_field(struct task_struct *p, int index, | |
118 | u64 tmp) | |
119 | { | |
73fbec60 FW |
120 | /* |
121 | * Since all updates are sure to touch the root cgroup, we | |
122 | * get ourselves ahead and touch it first. If the root cgroup | |
123 | * is the only cgroup, then nothing else should be necessary. | |
124 | * | |
125 | */ | |
a4f61cc0 | 126 | __this_cpu_add(kernel_cpustat.cpustat[index], tmp); |
73fbec60 | 127 | |
1966aaf7 | 128 | cpuacct_account_field(p, index, tmp); |
73fbec60 FW |
129 | } |
130 | ||
131 | /* | |
132 | * Account user cpu time to a process. | |
133 | * @p: the process that the cpu time gets accounted to | |
134 | * @cputime: the cpu time spent in user space since the last update | |
73fbec60 | 135 | */ |
23244a5c | 136 | void account_user_time(struct task_struct *p, u64 cputime) |
73fbec60 FW |
137 | { |
138 | int index; | |
139 | ||
140 | /* Add user time to process. */ | |
23244a5c FW |
141 | p->utime += cputime; |
142 | account_group_user_time(p, cputime); | |
73fbec60 | 143 | |
d0ea0268 | 144 | index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
73fbec60 FW |
145 | |
146 | /* Add user time to cpustat. */ | |
23244a5c | 147 | task_group_account_field(p, index, cputime); |
73fbec60 FW |
148 | |
149 | /* Account for user time used */ | |
6fac4829 | 150 | acct_account_cputime(p); |
42d293dc | 151 | |
42d293dc CB |
152 | /* Account power usage for user time */ |
153 | cpufreq_acct_update_power(p, cputime); | |
73fbec60 FW |
154 | } |
155 | ||
156 | /* | |
157 | * Account guest cpu time to a process. | |
158 | * @p: the process that the cpu time gets accounted to | |
159 | * @cputime: the cpu time spent in virtual machine since the last update | |
73fbec60 | 160 | */ |
fb8b049c | 161 | void account_guest_time(struct task_struct *p, u64 cputime) |
73fbec60 FW |
162 | { |
163 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
164 | ||
165 | /* Add guest time to process. */ | |
fb8b049c FW |
166 | p->utime += cputime; |
167 | account_group_user_time(p, cputime); | |
168 | p->gtime += cputime; | |
73fbec60 FW |
169 | |
170 | /* Add guest time to cpustat. */ | |
d0ea0268 | 171 | if (task_nice(p) > 0) { |
fb8b049c FW |
172 | cpustat[CPUTIME_NICE] += cputime; |
173 | cpustat[CPUTIME_GUEST_NICE] += cputime; | |
73fbec60 | 174 | } else { |
fb8b049c FW |
175 | cpustat[CPUTIME_USER] += cputime; |
176 | cpustat[CPUTIME_GUEST] += cputime; | |
73fbec60 FW |
177 | } |
178 | } | |
179 | ||
180 | /* | |
181 | * Account system cpu time to a process and desired cpustat field | |
182 | * @p: the process that the cpu time gets accounted to | |
183 | * @cputime: the cpu time spent in kernel space since the last update | |
40565b5a | 184 | * @index: pointer to cpustat field that has to be updated |
73fbec60 | 185 | */ |
c31cc6a5 | 186 | void account_system_index_time(struct task_struct *p, |
fb8b049c | 187 | u64 cputime, enum cpu_usage_stat index) |
73fbec60 FW |
188 | { |
189 | /* Add system time to process. */ | |
fb8b049c FW |
190 | p->stime += cputime; |
191 | account_group_system_time(p, cputime); | |
73fbec60 FW |
192 | |
193 | /* Add system time to cpustat. */ | |
fb8b049c | 194 | task_group_account_field(p, index, cputime); |
73fbec60 FW |
195 | |
196 | /* Account for system time used */ | |
6fac4829 | 197 | acct_account_cputime(p); |
e7ae9729 | 198 | |
1302a3d8 CB |
199 | /* Account power usage for system time */ |
200 | cpufreq_acct_update_power(p, cputime); | |
73fbec60 FW |
201 | } |
202 | ||
203 | /* | |
204 | * Account system cpu time to a process. | |
205 | * @p: the process that the cpu time gets accounted to | |
206 | * @hardirq_offset: the offset to subtract from hardirq_count() | |
207 | * @cputime: the cpu time spent in kernel space since the last update | |
73fbec60 | 208 | */ |
fb8b049c | 209 | void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) |
73fbec60 FW |
210 | { |
211 | int index; | |
212 | ||
213 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | |
40565b5a | 214 | account_guest_time(p, cputime); |
73fbec60 FW |
215 | return; |
216 | } | |
217 | ||
218 | if (hardirq_count() - hardirq_offset) | |
219 | index = CPUTIME_IRQ; | |
220 | else if (in_serving_softirq()) | |
221 | index = CPUTIME_SOFTIRQ; | |
222 | else | |
223 | index = CPUTIME_SYSTEM; | |
224 | ||
c31cc6a5 | 225 | account_system_index_time(p, cputime, index); |
73fbec60 FW |
226 | } |
227 | ||
228 | /* | |
229 | * Account for involuntary wait time. | |
230 | * @cputime: the cpu time spent in involuntary wait | |
231 | */ | |
be9095ed | 232 | void account_steal_time(u64 cputime) |
73fbec60 FW |
233 | { |
234 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
235 | ||
be9095ed | 236 | cpustat[CPUTIME_STEAL] += cputime; |
73fbec60 FW |
237 | } |
238 | ||
239 | /* | |
240 | * Account for idle time. | |
241 | * @cputime: the cpu time spent in idle wait | |
242 | */ | |
18b43a9b | 243 | void account_idle_time(u64 cputime) |
73fbec60 FW |
244 | { |
245 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
246 | struct rq *rq = this_rq(); | |
247 | ||
248 | if (atomic_read(&rq->nr_iowait) > 0) | |
18b43a9b | 249 | cpustat[CPUTIME_IOWAIT] += cputime; |
73fbec60 | 250 | else |
18b43a9b | 251 | cpustat[CPUTIME_IDLE] += cputime; |
73fbec60 FW |
252 | } |
253 | ||
03cbc732 WL |
254 | /* |
255 | * When a guest is interrupted for a longer amount of time, missed clock | |
256 | * ticks are not redelivered later. Due to that, this function may on | |
257 | * occasion account more time than the calling functions think elapsed. | |
258 | */ | |
2b1f967d | 259 | static __always_inline u64 steal_account_process_time(u64 maxtime) |
73fbec60 FW |
260 | { |
261 | #ifdef CONFIG_PARAVIRT | |
262 | if (static_key_false(¶virt_steal_enabled)) { | |
2b1f967d | 263 | u64 steal; |
73fbec60 FW |
264 | |
265 | steal = paravirt_steal_clock(smp_processor_id()); | |
266 | steal -= this_rq()->prev_steal_time; | |
2b1f967d FW |
267 | steal = min(steal, maxtime); |
268 | account_steal_time(steal); | |
269 | this_rq()->prev_steal_time += steal; | |
73fbec60 | 270 | |
2b1f967d | 271 | return steal; |
73fbec60 FW |
272 | } |
273 | #endif | |
807e5b80 | 274 | return 0; |
73fbec60 FW |
275 | } |
276 | ||
57430218 RR |
277 | /* |
278 | * Account how much elapsed time was spent in steal, irq, or softirq time. | |
279 | */ | |
2b1f967d | 280 | static inline u64 account_other_time(u64 max) |
57430218 | 281 | { |
2b1f967d | 282 | u64 accounted; |
57430218 | 283 | |
2810f611 FW |
284 | /* Shall be converted to a lockdep-enabled lightweight check */ |
285 | WARN_ON_ONCE(!irqs_disabled()); | |
286 | ||
57430218 RR |
287 | accounted = steal_account_process_time(max); |
288 | ||
289 | if (accounted < max) | |
a499a5a1 | 290 | accounted += irqtime_tick_accounted(max - accounted); |
57430218 RR |
291 | |
292 | return accounted; | |
293 | } | |
294 | ||
a1eb1411 SG |
295 | #ifdef CONFIG_64BIT |
296 | static inline u64 read_sum_exec_runtime(struct task_struct *t) | |
297 | { | |
298 | return t->se.sum_exec_runtime; | |
299 | } | |
300 | #else | |
301 | static u64 read_sum_exec_runtime(struct task_struct *t) | |
302 | { | |
303 | u64 ns; | |
304 | struct rq_flags rf; | |
305 | struct rq *rq; | |
306 | ||
307 | rq = task_rq_lock(t, &rf); | |
308 | ns = t->se.sum_exec_runtime; | |
309 | task_rq_unlock(rq, t, &rf); | |
310 | ||
311 | return ns; | |
312 | } | |
313 | #endif | |
314 | ||
a634f933 FW |
315 | /* |
316 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live | |
317 | * tasks (sum on group iteration) belonging to @tsk's group. | |
318 | */ | |
319 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |
320 | { | |
321 | struct signal_struct *sig = tsk->signal; | |
5613fda9 | 322 | u64 utime, stime; |
a634f933 | 323 | struct task_struct *t; |
e78c3496 | 324 | unsigned int seq, nextseq; |
9c368b5b | 325 | unsigned long flags; |
a634f933 | 326 | |
a1eb1411 SG |
327 | /* |
328 | * Update current task runtime to account pending time since last | |
329 | * scheduler action or thread_group_cputime() call. This thread group | |
330 | * might have other running tasks on different CPUs, but updating | |
331 | * their runtime can affect syscall performance, so we skip account | |
332 | * those pending times and rely only on values updated on tick or | |
333 | * other scheduler action. | |
334 | */ | |
335 | if (same_thread_group(current, tsk)) | |
336 | (void) task_sched_runtime(current); | |
337 | ||
a634f933 | 338 | rcu_read_lock(); |
e78c3496 RR |
339 | /* Attempt a lockless read on the first round. */ |
340 | nextseq = 0; | |
341 | do { | |
342 | seq = nextseq; | |
9c368b5b | 343 | flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); |
e78c3496 RR |
344 | times->utime = sig->utime; |
345 | times->stime = sig->stime; | |
346 | times->sum_exec_runtime = sig->sum_sched_runtime; | |
347 | ||
348 | for_each_thread(tsk, t) { | |
349 | task_cputime(t, &utime, &stime); | |
350 | times->utime += utime; | |
351 | times->stime += stime; | |
a1eb1411 | 352 | times->sum_exec_runtime += read_sum_exec_runtime(t); |
e78c3496 RR |
353 | } |
354 | /* If lockless access failed, take the lock. */ | |
355 | nextseq = 1; | |
356 | } while (need_seqretry(&sig->stats_lock, seq)); | |
9c368b5b | 357 | done_seqretry_irqrestore(&sig->stats_lock, seq, flags); |
a634f933 FW |
358 | rcu_read_unlock(); |
359 | } | |
360 | ||
73fbec60 FW |
361 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
362 | /* | |
363 | * Account a tick to a process and cpustat | |
364 | * @p: the process that the cpu time gets accounted to | |
365 | * @user_tick: is the tick from userspace | |
366 | * @rq: the pointer to rq | |
367 | * | |
368 | * Tick demultiplexing follows the order | |
369 | * - pending hardirq update | |
370 | * - pending softirq update | |
371 | * - user_time | |
372 | * - idle_time | |
373 | * - system time | |
374 | * - check for guest_time | |
375 | * - else account as system_time | |
376 | * | |
377 | * Check for hardirq is done both for system and user time as there is | |
378 | * no timer going off while we are on hardirq and hence we may never get an | |
379 | * opportunity to update it solely in system time. | |
380 | * p->stime and friends are only updated on system time and not on irq | |
381 | * softirq as those do not count in task exec_runtime any more. | |
382 | */ | |
383 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 384 | struct rq *rq, int ticks) |
73fbec60 | 385 | { |
2b1f967d | 386 | u64 other, cputime = TICK_NSEC * ticks; |
73fbec60 | 387 | |
57430218 RR |
388 | /* |
389 | * When returning from idle, many ticks can get accounted at | |
390 | * once, including some ticks of steal, irq, and softirq time. | |
391 | * Subtract those ticks from the amount of time accounted to | |
392 | * idle, or potentially user or system time. Due to rounding, | |
393 | * other time can exceed ticks occasionally. | |
394 | */ | |
03cbc732 | 395 | other = account_other_time(ULONG_MAX); |
2b1f967d | 396 | if (other >= cputime) |
73fbec60 | 397 | return; |
23244a5c | 398 | |
2b1f967d | 399 | cputime -= other; |
73fbec60 | 400 | |
57430218 | 401 | if (this_cpu_ksoftirqd() == p) { |
73fbec60 FW |
402 | /* |
403 | * ksoftirqd time do not get accounted in cpu_softirq_time. | |
404 | * So, we have to handle it separately here. | |
405 | * Also, p->stime needs to be updated for ksoftirqd. | |
406 | */ | |
fb8b049c | 407 | account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); |
73fbec60 | 408 | } else if (user_tick) { |
40565b5a | 409 | account_user_time(p, cputime); |
73fbec60 | 410 | } else if (p == rq->idle) { |
18b43a9b | 411 | account_idle_time(cputime); |
73fbec60 | 412 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
fb8b049c | 413 | account_guest_time(p, cputime); |
73fbec60 | 414 | } else { |
fb8b049c | 415 | account_system_index_time(p, cputime, CPUTIME_SYSTEM); |
73fbec60 FW |
416 | } |
417 | } | |
418 | ||
419 | static void irqtime_account_idle_ticks(int ticks) | |
420 | { | |
73fbec60 FW |
421 | struct rq *rq = this_rq(); |
422 | ||
2d513868 | 423 | irqtime_account_process_tick(current, 0, rq, ticks); |
73fbec60 FW |
424 | } |
425 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
3f4724ea FW |
426 | static inline void irqtime_account_idle_ticks(int ticks) {} |
427 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 428 | struct rq *rq, int nr_ticks) {} |
73fbec60 FW |
429 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
430 | ||
73fbec60 FW |
431 | /* |
432 | * Use precise platform statistics if available: | |
433 | */ | |
434 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | |
a7e1a9e3 | 435 | |
e3942ba0 | 436 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
b0493406 | 437 | void vtime_common_task_switch(struct task_struct *prev) |
e3942ba0 FW |
438 | { |
439 | if (is_idle_task(prev)) | |
440 | vtime_account_idle(prev); | |
441 | else | |
442 | vtime_account_system(prev); | |
443 | ||
c8d7dabf | 444 | vtime_flush(prev); |
e3942ba0 FW |
445 | arch_vtime_task_switch(prev); |
446 | } | |
447 | #endif | |
11113334 | 448 | |
0cfdf9a1 FW |
449 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
450 | ||
451 | ||
452 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | |
a7e1a9e3 FW |
453 | /* |
454 | * Archs that account the whole time spent in the idle task | |
455 | * (outside irq) as idle time can rely on this and just implement | |
fd25b4c2 | 456 | * vtime_account_system() and vtime_account_idle(). Archs that |
a7e1a9e3 FW |
457 | * have other meaning of the idle time (s390 only includes the |
458 | * time spent by the CPU when it's in low power mode) must override | |
459 | * vtime_account(). | |
460 | */ | |
461 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | |
0cfdf9a1 | 462 | void vtime_account_irq_enter(struct task_struct *tsk) |
a7e1a9e3 | 463 | { |
0cfdf9a1 FW |
464 | if (!in_interrupt() && is_idle_task(tsk)) |
465 | vtime_account_idle(tsk); | |
466 | else | |
467 | vtime_account_system(tsk); | |
a7e1a9e3 | 468 | } |
0cfdf9a1 | 469 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
a7e1a9e3 | 470 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
9fbc42ea | 471 | |
5613fda9 | 472 | void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
9fbc42ea FW |
473 | { |
474 | *ut = p->utime; | |
475 | *st = p->stime; | |
476 | } | |
9eec50b8 | 477 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
a7e1a9e3 | 478 | |
5613fda9 | 479 | void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
9fbc42ea FW |
480 | { |
481 | struct task_cputime cputime; | |
73fbec60 | 482 | |
9fbc42ea FW |
483 | thread_group_cputime(p, &cputime); |
484 | ||
485 | *ut = cputime.utime; | |
486 | *st = cputime.stime; | |
487 | } | |
488 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | |
489 | /* | |
490 | * Account a single tick of cpu time. | |
491 | * @p: the process that the cpu time gets accounted to | |
492 | * @user_tick: indicates if the tick is a user or a system tick | |
493 | */ | |
494 | void account_process_tick(struct task_struct *p, int user_tick) | |
73fbec60 | 495 | { |
2b1f967d | 496 | u64 cputime, steal; |
9fbc42ea | 497 | struct rq *rq = this_rq(); |
73fbec60 | 498 | |
55dbdcfa | 499 | if (vtime_accounting_cpu_enabled()) |
9fbc42ea FW |
500 | return; |
501 | ||
502 | if (sched_clock_irqtime) { | |
2d513868 | 503 | irqtime_account_process_tick(p, user_tick, rq, 1); |
9fbc42ea FW |
504 | return; |
505 | } | |
506 | ||
2b1f967d | 507 | cputime = TICK_NSEC; |
03cbc732 | 508 | steal = steal_account_process_time(ULONG_MAX); |
57430218 | 509 | |
2b1f967d | 510 | if (steal >= cputime) |
9fbc42ea | 511 | return; |
73fbec60 | 512 | |
2b1f967d | 513 | cputime -= steal; |
57430218 | 514 | |
9fbc42ea | 515 | if (user_tick) |
40565b5a | 516 | account_user_time(p, cputime); |
9fbc42ea | 517 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
fb8b049c | 518 | account_system_time(p, HARDIRQ_OFFSET, cputime); |
73fbec60 | 519 | else |
18b43a9b | 520 | account_idle_time(cputime); |
9fbc42ea | 521 | } |
73fbec60 | 522 | |
9fbc42ea FW |
523 | /* |
524 | * Account multiple ticks of idle time. | |
525 | * @ticks: number of stolen ticks | |
526 | */ | |
527 | void account_idle_ticks(unsigned long ticks) | |
528 | { | |
18b43a9b | 529 | u64 cputime, steal; |
26f2c75c | 530 | |
9fbc42ea FW |
531 | if (sched_clock_irqtime) { |
532 | irqtime_account_idle_ticks(ticks); | |
533 | return; | |
534 | } | |
535 | ||
18b43a9b | 536 | cputime = ticks * TICK_NSEC; |
2b1f967d | 537 | steal = steal_account_process_time(ULONG_MAX); |
f9bcf1e0 WL |
538 | |
539 | if (steal >= cputime) | |
540 | return; | |
541 | ||
542 | cputime -= steal; | |
543 | account_idle_time(cputime); | |
9fbc42ea | 544 | } |
73fbec60 | 545 | |
d9a3c982 | 546 | /* |
55eaa7c1 SG |
547 | * Perform (stime * rtime) / total, but avoid multiplication overflow by |
548 | * loosing precision when the numbers are big. | |
d9a3c982 | 549 | */ |
5613fda9 | 550 | static u64 scale_stime(u64 stime, u64 rtime, u64 total) |
73fbec60 | 551 | { |
55eaa7c1 | 552 | u64 scaled; |
73fbec60 | 553 | |
55eaa7c1 SG |
554 | for (;;) { |
555 | /* Make sure "rtime" is the bigger of stime/rtime */ | |
84f9f3a1 SG |
556 | if (stime > rtime) |
557 | swap(rtime, stime); | |
55eaa7c1 SG |
558 | |
559 | /* Make sure 'total' fits in 32 bits */ | |
560 | if (total >> 32) | |
561 | goto drop_precision; | |
562 | ||
563 | /* Does rtime (and thus stime) fit in 32 bits? */ | |
564 | if (!(rtime >> 32)) | |
565 | break; | |
566 | ||
567 | /* Can we just balance rtime/stime rather than dropping bits? */ | |
568 | if (stime >> 31) | |
569 | goto drop_precision; | |
570 | ||
571 | /* We can grow stime and shrink rtime and try to make them both fit */ | |
572 | stime <<= 1; | |
573 | rtime >>= 1; | |
574 | continue; | |
575 | ||
576 | drop_precision: | |
577 | /* We drop from rtime, it has more bits than stime */ | |
578 | rtime >>= 1; | |
579 | total >>= 1; | |
d9a3c982 | 580 | } |
73fbec60 | 581 | |
55eaa7c1 SG |
582 | /* |
583 | * Make sure gcc understands that this is a 32x32->64 multiply, | |
584 | * followed by a 64/32->64 divide. | |
585 | */ | |
586 | scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); | |
5613fda9 | 587 | return scaled; |
73fbec60 FW |
588 | } |
589 | ||
347abad9 | 590 | /* |
9d7fb042 PZ |
591 | * Adjust tick based cputime random precision against scheduler runtime |
592 | * accounting. | |
347abad9 | 593 | * |
9d7fb042 PZ |
594 | * Tick based cputime accounting depend on random scheduling timeslices of a |
595 | * task to be interrupted or not by the timer. Depending on these | |
596 | * circumstances, the number of these interrupts may be over or | |
597 | * under-optimistic, matching the real user and system cputime with a variable | |
598 | * precision. | |
599 | * | |
600 | * Fix this by scaling these tick based values against the total runtime | |
601 | * accounted by the CFS scheduler. | |
602 | * | |
603 | * This code provides the following guarantees: | |
604 | * | |
605 | * stime + utime == rtime | |
606 | * stime_i+1 >= stime_i, utime_i+1 >= utime_i | |
607 | * | |
608 | * Assuming that rtime_i+1 >= rtime_i. | |
fa092057 | 609 | */ |
d37f761d | 610 | static void cputime_adjust(struct task_cputime *curr, |
9d7fb042 | 611 | struct prev_cputime *prev, |
5613fda9 | 612 | u64 *ut, u64 *st) |
73fbec60 | 613 | { |
5613fda9 | 614 | u64 rtime, stime, utime; |
9d7fb042 | 615 | unsigned long flags; |
fa092057 | 616 | |
9d7fb042 PZ |
617 | /* Serialize concurrent callers such that we can honour our guarantees */ |
618 | raw_spin_lock_irqsave(&prev->lock, flags); | |
5613fda9 | 619 | rtime = curr->sum_exec_runtime; |
73fbec60 | 620 | |
772c808a | 621 | /* |
9d7fb042 PZ |
622 | * This is possible under two circumstances: |
623 | * - rtime isn't monotonic after all (a bug); | |
624 | * - we got reordered by the lock. | |
625 | * | |
626 | * In both cases this acts as a filter such that the rest of the code | |
627 | * can assume it is monotonic regardless of anything else. | |
772c808a SG |
628 | */ |
629 | if (prev->stime + prev->utime >= rtime) | |
630 | goto out; | |
631 | ||
5a8e01f8 SG |
632 | stime = curr->stime; |
633 | utime = curr->utime; | |
634 | ||
173be9a1 | 635 | /* |
3b9c08ae IM |
636 | * If either stime or utime are 0, assume all runtime is userspace. |
637 | * Once a task gets some ticks, the monotonicy code at 'update:' | |
638 | * will ensure things converge to the observed ratio. | |
173be9a1 | 639 | */ |
3b9c08ae IM |
640 | if (stime == 0) { |
641 | utime = rtime; | |
642 | goto update; | |
9d7fb042 | 643 | } |
5a8e01f8 | 644 | |
3b9c08ae IM |
645 | if (utime == 0) { |
646 | stime = rtime; | |
647 | goto update; | |
648 | } | |
649 | ||
650 | stime = scale_stime(stime, rtime, stime + utime); | |
651 | ||
652 | update: | |
9d7fb042 PZ |
653 | /* |
654 | * Make sure stime doesn't go backwards; this preserves monotonicity | |
655 | * for utime because rtime is monotonic. | |
656 | * | |
657 | * utime_i+1 = rtime_i+1 - stime_i | |
658 | * = rtime_i+1 - (rtime_i - utime_i) | |
659 | * = (rtime_i+1 - rtime_i) + utime_i | |
660 | * >= utime_i | |
661 | */ | |
662 | if (stime < prev->stime) | |
663 | stime = prev->stime; | |
664 | utime = rtime - stime; | |
665 | ||
666 | /* | |
667 | * Make sure utime doesn't go backwards; this still preserves | |
668 | * monotonicity for stime, analogous argument to above. | |
669 | */ | |
670 | if (utime < prev->utime) { | |
671 | utime = prev->utime; | |
672 | stime = rtime - utime; | |
673 | } | |
d37f761d | 674 | |
9d7fb042 PZ |
675 | prev->stime = stime; |
676 | prev->utime = utime; | |
772c808a | 677 | out: |
d37f761d FW |
678 | *ut = prev->utime; |
679 | *st = prev->stime; | |
9d7fb042 | 680 | raw_spin_unlock_irqrestore(&prev->lock, flags); |
d37f761d | 681 | } |
73fbec60 | 682 | |
5613fda9 | 683 | void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
d37f761d FW |
684 | { |
685 | struct task_cputime cputime = { | |
d37f761d FW |
686 | .sum_exec_runtime = p->se.sum_exec_runtime, |
687 | }; | |
688 | ||
6fac4829 | 689 | task_cputime(p, &cputime.utime, &cputime.stime); |
d37f761d | 690 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
73fbec60 | 691 | } |
9eec50b8 | 692 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
73fbec60 | 693 | |
5613fda9 | 694 | void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
73fbec60 | 695 | { |
73fbec60 | 696 | struct task_cputime cputime; |
73fbec60 FW |
697 | |
698 | thread_group_cputime(p, &cputime); | |
d37f761d | 699 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
73fbec60 | 700 | } |
9fbc42ea | 701 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
abf917cd FW |
702 | |
703 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
bac5b6b6 | 704 | static u64 vtime_delta(struct vtime *vtime) |
6a61671b | 705 | { |
2a42eb95 | 706 | unsigned long long clock; |
6a61671b | 707 | |
0e4097c3 | 708 | clock = sched_clock(); |
2a42eb95 | 709 | if (clock < vtime->starttime) |
6a61671b | 710 | return 0; |
abf917cd | 711 | |
2a42eb95 | 712 | return clock - vtime->starttime; |
6a61671b FW |
713 | } |
714 | ||
bac5b6b6 | 715 | static u64 get_vtime_delta(struct vtime *vtime) |
abf917cd | 716 | { |
2a42eb95 WL |
717 | u64 delta = vtime_delta(vtime); |
718 | u64 other; | |
abf917cd | 719 | |
03cbc732 WL |
720 | /* |
721 | * Unlike tick based timing, vtime based timing never has lost | |
722 | * ticks, and no need for steal time accounting to make up for | |
723 | * lost ticks. Vtime accounts a rounded version of actual | |
724 | * elapsed time. Limit account_other_time to prevent rounding | |
725 | * errors from causing elapsed vtime to go negative. | |
726 | */ | |
b58c3584 | 727 | other = account_other_time(delta); |
bac5b6b6 | 728 | WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); |
2a42eb95 | 729 | vtime->starttime += delta; |
abf917cd | 730 | |
b58c3584 | 731 | return delta - other; |
abf917cd FW |
732 | } |
733 | ||
2a42eb95 WL |
734 | static void __vtime_account_system(struct task_struct *tsk, |
735 | struct vtime *vtime) | |
6a61671b | 736 | { |
2a42eb95 WL |
737 | vtime->stime += get_vtime_delta(vtime); |
738 | if (vtime->stime >= TICK_NSEC) { | |
739 | account_system_time(tsk, irq_count(), vtime->stime); | |
740 | vtime->stime = 0; | |
741 | } | |
742 | } | |
743 | ||
744 | static void vtime_account_guest(struct task_struct *tsk, | |
745 | struct vtime *vtime) | |
746 | { | |
747 | vtime->gtime += get_vtime_delta(vtime); | |
748 | if (vtime->gtime >= TICK_NSEC) { | |
749 | account_guest_time(tsk, vtime->gtime); | |
750 | vtime->gtime = 0; | |
751 | } | |
6a61671b FW |
752 | } |
753 | ||
abf917cd FW |
754 | void vtime_account_system(struct task_struct *tsk) |
755 | { | |
bac5b6b6 FW |
756 | struct vtime *vtime = &tsk->vtime; |
757 | ||
758 | if (!vtime_delta(vtime)) | |
ff9a9b4c RR |
759 | return; |
760 | ||
bac5b6b6 | 761 | write_seqcount_begin(&vtime->seqcount); |
2a42eb95 WL |
762 | /* We might have scheduled out from guest path */ |
763 | if (current->flags & PF_VCPU) | |
764 | vtime_account_guest(tsk, vtime); | |
765 | else | |
766 | __vtime_account_system(tsk, vtime); | |
bac5b6b6 | 767 | write_seqcount_end(&vtime->seqcount); |
6a61671b | 768 | } |
3f4724ea | 769 | |
1c3eda01 | 770 | void vtime_user_enter(struct task_struct *tsk) |
abf917cd | 771 | { |
bac5b6b6 FW |
772 | struct vtime *vtime = &tsk->vtime; |
773 | ||
774 | write_seqcount_begin(&vtime->seqcount); | |
2a42eb95 | 775 | __vtime_account_system(tsk, vtime); |
bac5b6b6 FW |
776 | vtime->state = VTIME_USER; |
777 | write_seqcount_end(&vtime->seqcount); | |
6a61671b FW |
778 | } |
779 | ||
1c3eda01 | 780 | void vtime_user_exit(struct task_struct *tsk) |
6a61671b | 781 | { |
bac5b6b6 FW |
782 | struct vtime *vtime = &tsk->vtime; |
783 | ||
784 | write_seqcount_begin(&vtime->seqcount); | |
2a42eb95 WL |
785 | vtime->utime += get_vtime_delta(vtime); |
786 | if (vtime->utime >= TICK_NSEC) { | |
787 | account_user_time(tsk, vtime->utime); | |
788 | vtime->utime = 0; | |
789 | } | |
bac5b6b6 FW |
790 | vtime->state = VTIME_SYS; |
791 | write_seqcount_end(&vtime->seqcount); | |
6a61671b FW |
792 | } |
793 | ||
794 | void vtime_guest_enter(struct task_struct *tsk) | |
795 | { | |
bac5b6b6 | 796 | struct vtime *vtime = &tsk->vtime; |
5b206d48 FW |
797 | /* |
798 | * The flags must be updated under the lock with | |
60a9ce57 | 799 | * the vtime_starttime flush and update. |
5b206d48 FW |
800 | * That enforces a right ordering and update sequence |
801 | * synchronization against the reader (task_gtime()) | |
802 | * that can thus safely catch up with a tickless delta. | |
803 | */ | |
bac5b6b6 | 804 | write_seqcount_begin(&vtime->seqcount); |
2a42eb95 | 805 | __vtime_account_system(tsk, vtime); |
6a61671b | 806 | current->flags |= PF_VCPU; |
bac5b6b6 | 807 | write_seqcount_end(&vtime->seqcount); |
6a61671b | 808 | } |
48d6a816 | 809 | EXPORT_SYMBOL_GPL(vtime_guest_enter); |
6a61671b FW |
810 | |
811 | void vtime_guest_exit(struct task_struct *tsk) | |
812 | { | |
bac5b6b6 FW |
813 | struct vtime *vtime = &tsk->vtime; |
814 | ||
815 | write_seqcount_begin(&vtime->seqcount); | |
2a42eb95 | 816 | vtime_account_guest(tsk, vtime); |
6a61671b | 817 | current->flags &= ~PF_VCPU; |
bac5b6b6 | 818 | write_seqcount_end(&vtime->seqcount); |
abf917cd | 819 | } |
48d6a816 | 820 | EXPORT_SYMBOL_GPL(vtime_guest_exit); |
abf917cd FW |
821 | |
822 | void vtime_account_idle(struct task_struct *tsk) | |
823 | { | |
bac5b6b6 | 824 | account_idle_time(get_vtime_delta(&tsk->vtime)); |
abf917cd | 825 | } |
3f4724ea | 826 | |
6a61671b FW |
827 | void arch_vtime_task_switch(struct task_struct *prev) |
828 | { | |
bac5b6b6 | 829 | struct vtime *vtime = &prev->vtime; |
6a61671b | 830 | |
bac5b6b6 FW |
831 | write_seqcount_begin(&vtime->seqcount); |
832 | vtime->state = VTIME_INACTIVE; | |
833 | write_seqcount_end(&vtime->seqcount); | |
834 | ||
835 | vtime = ¤t->vtime; | |
836 | ||
837 | write_seqcount_begin(&vtime->seqcount); | |
838 | vtime->state = VTIME_SYS; | |
0e4097c3 | 839 | vtime->starttime = sched_clock(); |
bac5b6b6 | 840 | write_seqcount_end(&vtime->seqcount); |
6a61671b FW |
841 | } |
842 | ||
45eacc69 | 843 | void vtime_init_idle(struct task_struct *t, int cpu) |
6a61671b | 844 | { |
bac5b6b6 | 845 | struct vtime *vtime = &t->vtime; |
6a61671b FW |
846 | unsigned long flags; |
847 | ||
b7ce2277 | 848 | local_irq_save(flags); |
bac5b6b6 FW |
849 | write_seqcount_begin(&vtime->seqcount); |
850 | vtime->state = VTIME_SYS; | |
0e4097c3 | 851 | vtime->starttime = sched_clock(); |
bac5b6b6 | 852 | write_seqcount_end(&vtime->seqcount); |
b7ce2277 | 853 | local_irq_restore(flags); |
6a61671b FW |
854 | } |
855 | ||
16a6d9be | 856 | u64 task_gtime(struct task_struct *t) |
6a61671b | 857 | { |
bac5b6b6 | 858 | struct vtime *vtime = &t->vtime; |
6a61671b | 859 | unsigned int seq; |
16a6d9be | 860 | u64 gtime; |
6a61671b | 861 | |
e5925394 | 862 | if (!vtime_accounting_enabled()) |
2541117b HS |
863 | return t->gtime; |
864 | ||
6a61671b | 865 | do { |
bac5b6b6 | 866 | seq = read_seqcount_begin(&vtime->seqcount); |
6a61671b FW |
867 | |
868 | gtime = t->gtime; | |
bac5b6b6 | 869 | if (vtime->state == VTIME_SYS && t->flags & PF_VCPU) |
2a42eb95 | 870 | gtime += vtime->gtime + vtime_delta(vtime); |
6a61671b | 871 | |
bac5b6b6 | 872 | } while (read_seqcount_retry(&vtime->seqcount, seq)); |
6a61671b FW |
873 | |
874 | return gtime; | |
875 | } | |
876 | ||
877 | /* | |
878 | * Fetch cputime raw values from fields of task_struct and | |
879 | * add up the pending nohz execution time since the last | |
880 | * cputime snapshot. | |
881 | */ | |
5613fda9 | 882 | void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) |
6a61671b | 883 | { |
bac5b6b6 | 884 | struct vtime *vtime = &t->vtime; |
6a61671b | 885 | unsigned int seq; |
bac5b6b6 | 886 | u64 delta; |
6a61671b | 887 | |
353c50eb SG |
888 | if (!vtime_accounting_enabled()) { |
889 | *utime = t->utime; | |
890 | *stime = t->stime; | |
891 | return; | |
892 | } | |
6a61671b | 893 | |
353c50eb | 894 | do { |
bac5b6b6 | 895 | seq = read_seqcount_begin(&vtime->seqcount); |
6a61671b | 896 | |
353c50eb SG |
897 | *utime = t->utime; |
898 | *stime = t->stime; | |
6a61671b FW |
899 | |
900 | /* Task is sleeping, nothing to add */ | |
bac5b6b6 | 901 | if (vtime->state == VTIME_INACTIVE || is_idle_task(t)) |
6a61671b FW |
902 | continue; |
903 | ||
bac5b6b6 | 904 | delta = vtime_delta(vtime); |
6a61671b FW |
905 | |
906 | /* | |
907 | * Task runs either in user or kernel space, add pending nohz time to | |
908 | * the right place. | |
909 | */ | |
bac5b6b6 | 910 | if (vtime->state == VTIME_USER || t->flags & PF_VCPU) |
2a42eb95 | 911 | *utime += vtime->utime + delta; |
bac5b6b6 | 912 | else if (vtime->state == VTIME_SYS) |
2a42eb95 | 913 | *stime += vtime->stime + delta; |
bac5b6b6 | 914 | } while (read_seqcount_retry(&vtime->seqcount, seq)); |
6a61671b | 915 | } |
abf917cd | 916 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |