timers: fix itimer/many thread hang, fix #2
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / posix-cpu-timers.c
CommitLineData
1da177e4
LT
1/*
2 * Implement CPU time clocks for the POSIX clock interface.
3 */
4
5#include <linux/sched.h>
6#include <linux/posix-timers.h>
1da177e4 7#include <linux/errno.h>
f8bd2258
RZ
8#include <linux/math64.h>
9#include <asm/uaccess.h>
1da177e4 10
f06febc9
FM
11#ifdef CONFIG_SMP
12/*
13 * Allocate the thread_group_cputime structure appropriately for SMP kernels
14 * and fill in the current values of the fields. Called from copy_signal()
15 * via thread_group_cputime_clone_thread() when adding a second or subsequent
16 * thread to a thread group. Assumes interrupts are enabled when called.
17 */
18int thread_group_cputime_alloc_smp(struct task_struct *tsk)
19{
20 struct signal_struct *sig = tsk->signal;
21 struct task_cputime *cputime;
22
23 /*
24 * If we have multiple threads and we don't already have a
25 * per-CPU task_cputime struct, allocate one and fill it in with
26 * the times accumulated so far.
27 */
28 if (sig->cputime.totals)
29 return 0;
30 cputime = alloc_percpu(struct task_cputime);
31 if (cputime == NULL)
32 return -ENOMEM;
33 read_lock(&tasklist_lock);
34 spin_lock_irq(&tsk->sighand->siglock);
35 if (sig->cputime.totals) {
36 spin_unlock_irq(&tsk->sighand->siglock);
37 read_unlock(&tasklist_lock);
38 free_percpu(cputime);
39 return 0;
40 }
41 sig->cputime.totals = cputime;
42 cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
43 cputime->utime = tsk->utime;
44 cputime->stime = tsk->stime;
45 cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
46 put_cpu_no_resched();
47 spin_unlock_irq(&tsk->sighand->siglock);
48 read_unlock(&tasklist_lock);
49 return 0;
50}
51
52/**
53 * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
54 *
55 * @tsk: The task we use to identify the thread group.
56 * @times: task_cputime structure in which we return the summed fields.
57 *
58 * Walk the list of CPUs to sum the per-CPU time fields in the thread group
59 * time structure.
60 */
61void thread_group_cputime_smp(
62 struct task_struct *tsk,
63 struct task_cputime *times)
64{
65 struct signal_struct *sig;
66 int i;
67 struct task_cputime *tot;
68
69 sig = tsk->signal;
70 if (unlikely(!sig) || !sig->cputime.totals) {
71 times->utime = tsk->utime;
72 times->stime = tsk->stime;
73 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
74 return;
75 }
76 times->stime = times->utime = cputime_zero;
77 times->sum_exec_runtime = 0;
78 for_each_possible_cpu(i) {
79 tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
80 times->utime = cputime_add(times->utime, tot->utime);
81 times->stime = cputime_add(times->stime, tot->stime);
82 times->sum_exec_runtime += tot->sum_exec_runtime;
83 }
84}
85
86#endif /* CONFIG_SMP */
87
88/*
89 * Called after updating RLIMIT_CPU to set timer expiration if necessary.
90 */
91void update_rlimit_cpu(unsigned long rlim_new)
92{
93 cputime_t cputime;
94
95 cputime = secs_to_cputime(rlim_new);
96 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
97 cputime_lt(current->signal->it_prof_expires, cputime)) {
98 spin_lock_irq(&current->sighand->siglock);
99 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
100 spin_unlock_irq(&current->sighand->siglock);
101 }
102}
103
a924b04d 104static int check_clock(const clockid_t which_clock)
1da177e4
LT
105{
106 int error = 0;
107 struct task_struct *p;
108 const pid_t pid = CPUCLOCK_PID(which_clock);
109
110 if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
111 return -EINVAL;
112
113 if (pid == 0)
114 return 0;
115
116 read_lock(&tasklist_lock);
8dc86af0 117 p = find_task_by_vpid(pid);
bac0abd6
PE
118 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
119 same_thread_group(p, current) : thread_group_leader(p))) {
1da177e4
LT
120 error = -EINVAL;
121 }
122 read_unlock(&tasklist_lock);
123
124 return error;
125}
126
127static inline union cpu_time_count
a924b04d 128timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
1da177e4
LT
129{
130 union cpu_time_count ret;
131 ret.sched = 0; /* high half always zero when .cpu used */
132 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
ee500f27 133 ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
1da177e4
LT
134 } else {
135 ret.cpu = timespec_to_cputime(tp);
136 }
137 return ret;
138}
139
a924b04d 140static void sample_to_timespec(const clockid_t which_clock,
1da177e4
LT
141 union cpu_time_count cpu,
142 struct timespec *tp)
143{
f8bd2258
RZ
144 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
145 *tp = ns_to_timespec(cpu.sched);
146 else
1da177e4 147 cputime_to_timespec(cpu.cpu, tp);
1da177e4
LT
148}
149
a924b04d 150static inline int cpu_time_before(const clockid_t which_clock,
1da177e4
LT
151 union cpu_time_count now,
152 union cpu_time_count then)
153{
154 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
155 return now.sched < then.sched;
156 } else {
157 return cputime_lt(now.cpu, then.cpu);
158 }
159}
a924b04d 160static inline void cpu_time_add(const clockid_t which_clock,
1da177e4
LT
161 union cpu_time_count *acc,
162 union cpu_time_count val)
163{
164 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
165 acc->sched += val.sched;
166 } else {
167 acc->cpu = cputime_add(acc->cpu, val.cpu);
168 }
169}
a924b04d 170static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
1da177e4
LT
171 union cpu_time_count a,
172 union cpu_time_count b)
173{
174 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
175 a.sched -= b.sched;
176 } else {
177 a.cpu = cputime_sub(a.cpu, b.cpu);
178 }
179 return a;
180}
181
ac08c264
TG
182/*
183 * Divide and limit the result to res >= 1
184 *
185 * This is necessary to prevent signal delivery starvation, when the result of
186 * the division would be rounded down to 0.
187 */
188static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
189{
190 cputime_t res = cputime_div(time, div);
191
192 return max_t(cputime_t, res, 1);
193}
194
1da177e4
LT
195/*
196 * Update expiry time from increment, and increase overrun count,
197 * given the current clock sample.
198 */
7a4ed937 199static void bump_cpu_timer(struct k_itimer *timer,
1da177e4
LT
200 union cpu_time_count now)
201{
202 int i;
203
204 if (timer->it.cpu.incr.sched == 0)
205 return;
206
207 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
208 unsigned long long delta, incr;
209
210 if (now.sched < timer->it.cpu.expires.sched)
211 return;
212 incr = timer->it.cpu.incr.sched;
213 delta = now.sched + incr - timer->it.cpu.expires.sched;
214 /* Don't use (incr*2 < delta), incr*2 might overflow. */
215 for (i = 0; incr < delta - incr; i++)
216 incr = incr << 1;
217 for (; i >= 0; incr >>= 1, i--) {
7a4ed937 218 if (delta < incr)
1da177e4
LT
219 continue;
220 timer->it.cpu.expires.sched += incr;
221 timer->it_overrun += 1 << i;
222 delta -= incr;
223 }
224 } else {
225 cputime_t delta, incr;
226
227 if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
228 return;
229 incr = timer->it.cpu.incr.cpu;
230 delta = cputime_sub(cputime_add(now.cpu, incr),
231 timer->it.cpu.expires.cpu);
232 /* Don't use (incr*2 < delta), incr*2 might overflow. */
233 for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
234 incr = cputime_add(incr, incr);
235 for (; i >= 0; incr = cputime_halve(incr), i--) {
7a4ed937 236 if (cputime_lt(delta, incr))
1da177e4
LT
237 continue;
238 timer->it.cpu.expires.cpu =
239 cputime_add(timer->it.cpu.expires.cpu, incr);
240 timer->it_overrun += 1 << i;
241 delta = cputime_sub(delta, incr);
242 }
243 }
244}
245
246static inline cputime_t prof_ticks(struct task_struct *p)
247{
248 return cputime_add(p->utime, p->stime);
249}
250static inline cputime_t virt_ticks(struct task_struct *p)
251{
252 return p->utime;
253}
1da177e4 254
a924b04d 255int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
1da177e4
LT
256{
257 int error = check_clock(which_clock);
258 if (!error) {
259 tp->tv_sec = 0;
260 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
261 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
262 /*
263 * If sched_clock is using a cycle counter, we
264 * don't have any idea of its true resolution
265 * exported, but it is much more than 1s/HZ.
266 */
267 tp->tv_nsec = 1;
268 }
269 }
270 return error;
271}
272
a924b04d 273int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
1da177e4
LT
274{
275 /*
276 * You can never reset a CPU clock, but we check for other errors
277 * in the call before failing with EPERM.
278 */
279 int error = check_clock(which_clock);
280 if (error == 0) {
281 error = -EPERM;
282 }
283 return error;
284}
285
286
287/*
288 * Sample a per-thread clock for the given task.
289 */
a924b04d 290static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
1da177e4
LT
291 union cpu_time_count *cpu)
292{
293 switch (CPUCLOCK_WHICH(which_clock)) {
294 default:
295 return -EINVAL;
296 case CPUCLOCK_PROF:
297 cpu->cpu = prof_ticks(p);
298 break;
299 case CPUCLOCK_VIRT:
300 cpu->cpu = virt_ticks(p);
301 break;
302 case CPUCLOCK_SCHED:
f06febc9 303 cpu->sched = task_sched_runtime(p);
1da177e4
LT
304 break;
305 }
306 return 0;
307}
308
309/*
310 * Sample a process (thread group) clock for the given group_leader task.
311 * Must be called with tasklist_lock held for reading.
312 * Must be called with tasklist_lock held for reading, and p->sighand->siglock.
313 */
314static int cpu_clock_sample_group_locked(unsigned int clock_idx,
315 struct task_struct *p,
316 union cpu_time_count *cpu)
317{
f06febc9
FM
318 struct task_cputime cputime;
319
320 thread_group_cputime(p, &cputime);
321 switch (clock_idx) {
1da177e4
LT
322 default:
323 return -EINVAL;
324 case CPUCLOCK_PROF:
f06febc9 325 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
1da177e4
LT
326 break;
327 case CPUCLOCK_VIRT:
f06febc9 328 cpu->cpu = cputime.utime;
1da177e4
LT
329 break;
330 case CPUCLOCK_SCHED:
f06febc9 331 cpu->sched = thread_group_sched_runtime(p);
1da177e4
LT
332 break;
333 }
334 return 0;
335}
336
337/*
338 * Sample a process (thread group) clock for the given group_leader task.
339 * Must be called with tasklist_lock held for reading.
340 */
a924b04d 341static int cpu_clock_sample_group(const clockid_t which_clock,
1da177e4
LT
342 struct task_struct *p,
343 union cpu_time_count *cpu)
344{
345 int ret;
346 unsigned long flags;
347 spin_lock_irqsave(&p->sighand->siglock, flags);
348 ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p,
349 cpu);
350 spin_unlock_irqrestore(&p->sighand->siglock, flags);
351 return ret;
352}
353
354
a924b04d 355int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
1da177e4
LT
356{
357 const pid_t pid = CPUCLOCK_PID(which_clock);
358 int error = -EINVAL;
359 union cpu_time_count rtn;
360
361 if (pid == 0) {
362 /*
363 * Special case constant value for our own clocks.
364 * We don't have to do any lookup to find ourselves.
365 */
366 if (CPUCLOCK_PERTHREAD(which_clock)) {
367 /*
368 * Sampling just ourselves we can do with no locking.
369 */
370 error = cpu_clock_sample(which_clock,
371 current, &rtn);
372 } else {
373 read_lock(&tasklist_lock);
374 error = cpu_clock_sample_group(which_clock,
375 current, &rtn);
376 read_unlock(&tasklist_lock);
377 }
378 } else {
379 /*
380 * Find the given PID, and validate that the caller
381 * should be able to see it.
382 */
383 struct task_struct *p;
1f2ea083 384 rcu_read_lock();
8dc86af0 385 p = find_task_by_vpid(pid);
1da177e4
LT
386 if (p) {
387 if (CPUCLOCK_PERTHREAD(which_clock)) {
bac0abd6 388 if (same_thread_group(p, current)) {
1da177e4
LT
389 error = cpu_clock_sample(which_clock,
390 p, &rtn);
391 }
1f2ea083
PM
392 } else {
393 read_lock(&tasklist_lock);
bac0abd6 394 if (thread_group_leader(p) && p->signal) {
1f2ea083
PM
395 error =
396 cpu_clock_sample_group(which_clock,
397 p, &rtn);
398 }
399 read_unlock(&tasklist_lock);
1da177e4
LT
400 }
401 }
1f2ea083 402 rcu_read_unlock();
1da177e4
LT
403 }
404
405 if (error)
406 return error;
407 sample_to_timespec(which_clock, rtn, tp);
408 return 0;
409}
410
411
412/*
413 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
414 * This is called from sys_timer_create with the new timer already locked.
415 */
416int posix_cpu_timer_create(struct k_itimer *new_timer)
417{
418 int ret = 0;
419 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
420 struct task_struct *p;
421
422 if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
423 return -EINVAL;
424
425 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
426 new_timer->it.cpu.incr.sched = 0;
427 new_timer->it.cpu.expires.sched = 0;
428
429 read_lock(&tasklist_lock);
430 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
431 if (pid == 0) {
432 p = current;
433 } else {
8dc86af0 434 p = find_task_by_vpid(pid);
bac0abd6 435 if (p && !same_thread_group(p, current))
1da177e4
LT
436 p = NULL;
437 }
438 } else {
439 if (pid == 0) {
440 p = current->group_leader;
441 } else {
8dc86af0 442 p = find_task_by_vpid(pid);
bac0abd6 443 if (p && !thread_group_leader(p))
1da177e4
LT
444 p = NULL;
445 }
446 }
447 new_timer->it.cpu.task = p;
448 if (p) {
449 get_task_struct(p);
450 } else {
451 ret = -EINVAL;
452 }
453 read_unlock(&tasklist_lock);
454
455 return ret;
456}
457
458/*
459 * Clean up a CPU-clock timer that is about to be destroyed.
460 * This is called from timer deletion with the timer already locked.
461 * If we return TIMER_RETRY, it's necessary to release the timer's lock
462 * and try again. (This happens when the timer is in the middle of firing.)
463 */
464int posix_cpu_timer_del(struct k_itimer *timer)
465{
466 struct task_struct *p = timer->it.cpu.task;
108150ea 467 int ret = 0;
1da177e4 468
108150ea 469 if (likely(p != NULL)) {
9465bee8
LT
470 read_lock(&tasklist_lock);
471 if (unlikely(p->signal == NULL)) {
472 /*
473 * We raced with the reaping of the task.
474 * The deletion should have cleared us off the list.
475 */
476 BUG_ON(!list_empty(&timer->it.cpu.entry));
477 } else {
9465bee8 478 spin_lock(&p->sighand->siglock);
108150ea
ON
479 if (timer->it.cpu.firing)
480 ret = TIMER_RETRY;
481 else
482 list_del(&timer->it.cpu.entry);
9465bee8
LT
483 spin_unlock(&p->sighand->siglock);
484 }
485 read_unlock(&tasklist_lock);
108150ea
ON
486
487 if (!ret)
488 put_task_struct(p);
1da177e4 489 }
1da177e4 490
108150ea 491 return ret;
1da177e4
LT
492}
493
494/*
495 * Clean out CPU timers still ticking when a thread exited. The task
496 * pointer is cleared, and the expiry time is replaced with the residual
497 * time for later timer_gettime calls to return.
498 * This must be called with the siglock held.
499 */
500static void cleanup_timers(struct list_head *head,
501 cputime_t utime, cputime_t stime,
41b86e9c 502 unsigned long long sum_exec_runtime)
1da177e4
LT
503{
504 struct cpu_timer_list *timer, *next;
505 cputime_t ptime = cputime_add(utime, stime);
506
507 list_for_each_entry_safe(timer, next, head, entry) {
1da177e4
LT
508 list_del_init(&timer->entry);
509 if (cputime_lt(timer->expires.cpu, ptime)) {
510 timer->expires.cpu = cputime_zero;
511 } else {
512 timer->expires.cpu = cputime_sub(timer->expires.cpu,
513 ptime);
514 }
515 }
516
517 ++head;
518 list_for_each_entry_safe(timer, next, head, entry) {
1da177e4
LT
519 list_del_init(&timer->entry);
520 if (cputime_lt(timer->expires.cpu, utime)) {
521 timer->expires.cpu = cputime_zero;
522 } else {
523 timer->expires.cpu = cputime_sub(timer->expires.cpu,
524 utime);
525 }
526 }
527
528 ++head;
529 list_for_each_entry_safe(timer, next, head, entry) {
1da177e4 530 list_del_init(&timer->entry);
41b86e9c 531 if (timer->expires.sched < sum_exec_runtime) {
1da177e4
LT
532 timer->expires.sched = 0;
533 } else {
41b86e9c 534 timer->expires.sched -= sum_exec_runtime;
1da177e4
LT
535 }
536 }
537}
538
539/*
540 * These are both called with the siglock held, when the current thread
541 * is being reaped. When the final (leader) thread in the group is reaped,
542 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
543 */
544void posix_cpu_timers_exit(struct task_struct *tsk)
545{
546 cleanup_timers(tsk->cpu_timers,
41b86e9c 547 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
1da177e4
LT
548
549}
550void posix_cpu_timers_exit_group(struct task_struct *tsk)
551{
f06febc9 552 struct task_cputime cputime;
ca531a0a 553
f06febc9
FM
554 thread_group_cputime(tsk, &cputime);
555 cleanup_timers(tsk->signal->cpu_timers,
556 cputime.utime, cputime.stime, cputime.sum_exec_runtime);
1da177e4
LT
557}
558
559static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
560{
561 /*
562 * That's all for this thread or process.
563 * We leave our residual in expires to be reported.
564 */
565 put_task_struct(timer->it.cpu.task);
566 timer->it.cpu.task = NULL;
567 timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
568 timer->it.cpu.expires,
569 now);
570}
571
572/*
573 * Insert the timer on the appropriate list before any timers that
574 * expire later. This must be called with the tasklist_lock held
575 * for reading, and interrupts disabled.
576 */
577static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
578{
579 struct task_struct *p = timer->it.cpu.task;
580 struct list_head *head, *listpos;
581 struct cpu_timer_list *const nt = &timer->it.cpu;
582 struct cpu_timer_list *next;
583 unsigned long i;
584
585 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
586 p->cpu_timers : p->signal->cpu_timers);
587 head += CPUCLOCK_WHICH(timer->it_clock);
588
589 BUG_ON(!irqs_disabled());
590 spin_lock(&p->sighand->siglock);
591
592 listpos = head;
593 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
594 list_for_each_entry(next, head, entry) {
70ab81c2 595 if (next->expires.sched > nt->expires.sched)
1da177e4 596 break;
70ab81c2 597 listpos = &next->entry;
1da177e4
LT
598 }
599 } else {
600 list_for_each_entry(next, head, entry) {
70ab81c2 601 if (cputime_gt(next->expires.cpu, nt->expires.cpu))
1da177e4 602 break;
70ab81c2 603 listpos = &next->entry;
1da177e4
LT
604 }
605 }
606 list_add(&nt->entry, listpos);
607
608 if (listpos == head) {
609 /*
610 * We are the new earliest-expiring timer.
611 * If we are a thread timer, there can always
612 * be a process timer telling us to stop earlier.
613 */
614
615 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
616 switch (CPUCLOCK_WHICH(timer->it_clock)) {
617 default:
618 BUG();
619 case CPUCLOCK_PROF:
f06febc9 620 if (cputime_eq(p->cputime_expires.prof_exp,
1da177e4 621 cputime_zero) ||
f06febc9 622 cputime_gt(p->cputime_expires.prof_exp,
1da177e4 623 nt->expires.cpu))
f06febc9
FM
624 p->cputime_expires.prof_exp =
625 nt->expires.cpu;
1da177e4
LT
626 break;
627 case CPUCLOCK_VIRT:
f06febc9 628 if (cputime_eq(p->cputime_expires.virt_exp,
1da177e4 629 cputime_zero) ||
f06febc9 630 cputime_gt(p->cputime_expires.virt_exp,
1da177e4 631 nt->expires.cpu))
f06febc9
FM
632 p->cputime_expires.virt_exp =
633 nt->expires.cpu;
1da177e4
LT
634 break;
635 case CPUCLOCK_SCHED:
f06febc9
FM
636 if (p->cputime_expires.sched_exp == 0 ||
637 p->cputime_expires.sched_exp >
638 nt->expires.sched)
639 p->cputime_expires.sched_exp =
640 nt->expires.sched;
1da177e4
LT
641 break;
642 }
643 } else {
644 /*
f06febc9 645 * For a process timer, set the cached expiration time.
1da177e4
LT
646 */
647 switch (CPUCLOCK_WHICH(timer->it_clock)) {
648 default:
649 BUG();
650 case CPUCLOCK_VIRT:
651 if (!cputime_eq(p->signal->it_virt_expires,
652 cputime_zero) &&
653 cputime_lt(p->signal->it_virt_expires,
654 timer->it.cpu.expires.cpu))
655 break;
f06febc9
FM
656 p->signal->cputime_expires.virt_exp =
657 timer->it.cpu.expires.cpu;
658 break;
1da177e4
LT
659 case CPUCLOCK_PROF:
660 if (!cputime_eq(p->signal->it_prof_expires,
661 cputime_zero) &&
662 cputime_lt(p->signal->it_prof_expires,
663 timer->it.cpu.expires.cpu))
664 break;
665 i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
666 if (i != RLIM_INFINITY &&
667 i <= cputime_to_secs(timer->it.cpu.expires.cpu))
668 break;
f06febc9
FM
669 p->signal->cputime_expires.prof_exp =
670 timer->it.cpu.expires.cpu;
671 break;
1da177e4 672 case CPUCLOCK_SCHED:
f06febc9
FM
673 p->signal->cputime_expires.sched_exp =
674 timer->it.cpu.expires.sched;
1da177e4
LT
675 break;
676 }
677 }
678 }
679
680 spin_unlock(&p->sighand->siglock);
681}
682
683/*
684 * The timer is locked, fire it and arrange for its reload.
685 */
686static void cpu_timer_fire(struct k_itimer *timer)
687{
688 if (unlikely(timer->sigq == NULL)) {
689 /*
690 * This a special case for clock_nanosleep,
691 * not a normal timer from sys_timer_create.
692 */
693 wake_up_process(timer->it_process);
694 timer->it.cpu.expires.sched = 0;
695 } else if (timer->it.cpu.incr.sched == 0) {
696 /*
697 * One-shot timer. Clear it as soon as it's fired.
698 */
699 posix_timer_event(timer, 0);
700 timer->it.cpu.expires.sched = 0;
701 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
702 /*
703 * The signal did not get queued because the signal
704 * was ignored, so we won't get any callback to
705 * reload the timer. But we need to keep it
706 * ticking in case the signal is deliverable next time.
707 */
708 posix_cpu_timer_schedule(timer);
709 }
710}
711
712/*
713 * Guts of sys_timer_settime for CPU timers.
714 * This is called with the timer locked and interrupts disabled.
715 * If we return TIMER_RETRY, it's necessary to release the timer's lock
716 * and try again. (This happens when the timer is in the middle of firing.)
717 */
718int posix_cpu_timer_set(struct k_itimer *timer, int flags,
719 struct itimerspec *new, struct itimerspec *old)
720{
721 struct task_struct *p = timer->it.cpu.task;
722 union cpu_time_count old_expires, new_expires, val;
723 int ret;
724
725 if (unlikely(p == NULL)) {
726 /*
727 * Timer refers to a dead task's clock.
728 */
729 return -ESRCH;
730 }
731
732 new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
733
734 read_lock(&tasklist_lock);
735 /*
736 * We need the tasklist_lock to protect against reaping that
737 * clears p->signal. If p has just been reaped, we can no
738 * longer get any information about it at all.
739 */
740 if (unlikely(p->signal == NULL)) {
741 read_unlock(&tasklist_lock);
742 put_task_struct(p);
743 timer->it.cpu.task = NULL;
744 return -ESRCH;
745 }
746
747 /*
748 * Disarm any old timer after extracting its expiry time.
749 */
750 BUG_ON(!irqs_disabled());
a69ac4a7
ON
751
752 ret = 0;
1da177e4
LT
753 spin_lock(&p->sighand->siglock);
754 old_expires = timer->it.cpu.expires;
a69ac4a7
ON
755 if (unlikely(timer->it.cpu.firing)) {
756 timer->it.cpu.firing = -1;
757 ret = TIMER_RETRY;
758 } else
759 list_del_init(&timer->it.cpu.entry);
1da177e4
LT
760 spin_unlock(&p->sighand->siglock);
761
762 /*
763 * We need to sample the current value to convert the new
764 * value from to relative and absolute, and to convert the
765 * old value from absolute to relative. To set a process
766 * timer, we need a sample to balance the thread expiry
767 * times (in arm_timer). With an absolute time, we must
768 * check if it's already passed. In short, we need a sample.
769 */
770 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
771 cpu_clock_sample(timer->it_clock, p, &val);
772 } else {
773 cpu_clock_sample_group(timer->it_clock, p, &val);
774 }
775
776 if (old) {
777 if (old_expires.sched == 0) {
778 old->it_value.tv_sec = 0;
779 old->it_value.tv_nsec = 0;
780 } else {
781 /*
782 * Update the timer in case it has
783 * overrun already. If it has,
784 * we'll report it as having overrun
785 * and with the next reloaded timer
786 * already ticking, though we are
787 * swallowing that pending
788 * notification here to install the
789 * new setting.
790 */
791 bump_cpu_timer(timer, val);
792 if (cpu_time_before(timer->it_clock, val,
793 timer->it.cpu.expires)) {
794 old_expires = cpu_time_sub(
795 timer->it_clock,
796 timer->it.cpu.expires, val);
797 sample_to_timespec(timer->it_clock,
798 old_expires,
799 &old->it_value);
800 } else {
801 old->it_value.tv_nsec = 1;
802 old->it_value.tv_sec = 0;
803 }
804 }
805 }
806
a69ac4a7 807 if (unlikely(ret)) {
1da177e4
LT
808 /*
809 * We are colliding with the timer actually firing.
810 * Punt after filling in the timer's old value, and
811 * disable this firing since we are already reporting
812 * it as an overrun (thanks to bump_cpu_timer above).
813 */
814 read_unlock(&tasklist_lock);
1da177e4
LT
815 goto out;
816 }
817
818 if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
819 cpu_time_add(timer->it_clock, &new_expires, val);
820 }
821
822 /*
823 * Install the new expiry time (or zero).
824 * For a timer with no notification action, we don't actually
825 * arm the timer (we'll just fake it for timer_gettime).
826 */
827 timer->it.cpu.expires = new_expires;
828 if (new_expires.sched != 0 &&
829 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
830 cpu_time_before(timer->it_clock, val, new_expires)) {
831 arm_timer(timer, val);
832 }
833
834 read_unlock(&tasklist_lock);
835
836 /*
837 * Install the new reload setting, and
838 * set up the signal and overrun bookkeeping.
839 */
840 timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
841 &new->it_interval);
842
843 /*
844 * This acts as a modification timestamp for the timer,
845 * so any automatic reload attempt will punt on seeing
846 * that we have reset the timer manually.
847 */
848 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
849 ~REQUEUE_PENDING;
850 timer->it_overrun_last = 0;
851 timer->it_overrun = -1;
852
853 if (new_expires.sched != 0 &&
854 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
855 !cpu_time_before(timer->it_clock, val, new_expires)) {
856 /*
857 * The designated time already passed, so we notify
858 * immediately, even if the thread never runs to
859 * accumulate more time on this clock.
860 */
861 cpu_timer_fire(timer);
862 }
863
864 ret = 0;
865 out:
866 if (old) {
867 sample_to_timespec(timer->it_clock,
868 timer->it.cpu.incr, &old->it_interval);
869 }
870 return ret;
871}
872
873void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
874{
875 union cpu_time_count now;
876 struct task_struct *p = timer->it.cpu.task;
877 int clear_dead;
878
879 /*
880 * Easy part: convert the reload time.
881 */
882 sample_to_timespec(timer->it_clock,
883 timer->it.cpu.incr, &itp->it_interval);
884
885 if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
886 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
887 return;
888 }
889
890 if (unlikely(p == NULL)) {
891 /*
892 * This task already died and the timer will never fire.
893 * In this case, expires is actually the dead value.
894 */
895 dead:
896 sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
897 &itp->it_value);
898 return;
899 }
900
901 /*
902 * Sample the clock to take the difference with the expiry time.
903 */
904 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
905 cpu_clock_sample(timer->it_clock, p, &now);
906 clear_dead = p->exit_state;
907 } else {
908 read_lock(&tasklist_lock);
909 if (unlikely(p->signal == NULL)) {
910 /*
911 * The process has been reaped.
912 * We can't even collect a sample any more.
913 * Call the timer disarmed, nothing else to do.
914 */
915 put_task_struct(p);
916 timer->it.cpu.task = NULL;
917 timer->it.cpu.expires.sched = 0;
918 read_unlock(&tasklist_lock);
919 goto dead;
920 } else {
921 cpu_clock_sample_group(timer->it_clock, p, &now);
922 clear_dead = (unlikely(p->exit_state) &&
923 thread_group_empty(p));
924 }
925 read_unlock(&tasklist_lock);
926 }
927
928 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
929 if (timer->it.cpu.incr.sched == 0 &&
930 cpu_time_before(timer->it_clock,
931 timer->it.cpu.expires, now)) {
932 /*
933 * Do-nothing timer expired and has no reload,
934 * so it's as if it was never set.
935 */
936 timer->it.cpu.expires.sched = 0;
937 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
938 return;
939 }
940 /*
941 * Account for any expirations and reloads that should
942 * have happened.
943 */
944 bump_cpu_timer(timer, now);
945 }
946
947 if (unlikely(clear_dead)) {
948 /*
949 * We've noticed that the thread is dead, but
950 * not yet reaped. Take this opportunity to
951 * drop our task ref.
952 */
953 clear_dead_task(timer, now);
954 goto dead;
955 }
956
957 if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
958 sample_to_timespec(timer->it_clock,
959 cpu_time_sub(timer->it_clock,
960 timer->it.cpu.expires, now),
961 &itp->it_value);
962 } else {
963 /*
964 * The timer should have expired already, but the firing
965 * hasn't taken place yet. Say it's just about to expire.
966 */
967 itp->it_value.tv_nsec = 1;
968 itp->it_value.tv_sec = 0;
969 }
970}
971
972/*
973 * Check for any per-thread CPU timers that have fired and move them off
974 * the tsk->cpu_timers[N] list onto the firing list. Here we update the
975 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
976 */
977static void check_thread_timers(struct task_struct *tsk,
978 struct list_head *firing)
979{
e80eda94 980 int maxfire;
1da177e4 981 struct list_head *timers = tsk->cpu_timers;
78f2c7db 982 struct signal_struct *const sig = tsk->signal;
1da177e4 983
e80eda94 984 maxfire = 20;
f06febc9 985 tsk->cputime_expires.prof_exp = cputime_zero;
1da177e4 986 while (!list_empty(timers)) {
b5e61818 987 struct cpu_timer_list *t = list_first_entry(timers,
1da177e4
LT
988 struct cpu_timer_list,
989 entry);
e80eda94 990 if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
f06febc9 991 tsk->cputime_expires.prof_exp = t->expires.cpu;
1da177e4
LT
992 break;
993 }
994 t->firing = 1;
995 list_move_tail(&t->entry, firing);
996 }
997
998 ++timers;
e80eda94 999 maxfire = 20;
f06febc9 1000 tsk->cputime_expires.virt_exp = cputime_zero;
1da177e4 1001 while (!list_empty(timers)) {
b5e61818 1002 struct cpu_timer_list *t = list_first_entry(timers,
1da177e4
LT
1003 struct cpu_timer_list,
1004 entry);
e80eda94 1005 if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
f06febc9 1006 tsk->cputime_expires.virt_exp = t->expires.cpu;
1da177e4
LT
1007 break;
1008 }
1009 t->firing = 1;
1010 list_move_tail(&t->entry, firing);
1011 }
1012
1013 ++timers;
e80eda94 1014 maxfire = 20;
f06febc9 1015 tsk->cputime_expires.sched_exp = 0;
1da177e4 1016 while (!list_empty(timers)) {
b5e61818 1017 struct cpu_timer_list *t = list_first_entry(timers,
1da177e4
LT
1018 struct cpu_timer_list,
1019 entry);
41b86e9c 1020 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
f06febc9 1021 tsk->cputime_expires.sched_exp = t->expires.sched;
1da177e4
LT
1022 break;
1023 }
1024 t->firing = 1;
1025 list_move_tail(&t->entry, firing);
1026 }
78f2c7db
PZ
1027
1028 /*
1029 * Check for the special case thread timers.
1030 */
1031 if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
1032 unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
1033 unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
1034
5a52dd50
PZ
1035 if (hard != RLIM_INFINITY &&
1036 tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
78f2c7db
PZ
1037 /*
1038 * At the hard limit, we just die.
1039 * No need to calculate anything else now.
1040 */
1041 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1042 return;
1043 }
1044 if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
1045 /*
1046 * At the soft limit, send a SIGXCPU every second.
1047 */
1048 if (sig->rlim[RLIMIT_RTTIME].rlim_cur
1049 < sig->rlim[RLIMIT_RTTIME].rlim_max) {
1050 sig->rlim[RLIMIT_RTTIME].rlim_cur +=
1051 USEC_PER_SEC;
1052 }
81d50bb2
HS
1053 printk(KERN_INFO
1054 "RT Watchdog Timeout: %s[%d]\n",
1055 tsk->comm, task_pid_nr(tsk));
78f2c7db
PZ
1056 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1057 }
1058 }
1da177e4
LT
1059}
1060
1061/*
1062 * Check for any per-thread CPU timers that have fired and move them
1063 * off the tsk->*_timers list onto the firing list. Per-thread timers
1064 * have already been taken off.
1065 */
1066static void check_process_timers(struct task_struct *tsk,
1067 struct list_head *firing)
1068{
e80eda94 1069 int maxfire;
1da177e4 1070 struct signal_struct *const sig = tsk->signal;
f06febc9 1071 cputime_t utime, ptime, virt_expires, prof_expires;
41b86e9c 1072 unsigned long long sum_sched_runtime, sched_expires;
1da177e4 1073 struct list_head *timers = sig->cpu_timers;
f06febc9 1074 struct task_cputime cputime;
1da177e4
LT
1075
1076 /*
1077 * Don't sample the current process CPU clocks if there are no timers.
1078 */
1079 if (list_empty(&timers[CPUCLOCK_PROF]) &&
1080 cputime_eq(sig->it_prof_expires, cputime_zero) &&
1081 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1082 list_empty(&timers[CPUCLOCK_VIRT]) &&
1083 cputime_eq(sig->it_virt_expires, cputime_zero) &&
1084 list_empty(&timers[CPUCLOCK_SCHED]))
1085 return;
1086
1087 /*
1088 * Collect the current process totals.
1089 */
f06febc9
FM
1090 thread_group_cputime(tsk, &cputime);
1091 utime = cputime.utime;
1092 ptime = cputime_add(utime, cputime.stime);
1093 sum_sched_runtime = cputime.sum_exec_runtime;
e80eda94 1094 maxfire = 20;
1da177e4
LT
1095 prof_expires = cputime_zero;
1096 while (!list_empty(timers)) {
ee7dd205 1097 struct cpu_timer_list *tl = list_first_entry(timers,
1da177e4
LT
1098 struct cpu_timer_list,
1099 entry);
ee7dd205
WC
1100 if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
1101 prof_expires = tl->expires.cpu;
1da177e4
LT
1102 break;
1103 }
ee7dd205
WC
1104 tl->firing = 1;
1105 list_move_tail(&tl->entry, firing);
1da177e4
LT
1106 }
1107
1108 ++timers;
e80eda94 1109 maxfire = 20;
1da177e4
LT
1110 virt_expires = cputime_zero;
1111 while (!list_empty(timers)) {
ee7dd205 1112 struct cpu_timer_list *tl = list_first_entry(timers,
1da177e4
LT
1113 struct cpu_timer_list,
1114 entry);
ee7dd205
WC
1115 if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
1116 virt_expires = tl->expires.cpu;
1da177e4
LT
1117 break;
1118 }
ee7dd205
WC
1119 tl->firing = 1;
1120 list_move_tail(&tl->entry, firing);
1da177e4
LT
1121 }
1122
1123 ++timers;
e80eda94 1124 maxfire = 20;
1da177e4
LT
1125 sched_expires = 0;
1126 while (!list_empty(timers)) {
ee7dd205 1127 struct cpu_timer_list *tl = list_first_entry(timers,
1da177e4
LT
1128 struct cpu_timer_list,
1129 entry);
ee7dd205
WC
1130 if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
1131 sched_expires = tl->expires.sched;
1da177e4
LT
1132 break;
1133 }
ee7dd205
WC
1134 tl->firing = 1;
1135 list_move_tail(&tl->entry, firing);
1da177e4
LT
1136 }
1137
1138 /*
1139 * Check for the special case process timers.
1140 */
1141 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1142 if (cputime_ge(ptime, sig->it_prof_expires)) {
1143 /* ITIMER_PROF fires and reloads. */
1144 sig->it_prof_expires = sig->it_prof_incr;
1145 if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
1146 sig->it_prof_expires = cputime_add(
1147 sig->it_prof_expires, ptime);
1148 }
1149 __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
1150 }
1151 if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
1152 (cputime_eq(prof_expires, cputime_zero) ||
1153 cputime_lt(sig->it_prof_expires, prof_expires))) {
1154 prof_expires = sig->it_prof_expires;
1155 }
1156 }
1157 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1158 if (cputime_ge(utime, sig->it_virt_expires)) {
1159 /* ITIMER_VIRTUAL fires and reloads. */
1160 sig->it_virt_expires = sig->it_virt_incr;
1161 if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
1162 sig->it_virt_expires = cputime_add(
1163 sig->it_virt_expires, utime);
1164 }
1165 __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
1166 }
1167 if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
1168 (cputime_eq(virt_expires, cputime_zero) ||
1169 cputime_lt(sig->it_virt_expires, virt_expires))) {
1170 virt_expires = sig->it_virt_expires;
1171 }
1172 }
1173 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
1174 unsigned long psecs = cputime_to_secs(ptime);
1175 cputime_t x;
1176 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
1177 /*
1178 * At the hard limit, we just die.
1179 * No need to calculate anything else now.
1180 */
1181 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1182 return;
1183 }
1184 if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
1185 /*
1186 * At the soft limit, send a SIGXCPU every second.
1187 */
1188 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1189 if (sig->rlim[RLIMIT_CPU].rlim_cur
1190 < sig->rlim[RLIMIT_CPU].rlim_max) {
1191 sig->rlim[RLIMIT_CPU].rlim_cur++;
1192 }
1193 }
1194 x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
1195 if (cputime_eq(prof_expires, cputime_zero) ||
1196 cputime_lt(x, prof_expires)) {
1197 prof_expires = x;
1198 }
1199 }
1200
f06febc9
FM
1201 if (!cputime_eq(prof_expires, cputime_zero) &&
1202 (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
1203 cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
1204 sig->cputime_expires.prof_exp = prof_expires;
1205 if (!cputime_eq(virt_expires, cputime_zero) &&
1206 (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
1207 cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
1208 sig->cputime_expires.virt_exp = virt_expires;
1209 if (sched_expires != 0 &&
1210 (sig->cputime_expires.sched_exp == 0 ||
1211 sig->cputime_expires.sched_exp > sched_expires))
1212 sig->cputime_expires.sched_exp = sched_expires;
1da177e4
LT
1213}
1214
1215/*
1216 * This is called from the signal code (via do_schedule_next_timer)
1217 * when the last timer signal was delivered and we have to reload the timer.
1218 */
1219void posix_cpu_timer_schedule(struct k_itimer *timer)
1220{
1221 struct task_struct *p = timer->it.cpu.task;
1222 union cpu_time_count now;
1223
1224 if (unlikely(p == NULL))
1225 /*
1226 * The task was cleaned up already, no future firings.
1227 */
708f430d 1228 goto out;
1da177e4
LT
1229
1230 /*
1231 * Fetch the current sample and update the timer's expiry time.
1232 */
1233 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1234 cpu_clock_sample(timer->it_clock, p, &now);
1235 bump_cpu_timer(timer, now);
1236 if (unlikely(p->exit_state)) {
1237 clear_dead_task(timer, now);
708f430d 1238 goto out;
1da177e4
LT
1239 }
1240 read_lock(&tasklist_lock); /* arm_timer needs it. */
1241 } else {
1242 read_lock(&tasklist_lock);
1243 if (unlikely(p->signal == NULL)) {
1244 /*
1245 * The process has been reaped.
1246 * We can't even collect a sample any more.
1247 */
1248 put_task_struct(p);
1249 timer->it.cpu.task = p = NULL;
1250 timer->it.cpu.expires.sched = 0;
708f430d 1251 goto out_unlock;
1da177e4
LT
1252 } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1253 /*
1254 * We've noticed that the thread is dead, but
1255 * not yet reaped. Take this opportunity to
1256 * drop our task ref.
1257 */
1258 clear_dead_task(timer, now);
708f430d 1259 goto out_unlock;
1da177e4
LT
1260 }
1261 cpu_clock_sample_group(timer->it_clock, p, &now);
1262 bump_cpu_timer(timer, now);
1263 /* Leave the tasklist_lock locked for the call below. */
1264 }
1265
1266 /*
1267 * Now re-arm for the new expiry time.
1268 */
1269 arm_timer(timer, now);
1270
708f430d 1271out_unlock:
1da177e4 1272 read_unlock(&tasklist_lock);
708f430d
RM
1273
1274out:
1275 timer->it_overrun_last = timer->it_overrun;
1276 timer->it_overrun = -1;
1277 ++timer->it_requeue_pending;
1da177e4
LT
1278}
1279
f06febc9
FM
1280/**
1281 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1282 *
1283 * @cputime: The struct to compare.
1284 *
1285 * Checks @cputime to see if all fields are zero. Returns true if all fields
1286 * are zero, false if any field is nonzero.
1287 */
1288static inline int task_cputime_zero(const struct task_cputime *cputime)
1289{
1290 if (cputime_eq(cputime->utime, cputime_zero) &&
1291 cputime_eq(cputime->stime, cputime_zero) &&
1292 cputime->sum_exec_runtime == 0)
1293 return 1;
1294 return 0;
1295}
1296
1297/**
1298 * task_cputime_expired - Compare two task_cputime entities.
1299 *
1300 * @sample: The task_cputime structure to be checked for expiration.
1301 * @expires: Expiration times, against which @sample will be checked.
1302 *
1303 * Checks @sample against @expires to see if any field of @sample has expired.
1304 * Returns true if any field of the former is greater than the corresponding
1305 * field of the latter if the latter field is set. Otherwise returns false.
1306 */
1307static inline int task_cputime_expired(const struct task_cputime *sample,
1308 const struct task_cputime *expires)
1309{
1310 if (!cputime_eq(expires->utime, cputime_zero) &&
1311 cputime_ge(sample->utime, expires->utime))
1312 return 1;
1313 if (!cputime_eq(expires->stime, cputime_zero) &&
1314 cputime_ge(cputime_add(sample->utime, sample->stime),
1315 expires->stime))
1316 return 1;
1317 if (expires->sum_exec_runtime != 0 &&
1318 sample->sum_exec_runtime >= expires->sum_exec_runtime)
1319 return 1;
1320 return 0;
1321}
1322
1323/**
1324 * fastpath_timer_check - POSIX CPU timers fast path.
1325 *
1326 * @tsk: The task (thread) being checked.
1327 * @sig: The signal pointer for that task.
1328 *
1329 * If there are no timers set return false. Otherwise snapshot the task and
1330 * thread group timers, then compare them with the corresponding expiration
1331 # times. Returns true if a timer has expired, else returns false.
1332 */
1333static inline int fastpath_timer_check(struct task_struct *tsk,
1334 struct signal_struct *sig)
1335{
1336 struct task_cputime task_sample = {
1337 .utime = tsk->utime,
1338 .stime = tsk->stime,
1339 .sum_exec_runtime = tsk->se.sum_exec_runtime
1340 };
1341 struct task_cputime group_sample;
1342
1343 if (task_cputime_zero(&tsk->cputime_expires) &&
1344 task_cputime_zero(&sig->cputime_expires))
1345 return 0;
1346 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1347 return 1;
1348 thread_group_cputime(tsk, &group_sample);
1349 return task_cputime_expired(&group_sample, &sig->cputime_expires);
1350}
1351
1da177e4
LT
1352/*
1353 * This is called from the timer interrupt handler. The irq handler has
1354 * already updated our counts. We need to check if any timers fire now.
1355 * Interrupts are disabled.
1356 */
1357void run_posix_cpu_timers(struct task_struct *tsk)
1358{
1359 LIST_HEAD(firing);
1360 struct k_itimer *timer, *next;
f06febc9
FM
1361 struct signal_struct *sig;
1362 struct sighand_struct *sighand;
1363 unsigned long flags;
1da177e4
LT
1364
1365 BUG_ON(!irqs_disabled());
1366
f06febc9
FM
1367 /* Pick up tsk->signal and make sure it's valid. */
1368 sig = tsk->signal;
1da177e4 1369 /*
f06febc9
FM
1370 * The fast path checks that there are no expired thread or thread
1371 * group timers. If that's so, just return. Also check that
1372 * tsk->signal is non-NULL; this probably can't happen but cover the
1373 * possibility anyway.
1da177e4 1374 */
f06febc9
FM
1375 if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
1376 return;
1377 }
1378 sighand = lock_task_sighand(tsk, &flags);
1379 if (likely(sighand)) {
30f1e3dd 1380 /*
f06febc9
FM
1381 * Here we take off tsk->signal->cpu_timers[N] and
1382 * tsk->cpu_timers[N] all the timers that are firing, and
1383 * put them on the firing list.
30f1e3dd
ON
1384 */
1385 check_thread_timers(tsk, &firing);
1386 check_process_timers(tsk, &firing);
1da177e4 1387
30f1e3dd
ON
1388 /*
1389 * We must release these locks before taking any timer's lock.
1390 * There is a potential race with timer deletion here, as the
1391 * siglock now protects our private firing list. We have set
1392 * the firing flag in each timer, so that a deletion attempt
1393 * that gets the timer lock before we do will give it up and
1394 * spin until we've taken care of that timer below.
1395 */
30f1e3dd 1396 }
f06febc9 1397 unlock_task_sighand(tsk, &flags);
1da177e4
LT
1398
1399 /*
1400 * Now that all the timers on our list have the firing flag,
1401 * noone will touch their list entries but us. We'll take
1402 * each timer's lock before clearing its firing flag, so no
1403 * timer call will interfere.
1404 */
1405 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1406 int firing;
1407 spin_lock(&timer->it_lock);
1408 list_del_init(&timer->it.cpu.entry);
1409 firing = timer->it.cpu.firing;
1410 timer->it.cpu.firing = 0;
1411 /*
1412 * The firing flag is -1 if we collided with a reset
1413 * of the timer, which already reported this
1414 * almost-firing as an overrun. So don't generate an event.
1415 */
1416 if (likely(firing >= 0)) {
1417 cpu_timer_fire(timer);
1418 }
1419 spin_unlock(&timer->it_lock);
1420 }
1421}
1422
1423/*
1424 * Set one of the process-wide special case CPU timers.
f06febc9
FM
1425 * The tsk->sighand->siglock must be held by the caller.
1426 * The *newval argument is relative and we update it to be absolute, *oldval
1427 * is absolute and we update it to be relative.
1da177e4
LT
1428 */
1429void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1430 cputime_t *newval, cputime_t *oldval)
1431{
1432 union cpu_time_count now;
1433 struct list_head *head;
1434
1435 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1436 cpu_clock_sample_group_locked(clock_idx, tsk, &now);
1437
1438 if (oldval) {
1439 if (!cputime_eq(*oldval, cputime_zero)) {
1440 if (cputime_le(*oldval, now.cpu)) {
1441 /* Just about to fire. */
1442 *oldval = jiffies_to_cputime(1);
1443 } else {
1444 *oldval = cputime_sub(*oldval, now.cpu);
1445 }
1446 }
1447
1448 if (cputime_eq(*newval, cputime_zero))
1449 return;
1450 *newval = cputime_add(*newval, now.cpu);
1451
1452 /*
1453 * If the RLIMIT_CPU timer will expire before the
1454 * ITIMER_PROF timer, we have nothing else to do.
1455 */
1456 if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
1457 < cputime_to_secs(*newval))
1458 return;
1459 }
1460
1461 /*
1462 * Check whether there are any process timers already set to fire
1463 * before this one. If so, we don't have anything more to do.
1464 */
1465 head = &tsk->signal->cpu_timers[clock_idx];
1466 if (list_empty(head) ||
b5e61818 1467 cputime_ge(list_first_entry(head,
1da177e4
LT
1468 struct cpu_timer_list, entry)->expires.cpu,
1469 *newval)) {
f06febc9
FM
1470 switch (clock_idx) {
1471 case CPUCLOCK_PROF:
1472 tsk->signal->cputime_expires.prof_exp = *newval;
1473 break;
1474 case CPUCLOCK_VIRT:
1475 tsk->signal->cputime_expires.virt_exp = *newval;
1476 break;
1477 }
1da177e4
LT
1478 }
1479}
1480
e4b76555
TA
1481static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1482 struct timespec *rqtp, struct itimerspec *it)
1da177e4 1483{
1da177e4
LT
1484 struct k_itimer timer;
1485 int error;
1486
1da177e4
LT
1487 /*
1488 * Set up a temporary timer and then wait for it to go off.
1489 */
1490 memset(&timer, 0, sizeof timer);
1491 spin_lock_init(&timer.it_lock);
1492 timer.it_clock = which_clock;
1493 timer.it_overrun = -1;
1494 error = posix_cpu_timer_create(&timer);
1495 timer.it_process = current;
1496 if (!error) {
1da177e4 1497 static struct itimerspec zero_it;
e4b76555
TA
1498
1499 memset(it, 0, sizeof *it);
1500 it->it_value = *rqtp;
1da177e4
LT
1501
1502 spin_lock_irq(&timer.it_lock);
e4b76555 1503 error = posix_cpu_timer_set(&timer, flags, it, NULL);
1da177e4
LT
1504 if (error) {
1505 spin_unlock_irq(&timer.it_lock);
1506 return error;
1507 }
1508
1509 while (!signal_pending(current)) {
1510 if (timer.it.cpu.expires.sched == 0) {
1511 /*
1512 * Our timer fired and was reset.
1513 */
1514 spin_unlock_irq(&timer.it_lock);
1515 return 0;
1516 }
1517
1518 /*
1519 * Block until cpu_timer_fire (or a signal) wakes us.
1520 */
1521 __set_current_state(TASK_INTERRUPTIBLE);
1522 spin_unlock_irq(&timer.it_lock);
1523 schedule();
1524 spin_lock_irq(&timer.it_lock);
1525 }
1526
1527 /*
1528 * We were interrupted by a signal.
1529 */
1530 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
e4b76555 1531 posix_cpu_timer_set(&timer, 0, &zero_it, it);
1da177e4
LT
1532 spin_unlock_irq(&timer.it_lock);
1533
e4b76555 1534 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1da177e4
LT
1535 /*
1536 * It actually did fire already.
1537 */
1538 return 0;
1539 }
1540
e4b76555
TA
1541 error = -ERESTART_RESTARTBLOCK;
1542 }
1543
1544 return error;
1545}
1546
1547int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1548 struct timespec *rqtp, struct timespec __user *rmtp)
1549{
1550 struct restart_block *restart_block =
1551 &current_thread_info()->restart_block;
1552 struct itimerspec it;
1553 int error;
1554
1555 /*
1556 * Diagnose required errors first.
1557 */
1558 if (CPUCLOCK_PERTHREAD(which_clock) &&
1559 (CPUCLOCK_PID(which_clock) == 0 ||
1560 CPUCLOCK_PID(which_clock) == current->pid))
1561 return -EINVAL;
1562
1563 error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1564
1565 if (error == -ERESTART_RESTARTBLOCK) {
1566
1567 if (flags & TIMER_ABSTIME)
1568 return -ERESTARTNOHAND;
1da177e4 1569 /*
e4b76555
TA
1570 * Report back to the user the time still remaining.
1571 */
1572 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1da177e4
LT
1573 return -EFAULT;
1574
1711ef38 1575 restart_block->fn = posix_cpu_nsleep_restart;
1da177e4 1576 restart_block->arg0 = which_clock;
97735f25 1577 restart_block->arg1 = (unsigned long) rmtp;
1da177e4
LT
1578 restart_block->arg2 = rqtp->tv_sec;
1579 restart_block->arg3 = rqtp->tv_nsec;
1da177e4 1580 }
1da177e4
LT
1581 return error;
1582}
1583
1711ef38 1584long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1da177e4
LT
1585{
1586 clockid_t which_clock = restart_block->arg0;
97735f25
TG
1587 struct timespec __user *rmtp;
1588 struct timespec t;
e4b76555
TA
1589 struct itimerspec it;
1590 int error;
97735f25
TG
1591
1592 rmtp = (struct timespec __user *) restart_block->arg1;
1593 t.tv_sec = restart_block->arg2;
1594 t.tv_nsec = restart_block->arg3;
1595
1da177e4 1596 restart_block->fn = do_no_restart_syscall;
e4b76555
TA
1597 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1598
1599 if (error == -ERESTART_RESTARTBLOCK) {
1600 /*
1601 * Report back to the user the time still remaining.
1602 */
1603 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1604 return -EFAULT;
1605
1606 restart_block->fn = posix_cpu_nsleep_restart;
1607 restart_block->arg0 = which_clock;
1608 restart_block->arg1 = (unsigned long) rmtp;
1609 restart_block->arg2 = t.tv_sec;
1610 restart_block->arg3 = t.tv_nsec;
1611 }
1612 return error;
1613
1da177e4
LT
1614}
1615
1616
1617#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1618#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1619
a924b04d
TG
1620static int process_cpu_clock_getres(const clockid_t which_clock,
1621 struct timespec *tp)
1da177e4
LT
1622{
1623 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1624}
a924b04d
TG
1625static int process_cpu_clock_get(const clockid_t which_clock,
1626 struct timespec *tp)
1da177e4
LT
1627{
1628 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1629}
1630static int process_cpu_timer_create(struct k_itimer *timer)
1631{
1632 timer->it_clock = PROCESS_CLOCK;
1633 return posix_cpu_timer_create(timer);
1634}
a924b04d 1635static int process_cpu_nsleep(const clockid_t which_clock, int flags,
97735f25
TG
1636 struct timespec *rqtp,
1637 struct timespec __user *rmtp)
1da177e4 1638{
97735f25 1639 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1da177e4 1640}
1711ef38
TA
1641static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1642{
1643 return -EINVAL;
1644}
a924b04d
TG
1645static int thread_cpu_clock_getres(const clockid_t which_clock,
1646 struct timespec *tp)
1da177e4
LT
1647{
1648 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1649}
a924b04d
TG
1650static int thread_cpu_clock_get(const clockid_t which_clock,
1651 struct timespec *tp)
1da177e4
LT
1652{
1653 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1654}
1655static int thread_cpu_timer_create(struct k_itimer *timer)
1656{
1657 timer->it_clock = THREAD_CLOCK;
1658 return posix_cpu_timer_create(timer);
1659}
a924b04d 1660static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
97735f25 1661 struct timespec *rqtp, struct timespec __user *rmtp)
1da177e4
LT
1662{
1663 return -EINVAL;
1664}
1711ef38
TA
1665static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
1666{
1667 return -EINVAL;
1668}
1da177e4
LT
1669
1670static __init int init_posix_cpu_timers(void)
1671{
1672 struct k_clock process = {
1673 .clock_getres = process_cpu_clock_getres,
1674 .clock_get = process_cpu_clock_get,
1675 .clock_set = do_posix_clock_nosettime,
1676 .timer_create = process_cpu_timer_create,
1677 .nsleep = process_cpu_nsleep,
1711ef38 1678 .nsleep_restart = process_cpu_nsleep_restart,
1da177e4
LT
1679 };
1680 struct k_clock thread = {
1681 .clock_getres = thread_cpu_clock_getres,
1682 .clock_get = thread_cpu_clock_get,
1683 .clock_set = do_posix_clock_nosettime,
1684 .timer_create = thread_cpu_timer_create,
1685 .nsleep = thread_cpu_nsleep,
1711ef38 1686 .nsleep_restart = thread_cpu_nsleep_restart,
1da177e4
LT
1687 };
1688
1689 register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1690 register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1691
1692 return 0;
1693}
1694__initcall(init_posix_cpu_timers);