From 5613fda9a503cd6137b120298902a34a1386b2c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:23 +0100 Subject: [PATCH] sched/cputime: Convert task/group cputime to nsecs Now that most cputime readers use the transition API which return the task cputime in old style cputime_t, we can safely store the cputime in nsecs. This will eventually make cputime statistics less opaque and more granular. Back and forth convertions between cputime_t and nsecs in order to deal with cputime_t random granularity won't be needed anymore. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-8-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 4 +-- arch/powerpc/kernel/time.c | 4 +-- arch/s390/kernel/vtime.c | 6 ++-- arch/x86/kvm/hyperv.c | 5 ++-- fs/binfmt_elf.c | 11 ++++++-- fs/binfmt_elf_fdpic.c | 4 +-- fs/proc/array.c | 10 +++---- include/linux/sched.h | 55 +++++++++++++++++++++++-------------- kernel/exit.c | 4 +-- kernel/sched/cputime.c | 35 ++++++++++++----------- kernel/signal.c | 4 +-- kernel/sys.c | 16 +++++------ 12 files changed, 89 insertions(+), 69 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 0f92438d736b..82ccb43b795b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1163,8 +1163,8 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) r.ru_majflt = current->maj_flt; break; case RUSAGE_CHILDREN: - utime_jiffies = cputime_to_jiffies(current->signal->cutime); - stime_jiffies = cputime_to_jiffies(current->signal->cstime); + utime_jiffies = nsecs_to_jiffies(current->signal->cutime); + stime_jiffies = nsecs_to_jiffies(current->signal->cstime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime); r.ru_minflt = current->signal->cmin_flt; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 02e97305d22b..3cca82e065c9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -396,7 +396,7 @@ void vtime_flush(struct task_struct *tsk) account_user_time(tsk, acct->utime); if (acct->utime_scaled) - tsk->utimescaled += acct->utime_scaled; + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); if (acct->gtime) account_guest_time(tsk, acct->gtime); @@ -411,7 +411,7 @@ void vtime_flush(struct task_struct *tsk) account_system_index_time(tsk, acct->stime, CPUTIME_SYSTEM); if (acct->stime_scaled) - tsk->stimescaled += acct->stime_scaled; + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); if (acct->hardirq_time) account_system_index_time(tsk, acct->hardirq_time, CPUTIME_IRQ); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 0a9e5d67547d..f2fc27491604 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -114,7 +114,7 @@ static void account_system_index_scaled(struct task_struct *p, cputime_t cputime, cputime_t scaled, enum cpu_usage_stat index) { - p->stimescaled += scaled; + p->stimescaled += cputime_to_nsecs(scaled); account_system_index_time(p, cputime, index); } @@ -167,12 +167,12 @@ static int do_account_vtime(struct task_struct *tsk) /* Push account value */ if (user) { account_user_time(tsk, user); - tsk->utimescaled += scale_vtime(user); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(user)); } if (guest) { account_guest_time(tsk, guest); - tsk->utimescaled += scale_vtime(guest); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest)); } if (system) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 1572c35b4f1a..2ecd7dab4631 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -964,10 +964,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, /* Calculate cpu time spent by current task in 100ns units */ static u64 current_task_runtime_100ns(void) { - cputime_t utime, stime; + u64 utime, stime; task_cputime_adjusted(current, &utime, &stime); - return div_u64(cputime_to_nsecs(utime + stime), 100); + + return div_u64(utime + stime, 100); } static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 68b915650cae..6d451936a858 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1411,6 +1411,8 @@ static void fill_note(struct memelfnote *note, const char *name, int type, static void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) { + struct timeval tv; + prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; @@ -1437,8 +1439,13 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + tv = ns_to_timeval(p->signal->cutime); + prstatus->pr_cutime.tv_sec = tv.tv_sec; + prstatus->pr_cutime.tv_usec = tv.tv_usec; + + tv = ns_to_timeval(p->signal->cstime); + prstatus->pr_cstime.tv_sec = tv.tv_sec; + prstatus->pr_cstime.tv_usec = tv.tv_usec; } static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 6ccd9df7247a..e1f373460257 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1358,8 +1358,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + prstatus->pr_cutime = ns_to_timeval(p->signal->cutime); + prstatus->pr_cstime = ns_to_timeval(p->signal->cstime); prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap; prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap; diff --git a/fs/proc/array.c b/fs/proc/array.c index 25b54cf0c042..fe12b519d09b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -401,7 +401,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime, utime, stime; + u64 cutime, cstime, utime, stime; u64 cgtime, gtime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; @@ -497,10 +497,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", cmin_flt); seq_put_decimal_ull(m, " ", maj_flt); seq_put_decimal_ull(m, " ", cmaj_flt); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime)); seq_put_decimal_ll(m, " ", priority); seq_put_decimal_ll(m, " ", nice); seq_put_decimal_ll(m, " ", num_threads); diff --git a/include/linux/sched.h b/include/linux/sched.h index 9cc722f77799..b7ccc54b35cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -585,8 +585,8 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; raw_spinlock_t lock; #endif }; @@ -601,8 +601,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) /** * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in &cputime_t units - * @stime: time spent in kernel mode, in &cputime_t units + * @utime: time spent in user mode, in nanoseconds + * @stime: time spent in kernel mode, in nanoseconds * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * * This structure groups together three kinds of CPU time that are tracked for @@ -610,8 +610,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; unsigned long long sum_exec_runtime; }; @@ -780,7 +780,7 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; - cputime_t utime, stime, cutime, cstime; + u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; @@ -1661,9 +1661,9 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - cputime_t utime, stime; + u64 utime, stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - cputime_t utimescaled, stimescaled; + u64 utimescaled, stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; @@ -2260,11 +2260,11 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime); + u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime) + u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; @@ -2278,16 +2278,16 @@ static inline u64 task_gtime(struct task_struct *t) #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { *utimescaled = t->utimescaled; *stimescaled = t->stimescaled; } #else static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { task_cputime(t, utimescaled, stimescaled); } @@ -2296,18 +2296,26 @@ static inline void task_cputime_scaled(struct task_struct *t, static inline void task_cputime_t(struct task_struct *t, cputime_t *utime, cputime_t *stime) { - task_cputime(t, utime, stime); + u64 ut, st; + + task_cputime(t, &ut, &st); + *utime = nsecs_to_cputime(ut); + *stime = nsecs_to_cputime(st); } static inline void task_cputime_t_scaled(struct task_struct *t, cputime_t *utimescaled, cputime_t *stimescaled) { - task_cputime_scaled(t, utimescaled, stimescaled); + u64 ut, st; + + task_cputime_scaled(t, &ut, &st); + *utimescaled = nsecs_to_cputime(ut); + *stimescaled = nsecs_to_cputime(st); } -extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); /* * Per process flags @@ -3522,9 +3530,14 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); static inline void thread_group_cputime_t(struct task_struct *tsk, - struct task_cputime_t *times) + struct task_cputime_t *cputime) { - thread_group_cputime(tsk, (struct task_cputime *)times); + struct task_cputime times; + + thread_group_cputime(tsk, ×); + cputime->utime = nsecs_to_cputime(times.utime); + cputime->stime = nsecs_to_cputime(times.stime); + cputime->sum_exec_runtime = times.sum_exec_runtime; } /* diff --git a/kernel/exit.c b/kernel/exit.c index 8f14b866f9f6..8e5e21338b3a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -86,7 +86,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); - cputime_t utime, stime; + u64 utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -1091,7 +1091,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *sig = p->signal; struct signal_struct *psig = current->signal; unsigned long maxrss; - cputime_t tgutime, tgstime; + u64 tgutime, tgstime; /* * The resource counters for the group leader are in its diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8bcd98e2b821..0bdef50d88bc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -134,7 +134,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) int index; /* Add user time to process. */ - p->utime += cputime; + p->utime += cputime_to_nsecs(cputime); account_group_user_time(p, cputime); index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; @@ -156,7 +156,7 @@ void account_guest_time(struct task_struct *p, cputime_t cputime) u64 *cpustat = kcpustat_this_cpu->cpustat; /* Add guest time to process. */ - p->utime += cputime; + p->utime += cputime_to_nsecs(cputime); account_group_user_time(p, cputime); p->gtime += cputime_to_nsecs(cputime); @@ -180,7 +180,7 @@ void account_system_index_time(struct task_struct *p, cputime_t cputime, enum cpu_usage_stat index) { /* Add system time to process. */ - p->stime += cputime; + p->stime += cputime_to_nsecs(cputime); account_group_system_time(p, cputime); /* Add system time to cpustat. */ @@ -315,7 +315,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; - cputime_t utime, stime; + u64 utime, stime; struct task_struct *t; unsigned int seq, nextseq; unsigned long flags; @@ -465,14 +465,14 @@ void vtime_account_irq_enter(struct task_struct *tsk) EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { *ut = p->utime; *st = p->stime; } EXPORT_SYMBOL_GPL(task_cputime_adjusted); -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime; @@ -543,7 +543,7 @@ void account_idle_ticks(unsigned long ticks) * Perform (stime * rtime) / total, but avoid multiplication overflow by * loosing precision when the numbers are big. */ -static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) +static u64 scale_stime(u64 stime, u64 rtime, u64 total) { u64 scaled; @@ -580,7 +580,7 @@ drop_precision: * followed by a 64/32->64 divide. */ scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); - return (__force cputime_t) scaled; + return scaled; } /* @@ -605,14 +605,14 @@ drop_precision: */ static void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, - cputime_t *ut, cputime_t *st) + u64 *ut, u64 *st) { - cputime_t rtime, stime, utime; + u64 rtime, stime, utime; unsigned long flags; /* Serialize concurrent callers such that we can honour our guarantees */ raw_spin_lock_irqsave(&prev->lock, flags); - rtime = nsecs_to_cputime(curr->sum_exec_runtime); + rtime = curr->sum_exec_runtime; /* * This is possible under two circumstances: @@ -643,8 +643,7 @@ static void cputime_adjust(struct task_cputime *curr, goto update; } - stime = scale_stime((__force u64)stime, (__force u64)rtime, - (__force u64)(stime + utime)); + stime = scale_stime(stime, rtime, stime + utime); update: /* @@ -677,7 +676,7 @@ out: raw_spin_unlock_irqrestore(&prev->lock, flags); } -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime = { .sum_exec_runtime = p->se.sum_exec_runtime, @@ -688,7 +687,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) } EXPORT_SYMBOL_GPL(task_cputime_adjusted); -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime; @@ -849,9 +848,9 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { - cputime_t delta; + u64 delta; unsigned int seq; if (!vtime_accounting_enabled()) { @@ -870,7 +869,7 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) continue; - delta = vtime_delta(t); + delta = cputime_to_nsecs(vtime_delta(t)); /* * Task runs either in user or kernel space, add pending nohz time to diff --git a/kernel/signal.c b/kernel/signal.c index 218048a837ea..b63522193076 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1620,8 +1620,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) rcu_read_unlock(); task_cputime_t(tsk, &utime, &stime); - info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime); - info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime); + info.si_utime = cputime_to_clock_t(utime + nsecs_to_cputime(tsk->signal->utime)); + info.si_stime = cputime_to_clock_t(stime + nsecs_to_cputime(tsk->signal->stime)); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) diff --git a/kernel/sys.c b/kernel/sys.c index 842914ef7de4..7d4a9a6df956 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -881,15 +881,15 @@ SYSCALL_DEFINE0(getegid) void do_sys_times(struct tms *tms) { - cputime_t tgutime, tgstime, cutime, cstime; + u64 tgutime, tgstime, cutime, cstime; thread_group_cputime_adjusted(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; - tms->tms_utime = cputime_to_clock_t(tgutime); - tms->tms_stime = cputime_to_clock_t(tgstime); - tms->tms_cutime = cputime_to_clock_t(cutime); - tms->tms_cstime = cputime_to_clock_t(cstime); + tms->tms_utime = nsec_to_clock_t(tgutime); + tms->tms_stime = nsec_to_clock_t(tgstime); + tms->tms_cutime = nsec_to_clock_t(cutime); + tms->tms_cstime = nsec_to_clock_t(cstime); } SYSCALL_DEFINE1(times, struct tms __user *, tbuf) @@ -1544,7 +1544,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; - cputime_t tgutime, tgstime, utime, stime; + u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; memset((char *)r, 0, sizeof (*r)); @@ -1600,8 +1600,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); out: - cputime_to_timeval(utime, &r->ru_utime); - cputime_to_timeval(stime, &r->ru_stime); + r->ru_utime = ns_to_timeval(utime); + r->ru_stime = ns_to_timeval(stime); if (who != RUSAGE_CHILDREN) { struct mm_struct *mm = get_task_mm(p); -- 2.20.1