ANDROID: cpufreq: track per-task time in state
authorConnor O'Brien <connoro@google.com>
Thu, 1 Feb 2018 02:11:57 +0000 (18:11 -0800)
committerConnor O'Brien <connoro@google.com>
Thu, 12 Apr 2018 20:00:33 +0000 (13:00 -0700)
Add time in state data to task structs, and create
/proc/<pid>/time_in_state files to show how long each individual task
has run at each frequency.
Create a CONFIG_CPU_FREQ_TIMES option to enable/disable this tracking.

Signed-off-by: Connor O'Brien <connoro@google.com>
Bug: 72339335
Test: Read /proc/<pid>/time_in_state
Change-Id: Ia6456754f4cb1e83b2bc35efa8fbe9f8696febc8

drivers/cpufreq/Kconfig
drivers/cpufreq/Makefile
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_times.c [new file with mode: 0644]
fs/proc/base.c
include/linux/cpufreq_times.h [new file with mode: 0644]
include/linux/sched.h
kernel/exit.c
kernel/sched/core.c
kernel/sched/cputime.c

index d8addbce40bcc4f9c6a29e32c98cd0c15bac15b4..b374515f98138f4db3c6e87a15433790645366b9 100644 (file)
@@ -37,6 +37,13 @@ config CPU_FREQ_STAT
 
          If in doubt, say N.
 
+config CPU_FREQ_TIMES
+       bool "CPU frequency time-in-state statistics"
+       help
+         Export CPU time-in-state information through procfs.
+
+         If in doubt, say N.
+
 choice
        prompt "Default CPUFreq governor"
        default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ
index 812f9e0d01a343a1477ecc91fee0fbaa353fb26d..3ad8aeb687ef0e79827dac36169a268507f4bd5a 100644 (file)
@@ -5,7 +5,10 @@ obj-$(CONFIG_CPU_FREQ)                 += cpufreq.o freq_table.o
 # CPUfreq stats
 obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
 
-# CPUfreq governors 
+# CPUfreq times
+obj-$(CONFIG_CPU_FREQ_TIMES)           += cpufreq_times.o
+
+# CPUfreq governors
 obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o
 obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE)   += cpufreq_powersave.o
 obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)   += cpufreq_userspace.o
index 183e1edaeece583fb30f5551af8e1973477707f8..8e9c2c0f8576f56ccc571cfcef7d5f90c74e1daf 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -339,6 +340,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
                         (unsigned long)freqs->new, (unsigned long)freqs->cpu);
                trace_cpu_frequency(freqs->new, freqs->cpu);
                cpufreq_stats_record_transition(policy, freqs->new);
+               cpufreq_times_record_transition(freqs);
                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
@@ -1287,6 +1289,7 @@ static int cpufreq_online(unsigned int cpu)
                        goto out_exit_policy;
 
                cpufreq_stats_create_table(policy);
+               cpufreq_times_create_policy(policy);
 
                write_lock_irqsave(&cpufreq_driver_lock, flags);
                list_add(&policy->policy_list, &cpufreq_policy_list);
diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c
new file mode 100644 (file)
index 0000000..fa46fce
--- /dev/null
@@ -0,0 +1,204 @@
+/* drivers/cpufreq/cpufreq_times.c
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/cpufreq_times.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+
+static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */
+
+/**
+ * struct cpu_freqs - per-cpu frequency information
+ * @offset: start of these freqs' stats in task time_in_state array
+ * @max_state: number of entries in freq_table
+ * @last_index: index in freq_table of last frequency switched to
+ * @freq_table: list of available frequencies
+ */
+struct cpu_freqs {
+       unsigned int offset;
+       unsigned int max_state;
+       unsigned int last_index;
+       unsigned int freq_table[0];
+};
+
+static struct cpu_freqs *all_freqs[NR_CPUS];
+
+static unsigned int next_offset;
+
+void cpufreq_task_times_init(struct task_struct *p)
+{
+       void *temp;
+       unsigned long flags;
+       unsigned int max_state;
+
+       spin_lock_irqsave(&task_time_in_state_lock, flags);
+       p->time_in_state = NULL;
+       spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+       p->max_state = 0;
+
+       max_state = READ_ONCE(next_offset);
+
+       /* We use one array to avoid multiple allocs per task */
+       temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC);
+       if (!temp)
+               return;
+
+       spin_lock_irqsave(&task_time_in_state_lock, flags);
+       p->time_in_state = temp;
+       spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+       p->max_state = max_state;
+}
+
+/* Caller must hold task_time_in_state_lock */
+static int cpufreq_task_times_realloc_locked(struct task_struct *p)
+{
+       void *temp;
+       unsigned int max_state = READ_ONCE(next_offset);
+
+       temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC);
+       if (!temp)
+               return -ENOMEM;
+       p->time_in_state = temp;
+       memset(p->time_in_state + p->max_state, 0,
+              (max_state - p->max_state) * sizeof(u64));
+       p->max_state = max_state;
+       return 0;
+}
+
+void cpufreq_task_times_exit(struct task_struct *p)
+{
+       unsigned long flags;
+       void *temp;
+
+       spin_lock_irqsave(&task_time_in_state_lock, flags);
+       temp = p->time_in_state;
+       p->time_in_state = NULL;
+       spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+       kfree(temp);
+}
+
+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
+       struct pid *pid, struct task_struct *p)
+{
+       unsigned int cpu, i;
+       u64 cputime;
+       unsigned long flags;
+       struct cpu_freqs *freqs;
+       struct cpu_freqs *last_freqs = NULL;
+
+       spin_lock_irqsave(&task_time_in_state_lock, flags);
+       for_each_possible_cpu(cpu) {
+               freqs = all_freqs[cpu];
+               if (!freqs || freqs == last_freqs)
+                       continue;
+               last_freqs = freqs;
+
+               seq_printf(m, "cpu%u\n", cpu);
+               for (i = 0; i < freqs->max_state; i++) {
+                       if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID)
+                               continue;
+                       cputime = 0;
+                       if (freqs->offset + i < p->max_state &&
+                           p->time_in_state)
+                               cputime = p->time_in_state[freqs->offset + i];
+                       seq_printf(m, "%u %lu\n", freqs->freq_table[i],
+                                  (unsigned long)nsec_to_clock_t(cputime));
+               }
+       }
+       spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+       return 0;
+}
+
+void cpufreq_acct_update_power(struct task_struct *p, u64 cputime)
+{
+       unsigned long flags;
+       unsigned int state;
+       struct cpu_freqs *freqs = all_freqs[task_cpu(p)];
+
+       if (!freqs || p->flags & PF_EXITING)
+               return;
+
+       state = freqs->offset + READ_ONCE(freqs->last_index);
+
+       spin_lock_irqsave(&task_time_in_state_lock, flags);
+       if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) &&
+           p->time_in_state)
+               p->time_in_state[state] += cputime;
+       spin_unlock_irqrestore(&task_time_in_state_lock, flags);
+}
+
+void cpufreq_times_create_policy(struct cpufreq_policy *policy)
+{
+       int cpu, index;
+       unsigned int count = 0;
+       struct cpufreq_frequency_table *pos, *table;
+       struct cpu_freqs *freqs;
+       void *tmp;
+
+       if (all_freqs[policy->cpu])
+               return;
+
+       table = policy->freq_table;
+       if (!table)
+               return;
+
+       cpufreq_for_each_entry(pos, table)
+               count++;
+
+       tmp =  kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count,
+                      GFP_KERNEL);
+       if (!tmp)
+               return;
+
+       freqs = tmp;
+       freqs->max_state = count;
+
+       index = cpufreq_frequency_table_get_index(policy, policy->cur);
+       if (index >= 0)
+               WRITE_ONCE(freqs->last_index, index);
+
+       cpufreq_for_each_entry(pos, table)
+               freqs->freq_table[pos - table] = pos->frequency;
+
+       freqs->offset = next_offset;
+       WRITE_ONCE(next_offset, freqs->offset + count);
+       for_each_cpu(cpu, policy->related_cpus)
+               all_freqs[cpu] = freqs;
+}
+
+void cpufreq_times_record_transition(struct cpufreq_freqs *freq)
+{
+       int index;
+       struct cpu_freqs *freqs = all_freqs[freq->cpu];
+       struct cpufreq_policy *policy;
+
+       if (!freqs)
+               return;
+
+       policy = cpufreq_cpu_get(freq->cpu);
+       if (!policy)
+               return;
+
+       index = cpufreq_frequency_table_get_index(policy, freq->new);
+       if (index >= 0)
+               WRITE_ONCE(freqs->last_index, index);
+
+       cpufreq_cpu_put(policy);
+}
index 9d357b2ea6cb59af34af699cb46689464aef419e..b3c05c509ad33a049946a9056df39c014c3dabcb 100644 (file)
@@ -93,6 +93,7 @@
 #include <linux/sched/stat.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
+#include <linux/cpufreq_times.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -2989,6 +2990,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
        ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_CPU_FREQ_TIMES
+       ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3376,6 +3380,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
        ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_CPU_FREQ_TIMES
+       ONE("time_in_state", 0444, proc_time_in_state_show),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h
new file mode 100644 (file)
index 0000000..8cdbbc8
--- /dev/null
@@ -0,0 +1,35 @@
+/* drivers/cpufreq/cpufreq_times.c
+ *
+ * Copyright (C) 2018 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_CPUFREQ_TIMES_H
+#define _LINUX_CPUFREQ_TIMES_H
+
+#include <linux/cpufreq.h>
+#include <linux/pid.h>
+
+#ifdef CONFIG_CPU_FREQ_TIMES
+void cpufreq_task_times_init(struct task_struct *p);
+void cpufreq_task_times_exit(struct task_struct *p);
+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns,
+                           struct pid *pid, struct task_struct *p);
+void cpufreq_acct_update_power(struct task_struct *p, u64 cputime);
+void cpufreq_times_create_policy(struct cpufreq_policy *policy);
+void cpufreq_times_record_transition(struct cpufreq_freqs *freq);
+#else
+static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {}
+static inline void cpufreq_times_record_transition(
+       struct cpufreq_freqs *freq) {}
+#endif /* CONFIG_CPU_FREQ_TIMES */
+#endif /* _LINUX_CPUFREQ_TIMES_H */
index 5d9f33742aab7af40b327587c7a1f4a9362ce3d7..b4b1008729714ad060188ee9b4c800e49fb8eafc 100644 (file)
@@ -766,6 +766,10 @@ struct task_struct {
        u64                             stimescaled;
 #endif
        u64                             gtime;
+#ifdef CONFIG_CPU_FREQ_TIMES
+       u64                             *time_in_state;
+       unsigned int                    max_state;
+#endif
        struct prev_cputime             prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
        struct vtime                    vtime;
index e3a08761eb4074216ecd29e98a2973023c62db1b..1a58e9947f4514ef3ed926f71b9623e3e6fe462d 100644 (file)
@@ -62,6 +62,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/cpufreq_times.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -185,6 +186,9 @@ void release_task(struct task_struct *p)
 {
        struct task_struct *leader;
        int zap_leader;
+#ifdef CONFIG_CPU_FREQ_TIMES
+       cpufreq_task_times_exit(p);
+#endif
 repeat:
        /* don't need to get the RCU readlock here - the process is dead and
         * can't be modifying its own credentials. But shut RCU-lockdep up */
index 8a4e64f74bf8ff3c42012e5d4cb5fa980249e63e..a6e9017b60ca31e6e5e35bea597ebcd049ad72aa 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/rcupdate_wait.h>
 
 #include <linux/blkdev.h>
+#include <linux/cpufreq_times.h>
 #include <linux/kprobes.h>
 #include <linux/mmu_context.h>
 #include <linux/module.h>
@@ -2222,6 +2223,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
+#ifdef CONFIG_CPU_FREQ_TIMES
+       cpufreq_task_times_init(p);
+#endif
+
        RB_CLEAR_NODE(&p->dl.rb_node);
        init_dl_task_timer(&p->dl);
        init_dl_inactive_task_timer(&p->dl);
index 029b505aca494c58ec224eeb1c6bead63450c169..e7954007b54eef0382699887ce8d113c753f2eb6 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/static_key.h>
 #include <linux/context_tracking.h>
 #include <linux/sched/cputime.h>
+#include <linux/cpufreq_times.h>
 #include "sched.h"
 #include "walt.h"
 
@@ -147,6 +148,11 @@ void account_user_time(struct task_struct *p, u64 cputime)
 
        /* Account for user time used */
        acct_account_cputime(p);
+
+#ifdef CONFIG_CPU_FREQ_TIMES
+       /* Account power usage for user time */
+       cpufreq_acct_update_power(p, cputime);
+#endif
 }
 
 /*
@@ -191,6 +197,10 @@ void account_system_index_time(struct task_struct *p,
 
        /* Account for system time used */
        acct_account_cputime(p);
+#ifdef CONFIG_CPU_FREQ_TIMES
+       /* Account power usage for system time */
+       cpufreq_acct_update_power(p, cputime);
+#endif
 }
 
 /*