Commit | Line | Data |
---|---|---|
82f67cd9 IM |
1 | /* |
2 | * kernel/time/timer_stats.c | |
3 | * | |
4 | * Collect timer usage statistics. | |
5 | * | |
6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | |
7 | * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
8 | * | |
9 | * timer_stats is based on timer_top, a similar functionality which was part of | |
10 | * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the | |
11 | * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based | |
12 | * on dynamic allocation of the statistics entries and linear search based | |
13 | * lookup combined with a global lock, rather than the static array, hash | |
14 | * and per-CPU locking which is used by timer_stats. It was written for the | |
15 | * pre hrtimer kernel code and therefore did not take hrtimers into account. | |
16 | * Nevertheless it provided the base for the timer_stats implementation and | |
17 | * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks | |
18 | * for this effort. | |
19 | * | |
20 | * timer_top.c is | |
21 | * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus | |
22 | * Written by Daniel Petrini <d.pensator@gmail.com> | |
23 | * timer_top.c was released under the GNU General Public License version 2 | |
24 | * | |
25 | * We export the addresses and counting of timer functions being called, | |
26 | * the pid and cmdline from the owner process if applicable. | |
27 | * | |
28 | * Start/stop data collection: | |
29 | * # echo 1[0] >/proc/timer_stats | |
30 | * | |
31 | * Display the information collected so far: | |
32 | * # cat /proc/timer_stats | |
33 | * | |
34 | * This program is free software; you can redistribute it and/or modify | |
35 | * it under the terms of the GNU General Public License version 2 as | |
36 | * published by the Free Software Foundation. | |
37 | */ | |
38 | ||
39 | #include <linux/proc_fs.h> | |
40 | #include <linux/module.h> | |
41 | #include <linux/spinlock.h> | |
42 | #include <linux/sched.h> | |
43 | #include <linux/seq_file.h> | |
44 | #include <linux/kallsyms.h> | |
45 | ||
46 | #include <asm/uaccess.h> | |
47 | ||
48 | /* | |
49 | * This is our basic unit of interest: a timer expiry event identified | |
50 | * by the timer, its start/expire functions and the PID of the task that | |
51 | * started the timer. We count the number of times an event happens: | |
52 | */ | |
53 | struct entry { | |
54 | /* | |
55 | * Hash list: | |
56 | */ | |
57 | struct entry *next; | |
58 | ||
59 | /* | |
60 | * Hash keys: | |
61 | */ | |
62 | void *timer; | |
63 | void *start_func; | |
64 | void *expire_func; | |
65 | pid_t pid; | |
66 | ||
67 | /* | |
68 | * Number of timeout events: | |
69 | */ | |
70 | unsigned long count; | |
71 | ||
72 | /* | |
73 | * We save the command-line string to preserve | |
74 | * this information past task exit: | |
75 | */ | |
76 | char comm[TASK_COMM_LEN + 1]; | |
77 | ||
78 | } ____cacheline_aligned_in_smp; | |
79 | ||
80 | /* | |
81 | * Spinlock protecting the tables - not taken during lookup: | |
82 | */ | |
83 | static DEFINE_SPINLOCK(table_lock); | |
84 | ||
85 | /* | |
86 | * Per-CPU lookup locks for fast hash lookup: | |
87 | */ | |
88 | static DEFINE_PER_CPU(spinlock_t, lookup_lock); | |
89 | ||
90 | /* | |
91 | * Mutex to serialize state changes with show-stats activities: | |
92 | */ | |
93 | static DEFINE_MUTEX(show_mutex); | |
94 | ||
95 | /* | |
96 | * Collection status, active/inactive: | |
97 | */ | |
98 | static int __read_mostly active; | |
99 | ||
100 | /* | |
101 | * Beginning/end timestamps of measurement: | |
102 | */ | |
103 | static ktime_t time_start, time_stop; | |
104 | ||
105 | /* | |
106 | * tstat entry structs only get allocated while collection is | |
107 | * active and never freed during that time - this simplifies | |
108 | * things quite a bit. | |
109 | * | |
110 | * They get freed when a new collection period is started. | |
111 | */ | |
112 | #define MAX_ENTRIES_BITS 10 | |
113 | #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) | |
114 | ||
115 | static unsigned long nr_entries; | |
116 | static struct entry entries[MAX_ENTRIES]; | |
117 | ||
118 | static atomic_t overflow_count; | |
119 | ||
82f67cd9 IM |
120 | /* |
121 | * The entries are in a hash-table, for fast lookup: | |
122 | */ | |
123 | #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) | |
124 | #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) | |
125 | #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) | |
126 | ||
127 | #define __tstat_hashfn(entry) \ | |
128 | (((unsigned long)(entry)->timer ^ \ | |
129 | (unsigned long)(entry)->start_func ^ \ | |
130 | (unsigned long)(entry)->expire_func ^ \ | |
131 | (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) | |
132 | ||
133 | #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) | |
134 | ||
135 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | |
136 | ||
9fcc15ec BS |
137 | static void reset_entries(void) |
138 | { | |
139 | nr_entries = 0; | |
140 | memset(entries, 0, sizeof(entries)); | |
141 | memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); | |
142 | atomic_set(&overflow_count, 0); | |
143 | } | |
144 | ||
145 | static struct entry *alloc_entry(void) | |
146 | { | |
147 | if (nr_entries >= MAX_ENTRIES) | |
148 | return NULL; | |
149 | ||
150 | return entries + nr_entries++; | |
151 | } | |
152 | ||
82f67cd9 IM |
153 | static int match_entries(struct entry *entry1, struct entry *entry2) |
154 | { | |
155 | return entry1->timer == entry2->timer && | |
156 | entry1->start_func == entry2->start_func && | |
157 | entry1->expire_func == entry2->expire_func && | |
158 | entry1->pid == entry2->pid; | |
159 | } | |
160 | ||
161 | /* | |
162 | * Look up whether an entry matching this item is present | |
163 | * in the hash already. Must be called with irqs off and the | |
164 | * lookup lock held: | |
165 | */ | |
166 | static struct entry *tstat_lookup(struct entry *entry, char *comm) | |
167 | { | |
168 | struct entry **head, *curr, *prev; | |
169 | ||
170 | head = tstat_hashentry(entry); | |
171 | curr = *head; | |
172 | ||
173 | /* | |
174 | * The fastpath is when the entry is already hashed, | |
175 | * we do this with the lookup lock held, but with the | |
176 | * table lock not held: | |
177 | */ | |
178 | while (curr) { | |
179 | if (match_entries(curr, entry)) | |
180 | return curr; | |
181 | ||
182 | curr = curr->next; | |
183 | } | |
184 | /* | |
185 | * Slowpath: allocate, set up and link a new hash entry: | |
186 | */ | |
187 | prev = NULL; | |
188 | curr = *head; | |
189 | ||
190 | spin_lock(&table_lock); | |
191 | /* | |
192 | * Make sure we have not raced with another CPU: | |
193 | */ | |
194 | while (curr) { | |
195 | if (match_entries(curr, entry)) | |
196 | goto out_unlock; | |
197 | ||
198 | prev = curr; | |
199 | curr = curr->next; | |
200 | } | |
201 | ||
202 | curr = alloc_entry(); | |
203 | if (curr) { | |
204 | *curr = *entry; | |
205 | curr->count = 0; | |
9fcc15ec | 206 | curr->next = NULL; |
82f67cd9 | 207 | memcpy(curr->comm, comm, TASK_COMM_LEN); |
9fcc15ec BS |
208 | |
209 | smp_mb(); /* Ensure that curr is initialized before insert */ | |
210 | ||
82f67cd9 IM |
211 | if (prev) |
212 | prev->next = curr; | |
213 | else | |
214 | *head = curr; | |
82f67cd9 IM |
215 | } |
216 | out_unlock: | |
217 | spin_unlock(&table_lock); | |
218 | ||
219 | return curr; | |
220 | } | |
221 | ||
222 | /** | |
223 | * timer_stats_update_stats - Update the statistics for a timer. | |
224 | * @timer: pointer to either a timer_list or a hrtimer | |
225 | * @pid: the pid of the task which set up the timer | |
226 | * @startf: pointer to the function which did the timer setup | |
227 | * @timerf: pointer to the timer callback function of the timer | |
228 | * @comm: name of the process which set up the timer | |
229 | * | |
230 | * When the timer is already registered, then the event counter is | |
231 | * incremented. Otherwise the timer is registered in a free slot. | |
232 | */ | |
233 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |
234 | void *timerf, char * comm) | |
235 | { | |
236 | /* | |
237 | * It doesnt matter which lock we take: | |
238 | */ | |
c1a834dc | 239 | spinlock_t *lock; |
82f67cd9 IM |
240 | struct entry *entry, input; |
241 | unsigned long flags; | |
242 | ||
c1a834dc IM |
243 | if (likely(!active)) |
244 | return; | |
245 | ||
246 | lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | |
247 | ||
82f67cd9 IM |
248 | input.timer = timer; |
249 | input.start_func = startf; | |
250 | input.expire_func = timerf; | |
251 | input.pid = pid; | |
252 | ||
253 | spin_lock_irqsave(lock, flags); | |
254 | if (!active) | |
255 | goto out_unlock; | |
256 | ||
257 | entry = tstat_lookup(&input, comm); | |
258 | if (likely(entry)) | |
259 | entry->count++; | |
260 | else | |
261 | atomic_inc(&overflow_count); | |
262 | ||
263 | out_unlock: | |
264 | spin_unlock_irqrestore(lock, flags); | |
265 | } | |
266 | ||
267 | static void print_name_offset(struct seq_file *m, unsigned long addr) | |
268 | { | |
9d65cb4a | 269 | char symname[KSYM_NAME_LEN+1]; |
82f67cd9 | 270 | |
9d65cb4a | 271 | if (lookup_symbol_name(addr, symname) < 0) |
82f67cd9 | 272 | seq_printf(m, "<%p>", (void *)addr); |
9d65cb4a AD |
273 | else |
274 | seq_printf(m, "%s", symname); | |
82f67cd9 IM |
275 | } |
276 | ||
277 | static int tstats_show(struct seq_file *m, void *v) | |
278 | { | |
279 | struct timespec period; | |
280 | struct entry *entry; | |
281 | unsigned long ms; | |
282 | long events = 0; | |
283 | ktime_t time; | |
284 | int i; | |
285 | ||
286 | mutex_lock(&show_mutex); | |
287 | /* | |
288 | * If still active then calculate up to now: | |
289 | */ | |
290 | if (active) | |
291 | time_stop = ktime_get(); | |
292 | ||
293 | time = ktime_sub(time_stop, time_start); | |
294 | ||
295 | period = ktime_to_timespec(time); | |
296 | ms = period.tv_nsec / 1000000; | |
297 | ||
298 | seq_puts(m, "Timer Stats Version: v0.1\n"); | |
299 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | |
300 | if (atomic_read(&overflow_count)) | |
301 | seq_printf(m, "Overflow: %d entries\n", | |
302 | atomic_read(&overflow_count)); | |
303 | ||
304 | for (i = 0; i < nr_entries; i++) { | |
305 | entry = entries + i; | |
306 | seq_printf(m, "%4lu, %5d %-16s ", | |
307 | entry->count, entry->pid, entry->comm); | |
308 | ||
309 | print_name_offset(m, (unsigned long)entry->start_func); | |
310 | seq_puts(m, " ("); | |
311 | print_name_offset(m, (unsigned long)entry->expire_func); | |
312 | seq_puts(m, ")\n"); | |
313 | ||
314 | events += entry->count; | |
315 | } | |
316 | ||
317 | ms += period.tv_sec * 1000; | |
318 | if (!ms) | |
319 | ms = 1; | |
320 | ||
321 | if (events && period.tv_sec) | |
322 | seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | |
323 | events / period.tv_sec, events * 1000 / ms); | |
324 | else | |
325 | seq_printf(m, "%ld total events\n", events); | |
326 | ||
327 | mutex_unlock(&show_mutex); | |
328 | ||
329 | return 0; | |
330 | } | |
331 | ||
332 | /* | |
333 | * After a state change, make sure all concurrent lookup/update | |
334 | * activities have stopped: | |
335 | */ | |
336 | static void sync_access(void) | |
337 | { | |
338 | unsigned long flags; | |
339 | int cpu; | |
340 | ||
341 | for_each_online_cpu(cpu) { | |
342 | spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | |
343 | /* nothing */ | |
344 | spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | |
345 | } | |
346 | } | |
347 | ||
348 | static ssize_t tstats_write(struct file *file, const char __user *buf, | |
349 | size_t count, loff_t *offs) | |
350 | { | |
351 | char ctl[2]; | |
352 | ||
353 | if (count != 2 || *offs) | |
354 | return -EINVAL; | |
355 | ||
356 | if (copy_from_user(ctl, buf, count)) | |
357 | return -EFAULT; | |
358 | ||
359 | mutex_lock(&show_mutex); | |
360 | switch (ctl[0]) { | |
361 | case '0': | |
362 | if (active) { | |
363 | active = 0; | |
364 | time_stop = ktime_get(); | |
365 | sync_access(); | |
366 | } | |
367 | break; | |
368 | case '1': | |
369 | if (!active) { | |
370 | reset_entries(); | |
371 | time_start = ktime_get(); | |
9fcc15ec | 372 | smp_mb(); |
82f67cd9 IM |
373 | active = 1; |
374 | } | |
375 | break; | |
376 | default: | |
377 | count = -EINVAL; | |
378 | } | |
379 | mutex_unlock(&show_mutex); | |
380 | ||
381 | return count; | |
382 | } | |
383 | ||
384 | static int tstats_open(struct inode *inode, struct file *filp) | |
385 | { | |
386 | return single_open(filp, tstats_show, NULL); | |
387 | } | |
388 | ||
389 | static struct file_operations tstats_fops = { | |
390 | .open = tstats_open, | |
391 | .read = seq_read, | |
392 | .write = tstats_write, | |
393 | .llseek = seq_lseek, | |
394 | .release = seq_release, | |
395 | }; | |
396 | ||
397 | void __init init_timer_stats(void) | |
398 | { | |
399 | int cpu; | |
400 | ||
401 | for_each_possible_cpu(cpu) | |
402 | spin_lock_init(&per_cpu(lookup_lock, cpu)); | |
403 | } | |
404 | ||
405 | static int __init init_tstats_procfs(void) | |
406 | { | |
407 | struct proc_dir_entry *pe; | |
408 | ||
409 | pe = create_proc_entry("timer_stats", 0644, NULL); | |
410 | if (!pe) | |
411 | return -ENOMEM; | |
412 | ||
413 | pe->proc_fops = &tstats_fops; | |
414 | ||
415 | return 0; | |
416 | } | |
417 | __initcall(init_tstats_procfs); |