Merge tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[GitHub/MotorolaMobilityLLC/kernel-slsi.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
52 */
53 bool ring_buffer_expanded;
54
55 /*
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
61 */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65 * If a tracer is running, we do not want to run SELFTEST.
66 */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 return 0;
83 }
84
85 /*
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
88 * occurred.
89 */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
96 * this back to zero.
97 */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly tracing_buffer_mask;
101
102 /*
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104 *
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
109 * serial console.
110 *
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126 struct module *mod;
127 unsigned long length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133 /*
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "enum_string"
136 */
137 union trace_enum_map_item *next;
138 const char *end; /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144 * The trace_enum_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved enum_map items.
149 */
150 union trace_enum_map_item {
151 struct trace_enum_map map;
152 struct trace_enum_map_head head;
153 struct trace_enum_map_tail tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
173 return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
181 return 1;
182 }
183
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
186 return 1;
187 }
188
189 return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
197 return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
206 return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
227 return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
235 return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 nsec += 500;
242 do_div(nsec, 1000);
243 return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
265 */
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 struct trace_array *tr;
275 int ret = -ENODEV;
276
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 if (tr == this_tr) {
280 tr->ref++;
281 ret = 0;
282 break;
283 }
284 }
285 mutex_unlock(&trace_types_lock);
286
287 return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 WARN_ON(!this_tr->ref);
293 this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
306 {
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
310 return 1;
311 }
312
313 return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 vfree(pid_list->pids);
319 kfree(pid_list);
320 }
321
322 /**
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
326 *
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328 */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 /*
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
335 */
336 if (search_pid >= filtered_pids->pid_max)
337 return false;
338
339 return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
346 *
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
350 */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 /*
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
357 */
358 if (!filtered_pids)
359 return false;
360
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
369 *
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
374 * of a task.
375 */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
379 {
380 if (!pid_list)
381 return;
382
383 /* For forks, we only add if the forking task is listed */
384 if (self) {
385 if (!trace_find_filtered_pid(pid_list, self->pid))
386 return;
387 }
388
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
391 return;
392
393 /* "self" is set for forks, and NULL for exits */
394 if (self)
395 set_bit(task->pid, pid_list->pids);
396 else
397 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
405 *
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
408 *
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
411 */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 unsigned long pid = (unsigned long)v;
415
416 (*pos)++;
417
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
424
425 return NULL;
426 }
427
428 /**
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
432 *
433 * This is used by seq_file "start" operation to start the iteration
434 * of listing pids.
435 *
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
438 */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 unsigned long pid;
442 loff_t l = 0;
443
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
446 return NULL;
447
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 ;
452 return (void *)pid;
453 }
454
455 /**
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
459 *
460 * Can be directly used by seq_file operations to display the current
461 * pid value.
462 */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 unsigned long pid = (unsigned long)v - 1;
466
467 seq_printf(m, "%lu\n", pid);
468 return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
477 {
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
480 unsigned long val;
481 int nr_pids = 0;
482 ssize_t read = 0;
483 ssize_t ret = 0;
484 loff_t pos;
485 pid_t pid;
486
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 return -ENOMEM;
489
490 /*
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
494 * not modified.
495 */
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 if (!pid_list)
498 return -ENOMEM;
499
500 pid_list->pid_max = READ_ONCE(pid_max);
501
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
505
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
508 kfree(pid_list);
509 return -ENOMEM;
510 }
511
512 if (filtered_pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
517 nr_pids++;
518 }
519 }
520
521 while (cnt > 0) {
522
523 pos = 0;
524
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
527 break;
528
529 read += ret;
530 ubuf += ret;
531 cnt -= ret;
532
533 parser.buffer[parser.idx] = 0;
534
535 ret = -EINVAL;
536 if (kstrtoul(parser.buffer, 0, &val))
537 break;
538 if (val >= pid_list->pid_max)
539 break;
540
541 pid = (pid_t)val;
542
543 set_bit(pid, pid_list->pids);
544 nr_pids++;
545
546 trace_parser_clear(&parser);
547 ret = 0;
548 }
549 trace_parser_put(&parser);
550
551 if (ret < 0) {
552 trace_free_pid_list(pid_list);
553 return ret;
554 }
555
556 if (!nr_pids) {
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
559 read = ret;
560 pid_list = NULL;
561 }
562
563 *new_pid_list = pid_list;
564
565 return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 u64 ts;
571
572 /* Early boot up does not have a buffer yet */
573 if (!buf->buffer)
574 return trace_clock_local();
575
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579 return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588 * tracing_is_enabled - Show if global_trace has been disabled
589 *
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
595 */
596 int tracing_is_enabled(void)
597 {
598 /*
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
602 */
603 smp_rmb();
604 return !global_trace.buffer_disabled;
605 }
606
607 /*
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
610 * to page size.
611 *
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
616 */
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
623
624 /*
625 * trace_types_lock is used to protect the trace_types list.
626 */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630 * serialize the access of the ring buffer
631 *
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
635 *
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
643 *
644 * These primitives allow multi process access to different cpu ring buffer
645 * concurrently.
646 *
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
649 */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
660 } else {
661 /* gain it for accessing a cpu ring buffer. */
662
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
665
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
675 } else {
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
678 }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683 int cpu;
684
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695 (void)cpu;
696 mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701 (void)cpu;
702 mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 unsigned long flags,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
717 unsigned long flags,
718 int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
738 {
739 struct trace_entry *ent = ring_buffer_event_data(event);
740
741 tracing_generic_entry_update(ent, flags, pc);
742 ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 int type,
748 unsigned long len,
749 unsigned long flags, int pc)
750 {
751 struct ring_buffer_event *event;
752
753 event = ring_buffer_lock_reserve(buffer, len);
754 if (event != NULL)
755 trace_event_setup(event, type, flags, pc);
756
757 return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
764 /*
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
771 */
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
774 smp_wmb();
775 }
776
777 /**
778 * tracing_on - enable tracing buffers
779 *
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
782 */
783 void tracing_on(void)
784 {
785 tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 __this_cpu_write(trace_cmdline_save, true);
794
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
801 } else
802 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
810 */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
817 int alloc;
818 int pc;
819
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 return 0;
822
823 pc = preempt_count();
824
825 if (unlikely(tracing_selftest_running || tracing_disabled))
826 return 0;
827
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 irq_flags, pc);
834 if (!event)
835 return 0;
836
837 entry = ring_buffer_event_data(event);
838 entry->ip = ip;
839
840 memcpy(&entry->buf, str, size);
841
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
846 } else
847 entry->buf[size] = '\0';
848
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852 return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
860 */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
868 int pc;
869
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 return 0;
872
873 pc = preempt_count();
874
875 if (unlikely(tracing_selftest_running || tracing_disabled))
876 return 0;
877
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 irq_flags, pc);
882 if (!event)
883 return 0;
884
885 entry = ring_buffer_event_data(event);
886 entry->ip = ip;
887 entry->str = str;
888
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892 return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 struct tracer *tracer = tr->current_trace;
900 unsigned long flags;
901
902 if (in_nmi()) {
903 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 internal_trace_puts("*** snapshot is being ignored ***\n");
905 return;
906 }
907
908 if (!tr->allocated_snapshot) {
909 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 internal_trace_puts("*** stopping trace here! ***\n");
911 tracing_off();
912 return;
913 }
914
915 /* Note, snapshot can not be used when the tracer uses it */
916 if (tracer->use_max_tr) {
917 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 return;
920 }
921
922 local_irq_save(flags);
923 update_max_tr(tr, current, smp_processor_id());
924 local_irq_restore(flags);
925 }
926
927 /**
928 * trace_snapshot - take a snapshot of the current buffer.
929 *
930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live
932 * trace when some condition is triggered, but continue to trace.
933 *
934 * Note, make sure to allocate the snapshot with either
935 * a tracing_snapshot_alloc(), or by doing it manually
936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937 *
938 * If the snapshot buffer is not allocated, it will stop tracing.
939 * Basically making a permanent snapshot.
940 */
941 void tracing_snapshot(void)
942 {
943 struct trace_array *tr = &global_trace;
944
945 tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955 int ret;
956
957 if (!tr->allocated_snapshot) {
958
959 /* allocate spare buffer */
960 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 if (ret < 0)
963 return ret;
964
965 tr->allocated_snapshot = true;
966 }
967
968 return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973 /*
974 * We don't free the ring buffer. instead, resize it because
975 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 * we want preserve it.
977 */
978 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 set_buffer_entries(&tr->max_buffer, 1);
980 tracing_reset_online_cpus(&tr->max_buffer);
981 tr->allocated_snapshot = false;
982 }
983
984 /**
985 * tracing_alloc_snapshot - allocate snapshot buffer.
986 *
987 * This only allocates the snapshot buffer if it isn't already
988 * allocated - it doesn't also take a snapshot.
989 *
990 * This is meant to be used in cases where the snapshot buffer needs
991 * to be set up for events that can't sleep but need to be able to
992 * trigger a snapshot.
993 */
994 int tracing_alloc_snapshot(void)
995 {
996 struct trace_array *tr = &global_trace;
997 int ret;
998
999 ret = alloc_snapshot(tr);
1000 WARN_ON(ret < 0);
1001
1002 return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 *
1009 * This is similar to trace_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1012 *
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1016 */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 int ret;
1020
1021 ret = tracing_alloc_snapshot();
1022 if (ret < 0)
1023 return;
1024
1025 tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 /* Give warning */
1043 tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 if (tr->trace_buffer.buffer)
1051 ring_buffer_record_off(tr->trace_buffer.buffer);
1052 /*
1053 * This flag is looked at when buffers haven't been allocated
1054 * yet, or by some tracers (like irqsoff), that just want to
1055 * know if the ring buffer has been disabled, but it can handle
1056 * races of where it gets disabled but we still do a record.
1057 * As the check is in the fast path of the tracers, it is more
1058 * important to be fast than accurate.
1059 */
1060 tr->buffer_disabled = 1;
1061 /* Make the flag seen by readers */
1062 smp_wmb();
1063 }
1064
1065 /**
1066 * tracing_off - turn off tracing buffers
1067 *
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1072 */
1073 void tracing_off(void)
1074 {
1075 tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081 if (__disable_trace_on_warning)
1082 tracing_off();
1083 }
1084
1085 /**
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1088 *
1089 * Shows real state of the ring buffer if it is enabled or not.
1090 */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 if (tr->trace_buffer.buffer)
1094 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099 * tracing_is_on - show state of ring buffers enabled
1100 */
1101 int tracing_is_on(void)
1102 {
1103 return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109 unsigned long buf_size;
1110
1111 if (!str)
1112 return 0;
1113 buf_size = memparse(str, &str);
1114 /* nr_entries can not be zero */
1115 if (buf_size == 0)
1116 return 0;
1117 trace_buf_size = buf_size;
1118 return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 unsigned long threshold;
1125 int ret;
1126
1127 if (!str)
1128 return 0;
1129 ret = kstrtoul(str, 0, &threshold);
1130 if (ret < 0)
1131 return 0;
1132 tracing_thresh = threshold * 1000;
1133 return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 return nsecs / 1000;
1140 }
1141
1142 /*
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the enums were defined.
1147 */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 TRACE_FLAGS
1154 NULL
1155 };
1156
1157 static struct {
1158 u64 (*func)(void);
1159 const char *name;
1160 int in_ns; /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 { trace_clock_local, "local", 1 },
1163 { trace_clock_global, "global", 1 },
1164 { trace_clock_counter, "counter", 0 },
1165 { trace_clock_jiffies, "uptime", 0 },
1166 { trace_clock, "perf", 1 },
1167 { ktime_get_mono_fast_ns, "mono", 1 },
1168 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1169 { ktime_get_boot_fast_ns, "boot", 1 },
1170 ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174 * trace_parser_get_init - gets the buffer for trace parser
1175 */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 memset(parser, 0, sizeof(*parser));
1179
1180 parser->buffer = kmalloc(size, GFP_KERNEL);
1181 if (!parser->buffer)
1182 return 1;
1183
1184 parser->size = size;
1185 return 0;
1186 }
1187
1188 /*
1189 * trace_parser_put - frees the buffer for trace parser
1190 */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 kfree(parser->buffer);
1194 parser->buffer = NULL;
1195 }
1196
1197 /*
1198 * trace_get_user - reads the user input string separated by space
1199 * (matched by isspace(ch))
1200 *
1201 * For each string found the 'struct trace_parser' is updated,
1202 * and the function returns.
1203 *
1204 * Returns number of bytes read.
1205 *
1206 * See kernel/trace/trace.h for 'struct trace_parser' details.
1207 */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 size_t cnt, loff_t *ppos)
1210 {
1211 char ch;
1212 size_t read = 0;
1213 ssize_t ret;
1214
1215 if (!*ppos)
1216 trace_parser_clear(parser);
1217
1218 ret = get_user(ch, ubuf++);
1219 if (ret)
1220 goto out;
1221
1222 read++;
1223 cnt--;
1224
1225 /*
1226 * The parser is not finished with the last write,
1227 * continue reading the user input without skipping spaces.
1228 */
1229 if (!parser->cont) {
1230 /* skip white space */
1231 while (cnt && isspace(ch)) {
1232 ret = get_user(ch, ubuf++);
1233 if (ret)
1234 goto out;
1235 read++;
1236 cnt--;
1237 }
1238
1239 /* only spaces were written */
1240 if (isspace(ch)) {
1241 *ppos += read;
1242 ret = read;
1243 goto out;
1244 }
1245
1246 parser->idx = 0;
1247 }
1248
1249 /* read the non-space input */
1250 while (cnt && !isspace(ch)) {
1251 if (parser->idx < parser->size - 1)
1252 parser->buffer[parser->idx++] = ch;
1253 else {
1254 ret = -EINVAL;
1255 goto out;
1256 }
1257 ret = get_user(ch, ubuf++);
1258 if (ret)
1259 goto out;
1260 read++;
1261 cnt--;
1262 }
1263
1264 /* We either got finished input or we have to wait for another call. */
1265 if (isspace(ch)) {
1266 parser->buffer[parser->idx] = 0;
1267 parser->cont = false;
1268 } else if (parser->idx < parser->size - 1) {
1269 parser->cont = true;
1270 parser->buffer[parser->idx++] = ch;
1271 } else {
1272 ret = -EINVAL;
1273 goto out;
1274 }
1275
1276 *ppos += read;
1277 ret = read;
1278
1279 out:
1280 return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 int len;
1287
1288 if (trace_seq_used(s) <= s->seq.readpos)
1289 return -EBUSY;
1290
1291 len = trace_seq_used(s) - s->seq.readpos;
1292 if (cnt > len)
1293 cnt = len;
1294 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296 s->seq.readpos += cnt;
1297 return cnt;
1298 }
1299
1300 unsigned long __read_mostly tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307 */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 struct trace_buffer *max_buf = &tr->max_buffer;
1313 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316 max_buf->cpu = cpu;
1317 max_buf->time_start = data->preempt_timestamp;
1318
1319 max_data->saved_latency = tr->max_latency;
1320 max_data->critical_start = data->critical_start;
1321 max_data->critical_end = data->critical_end;
1322
1323 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 max_data->pid = tsk->pid;
1325 /*
1326 * If tsk == current, then use current_uid(), as that does not use
1327 * RCU. The irq tracer can be called out of RCU scope.
1328 */
1329 if (tsk == current)
1330 max_data->uid = current_uid();
1331 else
1332 max_data->uid = task_uid(tsk);
1333
1334 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 max_data->policy = tsk->policy;
1336 max_data->rt_priority = tsk->rt_priority;
1337
1338 /* record this tasks comm */
1339 tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344 * @tr: tracer
1345 * @tsk: the task with the latency
1346 * @cpu: The cpu that initiated the trace.
1347 *
1348 * Flip the buffers between the @tr and the max_tr and record information
1349 * about which task was the cause of this latency.
1350 */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 struct ring_buffer *buf;
1355
1356 if (tr->stop_count)
1357 return;
1358
1359 WARN_ON_ONCE(!irqs_disabled());
1360
1361 if (!tr->allocated_snapshot) {
1362 /* Only the nop tracer should hit this when disabling */
1363 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 return;
1365 }
1366
1367 arch_spin_lock(&tr->max_lock);
1368
1369 buf = tr->trace_buffer.buffer;
1370 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 tr->max_buffer.buffer = buf;
1372
1373 __update_max_tr(tr, tsk, cpu);
1374 arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378 * update_max_tr_single - only copy one trace over, and reset the rest
1379 * @tr - tracer
1380 * @tsk - task with the latency
1381 * @cpu - the cpu of the buffer to copy.
1382 *
1383 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384 */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388 int ret;
1389
1390 if (tr->stop_count)
1391 return;
1392
1393 WARN_ON_ONCE(!irqs_disabled());
1394 if (!tr->allocated_snapshot) {
1395 /* Only the nop tracer should hit this when disabling */
1396 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397 return;
1398 }
1399
1400 arch_spin_lock(&tr->max_lock);
1401
1402 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404 if (ret == -EBUSY) {
1405 /*
1406 * We failed to swap the buffer due to a commit taking
1407 * place on this CPU. We fail to record, but we reset
1408 * the max trace buffer (no one writes directly to it)
1409 * and flag that it failed.
1410 */
1411 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 "Failed to swap buffers due to commit in progress\n");
1413 }
1414
1415 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417 __update_max_tr(tr, tsk, cpu);
1418 arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424 /* Iterators are static, they should be filled or empty */
1425 if (trace_buffer_iter(iter, iter->cpu_file))
1426 return 0;
1427
1428 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436 struct list_head list;
1437 struct tracer *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444 struct trace_selftests *selftest;
1445
1446 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447 if (!selftest)
1448 return -ENOMEM;
1449
1450 selftest->type = type;
1451 list_add(&selftest->list, &postponed_selftests);
1452 return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457 struct trace_array *tr = &global_trace;
1458 struct tracer *saved_tracer = tr->current_trace;
1459 int ret;
1460
1461 if (!type->selftest || tracing_selftest_disabled)
1462 return 0;
1463
1464 /*
1465 * If a tracer registers early in boot up (before scheduling is
1466 * initialized and such), then do not run its selftests yet.
1467 * Instead, run it a little later in the boot process.
1468 */
1469 if (!selftests_can_run)
1470 return save_selftest(type);
1471
1472 /*
1473 * Run a selftest on this tracer.
1474 * Here we reset the trace buffer, and set the current
1475 * tracer to be this tracer. The tracer can then run some
1476 * internal tracing to verify that everything is in order.
1477 * If we fail, we do not register this tracer.
1478 */
1479 tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481 tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 if (type->use_max_tr) {
1485 /* If we expanded the buffers, make sure the max is expanded too */
1486 if (ring_buffer_expanded)
1487 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 RING_BUFFER_ALL_CPUS);
1489 tr->allocated_snapshot = true;
1490 }
1491 #endif
1492
1493 /* the test is responsible for initializing and enabling */
1494 pr_info("Testing tracer %s: ", type->name);
1495 ret = type->selftest(type, tr);
1496 /* the test is responsible for resetting too */
1497 tr->current_trace = saved_tracer;
1498 if (ret) {
1499 printk(KERN_CONT "FAILED!\n");
1500 /* Add the warning after printing 'FAILED' */
1501 WARN_ON(1);
1502 return -1;
1503 }
1504 /* Only reset on passing, to avoid touching corrupted buffers */
1505 tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 if (type->use_max_tr) {
1509 tr->allocated_snapshot = false;
1510
1511 /* Shrink the max buffer again */
1512 if (ring_buffer_expanded)
1513 ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 RING_BUFFER_ALL_CPUS);
1515 }
1516 #endif
1517
1518 printk(KERN_CONT "PASSED\n");
1519 return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524 struct trace_selftests *p, *n;
1525 struct tracer *t, **last;
1526 int ret;
1527
1528 selftests_can_run = true;
1529
1530 mutex_lock(&trace_types_lock);
1531
1532 if (list_empty(&postponed_selftests))
1533 goto out;
1534
1535 pr_info("Running postponed tracer tests:\n");
1536
1537 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 ret = run_tracer_selftest(p->type);
1539 /* If the test fails, then warn and remove from available_tracers */
1540 if (ret < 0) {
1541 WARN(1, "tracer: %s failed selftest, disabling\n",
1542 p->type->name);
1543 last = &trace_types;
1544 for (t = trace_types; t; t = t->next) {
1545 if (t == p->type) {
1546 *last = t->next;
1547 break;
1548 }
1549 last = &t->next;
1550 }
1551 }
1552 list_del(&p->list);
1553 kfree(p);
1554 }
1555
1556 out:
1557 mutex_unlock(&trace_types_lock);
1558
1559 return 0;
1560 }
1561 early_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565 return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574 * register_tracer - register a tracer with the ftrace system.
1575 * @type - the plugin for the tracer
1576 *
1577 * Register a new plugin tracer.
1578 */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581 struct tracer *t;
1582 int ret = 0;
1583
1584 if (!type->name) {
1585 pr_info("Tracer must have a name\n");
1586 return -1;
1587 }
1588
1589 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591 return -1;
1592 }
1593
1594 mutex_lock(&trace_types_lock);
1595
1596 tracing_selftest_running = true;
1597
1598 for (t = trace_types; t; t = t->next) {
1599 if (strcmp(type->name, t->name) == 0) {
1600 /* already found */
1601 pr_info("Tracer %s already registered\n",
1602 type->name);
1603 ret = -1;
1604 goto out;
1605 }
1606 }
1607
1608 if (!type->set_flag)
1609 type->set_flag = &dummy_set_flag;
1610 if (!type->flags) {
1611 /*allocate a dummy tracer_flags*/
1612 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613 if (!type->flags) {
1614 ret = -ENOMEM;
1615 goto out;
1616 }
1617 type->flags->val = 0;
1618 type->flags->opts = dummy_tracer_opt;
1619 } else
1620 if (!type->flags->opts)
1621 type->flags->opts = dummy_tracer_opt;
1622
1623 /* store the tracer for __set_tracer_option */
1624 type->flags->trace = type;
1625
1626 ret = run_tracer_selftest(type);
1627 if (ret < 0)
1628 goto out;
1629
1630 type->next = trace_types;
1631 trace_types = type;
1632 add_tracer_options(&global_trace, type);
1633
1634 out:
1635 tracing_selftest_running = false;
1636 mutex_unlock(&trace_types_lock);
1637
1638 if (ret || !default_bootup_tracer)
1639 goto out_unlock;
1640
1641 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642 goto out_unlock;
1643
1644 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 /* Do we want this tracer to start on bootup? */
1646 tracing_set_tracer(&global_trace, type->name);
1647 default_bootup_tracer = NULL;
1648
1649 apply_trace_boot_options();
1650
1651 /* disable other selftests, since this will break it. */
1652 tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655 type->name);
1656 #endif
1657
1658 out_unlock:
1659 return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664 struct ring_buffer *buffer = buf->buffer;
1665
1666 if (!buffer)
1667 return;
1668
1669 ring_buffer_record_disable(buffer);
1670
1671 /* Make sure all commits have finished */
1672 synchronize_sched();
1673 ring_buffer_reset_cpu(buffer, cpu);
1674
1675 ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680 struct ring_buffer *buffer = buf->buffer;
1681 int cpu;
1682
1683 if (!buffer)
1684 return;
1685
1686 ring_buffer_record_disable(buffer);
1687
1688 /* Make sure all commits have finished */
1689 synchronize_sched();
1690
1691 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693 for_each_online_cpu(cpu)
1694 ring_buffer_reset_cpu(buffer, cpu);
1695
1696 ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702 struct trace_array *tr;
1703
1704 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709 }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717 unsigned *map_cmdline_to_pid;
1718 unsigned cmdline_num;
1719 int cmdline_idx;
1720 char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738 struct saved_cmdlines_buffer *s)
1739 {
1740 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741 GFP_KERNEL);
1742 if (!s->map_cmdline_to_pid)
1743 return -ENOMEM;
1744
1745 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746 if (!s->saved_cmdlines) {
1747 kfree(s->map_cmdline_to_pid);
1748 return -ENOMEM;
1749 }
1750
1751 s->cmdline_idx = 0;
1752 s->cmdline_num = val;
1753 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754 sizeof(s->map_pid_to_cmdline));
1755 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756 val * sizeof(*s->map_cmdline_to_pid));
1757
1758 return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763 int ret;
1764
1765 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766 if (!savedcmd)
1767 return -ENOMEM;
1768
1769 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770 if (ret < 0) {
1771 kfree(savedcmd);
1772 savedcmd = NULL;
1773 return -ENOMEM;
1774 }
1775
1776 return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781 return global_trace.stop_count;
1782 }
1783
1784 /**
1785 * tracing_start - quick start of the tracer
1786 *
1787 * If tracing is enabled but was stopped by tracing_stop,
1788 * this will start the tracer back up.
1789 */
1790 void tracing_start(void)
1791 {
1792 struct ring_buffer *buffer;
1793 unsigned long flags;
1794
1795 if (tracing_disabled)
1796 return;
1797
1798 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799 if (--global_trace.stop_count) {
1800 if (global_trace.stop_count < 0) {
1801 /* Someone screwed up their debugging */
1802 WARN_ON_ONCE(1);
1803 global_trace.stop_count = 0;
1804 }
1805 goto out;
1806 }
1807
1808 /* Prevent the buffers from switching */
1809 arch_spin_lock(&global_trace.max_lock);
1810
1811 buffer = global_trace.trace_buffer.buffer;
1812 if (buffer)
1813 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816 buffer = global_trace.max_buffer.buffer;
1817 if (buffer)
1818 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821 arch_spin_unlock(&global_trace.max_lock);
1822
1823 out:
1824 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829 struct ring_buffer *buffer;
1830 unsigned long flags;
1831
1832 if (tracing_disabled)
1833 return;
1834
1835 /* If global, we need to also start the max tracer */
1836 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837 return tracing_start();
1838
1839 raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841 if (--tr->stop_count) {
1842 if (tr->stop_count < 0) {
1843 /* Someone screwed up their debugging */
1844 WARN_ON_ONCE(1);
1845 tr->stop_count = 0;
1846 }
1847 goto out;
1848 }
1849
1850 buffer = tr->trace_buffer.buffer;
1851 if (buffer)
1852 ring_buffer_record_enable(buffer);
1853
1854 out:
1855 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859 * tracing_stop - quick stop of the tracer
1860 *
1861 * Light weight way to stop tracing. Use in conjunction with
1862 * tracing_start.
1863 */
1864 void tracing_stop(void)
1865 {
1866 struct ring_buffer *buffer;
1867 unsigned long flags;
1868
1869 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870 if (global_trace.stop_count++)
1871 goto out;
1872
1873 /* Prevent the buffers from switching */
1874 arch_spin_lock(&global_trace.max_lock);
1875
1876 buffer = global_trace.trace_buffer.buffer;
1877 if (buffer)
1878 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881 buffer = global_trace.max_buffer.buffer;
1882 if (buffer)
1883 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886 arch_spin_unlock(&global_trace.max_lock);
1887
1888 out:
1889 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894 struct ring_buffer *buffer;
1895 unsigned long flags;
1896
1897 /* If global, we need to also stop the max tracer */
1898 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899 return tracing_stop();
1900
1901 raw_spin_lock_irqsave(&tr->start_lock, flags);
1902 if (tr->stop_count++)
1903 goto out;
1904
1905 buffer = tr->trace_buffer.buffer;
1906 if (buffer)
1907 ring_buffer_record_disable(buffer);
1908
1909 out:
1910 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 void trace_stop_cmdline_recording(void);
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917 unsigned pid, idx;
1918
1919 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920 return 0;
1921
1922 /*
1923 * It's not the end of the world if we don't get
1924 * the lock, but we also don't want to spin
1925 * nor do we want to disable interrupts,
1926 * so if we miss here, then better luck next time.
1927 */
1928 if (!arch_spin_trylock(&trace_cmdline_lock))
1929 return 0;
1930
1931 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932 if (idx == NO_CMDLINE_MAP) {
1933 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935 /*
1936 * Check whether the cmdline buffer at idx has a pid
1937 * mapped. We are going to overwrite that entry so we
1938 * need to clear the map_pid_to_cmdline. Otherwise we
1939 * would read the new comm for the old pid.
1940 */
1941 pid = savedcmd->map_cmdline_to_pid[idx];
1942 if (pid != NO_CMDLINE_MAP)
1943 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948 savedcmd->cmdline_idx = idx;
1949 }
1950
1951 set_cmdline(idx, tsk->comm);
1952
1953 arch_spin_unlock(&trace_cmdline_lock);
1954
1955 return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960 unsigned map;
1961
1962 if (!pid) {
1963 strcpy(comm, "<idle>");
1964 return;
1965 }
1966
1967 if (WARN_ON_ONCE(pid < 0)) {
1968 strcpy(comm, "<XXX>");
1969 return;
1970 }
1971
1972 if (pid > PID_MAX_DEFAULT) {
1973 strcpy(comm, "<...>");
1974 return;
1975 }
1976
1977 map = savedcmd->map_pid_to_cmdline[pid];
1978 if (map != NO_CMDLINE_MAP)
1979 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1980 else
1981 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986 preempt_disable();
1987 arch_spin_lock(&trace_cmdline_lock);
1988
1989 __trace_find_cmdline(pid, comm);
1990
1991 arch_spin_unlock(&trace_cmdline_lock);
1992 preempt_enable();
1993 }
1994
1995 void tracing_record_cmdline(struct task_struct *tsk)
1996 {
1997 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998 return;
1999
2000 if (!__this_cpu_read(trace_cmdline_save))
2001 return;
2002
2003 if (trace_save_cmdline(tsk))
2004 __this_cpu_write(trace_cmdline_save, false);
2005 }
2006
2007 /*
2008 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2009 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2010 * simplifies those functions and keeps them in sync.
2011 */
2012 enum print_line_t trace_handle_return(struct trace_seq *s)
2013 {
2014 return trace_seq_has_overflowed(s) ?
2015 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2016 }
2017 EXPORT_SYMBOL_GPL(trace_handle_return);
2018
2019 void
2020 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021 int pc)
2022 {
2023 struct task_struct *tsk = current;
2024
2025 entry->preempt_count = pc & 0xff;
2026 entry->pid = (tsk) ? tsk->pid : 0;
2027 entry->flags =
2028 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2029 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2030 #else
2031 TRACE_FLAG_IRQS_NOSUPPORT |
2032 #endif
2033 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2034 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2035 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2036 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2037 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2038 }
2039 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2040
2041 struct ring_buffer_event *
2042 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043 int type,
2044 unsigned long len,
2045 unsigned long flags, int pc)
2046 {
2047 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 }
2049
2050 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2051 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2052 static int trace_buffered_event_ref;
2053
2054 /**
2055 * trace_buffered_event_enable - enable buffering events
2056 *
2057 * When events are being filtered, it is quicker to use a temporary
2058 * buffer to write the event data into if there's a likely chance
2059 * that it will not be committed. The discard of the ring buffer
2060 * is not as fast as committing, and is much slower than copying
2061 * a commit.
2062 *
2063 * When an event is to be filtered, allocate per cpu buffers to
2064 * write the event data into, and if the event is filtered and discarded
2065 * it is simply dropped, otherwise, the entire data is to be committed
2066 * in one shot.
2067 */
2068 void trace_buffered_event_enable(void)
2069 {
2070 struct ring_buffer_event *event;
2071 struct page *page;
2072 int cpu;
2073
2074 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2075
2076 if (trace_buffered_event_ref++)
2077 return;
2078
2079 for_each_tracing_cpu(cpu) {
2080 page = alloc_pages_node(cpu_to_node(cpu),
2081 GFP_KERNEL | __GFP_NORETRY, 0);
2082 if (!page)
2083 goto failed;
2084
2085 event = page_address(page);
2086 memset(event, 0, sizeof(*event));
2087
2088 per_cpu(trace_buffered_event, cpu) = event;
2089
2090 preempt_disable();
2091 if (cpu == smp_processor_id() &&
2092 this_cpu_read(trace_buffered_event) !=
2093 per_cpu(trace_buffered_event, cpu))
2094 WARN_ON_ONCE(1);
2095 preempt_enable();
2096 }
2097
2098 return;
2099 failed:
2100 trace_buffered_event_disable();
2101 }
2102
2103 static void enable_trace_buffered_event(void *data)
2104 {
2105 /* Probably not needed, but do it anyway */
2106 smp_rmb();
2107 this_cpu_dec(trace_buffered_event_cnt);
2108 }
2109
2110 static void disable_trace_buffered_event(void *data)
2111 {
2112 this_cpu_inc(trace_buffered_event_cnt);
2113 }
2114
2115 /**
2116 * trace_buffered_event_disable - disable buffering events
2117 *
2118 * When a filter is removed, it is faster to not use the buffered
2119 * events, and to commit directly into the ring buffer. Free up
2120 * the temp buffers when there are no more users. This requires
2121 * special synchronization with current events.
2122 */
2123 void trace_buffered_event_disable(void)
2124 {
2125 int cpu;
2126
2127 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2128
2129 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130 return;
2131
2132 if (--trace_buffered_event_ref)
2133 return;
2134
2135 preempt_disable();
2136 /* For each CPU, set the buffer as used. */
2137 smp_call_function_many(tracing_buffer_mask,
2138 disable_trace_buffered_event, NULL, 1);
2139 preempt_enable();
2140
2141 /* Wait for all current users to finish */
2142 synchronize_sched();
2143
2144 for_each_tracing_cpu(cpu) {
2145 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2146 per_cpu(trace_buffered_event, cpu) = NULL;
2147 }
2148 /*
2149 * Make sure trace_buffered_event is NULL before clearing
2150 * trace_buffered_event_cnt.
2151 */
2152 smp_wmb();
2153
2154 preempt_disable();
2155 /* Do the work on each cpu */
2156 smp_call_function_many(tracing_buffer_mask,
2157 enable_trace_buffered_event, NULL, 1);
2158 preempt_enable();
2159 }
2160
2161 static struct ring_buffer *temp_buffer;
2162
2163 struct ring_buffer_event *
2164 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2165 struct trace_event_file *trace_file,
2166 int type, unsigned long len,
2167 unsigned long flags, int pc)
2168 {
2169 struct ring_buffer_event *entry;
2170 int val;
2171
2172 *current_rb = trace_file->tr->trace_buffer.buffer;
2173
2174 if ((trace_file->flags &
2175 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2176 (entry = this_cpu_read(trace_buffered_event))) {
2177 /* Try to use the per cpu buffer first */
2178 val = this_cpu_inc_return(trace_buffered_event_cnt);
2179 if (val == 1) {
2180 trace_event_setup(entry, type, flags, pc);
2181 entry->array[0] = len;
2182 return entry;
2183 }
2184 this_cpu_dec(trace_buffered_event_cnt);
2185 }
2186
2187 entry = __trace_buffer_lock_reserve(*current_rb,
2188 type, len, flags, pc);
2189 /*
2190 * If tracing is off, but we have triggers enabled
2191 * we still need to look at the event data. Use the temp_buffer
2192 * to store the trace event for the tigger to use. It's recusive
2193 * safe and will not be recorded anywhere.
2194 */
2195 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2196 *current_rb = temp_buffer;
2197 entry = __trace_buffer_lock_reserve(*current_rb,
2198 type, len, flags, pc);
2199 }
2200 return entry;
2201 }
2202 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2203
2204 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2205 static DEFINE_MUTEX(tracepoint_printk_mutex);
2206
2207 static void output_printk(struct trace_event_buffer *fbuffer)
2208 {
2209 struct trace_event_call *event_call;
2210 struct trace_event *event;
2211 unsigned long flags;
2212 struct trace_iterator *iter = tracepoint_print_iter;
2213
2214 /* We should never get here if iter is NULL */
2215 if (WARN_ON_ONCE(!iter))
2216 return;
2217
2218 event_call = fbuffer->trace_file->event_call;
2219 if (!event_call || !event_call->event.funcs ||
2220 !event_call->event.funcs->trace)
2221 return;
2222
2223 event = &fbuffer->trace_file->event_call->event;
2224
2225 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2226 trace_seq_init(&iter->seq);
2227 iter->ent = fbuffer->entry;
2228 event_call->event.funcs->trace(iter, 0, event);
2229 trace_seq_putc(&iter->seq, 0);
2230 printk("%s", iter->seq.buffer);
2231
2232 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 }
2234
2235 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2236 void __user *buffer, size_t *lenp,
2237 loff_t *ppos)
2238 {
2239 int save_tracepoint_printk;
2240 int ret;
2241
2242 mutex_lock(&tracepoint_printk_mutex);
2243 save_tracepoint_printk = tracepoint_printk;
2244
2245 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246
2247 /*
2248 * This will force exiting early, as tracepoint_printk
2249 * is always zero when tracepoint_printk_iter is not allocated
2250 */
2251 if (!tracepoint_print_iter)
2252 tracepoint_printk = 0;
2253
2254 if (save_tracepoint_printk == tracepoint_printk)
2255 goto out;
2256
2257 if (tracepoint_printk)
2258 static_key_enable(&tracepoint_printk_key.key);
2259 else
2260 static_key_disable(&tracepoint_printk_key.key);
2261
2262 out:
2263 mutex_unlock(&tracepoint_printk_mutex);
2264
2265 return ret;
2266 }
2267
2268 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2269 {
2270 if (static_key_false(&tracepoint_printk_key.key))
2271 output_printk(fbuffer);
2272
2273 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2274 fbuffer->event, fbuffer->entry,
2275 fbuffer->flags, fbuffer->pc);
2276 }
2277 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2278
2279 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2280 struct ring_buffer *buffer,
2281 struct ring_buffer_event *event,
2282 unsigned long flags, int pc,
2283 struct pt_regs *regs)
2284 {
2285 __buffer_unlock_commit(buffer, event);
2286
2287 /*
2288 * If regs is not set, then skip the following callers:
2289 * trace_buffer_unlock_commit_regs
2290 * event_trigger_unlock_commit
2291 * trace_event_buffer_commit
2292 * trace_event_raw_event_sched_switch
2293 * Note, we can still get here via blktrace, wakeup tracer
2294 * and mmiotrace, but that's ok if they lose a function or
2295 * two. They are that meaningful.
2296 */
2297 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2298 ftrace_trace_userstack(buffer, flags, pc);
2299 }
2300
2301 /*
2302 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303 */
2304 void
2305 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2306 struct ring_buffer_event *event)
2307 {
2308 __buffer_unlock_commit(buffer, event);
2309 }
2310
2311 static void
2312 trace_process_export(struct trace_export *export,
2313 struct ring_buffer_event *event)
2314 {
2315 struct trace_entry *entry;
2316 unsigned int size = 0;
2317
2318 entry = ring_buffer_event_data(event);
2319 size = ring_buffer_event_length(event);
2320 export->write(entry, size);
2321 }
2322
2323 static DEFINE_MUTEX(ftrace_export_lock);
2324
2325 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2326
2327 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2328
2329 static inline void ftrace_exports_enable(void)
2330 {
2331 static_branch_enable(&ftrace_exports_enabled);
2332 }
2333
2334 static inline void ftrace_exports_disable(void)
2335 {
2336 static_branch_disable(&ftrace_exports_enabled);
2337 }
2338
2339 void ftrace_exports(struct ring_buffer_event *event)
2340 {
2341 struct trace_export *export;
2342
2343 preempt_disable_notrace();
2344
2345 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2346 while (export) {
2347 trace_process_export(export, event);
2348 export = rcu_dereference_raw_notrace(export->next);
2349 }
2350
2351 preempt_enable_notrace();
2352 }
2353
2354 static inline void
2355 add_trace_export(struct trace_export **list, struct trace_export *export)
2356 {
2357 rcu_assign_pointer(export->next, *list);
2358 /*
2359 * We are entering export into the list but another
2360 * CPU might be walking that list. We need to make sure
2361 * the export->next pointer is valid before another CPU sees
2362 * the export pointer included into the list.
2363 */
2364 rcu_assign_pointer(*list, export);
2365 }
2366
2367 static inline int
2368 rm_trace_export(struct trace_export **list, struct trace_export *export)
2369 {
2370 struct trace_export **p;
2371
2372 for (p = list; *p != NULL; p = &(*p)->next)
2373 if (*p == export)
2374 break;
2375
2376 if (*p != export)
2377 return -1;
2378
2379 rcu_assign_pointer(*p, (*p)->next);
2380
2381 return 0;
2382 }
2383
2384 static inline void
2385 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 {
2387 if (*list == NULL)
2388 ftrace_exports_enable();
2389
2390 add_trace_export(list, export);
2391 }
2392
2393 static inline int
2394 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2395 {
2396 int ret;
2397
2398 ret = rm_trace_export(list, export);
2399 if (*list == NULL)
2400 ftrace_exports_disable();
2401
2402 return ret;
2403 }
2404
2405 int register_ftrace_export(struct trace_export *export)
2406 {
2407 if (WARN_ON_ONCE(!export->write))
2408 return -1;
2409
2410 mutex_lock(&ftrace_export_lock);
2411
2412 add_ftrace_export(&ftrace_exports_list, export);
2413
2414 mutex_unlock(&ftrace_export_lock);
2415
2416 return 0;
2417 }
2418 EXPORT_SYMBOL_GPL(register_ftrace_export);
2419
2420 int unregister_ftrace_export(struct trace_export *export)
2421 {
2422 int ret;
2423
2424 mutex_lock(&ftrace_export_lock);
2425
2426 ret = rm_ftrace_export(&ftrace_exports_list, export);
2427
2428 mutex_unlock(&ftrace_export_lock);
2429
2430 return ret;
2431 }
2432 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433
2434 void
2435 trace_function(struct trace_array *tr,
2436 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437 int pc)
2438 {
2439 struct trace_event_call *call = &event_function;
2440 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2441 struct ring_buffer_event *event;
2442 struct ftrace_entry *entry;
2443
2444 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2445 flags, pc);
2446 if (!event)
2447 return;
2448 entry = ring_buffer_event_data(event);
2449 entry->ip = ip;
2450 entry->parent_ip = parent_ip;
2451
2452 if (!call_filter_check_discard(call, entry, buffer, event)) {
2453 if (static_branch_unlikely(&ftrace_exports_enabled))
2454 ftrace_exports(event);
2455 __buffer_unlock_commit(buffer, event);
2456 }
2457 }
2458
2459 #ifdef CONFIG_STACKTRACE
2460
2461 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2462 struct ftrace_stack {
2463 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2464 };
2465
2466 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2467 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2468
2469 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2470 unsigned long flags,
2471 int skip, int pc, struct pt_regs *regs)
2472 {
2473 struct trace_event_call *call = &event_kernel_stack;
2474 struct ring_buffer_event *event;
2475 struct stack_entry *entry;
2476 struct stack_trace trace;
2477 int use_stack;
2478 int size = FTRACE_STACK_ENTRIES;
2479
2480 trace.nr_entries = 0;
2481 trace.skip = skip;
2482
2483 /*
2484 * Add two, for this function and the call to save_stack_trace()
2485 * If regs is set, then these functions will not be in the way.
2486 */
2487 if (!regs)
2488 trace.skip += 2;
2489
2490 /*
2491 * Since events can happen in NMIs there's no safe way to
2492 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2493 * or NMI comes in, it will just have to use the default
2494 * FTRACE_STACK_SIZE.
2495 */
2496 preempt_disable_notrace();
2497
2498 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2499 /*
2500 * We don't need any atomic variables, just a barrier.
2501 * If an interrupt comes in, we don't care, because it would
2502 * have exited and put the counter back to what we want.
2503 * We just need a barrier to keep gcc from moving things
2504 * around.
2505 */
2506 barrier();
2507 if (use_stack == 1) {
2508 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2509 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2510
2511 if (regs)
2512 save_stack_trace_regs(regs, &trace);
2513 else
2514 save_stack_trace(&trace);
2515
2516 if (trace.nr_entries > size)
2517 size = trace.nr_entries;
2518 } else
2519 /* From now on, use_stack is a boolean */
2520 use_stack = 0;
2521
2522 size *= sizeof(unsigned long);
2523
2524 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2525 sizeof(*entry) + size, flags, pc);
2526 if (!event)
2527 goto out;
2528 entry = ring_buffer_event_data(event);
2529
2530 memset(&entry->caller, 0, size);
2531
2532 if (use_stack)
2533 memcpy(&entry->caller, trace.entries,
2534 trace.nr_entries * sizeof(unsigned long));
2535 else {
2536 trace.max_entries = FTRACE_STACK_ENTRIES;
2537 trace.entries = entry->caller;
2538 if (regs)
2539 save_stack_trace_regs(regs, &trace);
2540 else
2541 save_stack_trace(&trace);
2542 }
2543
2544 entry->size = trace.nr_entries;
2545
2546 if (!call_filter_check_discard(call, entry, buffer, event))
2547 __buffer_unlock_commit(buffer, event);
2548
2549 out:
2550 /* Again, don't let gcc optimize things here */
2551 barrier();
2552 __this_cpu_dec(ftrace_stack_reserve);
2553 preempt_enable_notrace();
2554
2555 }
2556
2557 static inline void ftrace_trace_stack(struct trace_array *tr,
2558 struct ring_buffer *buffer,
2559 unsigned long flags,
2560 int skip, int pc, struct pt_regs *regs)
2561 {
2562 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563 return;
2564
2565 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 }
2567
2568 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569 int pc)
2570 {
2571 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2572 }
2573
2574 /**
2575 * trace_dump_stack - record a stack back trace in the trace buffer
2576 * @skip: Number of functions to skip (helper handlers)
2577 */
2578 void trace_dump_stack(int skip)
2579 {
2580 unsigned long flags;
2581
2582 if (tracing_disabled || tracing_selftest_running)
2583 return;
2584
2585 local_save_flags(flags);
2586
2587 /*
2588 * Skip 3 more, seems to get us at the caller of
2589 * this function.
2590 */
2591 skip += 3;
2592 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2593 flags, skip, preempt_count(), NULL);
2594 }
2595
2596 static DEFINE_PER_CPU(int, user_stack_count);
2597
2598 void
2599 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2600 {
2601 struct trace_event_call *call = &event_user_stack;
2602 struct ring_buffer_event *event;
2603 struct userstack_entry *entry;
2604 struct stack_trace trace;
2605
2606 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2607 return;
2608
2609 /*
2610 * NMIs can not handle page faults, even with fix ups.
2611 * The save user stack can (and often does) fault.
2612 */
2613 if (unlikely(in_nmi()))
2614 return;
2615
2616 /*
2617 * prevent recursion, since the user stack tracing may
2618 * trigger other kernel events.
2619 */
2620 preempt_disable();
2621 if (__this_cpu_read(user_stack_count))
2622 goto out;
2623
2624 __this_cpu_inc(user_stack_count);
2625
2626 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2627 sizeof(*entry), flags, pc);
2628 if (!event)
2629 goto out_drop_count;
2630 entry = ring_buffer_event_data(event);
2631
2632 entry->tgid = current->tgid;
2633 memset(&entry->caller, 0, sizeof(entry->caller));
2634
2635 trace.nr_entries = 0;
2636 trace.max_entries = FTRACE_STACK_ENTRIES;
2637 trace.skip = 0;
2638 trace.entries = entry->caller;
2639
2640 save_stack_trace_user(&trace);
2641 if (!call_filter_check_discard(call, entry, buffer, event))
2642 __buffer_unlock_commit(buffer, event);
2643
2644 out_drop_count:
2645 __this_cpu_dec(user_stack_count);
2646 out:
2647 preempt_enable();
2648 }
2649
2650 #ifdef UNUSED
2651 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2652 {
2653 ftrace_trace_userstack(tr, flags, preempt_count());
2654 }
2655 #endif /* UNUSED */
2656
2657 #endif /* CONFIG_STACKTRACE */
2658
2659 /* created for use with alloc_percpu */
2660 struct trace_buffer_struct {
2661 int nesting;
2662 char buffer[4][TRACE_BUF_SIZE];
2663 };
2664
2665 static struct trace_buffer_struct *trace_percpu_buffer;
2666
2667 /*
2668 * Thise allows for lockless recording. If we're nested too deeply, then
2669 * this returns NULL.
2670 */
2671 static char *get_trace_buf(void)
2672 {
2673 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2674
2675 if (!buffer || buffer->nesting >= 4)
2676 return NULL;
2677
2678 return &buffer->buffer[buffer->nesting++][0];
2679 }
2680
2681 static void put_trace_buf(void)
2682 {
2683 this_cpu_dec(trace_percpu_buffer->nesting);
2684 }
2685
2686 static int alloc_percpu_trace_buffer(void)
2687 {
2688 struct trace_buffer_struct *buffers;
2689
2690 buffers = alloc_percpu(struct trace_buffer_struct);
2691 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2692 return -ENOMEM;
2693
2694 trace_percpu_buffer = buffers;
2695 return 0;
2696 }
2697
2698 static int buffers_allocated;
2699
2700 void trace_printk_init_buffers(void)
2701 {
2702 if (buffers_allocated)
2703 return;
2704
2705 if (alloc_percpu_trace_buffer())
2706 return;
2707
2708 /* trace_printk() is for debug use only. Don't use it in production. */
2709
2710 pr_warn("\n");
2711 pr_warn("**********************************************************\n");
2712 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2713 pr_warn("** **\n");
2714 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2715 pr_warn("** **\n");
2716 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2717 pr_warn("** unsafe for production use. **\n");
2718 pr_warn("** **\n");
2719 pr_warn("** If you see this message and you are not debugging **\n");
2720 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2721 pr_warn("** **\n");
2722 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2723 pr_warn("**********************************************************\n");
2724
2725 /* Expand the buffers to set size */
2726 tracing_update_buffers();
2727
2728 buffers_allocated = 1;
2729
2730 /*
2731 * trace_printk_init_buffers() can be called by modules.
2732 * If that happens, then we need to start cmdline recording
2733 * directly here. If the global_trace.buffer is already
2734 * allocated here, then this was called by module code.
2735 */
2736 if (global_trace.trace_buffer.buffer)
2737 tracing_start_cmdline_record();
2738 }
2739
2740 void trace_printk_start_comm(void)
2741 {
2742 /* Start tracing comms if trace printk is set */
2743 if (!buffers_allocated)
2744 return;
2745 tracing_start_cmdline_record();
2746 }
2747
2748 static void trace_printk_start_stop_comm(int enabled)
2749 {
2750 if (!buffers_allocated)
2751 return;
2752
2753 if (enabled)
2754 tracing_start_cmdline_record();
2755 else
2756 tracing_stop_cmdline_record();
2757 }
2758
2759 /**
2760 * trace_vbprintk - write binary msg to tracing buffer
2761 *
2762 */
2763 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2764 {
2765 struct trace_event_call *call = &event_bprint;
2766 struct ring_buffer_event *event;
2767 struct ring_buffer *buffer;
2768 struct trace_array *tr = &global_trace;
2769 struct bprint_entry *entry;
2770 unsigned long flags;
2771 char *tbuffer;
2772 int len = 0, size, pc;
2773
2774 if (unlikely(tracing_selftest_running || tracing_disabled))
2775 return 0;
2776
2777 /* Don't pollute graph traces with trace_vprintk internals */
2778 pause_graph_tracing();
2779
2780 pc = preempt_count();
2781 preempt_disable_notrace();
2782
2783 tbuffer = get_trace_buf();
2784 if (!tbuffer) {
2785 len = 0;
2786 goto out_nobuffer;
2787 }
2788
2789 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2790
2791 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2792 goto out;
2793
2794 local_save_flags(flags);
2795 size = sizeof(*entry) + sizeof(u32) * len;
2796 buffer = tr->trace_buffer.buffer;
2797 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2798 flags, pc);
2799 if (!event)
2800 goto out;
2801 entry = ring_buffer_event_data(event);
2802 entry->ip = ip;
2803 entry->fmt = fmt;
2804
2805 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2806 if (!call_filter_check_discard(call, entry, buffer, event)) {
2807 __buffer_unlock_commit(buffer, event);
2808 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2809 }
2810
2811 out:
2812 put_trace_buf();
2813
2814 out_nobuffer:
2815 preempt_enable_notrace();
2816 unpause_graph_tracing();
2817
2818 return len;
2819 }
2820 EXPORT_SYMBOL_GPL(trace_vbprintk);
2821
2822 static int
2823 __trace_array_vprintk(struct ring_buffer *buffer,
2824 unsigned long ip, const char *fmt, va_list args)
2825 {
2826 struct trace_event_call *call = &event_print;
2827 struct ring_buffer_event *event;
2828 int len = 0, size, pc;
2829 struct print_entry *entry;
2830 unsigned long flags;
2831 char *tbuffer;
2832
2833 if (tracing_disabled || tracing_selftest_running)
2834 return 0;
2835
2836 /* Don't pollute graph traces with trace_vprintk internals */
2837 pause_graph_tracing();
2838
2839 pc = preempt_count();
2840 preempt_disable_notrace();
2841
2842
2843 tbuffer = get_trace_buf();
2844 if (!tbuffer) {
2845 len = 0;
2846 goto out_nobuffer;
2847 }
2848
2849 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2850
2851 local_save_flags(flags);
2852 size = sizeof(*entry) + len + 1;
2853 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2854 flags, pc);
2855 if (!event)
2856 goto out;
2857 entry = ring_buffer_event_data(event);
2858 entry->ip = ip;
2859
2860 memcpy(&entry->buf, tbuffer, len + 1);
2861 if (!call_filter_check_discard(call, entry, buffer, event)) {
2862 __buffer_unlock_commit(buffer, event);
2863 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2864 }
2865
2866 out:
2867 put_trace_buf();
2868
2869 out_nobuffer:
2870 preempt_enable_notrace();
2871 unpause_graph_tracing();
2872
2873 return len;
2874 }
2875
2876 int trace_array_vprintk(struct trace_array *tr,
2877 unsigned long ip, const char *fmt, va_list args)
2878 {
2879 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2880 }
2881
2882 int trace_array_printk(struct trace_array *tr,
2883 unsigned long ip, const char *fmt, ...)
2884 {
2885 int ret;
2886 va_list ap;
2887
2888 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2889 return 0;
2890
2891 va_start(ap, fmt);
2892 ret = trace_array_vprintk(tr, ip, fmt, ap);
2893 va_end(ap);
2894 return ret;
2895 }
2896
2897 int trace_array_printk_buf(struct ring_buffer *buffer,
2898 unsigned long ip, const char *fmt, ...)
2899 {
2900 int ret;
2901 va_list ap;
2902
2903 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2904 return 0;
2905
2906 va_start(ap, fmt);
2907 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2908 va_end(ap);
2909 return ret;
2910 }
2911
2912 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2913 {
2914 return trace_array_vprintk(&global_trace, ip, fmt, args);
2915 }
2916 EXPORT_SYMBOL_GPL(trace_vprintk);
2917
2918 static void trace_iterator_increment(struct trace_iterator *iter)
2919 {
2920 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2921
2922 iter->idx++;
2923 if (buf_iter)
2924 ring_buffer_read(buf_iter, NULL);
2925 }
2926
2927 static struct trace_entry *
2928 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2929 unsigned long *lost_events)
2930 {
2931 struct ring_buffer_event *event;
2932 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2933
2934 if (buf_iter)
2935 event = ring_buffer_iter_peek(buf_iter, ts);
2936 else
2937 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2938 lost_events);
2939
2940 if (event) {
2941 iter->ent_size = ring_buffer_event_length(event);
2942 return ring_buffer_event_data(event);
2943 }
2944 iter->ent_size = 0;
2945 return NULL;
2946 }
2947
2948 static struct trace_entry *
2949 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2950 unsigned long *missing_events, u64 *ent_ts)
2951 {
2952 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2953 struct trace_entry *ent, *next = NULL;
2954 unsigned long lost_events = 0, next_lost = 0;
2955 int cpu_file = iter->cpu_file;
2956 u64 next_ts = 0, ts;
2957 int next_cpu = -1;
2958 int next_size = 0;
2959 int cpu;
2960
2961 /*
2962 * If we are in a per_cpu trace file, don't bother by iterating over
2963 * all cpu and peek directly.
2964 */
2965 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2966 if (ring_buffer_empty_cpu(buffer, cpu_file))
2967 return NULL;
2968 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2969 if (ent_cpu)
2970 *ent_cpu = cpu_file;
2971
2972 return ent;
2973 }
2974
2975 for_each_tracing_cpu(cpu) {
2976
2977 if (ring_buffer_empty_cpu(buffer, cpu))
2978 continue;
2979
2980 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2981
2982 /*
2983 * Pick the entry with the smallest timestamp:
2984 */
2985 if (ent && (!next || ts < next_ts)) {
2986 next = ent;
2987 next_cpu = cpu;
2988 next_ts = ts;
2989 next_lost = lost_events;
2990 next_size = iter->ent_size;
2991 }
2992 }
2993
2994 iter->ent_size = next_size;
2995
2996 if (ent_cpu)
2997 *ent_cpu = next_cpu;
2998
2999 if (ent_ts)
3000 *ent_ts = next_ts;
3001
3002 if (missing_events)
3003 *missing_events = next_lost;
3004
3005 return next;
3006 }
3007
3008 /* Find the next real entry, without updating the iterator itself */
3009 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3010 int *ent_cpu, u64 *ent_ts)
3011 {
3012 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3013 }
3014
3015 /* Find the next real entry, and increment the iterator to the next entry */
3016 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3017 {
3018 iter->ent = __find_next_entry(iter, &iter->cpu,
3019 &iter->lost_events, &iter->ts);
3020
3021 if (iter->ent)
3022 trace_iterator_increment(iter);
3023
3024 return iter->ent ? iter : NULL;
3025 }
3026
3027 static void trace_consume(struct trace_iterator *iter)
3028 {
3029 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3030 &iter->lost_events);
3031 }
3032
3033 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3034 {
3035 struct trace_iterator *iter = m->private;
3036 int i = (int)*pos;
3037 void *ent;
3038
3039 WARN_ON_ONCE(iter->leftover);
3040
3041 (*pos)++;
3042
3043 /* can't go backwards */
3044 if (iter->idx > i)
3045 return NULL;
3046
3047 if (iter->idx < 0)
3048 ent = trace_find_next_entry_inc(iter);
3049 else
3050 ent = iter;
3051
3052 while (ent && iter->idx < i)
3053 ent = trace_find_next_entry_inc(iter);
3054
3055 iter->pos = *pos;
3056
3057 return ent;
3058 }
3059
3060 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3061 {
3062 struct ring_buffer_event *event;
3063 struct ring_buffer_iter *buf_iter;
3064 unsigned long entries = 0;
3065 u64 ts;
3066
3067 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3068
3069 buf_iter = trace_buffer_iter(iter, cpu);
3070 if (!buf_iter)
3071 return;
3072
3073 ring_buffer_iter_reset(buf_iter);
3074
3075 /*
3076 * We could have the case with the max latency tracers
3077 * that a reset never took place on a cpu. This is evident
3078 * by the timestamp being before the start of the buffer.
3079 */
3080 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3081 if (ts >= iter->trace_buffer->time_start)
3082 break;
3083 entries++;
3084 ring_buffer_read(buf_iter, NULL);
3085 }
3086
3087 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3088 }
3089
3090 /*
3091 * The current tracer is copied to avoid a global locking
3092 * all around.
3093 */
3094 static void *s_start(struct seq_file *m, loff_t *pos)
3095 {
3096 struct trace_iterator *iter = m->private;
3097 struct trace_array *tr = iter->tr;
3098 int cpu_file = iter->cpu_file;
3099 void *p = NULL;
3100 loff_t l = 0;
3101 int cpu;
3102
3103 /*
3104 * copy the tracer to avoid using a global lock all around.
3105 * iter->trace is a copy of current_trace, the pointer to the
3106 * name may be used instead of a strcmp(), as iter->trace->name
3107 * will point to the same string as current_trace->name.
3108 */
3109 mutex_lock(&trace_types_lock);
3110 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3111 *iter->trace = *tr->current_trace;
3112 mutex_unlock(&trace_types_lock);
3113
3114 #ifdef CONFIG_TRACER_MAX_TRACE
3115 if (iter->snapshot && iter->trace->use_max_tr)
3116 return ERR_PTR(-EBUSY);
3117 #endif
3118
3119 if (!iter->snapshot)
3120 atomic_inc(&trace_record_cmdline_disabled);
3121
3122 if (*pos != iter->pos) {
3123 iter->ent = NULL;
3124 iter->cpu = 0;
3125 iter->idx = -1;
3126
3127 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3128 for_each_tracing_cpu(cpu)
3129 tracing_iter_reset(iter, cpu);
3130 } else
3131 tracing_iter_reset(iter, cpu_file);
3132
3133 iter->leftover = 0;
3134 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3135 ;
3136
3137 } else {
3138 /*
3139 * If we overflowed the seq_file before, then we want
3140 * to just reuse the trace_seq buffer again.
3141 */
3142 if (iter->leftover)
3143 p = iter;
3144 else {
3145 l = *pos - 1;
3146 p = s_next(m, p, &l);
3147 }
3148 }
3149
3150 trace_event_read_lock();
3151 trace_access_lock(cpu_file);
3152 return p;
3153 }
3154
3155 static void s_stop(struct seq_file *m, void *p)
3156 {
3157 struct trace_iterator *iter = m->private;
3158
3159 #ifdef CONFIG_TRACER_MAX_TRACE
3160 if (iter->snapshot && iter->trace->use_max_tr)
3161 return;
3162 #endif
3163
3164 if (!iter->snapshot)
3165 atomic_dec(&trace_record_cmdline_disabled);
3166
3167 trace_access_unlock(iter->cpu_file);
3168 trace_event_read_unlock();
3169 }
3170
3171 static void
3172 get_total_entries(struct trace_buffer *buf,
3173 unsigned long *total, unsigned long *entries)
3174 {
3175 unsigned long count;
3176 int cpu;
3177
3178 *total = 0;
3179 *entries = 0;
3180
3181 for_each_tracing_cpu(cpu) {
3182 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3183 /*
3184 * If this buffer has skipped entries, then we hold all
3185 * entries for the trace and we need to ignore the
3186 * ones before the time stamp.
3187 */
3188 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3189 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3190 /* total is the same as the entries */
3191 *total += count;
3192 } else
3193 *total += count +
3194 ring_buffer_overrun_cpu(buf->buffer, cpu);
3195 *entries += count;
3196 }
3197 }
3198
3199 static void print_lat_help_header(struct seq_file *m)
3200 {
3201 seq_puts(m, "# _------=> CPU# \n"
3202 "# / _-----=> irqs-off \n"
3203 "# | / _----=> need-resched \n"
3204 "# || / _---=> hardirq/softirq \n"
3205 "# ||| / _--=> preempt-depth \n"
3206 "# |||| / delay \n"
3207 "# cmd pid ||||| time | caller \n"
3208 "# \\ / ||||| \\ | / \n");
3209 }
3210
3211 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3212 {
3213 unsigned long total;
3214 unsigned long entries;
3215
3216 get_total_entries(buf, &total, &entries);
3217 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3218 entries, total, num_online_cpus());
3219 seq_puts(m, "#\n");
3220 }
3221
3222 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3223 {
3224 print_event_info(buf, m);
3225 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
3226 "# | | | | |\n");
3227 }
3228
3229 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3230 {
3231 print_event_info(buf, m);
3232 seq_puts(m, "# _-----=> irqs-off\n"
3233 "# / _----=> need-resched\n"
3234 "# | / _---=> hardirq/softirq\n"
3235 "# || / _--=> preempt-depth\n"
3236 "# ||| / delay\n"
3237 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
3238 "# | | | |||| | |\n");
3239 }
3240
3241 void
3242 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3243 {
3244 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3245 struct trace_buffer *buf = iter->trace_buffer;
3246 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3247 struct tracer *type = iter->trace;
3248 unsigned long entries;
3249 unsigned long total;
3250 const char *name = "preemption";
3251
3252 name = type->name;
3253
3254 get_total_entries(buf, &total, &entries);
3255
3256 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3257 name, UTS_RELEASE);
3258 seq_puts(m, "# -----------------------------------"
3259 "---------------------------------\n");
3260 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3261 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3262 nsecs_to_usecs(data->saved_latency),
3263 entries,
3264 total,
3265 buf->cpu,
3266 #if defined(CONFIG_PREEMPT_NONE)
3267 "server",
3268 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3269 "desktop",
3270 #elif defined(CONFIG_PREEMPT)
3271 "preempt",
3272 #else
3273 "unknown",
3274 #endif
3275 /* These are reserved for later use */
3276 0, 0, 0, 0);
3277 #ifdef CONFIG_SMP
3278 seq_printf(m, " #P:%d)\n", num_online_cpus());
3279 #else
3280 seq_puts(m, ")\n");
3281 #endif
3282 seq_puts(m, "# -----------------\n");
3283 seq_printf(m, "# | task: %.16s-%d "
3284 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3285 data->comm, data->pid,
3286 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3287 data->policy, data->rt_priority);
3288 seq_puts(m, "# -----------------\n");
3289
3290 if (data->critical_start) {
3291 seq_puts(m, "# => started at: ");
3292 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3293 trace_print_seq(m, &iter->seq);
3294 seq_puts(m, "\n# => ended at: ");
3295 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3296 trace_print_seq(m, &iter->seq);
3297 seq_puts(m, "\n#\n");
3298 }
3299
3300 seq_puts(m, "#\n");
3301 }
3302
3303 static void test_cpu_buff_start(struct trace_iterator *iter)
3304 {
3305 struct trace_seq *s = &iter->seq;
3306 struct trace_array *tr = iter->tr;
3307
3308 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3309 return;
3310
3311 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3312 return;
3313
3314 if (cpumask_available(iter->started) &&
3315 cpumask_test_cpu(iter->cpu, iter->started))
3316 return;
3317
3318 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3319 return;
3320
3321 if (cpumask_available(iter->started))
3322 cpumask_set_cpu(iter->cpu, iter->started);
3323
3324 /* Don't print started cpu buffer for the first entry of the trace */
3325 if (iter->idx > 1)
3326 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3327 iter->cpu);
3328 }
3329
3330 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3331 {
3332 struct trace_array *tr = iter->tr;
3333 struct trace_seq *s = &iter->seq;
3334 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3335 struct trace_entry *entry;
3336 struct trace_event *event;
3337
3338 entry = iter->ent;
3339
3340 test_cpu_buff_start(iter);
3341
3342 event = ftrace_find_event(entry->type);
3343
3344 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3345 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3346 trace_print_lat_context(iter);
3347 else
3348 trace_print_context(iter);
3349 }
3350
3351 if (trace_seq_has_overflowed(s))
3352 return TRACE_TYPE_PARTIAL_LINE;
3353
3354 if (event)
3355 return event->funcs->trace(iter, sym_flags, event);
3356
3357 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3358
3359 return trace_handle_return(s);
3360 }
3361
3362 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3363 {
3364 struct trace_array *tr = iter->tr;
3365 struct trace_seq *s = &iter->seq;
3366 struct trace_entry *entry;
3367 struct trace_event *event;
3368
3369 entry = iter->ent;
3370
3371 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3372 trace_seq_printf(s, "%d %d %llu ",
3373 entry->pid, iter->cpu, iter->ts);
3374
3375 if (trace_seq_has_overflowed(s))
3376 return TRACE_TYPE_PARTIAL_LINE;
3377
3378 event = ftrace_find_event(entry->type);
3379 if (event)
3380 return event->funcs->raw(iter, 0, event);
3381
3382 trace_seq_printf(s, "%d ?\n", entry->type);
3383
3384 return trace_handle_return(s);
3385 }
3386
3387 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3388 {
3389 struct trace_array *tr = iter->tr;
3390 struct trace_seq *s = &iter->seq;
3391 unsigned char newline = '\n';
3392 struct trace_entry *entry;
3393 struct trace_event *event;
3394
3395 entry = iter->ent;
3396
3397 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3398 SEQ_PUT_HEX_FIELD(s, entry->pid);
3399 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3400 SEQ_PUT_HEX_FIELD(s, iter->ts);
3401 if (trace_seq_has_overflowed(s))
3402 return TRACE_TYPE_PARTIAL_LINE;
3403 }
3404
3405 event = ftrace_find_event(entry->type);
3406 if (event) {
3407 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3408 if (ret != TRACE_TYPE_HANDLED)
3409 return ret;
3410 }
3411
3412 SEQ_PUT_FIELD(s, newline);
3413
3414 return trace_handle_return(s);
3415 }
3416
3417 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3418 {
3419 struct trace_array *tr = iter->tr;
3420 struct trace_seq *s = &iter->seq;
3421 struct trace_entry *entry;
3422 struct trace_event *event;
3423
3424 entry = iter->ent;
3425
3426 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3427 SEQ_PUT_FIELD(s, entry->pid);
3428 SEQ_PUT_FIELD(s, iter->cpu);
3429 SEQ_PUT_FIELD(s, iter->ts);
3430 if (trace_seq_has_overflowed(s))
3431 return TRACE_TYPE_PARTIAL_LINE;
3432 }
3433
3434 event = ftrace_find_event(entry->type);
3435 return event ? event->funcs->binary(iter, 0, event) :
3436 TRACE_TYPE_HANDLED;
3437 }
3438
3439 int trace_empty(struct trace_iterator *iter)
3440 {
3441 struct ring_buffer_iter *buf_iter;
3442 int cpu;
3443
3444 /* If we are looking at one CPU buffer, only check that one */
3445 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3446 cpu = iter->cpu_file;
3447 buf_iter = trace_buffer_iter(iter, cpu);
3448 if (buf_iter) {
3449 if (!ring_buffer_iter_empty(buf_iter))
3450 return 0;
3451 } else {
3452 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3453 return 0;
3454 }
3455 return 1;
3456 }
3457
3458 for_each_tracing_cpu(cpu) {
3459 buf_iter = trace_buffer_iter(iter, cpu);
3460 if (buf_iter) {
3461 if (!ring_buffer_iter_empty(buf_iter))
3462 return 0;
3463 } else {
3464 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3465 return 0;
3466 }
3467 }
3468
3469 return 1;
3470 }
3471
3472 /* Called with trace_event_read_lock() held. */
3473 enum print_line_t print_trace_line(struct trace_iterator *iter)
3474 {
3475 struct trace_array *tr = iter->tr;
3476 unsigned long trace_flags = tr->trace_flags;
3477 enum print_line_t ret;
3478
3479 if (iter->lost_events) {
3480 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3481 iter->cpu, iter->lost_events);
3482 if (trace_seq_has_overflowed(&iter->seq))
3483 return TRACE_TYPE_PARTIAL_LINE;
3484 }
3485
3486 if (iter->trace && iter->trace->print_line) {
3487 ret = iter->trace->print_line(iter);
3488 if (ret != TRACE_TYPE_UNHANDLED)
3489 return ret;
3490 }
3491
3492 if (iter->ent->type == TRACE_BPUTS &&
3493 trace_flags & TRACE_ITER_PRINTK &&
3494 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3495 return trace_print_bputs_msg_only(iter);
3496
3497 if (iter->ent->type == TRACE_BPRINT &&
3498 trace_flags & TRACE_ITER_PRINTK &&
3499 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3500 return trace_print_bprintk_msg_only(iter);
3501
3502 if (iter->ent->type == TRACE_PRINT &&
3503 trace_flags & TRACE_ITER_PRINTK &&
3504 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3505 return trace_print_printk_msg_only(iter);
3506
3507 if (trace_flags & TRACE_ITER_BIN)
3508 return print_bin_fmt(iter);
3509
3510 if (trace_flags & TRACE_ITER_HEX)
3511 return print_hex_fmt(iter);
3512
3513 if (trace_flags & TRACE_ITER_RAW)
3514 return print_raw_fmt(iter);
3515
3516 return print_trace_fmt(iter);
3517 }
3518
3519 void trace_latency_header(struct seq_file *m)
3520 {
3521 struct trace_iterator *iter = m->private;
3522 struct trace_array *tr = iter->tr;
3523
3524 /* print nothing if the buffers are empty */
3525 if (trace_empty(iter))
3526 return;
3527
3528 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3529 print_trace_header(m, iter);
3530
3531 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3532 print_lat_help_header(m);
3533 }
3534
3535 void trace_default_header(struct seq_file *m)
3536 {
3537 struct trace_iterator *iter = m->private;
3538 struct trace_array *tr = iter->tr;
3539 unsigned long trace_flags = tr->trace_flags;
3540
3541 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3542 return;
3543
3544 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3545 /* print nothing if the buffers are empty */
3546 if (trace_empty(iter))
3547 return;
3548 print_trace_header(m, iter);
3549 if (!(trace_flags & TRACE_ITER_VERBOSE))
3550 print_lat_help_header(m);
3551 } else {
3552 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3553 if (trace_flags & TRACE_ITER_IRQ_INFO)
3554 print_func_help_header_irq(iter->trace_buffer, m);
3555 else
3556 print_func_help_header(iter->trace_buffer, m);
3557 }
3558 }
3559 }
3560
3561 static void test_ftrace_alive(struct seq_file *m)
3562 {
3563 if (!ftrace_is_dead())
3564 return;
3565 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3566 "# MAY BE MISSING FUNCTION EVENTS\n");
3567 }
3568
3569 #ifdef CONFIG_TRACER_MAX_TRACE
3570 static void show_snapshot_main_help(struct seq_file *m)
3571 {
3572 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3573 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3574 "# Takes a snapshot of the main buffer.\n"
3575 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3576 "# (Doesn't have to be '2' works with any number that\n"
3577 "# is not a '0' or '1')\n");
3578 }
3579
3580 static void show_snapshot_percpu_help(struct seq_file *m)
3581 {
3582 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3583 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3584 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3585 "# Takes a snapshot of the main buffer for this cpu.\n");
3586 #else
3587 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3588 "# Must use main snapshot file to allocate.\n");
3589 #endif
3590 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3591 "# (Doesn't have to be '2' works with any number that\n"
3592 "# is not a '0' or '1')\n");
3593 }
3594
3595 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3596 {
3597 if (iter->tr->allocated_snapshot)
3598 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3599 else
3600 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3601
3602 seq_puts(m, "# Snapshot commands:\n");
3603 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3604 show_snapshot_main_help(m);
3605 else
3606 show_snapshot_percpu_help(m);
3607 }
3608 #else
3609 /* Should never be called */
3610 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3611 #endif
3612
3613 static int s_show(struct seq_file *m, void *v)
3614 {
3615 struct trace_iterator *iter = v;
3616 int ret;
3617
3618 if (iter->ent == NULL) {
3619 if (iter->tr) {
3620 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3621 seq_puts(m, "#\n");
3622 test_ftrace_alive(m);
3623 }
3624 if (iter->snapshot && trace_empty(iter))
3625 print_snapshot_help(m, iter);
3626 else if (iter->trace && iter->trace->print_header)
3627 iter->trace->print_header(m);
3628 else
3629 trace_default_header(m);
3630
3631 } else if (iter->leftover) {
3632 /*
3633 * If we filled the seq_file buffer earlier, we
3634 * want to just show it now.
3635 */
3636 ret = trace_print_seq(m, &iter->seq);
3637
3638 /* ret should this time be zero, but you never know */
3639 iter->leftover = ret;
3640
3641 } else {
3642 print_trace_line(iter);
3643 ret = trace_print_seq(m, &iter->seq);
3644 /*
3645 * If we overflow the seq_file buffer, then it will
3646 * ask us for this data again at start up.
3647 * Use that instead.
3648 * ret is 0 if seq_file write succeeded.
3649 * -1 otherwise.
3650 */
3651 iter->leftover = ret;
3652 }
3653
3654 return 0;
3655 }
3656
3657 /*
3658 * Should be used after trace_array_get(), trace_types_lock
3659 * ensures that i_cdev was already initialized.
3660 */
3661 static inline int tracing_get_cpu(struct inode *inode)
3662 {
3663 if (inode->i_cdev) /* See trace_create_cpu_file() */
3664 return (long)inode->i_cdev - 1;
3665 return RING_BUFFER_ALL_CPUS;
3666 }
3667
3668 static const struct seq_operations tracer_seq_ops = {
3669 .start = s_start,
3670 .next = s_next,
3671 .stop = s_stop,
3672 .show = s_show,
3673 };
3674
3675 static struct trace_iterator *
3676 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3677 {
3678 struct trace_array *tr = inode->i_private;
3679 struct trace_iterator *iter;
3680 int cpu;
3681
3682 if (tracing_disabled)
3683 return ERR_PTR(-ENODEV);
3684
3685 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3686 if (!iter)
3687 return ERR_PTR(-ENOMEM);
3688
3689 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3690 GFP_KERNEL);
3691 if (!iter->buffer_iter)
3692 goto release;
3693
3694 /*
3695 * We make a copy of the current tracer to avoid concurrent
3696 * changes on it while we are reading.
3697 */
3698 mutex_lock(&trace_types_lock);
3699 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3700 if (!iter->trace)
3701 goto fail;
3702
3703 *iter->trace = *tr->current_trace;
3704
3705 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3706 goto fail;
3707
3708 iter->tr = tr;
3709
3710 #ifdef CONFIG_TRACER_MAX_TRACE
3711 /* Currently only the top directory has a snapshot */
3712 if (tr->current_trace->print_max || snapshot)
3713 iter->trace_buffer = &tr->max_buffer;
3714 else
3715 #endif
3716 iter->trace_buffer = &tr->trace_buffer;
3717 iter->snapshot = snapshot;
3718 iter->pos = -1;
3719 iter->cpu_file = tracing_get_cpu(inode);
3720 mutex_init(&iter->mutex);
3721
3722 /* Notify the tracer early; before we stop tracing. */
3723 if (iter->trace && iter->trace->open)
3724 iter->trace->open(iter);
3725
3726 /* Annotate start of buffers if we had overruns */
3727 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3728 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3729
3730 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3731 if (trace_clocks[tr->clock_id].in_ns)
3732 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3733
3734 /* stop the trace while dumping if we are not opening "snapshot" */
3735 if (!iter->snapshot)
3736 tracing_stop_tr(tr);
3737
3738 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3739 for_each_tracing_cpu(cpu) {
3740 iter->buffer_iter[cpu] =
3741 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3742 }
3743 ring_buffer_read_prepare_sync();
3744 for_each_tracing_cpu(cpu) {
3745 ring_buffer_read_start(iter->buffer_iter[cpu]);
3746 tracing_iter_reset(iter, cpu);
3747 }
3748 } else {
3749 cpu = iter->cpu_file;
3750 iter->buffer_iter[cpu] =
3751 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3752 ring_buffer_read_prepare_sync();
3753 ring_buffer_read_start(iter->buffer_iter[cpu]);
3754 tracing_iter_reset(iter, cpu);
3755 }
3756
3757 mutex_unlock(&trace_types_lock);
3758
3759 return iter;
3760
3761 fail:
3762 mutex_unlock(&trace_types_lock);
3763 kfree(iter->trace);
3764 kfree(iter->buffer_iter);
3765 release:
3766 seq_release_private(inode, file);
3767 return ERR_PTR(-ENOMEM);
3768 }
3769
3770 int tracing_open_generic(struct inode *inode, struct file *filp)
3771 {
3772 if (tracing_disabled)
3773 return -ENODEV;
3774
3775 filp->private_data = inode->i_private;
3776 return 0;
3777 }
3778
3779 bool tracing_is_disabled(void)
3780 {
3781 return (tracing_disabled) ? true: false;
3782 }
3783
3784 /*
3785 * Open and update trace_array ref count.
3786 * Must have the current trace_array passed to it.
3787 */
3788 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3789 {
3790 struct trace_array *tr = inode->i_private;
3791
3792 if (tracing_disabled)
3793 return -ENODEV;
3794
3795 if (trace_array_get(tr) < 0)
3796 return -ENODEV;
3797
3798 filp->private_data = inode->i_private;
3799
3800 return 0;
3801 }
3802
3803 static int tracing_release(struct inode *inode, struct file *file)
3804 {
3805 struct trace_array *tr = inode->i_private;
3806 struct seq_file *m = file->private_data;
3807 struct trace_iterator *iter;
3808 int cpu;
3809
3810 if (!(file->f_mode & FMODE_READ)) {
3811 trace_array_put(tr);
3812 return 0;
3813 }
3814
3815 /* Writes do not use seq_file */
3816 iter = m->private;
3817 mutex_lock(&trace_types_lock);
3818
3819 for_each_tracing_cpu(cpu) {
3820 if (iter->buffer_iter[cpu])
3821 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3822 }
3823
3824 if (iter->trace && iter->trace->close)
3825 iter->trace->close(iter);
3826
3827 if (!iter->snapshot)
3828 /* reenable tracing if it was previously enabled */
3829 tracing_start_tr(tr);
3830
3831 __trace_array_put(tr);
3832
3833 mutex_unlock(&trace_types_lock);
3834
3835 mutex_destroy(&iter->mutex);
3836 free_cpumask_var(iter->started);
3837 kfree(iter->trace);
3838 kfree(iter->buffer_iter);
3839 seq_release_private(inode, file);
3840
3841 return 0;
3842 }
3843
3844 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3845 {
3846 struct trace_array *tr = inode->i_private;
3847
3848 trace_array_put(tr);
3849 return 0;
3850 }
3851
3852 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3853 {
3854 struct trace_array *tr = inode->i_private;
3855
3856 trace_array_put(tr);
3857
3858 return single_release(inode, file);
3859 }
3860
3861 static int tracing_open(struct inode *inode, struct file *file)
3862 {
3863 struct trace_array *tr = inode->i_private;
3864 struct trace_iterator *iter;
3865 int ret = 0;
3866
3867 if (trace_array_get(tr) < 0)
3868 return -ENODEV;
3869
3870 /* If this file was open for write, then erase contents */
3871 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3872 int cpu = tracing_get_cpu(inode);
3873
3874 if (cpu == RING_BUFFER_ALL_CPUS)
3875 tracing_reset_online_cpus(&tr->trace_buffer);
3876 else
3877 tracing_reset(&tr->trace_buffer, cpu);
3878 }
3879
3880 if (file->f_mode & FMODE_READ) {
3881 iter = __tracing_open(inode, file, false);
3882 if (IS_ERR(iter))
3883 ret = PTR_ERR(iter);
3884 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3885 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3886 }
3887
3888 if (ret < 0)
3889 trace_array_put(tr);
3890
3891 return ret;
3892 }
3893
3894 /*
3895 * Some tracers are not suitable for instance buffers.
3896 * A tracer is always available for the global array (toplevel)
3897 * or if it explicitly states that it is.
3898 */
3899 static bool
3900 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3901 {
3902 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3903 }
3904
3905 /* Find the next tracer that this trace array may use */
3906 static struct tracer *
3907 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3908 {
3909 while (t && !trace_ok_for_array(t, tr))
3910 t = t->next;
3911
3912 return t;
3913 }
3914
3915 static void *
3916 t_next(struct seq_file *m, void *v, loff_t *pos)
3917 {
3918 struct trace_array *tr = m->private;
3919 struct tracer *t = v;
3920
3921 (*pos)++;
3922
3923 if (t)
3924 t = get_tracer_for_array(tr, t->next);
3925
3926 return t;
3927 }
3928
3929 static void *t_start(struct seq_file *m, loff_t *pos)
3930 {
3931 struct trace_array *tr = m->private;
3932 struct tracer *t;
3933 loff_t l = 0;
3934
3935 mutex_lock(&trace_types_lock);
3936
3937 t = get_tracer_for_array(tr, trace_types);
3938 for (; t && l < *pos; t = t_next(m, t, &l))
3939 ;
3940
3941 return t;
3942 }
3943
3944 static void t_stop(struct seq_file *m, void *p)
3945 {
3946 mutex_unlock(&trace_types_lock);
3947 }
3948
3949 static int t_show(struct seq_file *m, void *v)
3950 {
3951 struct tracer *t = v;
3952
3953 if (!t)
3954 return 0;
3955
3956 seq_puts(m, t->name);
3957 if (t->next)
3958 seq_putc(m, ' ');
3959 else
3960 seq_putc(m, '\n');
3961
3962 return 0;
3963 }
3964
3965 static const struct seq_operations show_traces_seq_ops = {
3966 .start = t_start,
3967 .next = t_next,
3968 .stop = t_stop,
3969 .show = t_show,
3970 };
3971
3972 static int show_traces_open(struct inode *inode, struct file *file)
3973 {
3974 struct trace_array *tr = inode->i_private;
3975 struct seq_file *m;
3976 int ret;
3977
3978 if (tracing_disabled)
3979 return -ENODEV;
3980
3981 ret = seq_open(file, &show_traces_seq_ops);
3982 if (ret)
3983 return ret;
3984
3985 m = file->private_data;
3986 m->private = tr;
3987
3988 return 0;
3989 }
3990
3991 static ssize_t
3992 tracing_write_stub(struct file *filp, const char __user *ubuf,
3993 size_t count, loff_t *ppos)
3994 {
3995 return count;
3996 }
3997
3998 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3999 {
4000 int ret;
4001
4002 if (file->f_mode & FMODE_READ)
4003 ret = seq_lseek(file, offset, whence);
4004 else
4005 file->f_pos = ret = 0;
4006
4007 return ret;
4008 }
4009
4010 static const struct file_operations tracing_fops = {
4011 .open = tracing_open,
4012 .read = seq_read,
4013 .write = tracing_write_stub,
4014 .llseek = tracing_lseek,
4015 .release = tracing_release,
4016 };
4017
4018 static const struct file_operations show_traces_fops = {
4019 .open = show_traces_open,
4020 .read = seq_read,
4021 .release = seq_release,
4022 .llseek = seq_lseek,
4023 };
4024
4025 /*
4026 * The tracer itself will not take this lock, but still we want
4027 * to provide a consistent cpumask to user-space:
4028 */
4029 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4030
4031 /*
4032 * Temporary storage for the character representation of the
4033 * CPU bitmask (and one more byte for the newline):
4034 */
4035 static char mask_str[NR_CPUS + 1];
4036
4037 static ssize_t
4038 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4039 size_t count, loff_t *ppos)
4040 {
4041 struct trace_array *tr = file_inode(filp)->i_private;
4042 int len;
4043
4044 mutex_lock(&tracing_cpumask_update_lock);
4045
4046 len = snprintf(mask_str, count, "%*pb\n",
4047 cpumask_pr_args(tr->tracing_cpumask));
4048 if (len >= count) {
4049 count = -EINVAL;
4050 goto out_err;
4051 }
4052 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4053
4054 out_err:
4055 mutex_unlock(&tracing_cpumask_update_lock);
4056
4057 return count;
4058 }
4059
4060 static ssize_t
4061 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4062 size_t count, loff_t *ppos)
4063 {
4064 struct trace_array *tr = file_inode(filp)->i_private;
4065 cpumask_var_t tracing_cpumask_new;
4066 int err, cpu;
4067
4068 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4069 return -ENOMEM;
4070
4071 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4072 if (err)
4073 goto err_unlock;
4074
4075 mutex_lock(&tracing_cpumask_update_lock);
4076
4077 local_irq_disable();
4078 arch_spin_lock(&tr->max_lock);
4079 for_each_tracing_cpu(cpu) {
4080 /*
4081 * Increase/decrease the disabled counter if we are
4082 * about to flip a bit in the cpumask:
4083 */
4084 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4085 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4086 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4087 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4088 }
4089 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4090 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4091 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4092 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4093 }
4094 }
4095 arch_spin_unlock(&tr->max_lock);
4096 local_irq_enable();
4097
4098 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4099
4100 mutex_unlock(&tracing_cpumask_update_lock);
4101 free_cpumask_var(tracing_cpumask_new);
4102
4103 return count;
4104
4105 err_unlock:
4106 free_cpumask_var(tracing_cpumask_new);
4107
4108 return err;
4109 }
4110
4111 static const struct file_operations tracing_cpumask_fops = {
4112 .open = tracing_open_generic_tr,
4113 .read = tracing_cpumask_read,
4114 .write = tracing_cpumask_write,
4115 .release = tracing_release_generic_tr,
4116 .llseek = generic_file_llseek,
4117 };
4118
4119 static int tracing_trace_options_show(struct seq_file *m, void *v)
4120 {
4121 struct tracer_opt *trace_opts;
4122 struct trace_array *tr = m->private;
4123 u32 tracer_flags;
4124 int i;
4125
4126 mutex_lock(&trace_types_lock);
4127 tracer_flags = tr->current_trace->flags->val;
4128 trace_opts = tr->current_trace->flags->opts;
4129
4130 for (i = 0; trace_options[i]; i++) {
4131 if (tr->trace_flags & (1 << i))
4132 seq_printf(m, "%s\n", trace_options[i]);
4133 else
4134 seq_printf(m, "no%s\n", trace_options[i]);
4135 }
4136
4137 for (i = 0; trace_opts[i].name; i++) {
4138 if (tracer_flags & trace_opts[i].bit)
4139 seq_printf(m, "%s\n", trace_opts[i].name);
4140 else
4141 seq_printf(m, "no%s\n", trace_opts[i].name);
4142 }
4143 mutex_unlock(&trace_types_lock);
4144
4145 return 0;
4146 }
4147
4148 static int __set_tracer_option(struct trace_array *tr,
4149 struct tracer_flags *tracer_flags,
4150 struct tracer_opt *opts, int neg)
4151 {
4152 struct tracer *trace = tracer_flags->trace;
4153 int ret;
4154
4155 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4156 if (ret)
4157 return ret;
4158
4159 if (neg)
4160 tracer_flags->val &= ~opts->bit;
4161 else
4162 tracer_flags->val |= opts->bit;
4163 return 0;
4164 }
4165
4166 /* Try to assign a tracer specific option */
4167 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4168 {
4169 struct tracer *trace = tr->current_trace;
4170 struct tracer_flags *tracer_flags = trace->flags;
4171 struct tracer_opt *opts = NULL;
4172 int i;
4173
4174 for (i = 0; tracer_flags->opts[i].name; i++) {
4175 opts = &tracer_flags->opts[i];
4176
4177 if (strcmp(cmp, opts->name) == 0)
4178 return __set_tracer_option(tr, trace->flags, opts, neg);
4179 }
4180
4181 return -EINVAL;
4182 }
4183
4184 /* Some tracers require overwrite to stay enabled */
4185 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4186 {
4187 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4188 return -1;
4189
4190 return 0;
4191 }
4192
4193 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4194 {
4195 /* do nothing if flag is already set */
4196 if (!!(tr->trace_flags & mask) == !!enabled)
4197 return 0;
4198
4199 /* Give the tracer a chance to approve the change */
4200 if (tr->current_trace->flag_changed)
4201 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4202 return -EINVAL;
4203
4204 if (enabled)
4205 tr->trace_flags |= mask;
4206 else
4207 tr->trace_flags &= ~mask;
4208
4209 if (mask == TRACE_ITER_RECORD_CMD)
4210 trace_event_enable_cmd_record(enabled);
4211
4212 if (mask == TRACE_ITER_EVENT_FORK)
4213 trace_event_follow_fork(tr, enabled);
4214
4215 if (mask == TRACE_ITER_FUNC_FORK)
4216 ftrace_pid_follow_fork(tr, enabled);
4217
4218 if (mask == TRACE_ITER_OVERWRITE) {
4219 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4220 #ifdef CONFIG_TRACER_MAX_TRACE
4221 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4222 #endif
4223 }
4224
4225 if (mask == TRACE_ITER_PRINTK) {
4226 trace_printk_start_stop_comm(enabled);
4227 trace_printk_control(enabled);
4228 }
4229
4230 return 0;
4231 }
4232
4233 static int trace_set_options(struct trace_array *tr, char *option)
4234 {
4235 char *cmp;
4236 int neg = 0;
4237 int ret = -ENODEV;
4238 int i;
4239 size_t orig_len = strlen(option);
4240
4241 cmp = strstrip(option);
4242
4243 if (strncmp(cmp, "no", 2) == 0) {
4244 neg = 1;
4245 cmp += 2;
4246 }
4247
4248 mutex_lock(&trace_types_lock);
4249
4250 for (i = 0; trace_options[i]; i++) {
4251 if (strcmp(cmp, trace_options[i]) == 0) {
4252 ret = set_tracer_flag(tr, 1 << i, !neg);
4253 break;
4254 }
4255 }
4256
4257 /* If no option could be set, test the specific tracer options */
4258 if (!trace_options[i])
4259 ret = set_tracer_option(tr, cmp, neg);
4260
4261 mutex_unlock(&trace_types_lock);
4262
4263 /*
4264 * If the first trailing whitespace is replaced with '\0' by strstrip,
4265 * turn it back into a space.
4266 */
4267 if (orig_len > strlen(option))
4268 option[strlen(option)] = ' ';
4269
4270 return ret;
4271 }
4272
4273 static void __init apply_trace_boot_options(void)
4274 {
4275 char *buf = trace_boot_options_buf;
4276 char *option;
4277
4278 while (true) {
4279 option = strsep(&buf, ",");
4280
4281 if (!option)
4282 break;
4283
4284 if (*option)
4285 trace_set_options(&global_trace, option);
4286
4287 /* Put back the comma to allow this to be called again */
4288 if (buf)
4289 *(buf - 1) = ',';
4290 }
4291 }
4292
4293 static ssize_t
4294 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4295 size_t cnt, loff_t *ppos)
4296 {
4297 struct seq_file *m = filp->private_data;
4298 struct trace_array *tr = m->private;
4299 char buf[64];
4300 int ret;
4301
4302 if (cnt >= sizeof(buf))
4303 return -EINVAL;
4304
4305 if (copy_from_user(buf, ubuf, cnt))
4306 return -EFAULT;
4307
4308 buf[cnt] = 0;
4309
4310 ret = trace_set_options(tr, buf);
4311 if (ret < 0)
4312 return ret;
4313
4314 *ppos += cnt;
4315
4316 return cnt;
4317 }
4318
4319 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4320 {
4321 struct trace_array *tr = inode->i_private;
4322 int ret;
4323
4324 if (tracing_disabled)
4325 return -ENODEV;
4326
4327 if (trace_array_get(tr) < 0)
4328 return -ENODEV;
4329
4330 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4331 if (ret < 0)
4332 trace_array_put(tr);
4333
4334 return ret;
4335 }
4336
4337 static const struct file_operations tracing_iter_fops = {
4338 .open = tracing_trace_options_open,
4339 .read = seq_read,
4340 .llseek = seq_lseek,
4341 .release = tracing_single_release_tr,
4342 .write = tracing_trace_options_write,
4343 };
4344
4345 static const char readme_msg[] =
4346 "tracing mini-HOWTO:\n\n"
4347 "# echo 0 > tracing_on : quick way to disable tracing\n"
4348 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4349 " Important files:\n"
4350 " trace\t\t\t- The static contents of the buffer\n"
4351 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4352 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4353 " current_tracer\t- function and latency tracers\n"
4354 " available_tracers\t- list of configured tracers for current_tracer\n"
4355 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4356 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4357 " trace_clock\t\t-change the clock used to order events\n"
4358 " local: Per cpu clock but may not be synced across CPUs\n"
4359 " global: Synced across CPUs but slows tracing down.\n"
4360 " counter: Not a clock, but just an increment\n"
4361 " uptime: Jiffy counter from time of boot\n"
4362 " perf: Same clock that perf events use\n"
4363 #ifdef CONFIG_X86_64
4364 " x86-tsc: TSC cycle counter\n"
4365 #endif
4366 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4367 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4368 " tracing_cpumask\t- Limit which CPUs to trace\n"
4369 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4370 "\t\t\t Remove sub-buffer with rmdir\n"
4371 " trace_options\t\t- Set format or modify how tracing happens\n"
4372 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4373 "\t\t\t option name\n"
4374 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4375 #ifdef CONFIG_DYNAMIC_FTRACE
4376 "\n available_filter_functions - list of functions that can be filtered on\n"
4377 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4378 "\t\t\t functions\n"
4379 "\t accepts: func_full_name or glob-matching-pattern\n"
4380 "\t modules: Can select a group via module\n"
4381 "\t Format: :mod:<module-name>\n"
4382 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4383 "\t triggers: a command to perform when function is hit\n"
4384 "\t Format: <function>:<trigger>[:count]\n"
4385 "\t trigger: traceon, traceoff\n"
4386 "\t\t enable_event:<system>:<event>\n"
4387 "\t\t disable_event:<system>:<event>\n"
4388 #ifdef CONFIG_STACKTRACE
4389 "\t\t stacktrace\n"
4390 #endif
4391 #ifdef CONFIG_TRACER_SNAPSHOT
4392 "\t\t snapshot\n"
4393 #endif
4394 "\t\t dump\n"
4395 "\t\t cpudump\n"
4396 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4397 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4398 "\t The first one will disable tracing every time do_fault is hit\n"
4399 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4400 "\t The first time do trap is hit and it disables tracing, the\n"
4401 "\t counter will decrement to 2. If tracing is already disabled,\n"
4402 "\t the counter will not decrement. It only decrements when the\n"
4403 "\t trigger did work\n"
4404 "\t To remove trigger without count:\n"
4405 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4406 "\t To remove trigger with a count:\n"
4407 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4408 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4409 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4410 "\t modules: Can select a group via module command :mod:\n"
4411 "\t Does not accept triggers\n"
4412 #endif /* CONFIG_DYNAMIC_FTRACE */
4413 #ifdef CONFIG_FUNCTION_TRACER
4414 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4415 "\t\t (function)\n"
4416 #endif
4417 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4418 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4419 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4420 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4421 #endif
4422 #ifdef CONFIG_TRACER_SNAPSHOT
4423 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4424 "\t\t\t snapshot buffer. Read the contents for more\n"
4425 "\t\t\t information\n"
4426 #endif
4427 #ifdef CONFIG_STACK_TRACER
4428 " stack_trace\t\t- Shows the max stack trace when active\n"
4429 " stack_max_size\t- Shows current max stack size that was traced\n"
4430 "\t\t\t Write into this file to reset the max size (trigger a\n"
4431 "\t\t\t new trace)\n"
4432 #ifdef CONFIG_DYNAMIC_FTRACE
4433 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4434 "\t\t\t traces\n"
4435 #endif
4436 #endif /* CONFIG_STACK_TRACER */
4437 #ifdef CONFIG_KPROBE_EVENTS
4438 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4439 "\t\t\t Write into this file to define/undefine new trace events.\n"
4440 #endif
4441 #ifdef CONFIG_UPROBE_EVENTS
4442 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4443 "\t\t\t Write into this file to define/undefine new trace events.\n"
4444 #endif
4445 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4446 "\t accepts: event-definitions (one definition per line)\n"
4447 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4448 "\t -:[<group>/]<event>\n"
4449 #ifdef CONFIG_KPROBE_EVENTS
4450 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4451 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4452 #endif
4453 #ifdef CONFIG_UPROBE_EVENTS
4454 "\t place: <path>:<offset>\n"
4455 #endif
4456 "\t args: <name>=fetcharg[:type]\n"
4457 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4458 "\t $stack<index>, $stack, $retval, $comm\n"
4459 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4460 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4461 #endif
4462 " events/\t\t- Directory containing all trace event subsystems:\n"
4463 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4464 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4465 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4466 "\t\t\t events\n"
4467 " filter\t\t- If set, only events passing filter are traced\n"
4468 " events/<system>/<event>/\t- Directory containing control files for\n"
4469 "\t\t\t <event>:\n"
4470 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4471 " filter\t\t- If set, only events passing filter are traced\n"
4472 " trigger\t\t- If set, a command to perform when event is hit\n"
4473 "\t Format: <trigger>[:count][if <filter>]\n"
4474 "\t trigger: traceon, traceoff\n"
4475 "\t enable_event:<system>:<event>\n"
4476 "\t disable_event:<system>:<event>\n"
4477 #ifdef CONFIG_HIST_TRIGGERS
4478 "\t enable_hist:<system>:<event>\n"
4479 "\t disable_hist:<system>:<event>\n"
4480 #endif
4481 #ifdef CONFIG_STACKTRACE
4482 "\t\t stacktrace\n"
4483 #endif
4484 #ifdef CONFIG_TRACER_SNAPSHOT
4485 "\t\t snapshot\n"
4486 #endif
4487 #ifdef CONFIG_HIST_TRIGGERS
4488 "\t\t hist (see below)\n"
4489 #endif
4490 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4491 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4492 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4493 "\t events/block/block_unplug/trigger\n"
4494 "\t The first disables tracing every time block_unplug is hit.\n"
4495 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4496 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4497 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4498 "\t Like function triggers, the counter is only decremented if it\n"
4499 "\t enabled or disabled tracing.\n"
4500 "\t To remove a trigger without a count:\n"
4501 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4502 "\t To remove a trigger with a count:\n"
4503 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4504 "\t Filters can be ignored when removing a trigger.\n"
4505 #ifdef CONFIG_HIST_TRIGGERS
4506 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4507 "\t Format: hist:keys=<field1[,field2,...]>\n"
4508 "\t [:values=<field1[,field2,...]>]\n"
4509 "\t [:sort=<field1[,field2,...]>]\n"
4510 "\t [:size=#entries]\n"
4511 "\t [:pause][:continue][:clear]\n"
4512 "\t [:name=histname1]\n"
4513 "\t [if <filter>]\n\n"
4514 "\t When a matching event is hit, an entry is added to a hash\n"
4515 "\t table using the key(s) and value(s) named, and the value of a\n"
4516 "\t sum called 'hitcount' is incremented. Keys and values\n"
4517 "\t correspond to fields in the event's format description. Keys\n"
4518 "\t can be any field, or the special string 'stacktrace'.\n"
4519 "\t Compound keys consisting of up to two fields can be specified\n"
4520 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4521 "\t fields. Sort keys consisting of up to two fields can be\n"
4522 "\t specified using the 'sort' keyword. The sort direction can\n"
4523 "\t be modified by appending '.descending' or '.ascending' to a\n"
4524 "\t sort field. The 'size' parameter can be used to specify more\n"
4525 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4526 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4527 "\t its histogram data will be shared with other triggers of the\n"
4528 "\t same name, and trigger hits will update this common data.\n\n"
4529 "\t Reading the 'hist' file for the event will dump the hash\n"
4530 "\t table in its entirety to stdout. If there are multiple hist\n"
4531 "\t triggers attached to an event, there will be a table for each\n"
4532 "\t trigger in the output. The table displayed for a named\n"
4533 "\t trigger will be the same as any other instance having the\n"
4534 "\t same name. The default format used to display a given field\n"
4535 "\t can be modified by appending any of the following modifiers\n"
4536 "\t to the field name, as applicable:\n\n"
4537 "\t .hex display a number as a hex value\n"
4538 "\t .sym display an address as a symbol\n"
4539 "\t .sym-offset display an address as a symbol and offset\n"
4540 "\t .execname display a common_pid as a program name\n"
4541 "\t .syscall display a syscall id as a syscall name\n\n"
4542 "\t .log2 display log2 value rather than raw number\n\n"
4543 "\t The 'pause' parameter can be used to pause an existing hist\n"
4544 "\t trigger or to start a hist trigger but not log any events\n"
4545 "\t until told to do so. 'continue' can be used to start or\n"
4546 "\t restart a paused hist trigger.\n\n"
4547 "\t The 'clear' parameter will clear the contents of a running\n"
4548 "\t hist trigger and leave its current paused/active state\n"
4549 "\t unchanged.\n\n"
4550 "\t The enable_hist and disable_hist triggers can be used to\n"
4551 "\t have one event conditionally start and stop another event's\n"
4552 "\t already-attached hist trigger. The syntax is analagous to\n"
4553 "\t the enable_event and disable_event triggers.\n"
4554 #endif
4555 ;
4556
4557 static ssize_t
4558 tracing_readme_read(struct file *filp, char __user *ubuf,
4559 size_t cnt, loff_t *ppos)
4560 {
4561 return simple_read_from_buffer(ubuf, cnt, ppos,
4562 readme_msg, strlen(readme_msg));
4563 }
4564
4565 static const struct file_operations tracing_readme_fops = {
4566 .open = tracing_open_generic,
4567 .read = tracing_readme_read,
4568 .llseek = generic_file_llseek,
4569 };
4570
4571 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4572 {
4573 unsigned int *ptr = v;
4574
4575 if (*pos || m->count)
4576 ptr++;
4577
4578 (*pos)++;
4579
4580 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4581 ptr++) {
4582 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4583 continue;
4584
4585 return ptr;
4586 }
4587
4588 return NULL;
4589 }
4590
4591 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4592 {
4593 void *v;
4594 loff_t l = 0;
4595
4596 preempt_disable();
4597 arch_spin_lock(&trace_cmdline_lock);
4598
4599 v = &savedcmd->map_cmdline_to_pid[0];
4600 while (l <= *pos) {
4601 v = saved_cmdlines_next(m, v, &l);
4602 if (!v)
4603 return NULL;
4604 }
4605
4606 return v;
4607 }
4608
4609 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4610 {
4611 arch_spin_unlock(&trace_cmdline_lock);
4612 preempt_enable();
4613 }
4614
4615 static int saved_cmdlines_show(struct seq_file *m, void *v)
4616 {
4617 char buf[TASK_COMM_LEN];
4618 unsigned int *pid = v;
4619
4620 __trace_find_cmdline(*pid, buf);
4621 seq_printf(m, "%d %s\n", *pid, buf);
4622 return 0;
4623 }
4624
4625 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4626 .start = saved_cmdlines_start,
4627 .next = saved_cmdlines_next,
4628 .stop = saved_cmdlines_stop,
4629 .show = saved_cmdlines_show,
4630 };
4631
4632 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4633 {
4634 if (tracing_disabled)
4635 return -ENODEV;
4636
4637 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4638 }
4639
4640 static const struct file_operations tracing_saved_cmdlines_fops = {
4641 .open = tracing_saved_cmdlines_open,
4642 .read = seq_read,
4643 .llseek = seq_lseek,
4644 .release = seq_release,
4645 };
4646
4647 static ssize_t
4648 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4649 size_t cnt, loff_t *ppos)
4650 {
4651 char buf[64];
4652 int r;
4653
4654 arch_spin_lock(&trace_cmdline_lock);
4655 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4656 arch_spin_unlock(&trace_cmdline_lock);
4657
4658 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4659 }
4660
4661 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4662 {
4663 kfree(s->saved_cmdlines);
4664 kfree(s->map_cmdline_to_pid);
4665 kfree(s);
4666 }
4667
4668 static int tracing_resize_saved_cmdlines(unsigned int val)
4669 {
4670 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4671
4672 s = kmalloc(sizeof(*s), GFP_KERNEL);
4673 if (!s)
4674 return -ENOMEM;
4675
4676 if (allocate_cmdlines_buffer(val, s) < 0) {
4677 kfree(s);
4678 return -ENOMEM;
4679 }
4680
4681 arch_spin_lock(&trace_cmdline_lock);
4682 savedcmd_temp = savedcmd;
4683 savedcmd = s;
4684 arch_spin_unlock(&trace_cmdline_lock);
4685 free_saved_cmdlines_buffer(savedcmd_temp);
4686
4687 return 0;
4688 }
4689
4690 static ssize_t
4691 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4692 size_t cnt, loff_t *ppos)
4693 {
4694 unsigned long val;
4695 int ret;
4696
4697 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4698 if (ret)
4699 return ret;
4700
4701 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4702 if (!val || val > PID_MAX_DEFAULT)
4703 return -EINVAL;
4704
4705 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4706 if (ret < 0)
4707 return ret;
4708
4709 *ppos += cnt;
4710
4711 return cnt;
4712 }
4713
4714 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4715 .open = tracing_open_generic,
4716 .read = tracing_saved_cmdlines_size_read,
4717 .write = tracing_saved_cmdlines_size_write,
4718 };
4719
4720 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4721 static union trace_enum_map_item *
4722 update_enum_map(union trace_enum_map_item *ptr)
4723 {
4724 if (!ptr->map.enum_string) {
4725 if (ptr->tail.next) {
4726 ptr = ptr->tail.next;
4727 /* Set ptr to the next real item (skip head) */
4728 ptr++;
4729 } else
4730 return NULL;
4731 }
4732 return ptr;
4733 }
4734
4735 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4736 {
4737 union trace_enum_map_item *ptr = v;
4738
4739 /*
4740 * Paranoid! If ptr points to end, we don't want to increment past it.
4741 * This really should never happen.
4742 */
4743 ptr = update_enum_map(ptr);
4744 if (WARN_ON_ONCE(!ptr))
4745 return NULL;
4746
4747 ptr++;
4748
4749 (*pos)++;
4750
4751 ptr = update_enum_map(ptr);
4752
4753 return ptr;
4754 }
4755
4756 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4757 {
4758 union trace_enum_map_item *v;
4759 loff_t l = 0;
4760
4761 mutex_lock(&trace_enum_mutex);
4762
4763 v = trace_enum_maps;
4764 if (v)
4765 v++;
4766
4767 while (v && l < *pos) {
4768 v = enum_map_next(m, v, &l);
4769 }
4770
4771 return v;
4772 }
4773
4774 static void enum_map_stop(struct seq_file *m, void *v)
4775 {
4776 mutex_unlock(&trace_enum_mutex);
4777 }
4778
4779 static int enum_map_show(struct seq_file *m, void *v)
4780 {
4781 union trace_enum_map_item *ptr = v;
4782
4783 seq_printf(m, "%s %ld (%s)\n",
4784 ptr->map.enum_string, ptr->map.enum_value,
4785 ptr->map.system);
4786
4787 return 0;
4788 }
4789
4790 static const struct seq_operations tracing_enum_map_seq_ops = {
4791 .start = enum_map_start,
4792 .next = enum_map_next,
4793 .stop = enum_map_stop,
4794 .show = enum_map_show,
4795 };
4796
4797 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4798 {
4799 if (tracing_disabled)
4800 return -ENODEV;
4801
4802 return seq_open(filp, &tracing_enum_map_seq_ops);
4803 }
4804
4805 static const struct file_operations tracing_enum_map_fops = {
4806 .open = tracing_enum_map_open,
4807 .read = seq_read,
4808 .llseek = seq_lseek,
4809 .release = seq_release,
4810 };
4811
4812 static inline union trace_enum_map_item *
4813 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4814 {
4815 /* Return tail of array given the head */
4816 return ptr + ptr->head.length + 1;
4817 }
4818
4819 static void
4820 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4821 int len)
4822 {
4823 struct trace_enum_map **stop;
4824 struct trace_enum_map **map;
4825 union trace_enum_map_item *map_array;
4826 union trace_enum_map_item *ptr;
4827
4828 stop = start + len;
4829
4830 /*
4831 * The trace_enum_maps contains the map plus a head and tail item,
4832 * where the head holds the module and length of array, and the
4833 * tail holds a pointer to the next list.
4834 */
4835 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4836 if (!map_array) {
4837 pr_warn("Unable to allocate trace enum mapping\n");
4838 return;
4839 }
4840
4841 mutex_lock(&trace_enum_mutex);
4842
4843 if (!trace_enum_maps)
4844 trace_enum_maps = map_array;
4845 else {
4846 ptr = trace_enum_maps;
4847 for (;;) {
4848 ptr = trace_enum_jmp_to_tail(ptr);
4849 if (!ptr->tail.next)
4850 break;
4851 ptr = ptr->tail.next;
4852
4853 }
4854 ptr->tail.next = map_array;
4855 }
4856 map_array->head.mod = mod;
4857 map_array->head.length = len;
4858 map_array++;
4859
4860 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4861 map_array->map = **map;
4862 map_array++;
4863 }
4864 memset(map_array, 0, sizeof(*map_array));
4865
4866 mutex_unlock(&trace_enum_mutex);
4867 }
4868
4869 static void trace_create_enum_file(struct dentry *d_tracer)
4870 {
4871 trace_create_file("enum_map", 0444, d_tracer,
4872 NULL, &tracing_enum_map_fops);
4873 }
4874
4875 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4876 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4877 static inline void trace_insert_enum_map_file(struct module *mod,
4878 struct trace_enum_map **start, int len) { }
4879 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4880
4881 static void trace_insert_enum_map(struct module *mod,
4882 struct trace_enum_map **start, int len)
4883 {
4884 struct trace_enum_map **map;
4885
4886 if (len <= 0)
4887 return;
4888
4889 map = start;
4890
4891 trace_event_enum_update(map, len);
4892
4893 trace_insert_enum_map_file(mod, start, len);
4894 }
4895
4896 static ssize_t
4897 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4898 size_t cnt, loff_t *ppos)
4899 {
4900 struct trace_array *tr = filp->private_data;
4901 char buf[MAX_TRACER_SIZE+2];
4902 int r;
4903
4904 mutex_lock(&trace_types_lock);
4905 r = sprintf(buf, "%s\n", tr->current_trace->name);
4906 mutex_unlock(&trace_types_lock);
4907
4908 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4909 }
4910
4911 int tracer_init(struct tracer *t, struct trace_array *tr)
4912 {
4913 tracing_reset_online_cpus(&tr->trace_buffer);
4914 return t->init(tr);
4915 }
4916
4917 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4918 {
4919 int cpu;
4920
4921 for_each_tracing_cpu(cpu)
4922 per_cpu_ptr(buf->data, cpu)->entries = val;
4923 }
4924
4925 #ifdef CONFIG_TRACER_MAX_TRACE
4926 /* resize @tr's buffer to the size of @size_tr's entries */
4927 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4928 struct trace_buffer *size_buf, int cpu_id)
4929 {
4930 int cpu, ret = 0;
4931
4932 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4933 for_each_tracing_cpu(cpu) {
4934 ret = ring_buffer_resize(trace_buf->buffer,
4935 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4936 if (ret < 0)
4937 break;
4938 per_cpu_ptr(trace_buf->data, cpu)->entries =
4939 per_cpu_ptr(size_buf->data, cpu)->entries;
4940 }
4941 } else {
4942 ret = ring_buffer_resize(trace_buf->buffer,
4943 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4944 if (ret == 0)
4945 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4946 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4947 }
4948
4949 return ret;
4950 }
4951 #endif /* CONFIG_TRACER_MAX_TRACE */
4952
4953 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4954 unsigned long size, int cpu)
4955 {
4956 int ret;
4957
4958 /*
4959 * If kernel or user changes the size of the ring buffer
4960 * we use the size that was given, and we can forget about
4961 * expanding it later.
4962 */
4963 ring_buffer_expanded = true;
4964
4965 /* May be called before buffers are initialized */
4966 if (!tr->trace_buffer.buffer)
4967 return 0;
4968
4969 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4970 if (ret < 0)
4971 return ret;
4972
4973 #ifdef CONFIG_TRACER_MAX_TRACE
4974 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4975 !tr->current_trace->use_max_tr)
4976 goto out;
4977
4978 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4979 if (ret < 0) {
4980 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4981 &tr->trace_buffer, cpu);
4982 if (r < 0) {
4983 /*
4984 * AARGH! We are left with different
4985 * size max buffer!!!!
4986 * The max buffer is our "snapshot" buffer.
4987 * When a tracer needs a snapshot (one of the
4988 * latency tracers), it swaps the max buffer
4989 * with the saved snap shot. We succeeded to
4990 * update the size of the main buffer, but failed to
4991 * update the size of the max buffer. But when we tried
4992 * to reset the main buffer to the original size, we
4993 * failed there too. This is very unlikely to
4994 * happen, but if it does, warn and kill all
4995 * tracing.
4996 */
4997 WARN_ON(1);
4998 tracing_disabled = 1;
4999 }
5000 return ret;
5001 }
5002
5003 if (cpu == RING_BUFFER_ALL_CPUS)
5004 set_buffer_entries(&tr->max_buffer, size);
5005 else
5006 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5007
5008 out:
5009 #endif /* CONFIG_TRACER_MAX_TRACE */
5010
5011 if (cpu == RING_BUFFER_ALL_CPUS)
5012 set_buffer_entries(&tr->trace_buffer, size);
5013 else
5014 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5015
5016 return ret;
5017 }
5018
5019 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5020 unsigned long size, int cpu_id)
5021 {
5022 int ret = size;
5023
5024 mutex_lock(&trace_types_lock);
5025
5026 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5027 /* make sure, this cpu is enabled in the mask */
5028 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5029 ret = -EINVAL;
5030 goto out;
5031 }
5032 }
5033
5034 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5035 if (ret < 0)
5036 ret = -ENOMEM;
5037
5038 out:
5039 mutex_unlock(&trace_types_lock);
5040
5041 return ret;
5042 }
5043
5044
5045 /**
5046 * tracing_update_buffers - used by tracing facility to expand ring buffers
5047 *
5048 * To save on memory when the tracing is never used on a system with it
5049 * configured in. The ring buffers are set to a minimum size. But once
5050 * a user starts to use the tracing facility, then they need to grow
5051 * to their default size.
5052 *
5053 * This function is to be called when a tracer is about to be used.
5054 */
5055 int tracing_update_buffers(void)
5056 {
5057 int ret = 0;
5058
5059 mutex_lock(&trace_types_lock);
5060 if (!ring_buffer_expanded)
5061 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5062 RING_BUFFER_ALL_CPUS);
5063 mutex_unlock(&trace_types_lock);
5064
5065 return ret;
5066 }
5067
5068 struct trace_option_dentry;
5069
5070 static void
5071 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5072
5073 /*
5074 * Used to clear out the tracer before deletion of an instance.
5075 * Must have trace_types_lock held.
5076 */
5077 static void tracing_set_nop(struct trace_array *tr)
5078 {
5079 if (tr->current_trace == &nop_trace)
5080 return;
5081
5082 tr->current_trace->enabled--;
5083
5084 if (tr->current_trace->reset)
5085 tr->current_trace->reset(tr);
5086
5087 tr->current_trace = &nop_trace;
5088 }
5089
5090 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5091 {
5092 /* Only enable if the directory has been created already. */
5093 if (!tr->dir)
5094 return;
5095
5096 create_trace_option_files(tr, t);
5097 }
5098
5099 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5100 {
5101 struct tracer *t;
5102 #ifdef CONFIG_TRACER_MAX_TRACE
5103 bool had_max_tr;
5104 #endif
5105 int ret = 0;
5106
5107 mutex_lock(&trace_types_lock);
5108
5109 if (!ring_buffer_expanded) {
5110 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5111 RING_BUFFER_ALL_CPUS);
5112 if (ret < 0)
5113 goto out;
5114 ret = 0;
5115 }
5116
5117 for (t = trace_types; t; t = t->next) {
5118 if (strcmp(t->name, buf) == 0)
5119 break;
5120 }
5121 if (!t) {
5122 ret = -EINVAL;
5123 goto out;
5124 }
5125 if (t == tr->current_trace)
5126 goto out;
5127
5128 /* Some tracers are only allowed for the top level buffer */
5129 if (!trace_ok_for_array(t, tr)) {
5130 ret = -EINVAL;
5131 goto out;
5132 }
5133
5134 /* If trace pipe files are being read, we can't change the tracer */
5135 if (tr->current_trace->ref) {
5136 ret = -EBUSY;
5137 goto out;
5138 }
5139
5140 trace_branch_disable();
5141
5142 tr->current_trace->enabled--;
5143
5144 if (tr->current_trace->reset)
5145 tr->current_trace->reset(tr);
5146
5147 /* Current trace needs to be nop_trace before synchronize_sched */
5148 tr->current_trace = &nop_trace;
5149
5150 #ifdef CONFIG_TRACER_MAX_TRACE
5151 had_max_tr = tr->allocated_snapshot;
5152
5153 if (had_max_tr && !t->use_max_tr) {
5154 /*
5155 * We need to make sure that the update_max_tr sees that
5156 * current_trace changed to nop_trace to keep it from
5157 * swapping the buffers after we resize it.
5158 * The update_max_tr is called from interrupts disabled
5159 * so a synchronized_sched() is sufficient.
5160 */
5161 synchronize_sched();
5162 free_snapshot(tr);
5163 }
5164 #endif
5165
5166 #ifdef CONFIG_TRACER_MAX_TRACE
5167 if (t->use_max_tr && !had_max_tr) {
5168 ret = alloc_snapshot(tr);
5169 if (ret < 0)
5170 goto out;
5171 }
5172 #endif
5173
5174 if (t->init) {
5175 ret = tracer_init(t, tr);
5176 if (ret)
5177 goto out;
5178 }
5179
5180 tr->current_trace = t;
5181 tr->current_trace->enabled++;
5182 trace_branch_enable(tr);
5183 out:
5184 mutex_unlock(&trace_types_lock);
5185
5186 return ret;
5187 }
5188
5189 static ssize_t
5190 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5191 size_t cnt, loff_t *ppos)
5192 {
5193 struct trace_array *tr = filp->private_data;
5194 char buf[MAX_TRACER_SIZE+1];
5195 int i;
5196 size_t ret;
5197 int err;
5198
5199 ret = cnt;
5200
5201 if (cnt > MAX_TRACER_SIZE)
5202 cnt = MAX_TRACER_SIZE;
5203
5204 if (copy_from_user(buf, ubuf, cnt))
5205 return -EFAULT;
5206
5207 buf[cnt] = 0;
5208
5209 /* strip ending whitespace. */
5210 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5211 buf[i] = 0;
5212
5213 err = tracing_set_tracer(tr, buf);
5214 if (err)
5215 return err;
5216
5217 *ppos += ret;
5218
5219 return ret;
5220 }
5221
5222 static ssize_t
5223 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5224 size_t cnt, loff_t *ppos)
5225 {
5226 char buf[64];
5227 int r;
5228
5229 r = snprintf(buf, sizeof(buf), "%ld\n",
5230 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5231 if (r > sizeof(buf))
5232 r = sizeof(buf);
5233 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5234 }
5235
5236 static ssize_t
5237 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5238 size_t cnt, loff_t *ppos)
5239 {
5240 unsigned long val;
5241 int ret;
5242
5243 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5244 if (ret)
5245 return ret;
5246
5247 *ptr = val * 1000;
5248
5249 return cnt;
5250 }
5251
5252 static ssize_t
5253 tracing_thresh_read(struct file *filp, char __user *ubuf,
5254 size_t cnt, loff_t *ppos)
5255 {
5256 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5257 }
5258
5259 static ssize_t
5260 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5261 size_t cnt, loff_t *ppos)
5262 {
5263 struct trace_array *tr = filp->private_data;
5264 int ret;
5265
5266 mutex_lock(&trace_types_lock);
5267 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5268 if (ret < 0)
5269 goto out;
5270
5271 if (tr->current_trace->update_thresh) {
5272 ret = tr->current_trace->update_thresh(tr);
5273 if (ret < 0)
5274 goto out;
5275 }
5276
5277 ret = cnt;
5278 out:
5279 mutex_unlock(&trace_types_lock);
5280
5281 return ret;
5282 }
5283
5284 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5285
5286 static ssize_t
5287 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5288 size_t cnt, loff_t *ppos)
5289 {
5290 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5291 }
5292
5293 static ssize_t
5294 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5295 size_t cnt, loff_t *ppos)
5296 {
5297 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5298 }
5299
5300 #endif
5301
5302 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5303 {
5304 struct trace_array *tr = inode->i_private;
5305 struct trace_iterator *iter;
5306 int ret = 0;
5307
5308 if (tracing_disabled)
5309 return -ENODEV;
5310
5311 if (trace_array_get(tr) < 0)
5312 return -ENODEV;
5313
5314 mutex_lock(&trace_types_lock);
5315
5316 /* create a buffer to store the information to pass to userspace */
5317 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5318 if (!iter) {
5319 ret = -ENOMEM;
5320 __trace_array_put(tr);
5321 goto out;
5322 }
5323
5324 trace_seq_init(&iter->seq);
5325 iter->trace = tr->current_trace;
5326
5327 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5328 ret = -ENOMEM;
5329 goto fail;
5330 }
5331
5332 /* trace pipe does not show start of buffer */
5333 cpumask_setall(iter->started);
5334
5335 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5336 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5337
5338 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5339 if (trace_clocks[tr->clock_id].in_ns)
5340 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5341
5342 iter->tr = tr;
5343 iter->trace_buffer = &tr->trace_buffer;
5344 iter->cpu_file = tracing_get_cpu(inode);
5345 mutex_init(&iter->mutex);
5346 filp->private_data = iter;
5347
5348 if (iter->trace->pipe_open)
5349 iter->trace->pipe_open(iter);
5350
5351 nonseekable_open(inode, filp);
5352
5353 tr->current_trace->ref++;
5354 out:
5355 mutex_unlock(&trace_types_lock);
5356 return ret;
5357
5358 fail:
5359 kfree(iter->trace);
5360 kfree(iter);
5361 __trace_array_put(tr);
5362 mutex_unlock(&trace_types_lock);
5363 return ret;
5364 }
5365
5366 static int tracing_release_pipe(struct inode *inode, struct file *file)
5367 {
5368 struct trace_iterator *iter = file->private_data;
5369 struct trace_array *tr = inode->i_private;
5370
5371 mutex_lock(&trace_types_lock);
5372
5373 tr->current_trace->ref--;
5374
5375 if (iter->trace->pipe_close)
5376 iter->trace->pipe_close(iter);
5377
5378 mutex_unlock(&trace_types_lock);
5379
5380 free_cpumask_var(iter->started);
5381 mutex_destroy(&iter->mutex);
5382 kfree(iter);
5383
5384 trace_array_put(tr);
5385
5386 return 0;
5387 }
5388
5389 static unsigned int
5390 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5391 {
5392 struct trace_array *tr = iter->tr;
5393
5394 /* Iterators are static, they should be filled or empty */
5395 if (trace_buffer_iter(iter, iter->cpu_file))
5396 return POLLIN | POLLRDNORM;
5397
5398 if (tr->trace_flags & TRACE_ITER_BLOCK)
5399 /*
5400 * Always select as readable when in blocking mode
5401 */
5402 return POLLIN | POLLRDNORM;
5403 else
5404 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5405 filp, poll_table);
5406 }
5407
5408 static unsigned int
5409 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5410 {
5411 struct trace_iterator *iter = filp->private_data;
5412
5413 return trace_poll(iter, filp, poll_table);
5414 }
5415
5416 /* Must be called with iter->mutex held. */
5417 static int tracing_wait_pipe(struct file *filp)
5418 {
5419 struct trace_iterator *iter = filp->private_data;
5420 int ret;
5421
5422 while (trace_empty(iter)) {
5423
5424 if ((filp->f_flags & O_NONBLOCK)) {
5425 return -EAGAIN;
5426 }
5427
5428 /*
5429 * We block until we read something and tracing is disabled.
5430 * We still block if tracing is disabled, but we have never
5431 * read anything. This allows a user to cat this file, and
5432 * then enable tracing. But after we have read something,
5433 * we give an EOF when tracing is again disabled.
5434 *
5435 * iter->pos will be 0 if we haven't read anything.
5436 */
5437 if (!tracing_is_on() && iter->pos)
5438 break;
5439
5440 mutex_unlock(&iter->mutex);
5441
5442 ret = wait_on_pipe(iter, false);
5443
5444 mutex_lock(&iter->mutex);
5445
5446 if (ret)
5447 return ret;
5448 }
5449
5450 return 1;
5451 }
5452
5453 /*
5454 * Consumer reader.
5455 */
5456 static ssize_t
5457 tracing_read_pipe(struct file *filp, char __user *ubuf,
5458 size_t cnt, loff_t *ppos)
5459 {
5460 struct trace_iterator *iter = filp->private_data;
5461 ssize_t sret;
5462
5463 /*
5464 * Avoid more than one consumer on a single file descriptor
5465 * This is just a matter of traces coherency, the ring buffer itself
5466 * is protected.
5467 */
5468 mutex_lock(&iter->mutex);
5469
5470 /* return any leftover data */
5471 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5472 if (sret != -EBUSY)
5473 goto out;
5474
5475 trace_seq_init(&iter->seq);
5476
5477 if (iter->trace->read) {
5478 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5479 if (sret)
5480 goto out;
5481 }
5482
5483 waitagain:
5484 sret = tracing_wait_pipe(filp);
5485 if (sret <= 0)
5486 goto out;
5487
5488 /* stop when tracing is finished */
5489 if (trace_empty(iter)) {
5490 sret = 0;
5491 goto out;
5492 }
5493
5494 if (cnt >= PAGE_SIZE)
5495 cnt = PAGE_SIZE - 1;
5496
5497 /* reset all but tr, trace, and overruns */
5498 memset(&iter->seq, 0,
5499 sizeof(struct trace_iterator) -
5500 offsetof(struct trace_iterator, seq));
5501 cpumask_clear(iter->started);
5502 iter->pos = -1;
5503
5504 trace_event_read_lock();
5505 trace_access_lock(iter->cpu_file);
5506 while (trace_find_next_entry_inc(iter) != NULL) {
5507 enum print_line_t ret;
5508 int save_len = iter->seq.seq.len;
5509
5510 ret = print_trace_line(iter);
5511 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5512 /* don't print partial lines */
5513 iter->seq.seq.len = save_len;
5514 break;
5515 }
5516 if (ret != TRACE_TYPE_NO_CONSUME)
5517 trace_consume(iter);
5518
5519 if (trace_seq_used(&iter->seq) >= cnt)
5520 break;
5521
5522 /*
5523 * Setting the full flag means we reached the trace_seq buffer
5524 * size and we should leave by partial output condition above.
5525 * One of the trace_seq_* functions is not used properly.
5526 */
5527 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5528 iter->ent->type);
5529 }
5530 trace_access_unlock(iter->cpu_file);
5531 trace_event_read_unlock();
5532
5533 /* Now copy what we have to the user */
5534 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5535 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5536 trace_seq_init(&iter->seq);
5537
5538 /*
5539 * If there was nothing to send to user, in spite of consuming trace
5540 * entries, go back to wait for more entries.
5541 */
5542 if (sret == -EBUSY)
5543 goto waitagain;
5544
5545 out:
5546 mutex_unlock(&iter->mutex);
5547
5548 return sret;
5549 }
5550
5551 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5552 unsigned int idx)
5553 {
5554 __free_page(spd->pages[idx]);
5555 }
5556
5557 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5558 .can_merge = 0,
5559 .confirm = generic_pipe_buf_confirm,
5560 .release = generic_pipe_buf_release,
5561 .steal = generic_pipe_buf_steal,
5562 .get = generic_pipe_buf_get,
5563 };
5564
5565 static size_t
5566 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5567 {
5568 size_t count;
5569 int save_len;
5570 int ret;
5571
5572 /* Seq buffer is page-sized, exactly what we need. */
5573 for (;;) {
5574 save_len = iter->seq.seq.len;
5575 ret = print_trace_line(iter);
5576
5577 if (trace_seq_has_overflowed(&iter->seq)) {
5578 iter->seq.seq.len = save_len;
5579 break;
5580 }
5581
5582 /*
5583 * This should not be hit, because it should only
5584 * be set if the iter->seq overflowed. But check it
5585 * anyway to be safe.
5586 */
5587 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5588 iter->seq.seq.len = save_len;
5589 break;
5590 }
5591
5592 count = trace_seq_used(&iter->seq) - save_len;
5593 if (rem < count) {
5594 rem = 0;
5595 iter->seq.seq.len = save_len;
5596 break;
5597 }
5598
5599 if (ret != TRACE_TYPE_NO_CONSUME)
5600 trace_consume(iter);
5601 rem -= count;
5602 if (!trace_find_next_entry_inc(iter)) {
5603 rem = 0;
5604 iter->ent = NULL;
5605 break;
5606 }
5607 }
5608
5609 return rem;
5610 }
5611
5612 static ssize_t tracing_splice_read_pipe(struct file *filp,
5613 loff_t *ppos,
5614 struct pipe_inode_info *pipe,
5615 size_t len,
5616 unsigned int flags)
5617 {
5618 struct page *pages_def[PIPE_DEF_BUFFERS];
5619 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5620 struct trace_iterator *iter = filp->private_data;
5621 struct splice_pipe_desc spd = {
5622 .pages = pages_def,
5623 .partial = partial_def,
5624 .nr_pages = 0, /* This gets updated below. */
5625 .nr_pages_max = PIPE_DEF_BUFFERS,
5626 .ops = &tracing_pipe_buf_ops,
5627 .spd_release = tracing_spd_release_pipe,
5628 };
5629 ssize_t ret;
5630 size_t rem;
5631 unsigned int i;
5632
5633 if (splice_grow_spd(pipe, &spd))
5634 return -ENOMEM;
5635
5636 mutex_lock(&iter->mutex);
5637
5638 if (iter->trace->splice_read) {
5639 ret = iter->trace->splice_read(iter, filp,
5640 ppos, pipe, len, flags);
5641 if (ret)
5642 goto out_err;
5643 }
5644
5645 ret = tracing_wait_pipe(filp);
5646 if (ret <= 0)
5647 goto out_err;
5648
5649 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5650 ret = -EFAULT;
5651 goto out_err;
5652 }
5653
5654 trace_event_read_lock();
5655 trace_access_lock(iter->cpu_file);
5656
5657 /* Fill as many pages as possible. */
5658 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5659 spd.pages[i] = alloc_page(GFP_KERNEL);
5660 if (!spd.pages[i])
5661 break;
5662
5663 rem = tracing_fill_pipe_page(rem, iter);
5664
5665 /* Copy the data into the page, so we can start over. */
5666 ret = trace_seq_to_buffer(&iter->seq,
5667 page_address(spd.pages[i]),
5668 trace_seq_used(&iter->seq));
5669 if (ret < 0) {
5670 __free_page(spd.pages[i]);
5671 break;
5672 }
5673 spd.partial[i].offset = 0;
5674 spd.partial[i].len = trace_seq_used(&iter->seq);
5675
5676 trace_seq_init(&iter->seq);
5677 }
5678
5679 trace_access_unlock(iter->cpu_file);
5680 trace_event_read_unlock();
5681 mutex_unlock(&iter->mutex);
5682
5683 spd.nr_pages = i;
5684
5685 if (i)
5686 ret = splice_to_pipe(pipe, &spd);
5687 else
5688 ret = 0;
5689 out:
5690 splice_shrink_spd(&spd);
5691 return ret;
5692
5693 out_err:
5694 mutex_unlock(&iter->mutex);
5695 goto out;
5696 }
5697
5698 static ssize_t
5699 tracing_entries_read(struct file *filp, char __user *ubuf,
5700 size_t cnt, loff_t *ppos)
5701 {
5702 struct inode *inode = file_inode(filp);
5703 struct trace_array *tr = inode->i_private;
5704 int cpu = tracing_get_cpu(inode);
5705 char buf[64];
5706 int r = 0;
5707 ssize_t ret;
5708
5709 mutex_lock(&trace_types_lock);
5710
5711 if (cpu == RING_BUFFER_ALL_CPUS) {
5712 int cpu, buf_size_same;
5713 unsigned long size;
5714
5715 size = 0;
5716 buf_size_same = 1;
5717 /* check if all cpu sizes are same */
5718 for_each_tracing_cpu(cpu) {
5719 /* fill in the size from first enabled cpu */
5720 if (size == 0)
5721 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5722 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5723 buf_size_same = 0;
5724 break;
5725 }
5726 }
5727
5728 if (buf_size_same) {
5729 if (!ring_buffer_expanded)
5730 r = sprintf(buf, "%lu (expanded: %lu)\n",
5731 size >> 10,
5732 trace_buf_size >> 10);
5733 else
5734 r = sprintf(buf, "%lu\n", size >> 10);
5735 } else
5736 r = sprintf(buf, "X\n");
5737 } else
5738 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5739
5740 mutex_unlock(&trace_types_lock);
5741
5742 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5743 return ret;
5744 }
5745
5746 static ssize_t
5747 tracing_entries_write(struct file *filp, const char __user *ubuf,
5748 size_t cnt, loff_t *ppos)
5749 {
5750 struct inode *inode = file_inode(filp);
5751 struct trace_array *tr = inode->i_private;
5752 unsigned long val;
5753 int ret;
5754
5755 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5756 if (ret)
5757 return ret;
5758
5759 /* must have at least 1 entry */
5760 if (!val)
5761 return -EINVAL;
5762
5763 /* value is in KB */
5764 val <<= 10;
5765 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5766 if (ret < 0)
5767 return ret;
5768
5769 *ppos += cnt;
5770
5771 return cnt;
5772 }
5773
5774 static ssize_t
5775 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5776 size_t cnt, loff_t *ppos)
5777 {
5778 struct trace_array *tr = filp->private_data;
5779 char buf[64];
5780 int r, cpu;
5781 unsigned long size = 0, expanded_size = 0;
5782
5783 mutex_lock(&trace_types_lock);
5784 for_each_tracing_cpu(cpu) {
5785 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5786 if (!ring_buffer_expanded)
5787 expanded_size += trace_buf_size >> 10;
5788 }
5789 if (ring_buffer_expanded)
5790 r = sprintf(buf, "%lu\n", size);
5791 else
5792 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5793 mutex_unlock(&trace_types_lock);
5794
5795 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5796 }
5797
5798 static ssize_t
5799 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5800 size_t cnt, loff_t *ppos)
5801 {
5802 /*
5803 * There is no need to read what the user has written, this function
5804 * is just to make sure that there is no error when "echo" is used
5805 */
5806
5807 *ppos += cnt;
5808
5809 return cnt;
5810 }
5811
5812 static int
5813 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5814 {
5815 struct trace_array *tr = inode->i_private;
5816
5817 /* disable tracing ? */
5818 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5819 tracer_tracing_off(tr);
5820 /* resize the ring buffer to 0 */
5821 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5822
5823 trace_array_put(tr);
5824
5825 return 0;
5826 }
5827
5828 static ssize_t
5829 tracing_mark_write(struct file *filp, const char __user *ubuf,
5830 size_t cnt, loff_t *fpos)
5831 {
5832 struct trace_array *tr = filp->private_data;
5833 struct ring_buffer_event *event;
5834 struct ring_buffer *buffer;
5835 struct print_entry *entry;
5836 unsigned long irq_flags;
5837 const char faulted[] = "<faulted>";
5838 ssize_t written;
5839 int size;
5840 int len;
5841
5842 /* Used in tracing_mark_raw_write() as well */
5843 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5844
5845 if (tracing_disabled)
5846 return -EINVAL;
5847
5848 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5849 return -EINVAL;
5850
5851 if (cnt > TRACE_BUF_SIZE)
5852 cnt = TRACE_BUF_SIZE;
5853
5854 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5855
5856 local_save_flags(irq_flags);
5857 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5858
5859 /* If less than "<faulted>", then make sure we can still add that */
5860 if (cnt < FAULTED_SIZE)
5861 size += FAULTED_SIZE - cnt;
5862
5863 buffer = tr->trace_buffer.buffer;
5864 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5865 irq_flags, preempt_count());
5866 if (unlikely(!event))
5867 /* Ring buffer disabled, return as if not open for write */
5868 return -EBADF;
5869
5870 entry = ring_buffer_event_data(event);
5871 entry->ip = _THIS_IP_;
5872
5873 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5874 if (len) {
5875 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5876 cnt = FAULTED_SIZE;
5877 written = -EFAULT;
5878 } else
5879 written = cnt;
5880 len = cnt;
5881
5882 if (entry->buf[cnt - 1] != '\n') {
5883 entry->buf[cnt] = '\n';
5884 entry->buf[cnt + 1] = '\0';
5885 } else
5886 entry->buf[cnt] = '\0';
5887
5888 __buffer_unlock_commit(buffer, event);
5889
5890 if (written > 0)
5891 *fpos += written;
5892
5893 return written;
5894 }
5895
5896 /* Limit it for now to 3K (including tag) */
5897 #define RAW_DATA_MAX_SIZE (1024*3)
5898
5899 static ssize_t
5900 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5901 size_t cnt, loff_t *fpos)
5902 {
5903 struct trace_array *tr = filp->private_data;
5904 struct ring_buffer_event *event;
5905 struct ring_buffer *buffer;
5906 struct raw_data_entry *entry;
5907 const char faulted[] = "<faulted>";
5908 unsigned long irq_flags;
5909 ssize_t written;
5910 int size;
5911 int len;
5912
5913 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5914
5915 if (tracing_disabled)
5916 return -EINVAL;
5917
5918 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5919 return -EINVAL;
5920
5921 /* The marker must at least have a tag id */
5922 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5923 return -EINVAL;
5924
5925 if (cnt > TRACE_BUF_SIZE)
5926 cnt = TRACE_BUF_SIZE;
5927
5928 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5929
5930 local_save_flags(irq_flags);
5931 size = sizeof(*entry) + cnt;
5932 if (cnt < FAULT_SIZE_ID)
5933 size += FAULT_SIZE_ID - cnt;
5934
5935 buffer = tr->trace_buffer.buffer;
5936 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5937 irq_flags, preempt_count());
5938 if (!event)
5939 /* Ring buffer disabled, return as if not open for write */
5940 return -EBADF;
5941
5942 entry = ring_buffer_event_data(event);
5943
5944 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5945 if (len) {
5946 entry->id = -1;
5947 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5948 written = -EFAULT;
5949 } else
5950 written = cnt;
5951
5952 __buffer_unlock_commit(buffer, event);
5953
5954 if (written > 0)
5955 *fpos += written;
5956
5957 return written;
5958 }
5959
5960 static int tracing_clock_show(struct seq_file *m, void *v)
5961 {
5962 struct trace_array *tr = m->private;
5963 int i;
5964
5965 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5966 seq_printf(m,
5967 "%s%s%s%s", i ? " " : "",
5968 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5969 i == tr->clock_id ? "]" : "");
5970 seq_putc(m, '\n');
5971
5972 return 0;
5973 }
5974
5975 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5976 {
5977 int i;
5978
5979 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5980 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5981 break;
5982 }
5983 if (i == ARRAY_SIZE(trace_clocks))
5984 return -EINVAL;
5985
5986 mutex_lock(&trace_types_lock);
5987
5988 tr->clock_id = i;
5989
5990 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5991
5992 /*
5993 * New clock may not be consistent with the previous clock.
5994 * Reset the buffer so that it doesn't have incomparable timestamps.
5995 */
5996 tracing_reset_online_cpus(&tr->trace_buffer);
5997
5998 #ifdef CONFIG_TRACER_MAX_TRACE
5999 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6000 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6001 tracing_reset_online_cpus(&tr->max_buffer);
6002 #endif
6003
6004 mutex_unlock(&trace_types_lock);
6005
6006 return 0;
6007 }
6008
6009 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6010 size_t cnt, loff_t *fpos)
6011 {
6012 struct seq_file *m = filp->private_data;
6013 struct trace_array *tr = m->private;
6014 char buf[64];
6015 const char *clockstr;
6016 int ret;
6017
6018 if (cnt >= sizeof(buf))
6019 return -EINVAL;
6020
6021 if (copy_from_user(buf, ubuf, cnt))
6022 return -EFAULT;
6023
6024 buf[cnt] = 0;
6025
6026 clockstr = strstrip(buf);
6027
6028 ret = tracing_set_clock(tr, clockstr);
6029 if (ret)
6030 return ret;
6031
6032 *fpos += cnt;
6033
6034 return cnt;
6035 }
6036
6037 static int tracing_clock_open(struct inode *inode, struct file *file)
6038 {
6039 struct trace_array *tr = inode->i_private;
6040 int ret;
6041
6042 if (tracing_disabled)
6043 return -ENODEV;
6044
6045 if (trace_array_get(tr))
6046 return -ENODEV;
6047
6048 ret = single_open(file, tracing_clock_show, inode->i_private);
6049 if (ret < 0)
6050 trace_array_put(tr);
6051
6052 return ret;
6053 }
6054
6055 struct ftrace_buffer_info {
6056 struct trace_iterator iter;
6057 void *spare;
6058 unsigned int spare_cpu;
6059 unsigned int read;
6060 };
6061
6062 #ifdef CONFIG_TRACER_SNAPSHOT
6063 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6064 {
6065 struct trace_array *tr = inode->i_private;
6066 struct trace_iterator *iter;
6067 struct seq_file *m;
6068 int ret = 0;
6069
6070 if (trace_array_get(tr) < 0)
6071 return -ENODEV;
6072
6073 if (file->f_mode & FMODE_READ) {
6074 iter = __tracing_open(inode, file, true);
6075 if (IS_ERR(iter))
6076 ret = PTR_ERR(iter);
6077 } else {
6078 /* Writes still need the seq_file to hold the private data */
6079 ret = -ENOMEM;
6080 m = kzalloc(sizeof(*m), GFP_KERNEL);
6081 if (!m)
6082 goto out;
6083 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6084 if (!iter) {
6085 kfree(m);
6086 goto out;
6087 }
6088 ret = 0;
6089
6090 iter->tr = tr;
6091 iter->trace_buffer = &tr->max_buffer;
6092 iter->cpu_file = tracing_get_cpu(inode);
6093 m->private = iter;
6094 file->private_data = m;
6095 }
6096 out:
6097 if (ret < 0)
6098 trace_array_put(tr);
6099
6100 return ret;
6101 }
6102
6103 static ssize_t
6104 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6105 loff_t *ppos)
6106 {
6107 struct seq_file *m = filp->private_data;
6108 struct trace_iterator *iter = m->private;
6109 struct trace_array *tr = iter->tr;
6110 unsigned long val;
6111 int ret;
6112
6113 ret = tracing_update_buffers();
6114 if (ret < 0)
6115 return ret;
6116
6117 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6118 if (ret)
6119 return ret;
6120
6121 mutex_lock(&trace_types_lock);
6122
6123 if (tr->current_trace->use_max_tr) {
6124 ret = -EBUSY;
6125 goto out;
6126 }
6127
6128 switch (val) {
6129 case 0:
6130 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6131 ret = -EINVAL;
6132 break;
6133 }
6134 if (tr->allocated_snapshot)
6135 free_snapshot(tr);
6136 break;
6137 case 1:
6138 /* Only allow per-cpu swap if the ring buffer supports it */
6139 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6140 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6141 ret = -EINVAL;
6142 break;
6143 }
6144 #endif
6145 if (!tr->allocated_snapshot) {
6146 ret = alloc_snapshot(tr);
6147 if (ret < 0)
6148 break;
6149 }
6150 local_irq_disable();
6151 /* Now, we're going to swap */
6152 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6153 update_max_tr(tr, current, smp_processor_id());
6154 else
6155 update_max_tr_single(tr, current, iter->cpu_file);
6156 local_irq_enable();
6157 break;
6158 default:
6159 if (tr->allocated_snapshot) {
6160 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6161 tracing_reset_online_cpus(&tr->max_buffer);
6162 else
6163 tracing_reset(&tr->max_buffer, iter->cpu_file);
6164 }
6165 break;
6166 }
6167
6168 if (ret >= 0) {
6169 *ppos += cnt;
6170 ret = cnt;
6171 }
6172 out:
6173 mutex_unlock(&trace_types_lock);
6174 return ret;
6175 }
6176
6177 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6178 {
6179 struct seq_file *m = file->private_data;
6180 int ret;
6181
6182 ret = tracing_release(inode, file);
6183
6184 if (file->f_mode & FMODE_READ)
6185 return ret;
6186
6187 /* If write only, the seq_file is just a stub */
6188 if (m)
6189 kfree(m->private);
6190 kfree(m);
6191
6192 return 0;
6193 }
6194
6195 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6196 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6197 size_t count, loff_t *ppos);
6198 static int tracing_buffers_release(struct inode *inode, struct file *file);
6199 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6200 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6201
6202 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6203 {
6204 struct ftrace_buffer_info *info;
6205 int ret;
6206
6207 ret = tracing_buffers_open(inode, filp);
6208 if (ret < 0)
6209 return ret;
6210
6211 info = filp->private_data;
6212
6213 if (info->iter.trace->use_max_tr) {
6214 tracing_buffers_release(inode, filp);
6215 return -EBUSY;
6216 }
6217
6218 info->iter.snapshot = true;
6219 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6220
6221 return ret;
6222 }
6223
6224 #endif /* CONFIG_TRACER_SNAPSHOT */
6225
6226
6227 static const struct file_operations tracing_thresh_fops = {
6228 .open = tracing_open_generic,
6229 .read = tracing_thresh_read,
6230 .write = tracing_thresh_write,
6231 .llseek = generic_file_llseek,
6232 };
6233
6234 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6235 static const struct file_operations tracing_max_lat_fops = {
6236 .open = tracing_open_generic,
6237 .read = tracing_max_lat_read,
6238 .write = tracing_max_lat_write,
6239 .llseek = generic_file_llseek,
6240 };
6241 #endif
6242
6243 static const struct file_operations set_tracer_fops = {
6244 .open = tracing_open_generic,
6245 .read = tracing_set_trace_read,
6246 .write = tracing_set_trace_write,
6247 .llseek = generic_file_llseek,
6248 };
6249
6250 static const struct file_operations tracing_pipe_fops = {
6251 .open = tracing_open_pipe,
6252 .poll = tracing_poll_pipe,
6253 .read = tracing_read_pipe,
6254 .splice_read = tracing_splice_read_pipe,
6255 .release = tracing_release_pipe,
6256 .llseek = no_llseek,
6257 };
6258
6259 static const struct file_operations tracing_entries_fops = {
6260 .open = tracing_open_generic_tr,
6261 .read = tracing_entries_read,
6262 .write = tracing_entries_write,
6263 .llseek = generic_file_llseek,
6264 .release = tracing_release_generic_tr,
6265 };
6266
6267 static const struct file_operations tracing_total_entries_fops = {
6268 .open = tracing_open_generic_tr,
6269 .read = tracing_total_entries_read,
6270 .llseek = generic_file_llseek,
6271 .release = tracing_release_generic_tr,
6272 };
6273
6274 static const struct file_operations tracing_free_buffer_fops = {
6275 .open = tracing_open_generic_tr,
6276 .write = tracing_free_buffer_write,
6277 .release = tracing_free_buffer_release,
6278 };
6279
6280 static const struct file_operations tracing_mark_fops = {
6281 .open = tracing_open_generic_tr,
6282 .write = tracing_mark_write,
6283 .llseek = generic_file_llseek,
6284 .release = tracing_release_generic_tr,
6285 };
6286
6287 static const struct file_operations tracing_mark_raw_fops = {
6288 .open = tracing_open_generic_tr,
6289 .write = tracing_mark_raw_write,
6290 .llseek = generic_file_llseek,
6291 .release = tracing_release_generic_tr,
6292 };
6293
6294 static const struct file_operations trace_clock_fops = {
6295 .open = tracing_clock_open,
6296 .read = seq_read,
6297 .llseek = seq_lseek,
6298 .release = tracing_single_release_tr,
6299 .write = tracing_clock_write,
6300 };
6301
6302 #ifdef CONFIG_TRACER_SNAPSHOT
6303 static const struct file_operations snapshot_fops = {
6304 .open = tracing_snapshot_open,
6305 .read = seq_read,
6306 .write = tracing_snapshot_write,
6307 .llseek = tracing_lseek,
6308 .release = tracing_snapshot_release,
6309 };
6310
6311 static const struct file_operations snapshot_raw_fops = {
6312 .open = snapshot_raw_open,
6313 .read = tracing_buffers_read,
6314 .release = tracing_buffers_release,
6315 .splice_read = tracing_buffers_splice_read,
6316 .llseek = no_llseek,
6317 };
6318
6319 #endif /* CONFIG_TRACER_SNAPSHOT */
6320
6321 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6322 {
6323 struct trace_array *tr = inode->i_private;
6324 struct ftrace_buffer_info *info;
6325 int ret;
6326
6327 if (tracing_disabled)
6328 return -ENODEV;
6329
6330 if (trace_array_get(tr) < 0)
6331 return -ENODEV;
6332
6333 info = kzalloc(sizeof(*info), GFP_KERNEL);
6334 if (!info) {
6335 trace_array_put(tr);
6336 return -ENOMEM;
6337 }
6338
6339 mutex_lock(&trace_types_lock);
6340
6341 info->iter.tr = tr;
6342 info->iter.cpu_file = tracing_get_cpu(inode);
6343 info->iter.trace = tr->current_trace;
6344 info->iter.trace_buffer = &tr->trace_buffer;
6345 info->spare = NULL;
6346 /* Force reading ring buffer for first read */
6347 info->read = (unsigned int)-1;
6348
6349 filp->private_data = info;
6350
6351 tr->current_trace->ref++;
6352
6353 mutex_unlock(&trace_types_lock);
6354
6355 ret = nonseekable_open(inode, filp);
6356 if (ret < 0)
6357 trace_array_put(tr);
6358
6359 return ret;
6360 }
6361
6362 static unsigned int
6363 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6364 {
6365 struct ftrace_buffer_info *info = filp->private_data;
6366 struct trace_iterator *iter = &info->iter;
6367
6368 return trace_poll(iter, filp, poll_table);
6369 }
6370
6371 static ssize_t
6372 tracing_buffers_read(struct file *filp, char __user *ubuf,
6373 size_t count, loff_t *ppos)
6374 {
6375 struct ftrace_buffer_info *info = filp->private_data;
6376 struct trace_iterator *iter = &info->iter;
6377 ssize_t ret;
6378 ssize_t size;
6379
6380 if (!count)
6381 return 0;
6382
6383 #ifdef CONFIG_TRACER_MAX_TRACE
6384 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6385 return -EBUSY;
6386 #endif
6387
6388 if (!info->spare) {
6389 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6390 iter->cpu_file);
6391 info->spare_cpu = iter->cpu_file;
6392 }
6393 if (!info->spare)
6394 return -ENOMEM;
6395
6396 /* Do we have previous read data to read? */
6397 if (info->read < PAGE_SIZE)
6398 goto read;
6399
6400 again:
6401 trace_access_lock(iter->cpu_file);
6402 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6403 &info->spare,
6404 count,
6405 iter->cpu_file, 0);
6406 trace_access_unlock(iter->cpu_file);
6407
6408 if (ret < 0) {
6409 if (trace_empty(iter)) {
6410 if ((filp->f_flags & O_NONBLOCK))
6411 return -EAGAIN;
6412
6413 ret = wait_on_pipe(iter, false);
6414 if (ret)
6415 return ret;
6416
6417 goto again;
6418 }
6419 return 0;
6420 }
6421
6422 info->read = 0;
6423 read:
6424 size = PAGE_SIZE - info->read;
6425 if (size > count)
6426 size = count;
6427
6428 ret = copy_to_user(ubuf, info->spare + info->read, size);
6429 if (ret == size)
6430 return -EFAULT;
6431
6432 size -= ret;
6433
6434 *ppos += size;
6435 info->read += size;
6436
6437 return size;
6438 }
6439
6440 static int tracing_buffers_release(struct inode *inode, struct file *file)
6441 {
6442 struct ftrace_buffer_info *info = file->private_data;
6443 struct trace_iterator *iter = &info->iter;
6444
6445 mutex_lock(&trace_types_lock);
6446
6447 iter->tr->current_trace->ref--;
6448
6449 __trace_array_put(iter->tr);
6450
6451 if (info->spare)
6452 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6453 info->spare_cpu, info->spare);
6454 kfree(info);
6455
6456 mutex_unlock(&trace_types_lock);
6457
6458 return 0;
6459 }
6460
6461 struct buffer_ref {
6462 struct ring_buffer *buffer;
6463 void *page;
6464 int cpu;
6465 int ref;
6466 };
6467
6468 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6469 struct pipe_buffer *buf)
6470 {
6471 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6472
6473 if (--ref->ref)
6474 return;
6475
6476 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6477 kfree(ref);
6478 buf->private = 0;
6479 }
6480
6481 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6482 struct pipe_buffer *buf)
6483 {
6484 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6485
6486 ref->ref++;
6487 }
6488
6489 /* Pipe buffer operations for a buffer. */
6490 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6491 .can_merge = 0,
6492 .confirm = generic_pipe_buf_confirm,
6493 .release = buffer_pipe_buf_release,
6494 .steal = generic_pipe_buf_steal,
6495 .get = buffer_pipe_buf_get,
6496 };
6497
6498 /*
6499 * Callback from splice_to_pipe(), if we need to release some pages
6500 * at the end of the spd in case we error'ed out in filling the pipe.
6501 */
6502 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6503 {
6504 struct buffer_ref *ref =
6505 (struct buffer_ref *)spd->partial[i].private;
6506
6507 if (--ref->ref)
6508 return;
6509
6510 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6511 kfree(ref);
6512 spd->partial[i].private = 0;
6513 }
6514
6515 static ssize_t
6516 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6517 struct pipe_inode_info *pipe, size_t len,
6518 unsigned int flags)
6519 {
6520 struct ftrace_buffer_info *info = file->private_data;
6521 struct trace_iterator *iter = &info->iter;
6522 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6523 struct page *pages_def[PIPE_DEF_BUFFERS];
6524 struct splice_pipe_desc spd = {
6525 .pages = pages_def,
6526 .partial = partial_def,
6527 .nr_pages_max = PIPE_DEF_BUFFERS,
6528 .ops = &buffer_pipe_buf_ops,
6529 .spd_release = buffer_spd_release,
6530 };
6531 struct buffer_ref *ref;
6532 int entries, size, i;
6533 ssize_t ret = 0;
6534
6535 #ifdef CONFIG_TRACER_MAX_TRACE
6536 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6537 return -EBUSY;
6538 #endif
6539
6540 if (*ppos & (PAGE_SIZE - 1))
6541 return -EINVAL;
6542
6543 if (len & (PAGE_SIZE - 1)) {
6544 if (len < PAGE_SIZE)
6545 return -EINVAL;
6546 len &= PAGE_MASK;
6547 }
6548
6549 if (splice_grow_spd(pipe, &spd))
6550 return -ENOMEM;
6551
6552 again:
6553 trace_access_lock(iter->cpu_file);
6554 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6555
6556 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6557 struct page *page;
6558 int r;
6559
6560 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6561 if (!ref) {
6562 ret = -ENOMEM;
6563 break;
6564 }
6565
6566 ref->ref = 1;
6567 ref->buffer = iter->trace_buffer->buffer;
6568 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6569 if (!ref->page) {
6570 ret = -ENOMEM;
6571 kfree(ref);
6572 break;
6573 }
6574 ref->cpu = iter->cpu_file;
6575
6576 r = ring_buffer_read_page(ref->buffer, &ref->page,
6577 len, iter->cpu_file, 1);
6578 if (r < 0) {
6579 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6580 ref->page);
6581 kfree(ref);
6582 break;
6583 }
6584
6585 /*
6586 * zero out any left over data, this is going to
6587 * user land.
6588 */
6589 size = ring_buffer_page_len(ref->page);
6590 if (size < PAGE_SIZE)
6591 memset(ref->page + size, 0, PAGE_SIZE - size);
6592
6593 page = virt_to_page(ref->page);
6594
6595 spd.pages[i] = page;
6596 spd.partial[i].len = PAGE_SIZE;
6597 spd.partial[i].offset = 0;
6598 spd.partial[i].private = (unsigned long)ref;
6599 spd.nr_pages++;
6600 *ppos += PAGE_SIZE;
6601
6602 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6603 }
6604
6605 trace_access_unlock(iter->cpu_file);
6606 spd.nr_pages = i;
6607
6608 /* did we read anything? */
6609 if (!spd.nr_pages) {
6610 if (ret)
6611 goto out;
6612
6613 ret = -EAGAIN;
6614 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6615 goto out;
6616
6617 ret = wait_on_pipe(iter, true);
6618 if (ret)
6619 goto out;
6620
6621 goto again;
6622 }
6623
6624 ret = splice_to_pipe(pipe, &spd);
6625 out:
6626 splice_shrink_spd(&spd);
6627
6628 return ret;
6629 }
6630
6631 static const struct file_operations tracing_buffers_fops = {
6632 .open = tracing_buffers_open,
6633 .read = tracing_buffers_read,
6634 .poll = tracing_buffers_poll,
6635 .release = tracing_buffers_release,
6636 .splice_read = tracing_buffers_splice_read,
6637 .llseek = no_llseek,
6638 };
6639
6640 static ssize_t
6641 tracing_stats_read(struct file *filp, char __user *ubuf,
6642 size_t count, loff_t *ppos)
6643 {
6644 struct inode *inode = file_inode(filp);
6645 struct trace_array *tr = inode->i_private;
6646 struct trace_buffer *trace_buf = &tr->trace_buffer;
6647 int cpu = tracing_get_cpu(inode);
6648 struct trace_seq *s;
6649 unsigned long cnt;
6650 unsigned long long t;
6651 unsigned long usec_rem;
6652
6653 s = kmalloc(sizeof(*s), GFP_KERNEL);
6654 if (!s)
6655 return -ENOMEM;
6656
6657 trace_seq_init(s);
6658
6659 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6660 trace_seq_printf(s, "entries: %ld\n", cnt);
6661
6662 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6663 trace_seq_printf(s, "overrun: %ld\n", cnt);
6664
6665 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6666 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6667
6668 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6669 trace_seq_printf(s, "bytes: %ld\n", cnt);
6670
6671 if (trace_clocks[tr->clock_id].in_ns) {
6672 /* local or global for trace_clock */
6673 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6674 usec_rem = do_div(t, USEC_PER_SEC);
6675 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6676 t, usec_rem);
6677
6678 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6679 usec_rem = do_div(t, USEC_PER_SEC);
6680 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6681 } else {
6682 /* counter or tsc mode for trace_clock */
6683 trace_seq_printf(s, "oldest event ts: %llu\n",
6684 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6685
6686 trace_seq_printf(s, "now ts: %llu\n",
6687 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6688 }
6689
6690 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6691 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6692
6693 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6694 trace_seq_printf(s, "read events: %ld\n", cnt);
6695
6696 count = simple_read_from_buffer(ubuf, count, ppos,
6697 s->buffer, trace_seq_used(s));
6698
6699 kfree(s);
6700
6701 return count;
6702 }
6703
6704 static const struct file_operations tracing_stats_fops = {
6705 .open = tracing_open_generic_tr,
6706 .read = tracing_stats_read,
6707 .llseek = generic_file_llseek,
6708 .release = tracing_release_generic_tr,
6709 };
6710
6711 #ifdef CONFIG_DYNAMIC_FTRACE
6712
6713 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6714 {
6715 return 0;
6716 }
6717
6718 static ssize_t
6719 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6720 size_t cnt, loff_t *ppos)
6721 {
6722 static char ftrace_dyn_info_buffer[1024];
6723 static DEFINE_MUTEX(dyn_info_mutex);
6724 unsigned long *p = filp->private_data;
6725 char *buf = ftrace_dyn_info_buffer;
6726 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6727 int r;
6728
6729 mutex_lock(&dyn_info_mutex);
6730 r = sprintf(buf, "%ld ", *p);
6731
6732 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6733 buf[r++] = '\n';
6734
6735 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6736
6737 mutex_unlock(&dyn_info_mutex);
6738
6739 return r;
6740 }
6741
6742 static const struct file_operations tracing_dyn_info_fops = {
6743 .open = tracing_open_generic,
6744 .read = tracing_read_dyn_info,
6745 .llseek = generic_file_llseek,
6746 };
6747 #endif /* CONFIG_DYNAMIC_FTRACE */
6748
6749 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6750 static void
6751 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6752 struct trace_array *tr, struct ftrace_probe_ops *ops,
6753 void *data)
6754 {
6755 tracing_snapshot_instance(tr);
6756 }
6757
6758 static void
6759 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6760 struct trace_array *tr, struct ftrace_probe_ops *ops,
6761 void *data)
6762 {
6763 struct ftrace_func_mapper *mapper = data;
6764 long *count = NULL;
6765
6766 if (mapper)
6767 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6768
6769 if (count) {
6770
6771 if (*count <= 0)
6772 return;
6773
6774 (*count)--;
6775 }
6776
6777 tracing_snapshot_instance(tr);
6778 }
6779
6780 static int
6781 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6782 struct ftrace_probe_ops *ops, void *data)
6783 {
6784 struct ftrace_func_mapper *mapper = data;
6785 long *count = NULL;
6786
6787 seq_printf(m, "%ps:", (void *)ip);
6788
6789 seq_puts(m, "snapshot");
6790
6791 if (mapper)
6792 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6793
6794 if (count)
6795 seq_printf(m, ":count=%ld\n", *count);
6796 else
6797 seq_puts(m, ":unlimited\n");
6798
6799 return 0;
6800 }
6801
6802 static int
6803 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6804 unsigned long ip, void *init_data, void **data)
6805 {
6806 struct ftrace_func_mapper *mapper = *data;
6807
6808 if (!mapper) {
6809 mapper = allocate_ftrace_func_mapper();
6810 if (!mapper)
6811 return -ENOMEM;
6812 *data = mapper;
6813 }
6814
6815 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6816 }
6817
6818 static void
6819 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6820 unsigned long ip, void *data)
6821 {
6822 struct ftrace_func_mapper *mapper = data;
6823
6824 if (!ip) {
6825 if (!mapper)
6826 return;
6827 free_ftrace_func_mapper(mapper, NULL);
6828 return;
6829 }
6830
6831 ftrace_func_mapper_remove_ip(mapper, ip);
6832 }
6833
6834 static struct ftrace_probe_ops snapshot_probe_ops = {
6835 .func = ftrace_snapshot,
6836 .print = ftrace_snapshot_print,
6837 };
6838
6839 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6840 .func = ftrace_count_snapshot,
6841 .print = ftrace_snapshot_print,
6842 .init = ftrace_snapshot_init,
6843 .free = ftrace_snapshot_free,
6844 };
6845
6846 static int
6847 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6848 char *glob, char *cmd, char *param, int enable)
6849 {
6850 struct ftrace_probe_ops *ops;
6851 void *count = (void *)-1;
6852 char *number;
6853 int ret;
6854
6855 /* hash funcs only work with set_ftrace_filter */
6856 if (!enable)
6857 return -EINVAL;
6858
6859 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6860
6861 if (glob[0] == '!')
6862 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6863
6864 if (!param)
6865 goto out_reg;
6866
6867 number = strsep(&param, ":");
6868
6869 if (!strlen(number))
6870 goto out_reg;
6871
6872 /*
6873 * We use the callback data field (which is a pointer)
6874 * as our counter.
6875 */
6876 ret = kstrtoul(number, 0, (unsigned long *)&count);
6877 if (ret)
6878 return ret;
6879
6880 out_reg:
6881 ret = alloc_snapshot(tr);
6882 if (ret < 0)
6883 goto out;
6884
6885 ret = register_ftrace_function_probe(glob, tr, ops, count);
6886
6887 out:
6888 return ret < 0 ? ret : 0;
6889 }
6890
6891 static struct ftrace_func_command ftrace_snapshot_cmd = {
6892 .name = "snapshot",
6893 .func = ftrace_trace_snapshot_callback,
6894 };
6895
6896 static __init int register_snapshot_cmd(void)
6897 {
6898 return register_ftrace_command(&ftrace_snapshot_cmd);
6899 }
6900 #else
6901 static inline __init int register_snapshot_cmd(void) { return 0; }
6902 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6903
6904 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6905 {
6906 if (WARN_ON(!tr->dir))
6907 return ERR_PTR(-ENODEV);
6908
6909 /* Top directory uses NULL as the parent */
6910 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6911 return NULL;
6912
6913 /* All sub buffers have a descriptor */
6914 return tr->dir;
6915 }
6916
6917 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6918 {
6919 struct dentry *d_tracer;
6920
6921 if (tr->percpu_dir)
6922 return tr->percpu_dir;
6923
6924 d_tracer = tracing_get_dentry(tr);
6925 if (IS_ERR(d_tracer))
6926 return NULL;
6927
6928 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6929
6930 WARN_ONCE(!tr->percpu_dir,
6931 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6932
6933 return tr->percpu_dir;
6934 }
6935
6936 static struct dentry *
6937 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6938 void *data, long cpu, const struct file_operations *fops)
6939 {
6940 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6941
6942 if (ret) /* See tracing_get_cpu() */
6943 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6944 return ret;
6945 }
6946
6947 static void
6948 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6949 {
6950 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6951 struct dentry *d_cpu;
6952 char cpu_dir[30]; /* 30 characters should be more than enough */
6953
6954 if (!d_percpu)
6955 return;
6956
6957 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6958 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6959 if (!d_cpu) {
6960 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6961 return;
6962 }
6963
6964 /* per cpu trace_pipe */
6965 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6966 tr, cpu, &tracing_pipe_fops);
6967
6968 /* per cpu trace */
6969 trace_create_cpu_file("trace", 0644, d_cpu,
6970 tr, cpu, &tracing_fops);
6971
6972 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6973 tr, cpu, &tracing_buffers_fops);
6974
6975 trace_create_cpu_file("stats", 0444, d_cpu,
6976 tr, cpu, &tracing_stats_fops);
6977
6978 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6979 tr, cpu, &tracing_entries_fops);
6980
6981 #ifdef CONFIG_TRACER_SNAPSHOT
6982 trace_create_cpu_file("snapshot", 0644, d_cpu,
6983 tr, cpu, &snapshot_fops);
6984
6985 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6986 tr, cpu, &snapshot_raw_fops);
6987 #endif
6988 }
6989
6990 #ifdef CONFIG_FTRACE_SELFTEST
6991 /* Let selftest have access to static functions in this file */
6992 #include "trace_selftest.c"
6993 #endif
6994
6995 static ssize_t
6996 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6997 loff_t *ppos)
6998 {
6999 struct trace_option_dentry *topt = filp->private_data;
7000 char *buf;
7001
7002 if (topt->flags->val & topt->opt->bit)
7003 buf = "1\n";
7004 else
7005 buf = "0\n";
7006
7007 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7008 }
7009
7010 static ssize_t
7011 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7012 loff_t *ppos)
7013 {
7014 struct trace_option_dentry *topt = filp->private_data;
7015 unsigned long val;
7016 int ret;
7017
7018 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7019 if (ret)
7020 return ret;
7021
7022 if (val != 0 && val != 1)
7023 return -EINVAL;
7024
7025 if (!!(topt->flags->val & topt->opt->bit) != val) {
7026 mutex_lock(&trace_types_lock);
7027 ret = __set_tracer_option(topt->tr, topt->flags,
7028 topt->opt, !val);
7029 mutex_unlock(&trace_types_lock);
7030 if (ret)
7031 return ret;
7032 }
7033
7034 *ppos += cnt;
7035
7036 return cnt;
7037 }
7038
7039
7040 static const struct file_operations trace_options_fops = {
7041 .open = tracing_open_generic,
7042 .read = trace_options_read,
7043 .write = trace_options_write,
7044 .llseek = generic_file_llseek,
7045 };
7046
7047 /*
7048 * In order to pass in both the trace_array descriptor as well as the index
7049 * to the flag that the trace option file represents, the trace_array
7050 * has a character array of trace_flags_index[], which holds the index
7051 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7052 * The address of this character array is passed to the flag option file
7053 * read/write callbacks.
7054 *
7055 * In order to extract both the index and the trace_array descriptor,
7056 * get_tr_index() uses the following algorithm.
7057 *
7058 * idx = *ptr;
7059 *
7060 * As the pointer itself contains the address of the index (remember
7061 * index[1] == 1).
7062 *
7063 * Then to get the trace_array descriptor, by subtracting that index
7064 * from the ptr, we get to the start of the index itself.
7065 *
7066 * ptr - idx == &index[0]
7067 *
7068 * Then a simple container_of() from that pointer gets us to the
7069 * trace_array descriptor.
7070 */
7071 static void get_tr_index(void *data, struct trace_array **ptr,
7072 unsigned int *pindex)
7073 {
7074 *pindex = *(unsigned char *)data;
7075
7076 *ptr = container_of(data - *pindex, struct trace_array,
7077 trace_flags_index);
7078 }
7079
7080 static ssize_t
7081 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7082 loff_t *ppos)
7083 {
7084 void *tr_index = filp->private_data;
7085 struct trace_array *tr;
7086 unsigned int index;
7087 char *buf;
7088
7089 get_tr_index(tr_index, &tr, &index);
7090
7091 if (tr->trace_flags & (1 << index))
7092 buf = "1\n";
7093 else
7094 buf = "0\n";
7095
7096 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7097 }
7098
7099 static ssize_t
7100 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7101 loff_t *ppos)
7102 {
7103 void *tr_index = filp->private_data;
7104 struct trace_array *tr;
7105 unsigned int index;
7106 unsigned long val;
7107 int ret;
7108
7109 get_tr_index(tr_index, &tr, &index);
7110
7111 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7112 if (ret)
7113 return ret;
7114
7115 if (val != 0 && val != 1)
7116 return -EINVAL;
7117
7118 mutex_lock(&trace_types_lock);
7119 ret = set_tracer_flag(tr, 1 << index, val);
7120 mutex_unlock(&trace_types_lock);
7121
7122 if (ret < 0)
7123 return ret;
7124
7125 *ppos += cnt;
7126
7127 return cnt;
7128 }
7129
7130 static const struct file_operations trace_options_core_fops = {
7131 .open = tracing_open_generic,
7132 .read = trace_options_core_read,
7133 .write = trace_options_core_write,
7134 .llseek = generic_file_llseek,
7135 };
7136
7137 struct dentry *trace_create_file(const char *name,
7138 umode_t mode,
7139 struct dentry *parent,
7140 void *data,
7141 const struct file_operations *fops)
7142 {
7143 struct dentry *ret;
7144
7145 ret = tracefs_create_file(name, mode, parent, data, fops);
7146 if (!ret)
7147 pr_warn("Could not create tracefs '%s' entry\n", name);
7148
7149 return ret;
7150 }
7151
7152
7153 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7154 {
7155 struct dentry *d_tracer;
7156
7157 if (tr->options)
7158 return tr->options;
7159
7160 d_tracer = tracing_get_dentry(tr);
7161 if (IS_ERR(d_tracer))
7162 return NULL;
7163
7164 tr->options = tracefs_create_dir("options", d_tracer);
7165 if (!tr->options) {
7166 pr_warn("Could not create tracefs directory 'options'\n");
7167 return NULL;
7168 }
7169
7170 return tr->options;
7171 }
7172
7173 static void
7174 create_trace_option_file(struct trace_array *tr,
7175 struct trace_option_dentry *topt,
7176 struct tracer_flags *flags,
7177 struct tracer_opt *opt)
7178 {
7179 struct dentry *t_options;
7180
7181 t_options = trace_options_init_dentry(tr);
7182 if (!t_options)
7183 return;
7184
7185 topt->flags = flags;
7186 topt->opt = opt;
7187 topt->tr = tr;
7188
7189 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7190 &trace_options_fops);
7191
7192 }
7193
7194 static void
7195 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7196 {
7197 struct trace_option_dentry *topts;
7198 struct trace_options *tr_topts;
7199 struct tracer_flags *flags;
7200 struct tracer_opt *opts;
7201 int cnt;
7202 int i;
7203
7204 if (!tracer)
7205 return;
7206
7207 flags = tracer->flags;
7208
7209 if (!flags || !flags->opts)
7210 return;
7211
7212 /*
7213 * If this is an instance, only create flags for tracers
7214 * the instance may have.
7215 */
7216 if (!trace_ok_for_array(tracer, tr))
7217 return;
7218
7219 for (i = 0; i < tr->nr_topts; i++) {
7220 /* Make sure there's no duplicate flags. */
7221 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7222 return;
7223 }
7224
7225 opts = flags->opts;
7226
7227 for (cnt = 0; opts[cnt].name; cnt++)
7228 ;
7229
7230 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7231 if (!topts)
7232 return;
7233
7234 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7235 GFP_KERNEL);
7236 if (!tr_topts) {
7237 kfree(topts);
7238 return;
7239 }
7240
7241 tr->topts = tr_topts;
7242 tr->topts[tr->nr_topts].tracer = tracer;
7243 tr->topts[tr->nr_topts].topts = topts;
7244 tr->nr_topts++;
7245
7246 for (cnt = 0; opts[cnt].name; cnt++) {
7247 create_trace_option_file(tr, &topts[cnt], flags,
7248 &opts[cnt]);
7249 WARN_ONCE(topts[cnt].entry == NULL,
7250 "Failed to create trace option: %s",
7251 opts[cnt].name);
7252 }
7253 }
7254
7255 static struct dentry *
7256 create_trace_option_core_file(struct trace_array *tr,
7257 const char *option, long index)
7258 {
7259 struct dentry *t_options;
7260
7261 t_options = trace_options_init_dentry(tr);
7262 if (!t_options)
7263 return NULL;
7264
7265 return trace_create_file(option, 0644, t_options,
7266 (void *)&tr->trace_flags_index[index],
7267 &trace_options_core_fops);
7268 }
7269
7270 static void create_trace_options_dir(struct trace_array *tr)
7271 {
7272 struct dentry *t_options;
7273 bool top_level = tr == &global_trace;
7274 int i;
7275
7276 t_options = trace_options_init_dentry(tr);
7277 if (!t_options)
7278 return;
7279
7280 for (i = 0; trace_options[i]; i++) {
7281 if (top_level ||
7282 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7283 create_trace_option_core_file(tr, trace_options[i], i);
7284 }
7285 }
7286
7287 static ssize_t
7288 rb_simple_read(struct file *filp, char __user *ubuf,
7289 size_t cnt, loff_t *ppos)
7290 {
7291 struct trace_array *tr = filp->private_data;
7292 char buf[64];
7293 int r;
7294
7295 r = tracer_tracing_is_on(tr);
7296 r = sprintf(buf, "%d\n", r);
7297
7298 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7299 }
7300
7301 static ssize_t
7302 rb_simple_write(struct file *filp, const char __user *ubuf,
7303 size_t cnt, loff_t *ppos)
7304 {
7305 struct trace_array *tr = filp->private_data;
7306 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7307 unsigned long val;
7308 int ret;
7309
7310 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7311 if (ret)
7312 return ret;
7313
7314 if (buffer) {
7315 mutex_lock(&trace_types_lock);
7316 if (val) {
7317 tracer_tracing_on(tr);
7318 if (tr->current_trace->start)
7319 tr->current_trace->start(tr);
7320 } else {
7321 tracer_tracing_off(tr);
7322 if (tr->current_trace->stop)
7323 tr->current_trace->stop(tr);
7324 }
7325 mutex_unlock(&trace_types_lock);
7326 }
7327
7328 (*ppos)++;
7329
7330 return cnt;
7331 }
7332
7333 static const struct file_operations rb_simple_fops = {
7334 .open = tracing_open_generic_tr,
7335 .read = rb_simple_read,
7336 .write = rb_simple_write,
7337 .release = tracing_release_generic_tr,
7338 .llseek = default_llseek,
7339 };
7340
7341 struct dentry *trace_instance_dir;
7342
7343 static void
7344 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7345
7346 static int
7347 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7348 {
7349 enum ring_buffer_flags rb_flags;
7350
7351 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7352
7353 buf->tr = tr;
7354
7355 buf->buffer = ring_buffer_alloc(size, rb_flags);
7356 if (!buf->buffer)
7357 return -ENOMEM;
7358
7359 buf->data = alloc_percpu(struct trace_array_cpu);
7360 if (!buf->data) {
7361 ring_buffer_free(buf->buffer);
7362 return -ENOMEM;
7363 }
7364
7365 /* Allocate the first page for all buffers */
7366 set_buffer_entries(&tr->trace_buffer,
7367 ring_buffer_size(tr->trace_buffer.buffer, 0));
7368
7369 return 0;
7370 }
7371
7372 static int allocate_trace_buffers(struct trace_array *tr, int size)
7373 {
7374 int ret;
7375
7376 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7377 if (ret)
7378 return ret;
7379
7380 #ifdef CONFIG_TRACER_MAX_TRACE
7381 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7382 allocate_snapshot ? size : 1);
7383 if (WARN_ON(ret)) {
7384 ring_buffer_free(tr->trace_buffer.buffer);
7385 free_percpu(tr->trace_buffer.data);
7386 return -ENOMEM;
7387 }
7388 tr->allocated_snapshot = allocate_snapshot;
7389
7390 /*
7391 * Only the top level trace array gets its snapshot allocated
7392 * from the kernel command line.
7393 */
7394 allocate_snapshot = false;
7395 #endif
7396 return 0;
7397 }
7398
7399 static void free_trace_buffer(struct trace_buffer *buf)
7400 {
7401 if (buf->buffer) {
7402 ring_buffer_free(buf->buffer);
7403 buf->buffer = NULL;
7404 free_percpu(buf->data);
7405 buf->data = NULL;
7406 }
7407 }
7408
7409 static void free_trace_buffers(struct trace_array *tr)
7410 {
7411 if (!tr)
7412 return;
7413
7414 free_trace_buffer(&tr->trace_buffer);
7415
7416 #ifdef CONFIG_TRACER_MAX_TRACE
7417 free_trace_buffer(&tr->max_buffer);
7418 #endif
7419 }
7420
7421 static void init_trace_flags_index(struct trace_array *tr)
7422 {
7423 int i;
7424
7425 /* Used by the trace options files */
7426 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7427 tr->trace_flags_index[i] = i;
7428 }
7429
7430 static void __update_tracer_options(struct trace_array *tr)
7431 {
7432 struct tracer *t;
7433
7434 for (t = trace_types; t; t = t->next)
7435 add_tracer_options(tr, t);
7436 }
7437
7438 static void update_tracer_options(struct trace_array *tr)
7439 {
7440 mutex_lock(&trace_types_lock);
7441 __update_tracer_options(tr);
7442 mutex_unlock(&trace_types_lock);
7443 }
7444
7445 static int instance_mkdir(const char *name)
7446 {
7447 struct trace_array *tr;
7448 int ret;
7449
7450 mutex_lock(&trace_types_lock);
7451
7452 ret = -EEXIST;
7453 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7454 if (tr->name && strcmp(tr->name, name) == 0)
7455 goto out_unlock;
7456 }
7457
7458 ret = -ENOMEM;
7459 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7460 if (!tr)
7461 goto out_unlock;
7462
7463 tr->name = kstrdup(name, GFP_KERNEL);
7464 if (!tr->name)
7465 goto out_free_tr;
7466
7467 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7468 goto out_free_tr;
7469
7470 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7471
7472 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7473
7474 raw_spin_lock_init(&tr->start_lock);
7475
7476 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7477
7478 tr->current_trace = &nop_trace;
7479
7480 INIT_LIST_HEAD(&tr->systems);
7481 INIT_LIST_HEAD(&tr->events);
7482
7483 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7484 goto out_free_tr;
7485
7486 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7487 if (!tr->dir)
7488 goto out_free_tr;
7489
7490 ret = event_trace_add_tracer(tr->dir, tr);
7491 if (ret) {
7492 tracefs_remove_recursive(tr->dir);
7493 goto out_free_tr;
7494 }
7495
7496 ftrace_init_trace_array(tr);
7497
7498 init_tracer_tracefs(tr, tr->dir);
7499 init_trace_flags_index(tr);
7500 __update_tracer_options(tr);
7501
7502 list_add(&tr->list, &ftrace_trace_arrays);
7503
7504 mutex_unlock(&trace_types_lock);
7505
7506 return 0;
7507
7508 out_free_tr:
7509 free_trace_buffers(tr);
7510 free_cpumask_var(tr->tracing_cpumask);
7511 kfree(tr->name);
7512 kfree(tr);
7513
7514 out_unlock:
7515 mutex_unlock(&trace_types_lock);
7516
7517 return ret;
7518
7519 }
7520
7521 static int instance_rmdir(const char *name)
7522 {
7523 struct trace_array *tr;
7524 int found = 0;
7525 int ret;
7526 int i;
7527
7528 mutex_lock(&trace_types_lock);
7529
7530 ret = -ENODEV;
7531 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7532 if (tr->name && strcmp(tr->name, name) == 0) {
7533 found = 1;
7534 break;
7535 }
7536 }
7537 if (!found)
7538 goto out_unlock;
7539
7540 ret = -EBUSY;
7541 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7542 goto out_unlock;
7543
7544 list_del(&tr->list);
7545
7546 /* Disable all the flags that were enabled coming in */
7547 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7548 if ((1 << i) & ZEROED_TRACE_FLAGS)
7549 set_tracer_flag(tr, 1 << i, 0);
7550 }
7551
7552 tracing_set_nop(tr);
7553 event_trace_del_tracer(tr);
7554 ftrace_clear_pids(tr);
7555 ftrace_destroy_function_files(tr);
7556 tracefs_remove_recursive(tr->dir);
7557 free_trace_buffers(tr);
7558
7559 for (i = 0; i < tr->nr_topts; i++) {
7560 kfree(tr->topts[i].topts);
7561 }
7562 kfree(tr->topts);
7563
7564 kfree(tr->name);
7565 kfree(tr);
7566
7567 ret = 0;
7568
7569 out_unlock:
7570 mutex_unlock(&trace_types_lock);
7571
7572 return ret;
7573 }
7574
7575 static __init void create_trace_instances(struct dentry *d_tracer)
7576 {
7577 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7578 instance_mkdir,
7579 instance_rmdir);
7580 if (WARN_ON(!trace_instance_dir))
7581 return;
7582 }
7583
7584 static void
7585 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7586 {
7587 int cpu;
7588
7589 trace_create_file("available_tracers", 0444, d_tracer,
7590 tr, &show_traces_fops);
7591
7592 trace_create_file("current_tracer", 0644, d_tracer,
7593 tr, &set_tracer_fops);
7594
7595 trace_create_file("tracing_cpumask", 0644, d_tracer,
7596 tr, &tracing_cpumask_fops);
7597
7598 trace_create_file("trace_options", 0644, d_tracer,
7599 tr, &tracing_iter_fops);
7600
7601 trace_create_file("trace", 0644, d_tracer,
7602 tr, &tracing_fops);
7603
7604 trace_create_file("trace_pipe", 0444, d_tracer,
7605 tr, &tracing_pipe_fops);
7606
7607 trace_create_file("buffer_size_kb", 0644, d_tracer,
7608 tr, &tracing_entries_fops);
7609
7610 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7611 tr, &tracing_total_entries_fops);
7612
7613 trace_create_file("free_buffer", 0200, d_tracer,
7614 tr, &tracing_free_buffer_fops);
7615
7616 trace_create_file("trace_marker", 0220, d_tracer,
7617 tr, &tracing_mark_fops);
7618
7619 trace_create_file("trace_marker_raw", 0220, d_tracer,
7620 tr, &tracing_mark_raw_fops);
7621
7622 trace_create_file("trace_clock", 0644, d_tracer, tr,
7623 &trace_clock_fops);
7624
7625 trace_create_file("tracing_on", 0644, d_tracer,
7626 tr, &rb_simple_fops);
7627
7628 create_trace_options_dir(tr);
7629
7630 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7631 trace_create_file("tracing_max_latency", 0644, d_tracer,
7632 &tr->max_latency, &tracing_max_lat_fops);
7633 #endif
7634
7635 if (ftrace_create_function_files(tr, d_tracer))
7636 WARN(1, "Could not allocate function filter files");
7637
7638 #ifdef CONFIG_TRACER_SNAPSHOT
7639 trace_create_file("snapshot", 0644, d_tracer,
7640 tr, &snapshot_fops);
7641 #endif
7642
7643 for_each_tracing_cpu(cpu)
7644 tracing_init_tracefs_percpu(tr, cpu);
7645
7646 ftrace_init_tracefs(tr, d_tracer);
7647 }
7648
7649 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7650 {
7651 struct vfsmount *mnt;
7652 struct file_system_type *type;
7653
7654 /*
7655 * To maintain backward compatibility for tools that mount
7656 * debugfs to get to the tracing facility, tracefs is automatically
7657 * mounted to the debugfs/tracing directory.
7658 */
7659 type = get_fs_type("tracefs");
7660 if (!type)
7661 return NULL;
7662 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7663 put_filesystem(type);
7664 if (IS_ERR(mnt))
7665 return NULL;
7666 mntget(mnt);
7667
7668 return mnt;
7669 }
7670
7671 /**
7672 * tracing_init_dentry - initialize top level trace array
7673 *
7674 * This is called when creating files or directories in the tracing
7675 * directory. It is called via fs_initcall() by any of the boot up code
7676 * and expects to return the dentry of the top level tracing directory.
7677 */
7678 struct dentry *tracing_init_dentry(void)
7679 {
7680 struct trace_array *tr = &global_trace;
7681
7682 /* The top level trace array uses NULL as parent */
7683 if (tr->dir)
7684 return NULL;
7685
7686 if (WARN_ON(!tracefs_initialized()) ||
7687 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7688 WARN_ON(!debugfs_initialized())))
7689 return ERR_PTR(-ENODEV);
7690
7691 /*
7692 * As there may still be users that expect the tracing
7693 * files to exist in debugfs/tracing, we must automount
7694 * the tracefs file system there, so older tools still
7695 * work with the newer kerenl.
7696 */
7697 tr->dir = debugfs_create_automount("tracing", NULL,
7698 trace_automount, NULL);
7699 if (!tr->dir) {
7700 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7701 return ERR_PTR(-ENOMEM);
7702 }
7703
7704 return NULL;
7705 }
7706
7707 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7708 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7709
7710 static void __init trace_enum_init(void)
7711 {
7712 int len;
7713
7714 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7715 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7716 }
7717
7718 #ifdef CONFIG_MODULES
7719 static void trace_module_add_enums(struct module *mod)
7720 {
7721 if (!mod->num_trace_enums)
7722 return;
7723
7724 /*
7725 * Modules with bad taint do not have events created, do
7726 * not bother with enums either.
7727 */
7728 if (trace_module_has_bad_taint(mod))
7729 return;
7730
7731 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7732 }
7733
7734 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7735 static void trace_module_remove_enums(struct module *mod)
7736 {
7737 union trace_enum_map_item *map;
7738 union trace_enum_map_item **last = &trace_enum_maps;
7739
7740 if (!mod->num_trace_enums)
7741 return;
7742
7743 mutex_lock(&trace_enum_mutex);
7744
7745 map = trace_enum_maps;
7746
7747 while (map) {
7748 if (map->head.mod == mod)
7749 break;
7750 map = trace_enum_jmp_to_tail(map);
7751 last = &map->tail.next;
7752 map = map->tail.next;
7753 }
7754 if (!map)
7755 goto out;
7756
7757 *last = trace_enum_jmp_to_tail(map)->tail.next;
7758 kfree(map);
7759 out:
7760 mutex_unlock(&trace_enum_mutex);
7761 }
7762 #else
7763 static inline void trace_module_remove_enums(struct module *mod) { }
7764 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7765
7766 static int trace_module_notify(struct notifier_block *self,
7767 unsigned long val, void *data)
7768 {
7769 struct module *mod = data;
7770
7771 switch (val) {
7772 case MODULE_STATE_COMING:
7773 trace_module_add_enums(mod);
7774 break;
7775 case MODULE_STATE_GOING:
7776 trace_module_remove_enums(mod);
7777 break;
7778 }
7779
7780 return 0;
7781 }
7782
7783 static struct notifier_block trace_module_nb = {
7784 .notifier_call = trace_module_notify,
7785 .priority = 0,
7786 };
7787 #endif /* CONFIG_MODULES */
7788
7789 static __init int tracer_init_tracefs(void)
7790 {
7791 struct dentry *d_tracer;
7792
7793 trace_access_lock_init();
7794
7795 d_tracer = tracing_init_dentry();
7796 if (IS_ERR(d_tracer))
7797 return 0;
7798
7799 init_tracer_tracefs(&global_trace, d_tracer);
7800 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7801
7802 trace_create_file("tracing_thresh", 0644, d_tracer,
7803 &global_trace, &tracing_thresh_fops);
7804
7805 trace_create_file("README", 0444, d_tracer,
7806 NULL, &tracing_readme_fops);
7807
7808 trace_create_file("saved_cmdlines", 0444, d_tracer,
7809 NULL, &tracing_saved_cmdlines_fops);
7810
7811 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7812 NULL, &tracing_saved_cmdlines_size_fops);
7813
7814 trace_enum_init();
7815
7816 trace_create_enum_file(d_tracer);
7817
7818 #ifdef CONFIG_MODULES
7819 register_module_notifier(&trace_module_nb);
7820 #endif
7821
7822 #ifdef CONFIG_DYNAMIC_FTRACE
7823 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7824 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7825 #endif
7826
7827 create_trace_instances(d_tracer);
7828
7829 update_tracer_options(&global_trace);
7830
7831 return 0;
7832 }
7833
7834 static int trace_panic_handler(struct notifier_block *this,
7835 unsigned long event, void *unused)
7836 {
7837 if (ftrace_dump_on_oops)
7838 ftrace_dump(ftrace_dump_on_oops);
7839 return NOTIFY_OK;
7840 }
7841
7842 static struct notifier_block trace_panic_notifier = {
7843 .notifier_call = trace_panic_handler,
7844 .next = NULL,
7845 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7846 };
7847
7848 static int trace_die_handler(struct notifier_block *self,
7849 unsigned long val,
7850 void *data)
7851 {
7852 switch (val) {
7853 case DIE_OOPS:
7854 if (ftrace_dump_on_oops)
7855 ftrace_dump(ftrace_dump_on_oops);
7856 break;
7857 default:
7858 break;
7859 }
7860 return NOTIFY_OK;
7861 }
7862
7863 static struct notifier_block trace_die_notifier = {
7864 .notifier_call = trace_die_handler,
7865 .priority = 200
7866 };
7867
7868 /*
7869 * printk is set to max of 1024, we really don't need it that big.
7870 * Nothing should be printing 1000 characters anyway.
7871 */
7872 #define TRACE_MAX_PRINT 1000
7873
7874 /*
7875 * Define here KERN_TRACE so that we have one place to modify
7876 * it if we decide to change what log level the ftrace dump
7877 * should be at.
7878 */
7879 #define KERN_TRACE KERN_EMERG
7880
7881 void
7882 trace_printk_seq(struct trace_seq *s)
7883 {
7884 /* Probably should print a warning here. */
7885 if (s->seq.len >= TRACE_MAX_PRINT)
7886 s->seq.len = TRACE_MAX_PRINT;
7887
7888 /*
7889 * More paranoid code. Although the buffer size is set to
7890 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7891 * an extra layer of protection.
7892 */
7893 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7894 s->seq.len = s->seq.size - 1;
7895
7896 /* should be zero ended, but we are paranoid. */
7897 s->buffer[s->seq.len] = 0;
7898
7899 printk(KERN_TRACE "%s", s->buffer);
7900
7901 trace_seq_init(s);
7902 }
7903
7904 void trace_init_global_iter(struct trace_iterator *iter)
7905 {
7906 iter->tr = &global_trace;
7907 iter->trace = iter->tr->current_trace;
7908 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7909 iter->trace_buffer = &global_trace.trace_buffer;
7910
7911 if (iter->trace && iter->trace->open)
7912 iter->trace->open(iter);
7913
7914 /* Annotate start of buffers if we had overruns */
7915 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7916 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7917
7918 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7919 if (trace_clocks[iter->tr->clock_id].in_ns)
7920 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7921 }
7922
7923 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7924 {
7925 /* use static because iter can be a bit big for the stack */
7926 static struct trace_iterator iter;
7927 static atomic_t dump_running;
7928 struct trace_array *tr = &global_trace;
7929 unsigned int old_userobj;
7930 unsigned long flags;
7931 int cnt = 0, cpu;
7932
7933 /* Only allow one dump user at a time. */
7934 if (atomic_inc_return(&dump_running) != 1) {
7935 atomic_dec(&dump_running);
7936 return;
7937 }
7938
7939 /*
7940 * Always turn off tracing when we dump.
7941 * We don't need to show trace output of what happens
7942 * between multiple crashes.
7943 *
7944 * If the user does a sysrq-z, then they can re-enable
7945 * tracing with echo 1 > tracing_on.
7946 */
7947 tracing_off();
7948
7949 local_irq_save(flags);
7950
7951 /* Simulate the iterator */
7952 trace_init_global_iter(&iter);
7953
7954 for_each_tracing_cpu(cpu) {
7955 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7956 }
7957
7958 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7959
7960 /* don't look at user memory in panic mode */
7961 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7962
7963 switch (oops_dump_mode) {
7964 case DUMP_ALL:
7965 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7966 break;
7967 case DUMP_ORIG:
7968 iter.cpu_file = raw_smp_processor_id();
7969 break;
7970 case DUMP_NONE:
7971 goto out_enable;
7972 default:
7973 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7974 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7975 }
7976
7977 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7978
7979 /* Did function tracer already get disabled? */
7980 if (ftrace_is_dead()) {
7981 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7982 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7983 }
7984
7985 /*
7986 * We need to stop all tracing on all CPUS to read the
7987 * the next buffer. This is a bit expensive, but is
7988 * not done often. We fill all what we can read,
7989 * and then release the locks again.
7990 */
7991
7992 while (!trace_empty(&iter)) {
7993
7994 if (!cnt)
7995 printk(KERN_TRACE "---------------------------------\n");
7996
7997 cnt++;
7998
7999 /* reset all but tr, trace, and overruns */
8000 memset(&iter.seq, 0,
8001 sizeof(struct trace_iterator) -
8002 offsetof(struct trace_iterator, seq));
8003 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8004 iter.pos = -1;
8005
8006 if (trace_find_next_entry_inc(&iter) != NULL) {
8007 int ret;
8008
8009 ret = print_trace_line(&iter);
8010 if (ret != TRACE_TYPE_NO_CONSUME)
8011 trace_consume(&iter);
8012 }
8013 touch_nmi_watchdog();
8014
8015 trace_printk_seq(&iter.seq);
8016 }
8017
8018 if (!cnt)
8019 printk(KERN_TRACE " (ftrace buffer empty)\n");
8020 else
8021 printk(KERN_TRACE "---------------------------------\n");
8022
8023 out_enable:
8024 tr->trace_flags |= old_userobj;
8025
8026 for_each_tracing_cpu(cpu) {
8027 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8028 }
8029 atomic_dec(&dump_running);
8030 local_irq_restore(flags);
8031 }
8032 EXPORT_SYMBOL_GPL(ftrace_dump);
8033
8034 __init static int tracer_alloc_buffers(void)
8035 {
8036 int ring_buf_size;
8037 int ret = -ENOMEM;
8038
8039 /*
8040 * Make sure we don't accidently add more trace options
8041 * than we have bits for.
8042 */
8043 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8044
8045 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8046 goto out;
8047
8048 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8049 goto out_free_buffer_mask;
8050
8051 /* Only allocate trace_printk buffers if a trace_printk exists */
8052 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8053 /* Must be called before global_trace.buffer is allocated */
8054 trace_printk_init_buffers();
8055
8056 /* To save memory, keep the ring buffer size to its minimum */
8057 if (ring_buffer_expanded)
8058 ring_buf_size = trace_buf_size;
8059 else
8060 ring_buf_size = 1;
8061
8062 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8063 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8064
8065 raw_spin_lock_init(&global_trace.start_lock);
8066
8067 /*
8068 * The prepare callbacks allocates some memory for the ring buffer. We
8069 * don't free the buffer if the if the CPU goes down. If we were to free
8070 * the buffer, then the user would lose any trace that was in the
8071 * buffer. The memory will be removed once the "instance" is removed.
8072 */
8073 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8074 "trace/RB:preapre", trace_rb_cpu_prepare,
8075 NULL);
8076 if (ret < 0)
8077 goto out_free_cpumask;
8078 /* Used for event triggers */
8079 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8080 if (!temp_buffer)
8081 goto out_rm_hp_state;
8082
8083 if (trace_create_savedcmd() < 0)
8084 goto out_free_temp_buffer;
8085
8086 /* TODO: make the number of buffers hot pluggable with CPUS */
8087 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8088 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8089 WARN_ON(1);
8090 goto out_free_savedcmd;
8091 }
8092
8093 if (global_trace.buffer_disabled)
8094 tracing_off();
8095
8096 if (trace_boot_clock) {
8097 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8098 if (ret < 0)
8099 pr_warn("Trace clock %s not defined, going back to default\n",
8100 trace_boot_clock);
8101 }
8102
8103 /*
8104 * register_tracer() might reference current_trace, so it
8105 * needs to be set before we register anything. This is
8106 * just a bootstrap of current_trace anyway.
8107 */
8108 global_trace.current_trace = &nop_trace;
8109
8110 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8111
8112 ftrace_init_global_array_ops(&global_trace);
8113
8114 init_trace_flags_index(&global_trace);
8115
8116 register_tracer(&nop_trace);
8117
8118 /* Function tracing may start here (via kernel command line) */
8119 init_function_trace();
8120
8121 /* All seems OK, enable tracing */
8122 tracing_disabled = 0;
8123
8124 atomic_notifier_chain_register(&panic_notifier_list,
8125 &trace_panic_notifier);
8126
8127 register_die_notifier(&trace_die_notifier);
8128
8129 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8130
8131 INIT_LIST_HEAD(&global_trace.systems);
8132 INIT_LIST_HEAD(&global_trace.events);
8133 list_add(&global_trace.list, &ftrace_trace_arrays);
8134
8135 apply_trace_boot_options();
8136
8137 register_snapshot_cmd();
8138
8139 return 0;
8140
8141 out_free_savedcmd:
8142 free_saved_cmdlines_buffer(savedcmd);
8143 out_free_temp_buffer:
8144 ring_buffer_free(temp_buffer);
8145 out_rm_hp_state:
8146 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8147 out_free_cpumask:
8148 free_cpumask_var(global_trace.tracing_cpumask);
8149 out_free_buffer_mask:
8150 free_cpumask_var(tracing_buffer_mask);
8151 out:
8152 return ret;
8153 }
8154
8155 void __init early_trace_init(void)
8156 {
8157 if (tracepoint_printk) {
8158 tracepoint_print_iter =
8159 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8160 if (WARN_ON(!tracepoint_print_iter))
8161 tracepoint_printk = 0;
8162 else
8163 static_key_enable(&tracepoint_printk_key.key);
8164 }
8165 tracer_alloc_buffers();
8166 }
8167
8168 void __init trace_init(void)
8169 {
8170 trace_event_init();
8171 }
8172
8173 __init static int clear_boot_tracer(void)
8174 {
8175 /*
8176 * The default tracer at boot buffer is an init section.
8177 * This function is called in lateinit. If we did not
8178 * find the boot tracer, then clear it out, to prevent
8179 * later registration from accessing the buffer that is
8180 * about to be freed.
8181 */
8182 if (!default_bootup_tracer)
8183 return 0;
8184
8185 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8186 default_bootup_tracer);
8187 default_bootup_tracer = NULL;
8188
8189 return 0;
8190 }
8191
8192 fs_initcall(tracer_init_tracefs);
8193 late_initcall(clear_boot_tracer);