[RAMEN9610-21500]ALSA: timer: Fix incorrectly assigned timer instance
[GitHub/MotorolaMobilityLLC/kernel-slsi.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
52 */
53 bool ring_buffer_expanded;
54
55 /*
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
61 */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65 * If a tracer is running, we do not want to run SELFTEST.
66 */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 return 0;
83 }
84
85 /*
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
88 * occurred.
89 */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91
92 /*
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
96 * this back to zero.
97 */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly tracing_buffer_mask;
101
102 /*
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104 *
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
109 * serial console.
110 *
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
116 */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 struct module *mod;
127 unsigned long length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133 /*
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "eval_string"
136 */
137 union trace_eval_map_item *next;
138 const char *end; /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144 * The trace_eval_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved eval_map items.
149 */
150 union trace_eval_map_item {
151 struct trace_eval_map map;
152 struct trace_eval_map_head head;
153 struct trace_eval_map_tail tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
173 return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
181 return 1;
182 }
183
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
186 return 1;
187 }
188
189 return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
197 return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
206 return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
227 return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
235 return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241 nsec += 500;
242 do_div(nsec, 1000);
243 return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
265 */
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274 struct trace_array *tr;
275 int ret = -ENODEV;
276
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 if (tr == this_tr) {
280 tr->ref++;
281 ret = 0;
282 break;
283 }
284 }
285 mutex_unlock(&trace_types_lock);
286
287 return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 WARN_ON(!this_tr->ref);
293 this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
306 {
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
310 return 1;
311 }
312
313 return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 vfree(pid_list->pids);
319 kfree(pid_list);
320 }
321
322 /**
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
326 *
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328 */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 /*
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
335 */
336 if (search_pid >= filtered_pids->pid_max)
337 return false;
338
339 return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
346 *
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
350 */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 /*
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
357 */
358 if (!filtered_pids)
359 return false;
360
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
369 *
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
374 * of a task.
375 */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
379 {
380 if (!pid_list)
381 return;
382
383 /* For forks, we only add if the forking task is listed */
384 if (self) {
385 if (!trace_find_filtered_pid(pid_list, self->pid))
386 return;
387 }
388
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
391 return;
392
393 /* "self" is set for forks, and NULL for exits */
394 if (self)
395 set_bit(task->pid, pid_list->pids);
396 else
397 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
405 *
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
408 *
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
411 */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 unsigned long pid = (unsigned long)v;
415
416 (*pos)++;
417
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
424
425 return NULL;
426 }
427
428 /**
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
432 *
433 * This is used by seq_file "start" operation to start the iteration
434 * of listing pids.
435 *
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
438 */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 unsigned long pid;
442 loff_t l = 0;
443
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
446 return NULL;
447
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 ;
452 return (void *)pid;
453 }
454
455 /**
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
459 *
460 * Can be directly used by seq_file operations to display the current
461 * pid value.
462 */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 unsigned long pid = (unsigned long)v - 1;
466
467 seq_printf(m, "%lu\n", pid);
468 return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
477 {
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
480 unsigned long val;
481 int nr_pids = 0;
482 ssize_t read = 0;
483 ssize_t ret = 0;
484 loff_t pos;
485 pid_t pid;
486
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 return -ENOMEM;
489
490 /*
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
494 * not modified.
495 */
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 if (!pid_list)
498 return -ENOMEM;
499
500 pid_list->pid_max = READ_ONCE(pid_max);
501
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
505
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
508 kfree(pid_list);
509 return -ENOMEM;
510 }
511
512 if (filtered_pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
517 nr_pids++;
518 }
519 }
520
521 while (cnt > 0) {
522
523 pos = 0;
524
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
527 break;
528
529 read += ret;
530 ubuf += ret;
531 cnt -= ret;
532
533 parser.buffer[parser.idx] = 0;
534
535 ret = -EINVAL;
536 if (kstrtoul(parser.buffer, 0, &val))
537 break;
538 if (val >= pid_list->pid_max)
539 break;
540
541 pid = (pid_t)val;
542
543 set_bit(pid, pid_list->pids);
544 nr_pids++;
545
546 trace_parser_clear(&parser);
547 ret = 0;
548 }
549 trace_parser_put(&parser);
550
551 if (ret < 0) {
552 trace_free_pid_list(pid_list);
553 return ret;
554 }
555
556 if (!nr_pids) {
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
559 read = ret;
560 pid_list = NULL;
561 }
562
563 *new_pid_list = pid_list;
564
565 return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570 u64 ts;
571
572 /* Early boot up does not have a buffer yet */
573 if (!buf->buffer)
574 return trace_clock_local();
575
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579 return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588 * tracing_is_enabled - Show if global_trace has been disabled
589 *
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
595 */
596 int tracing_is_enabled(void)
597 {
598 /*
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
602 */
603 smp_rmb();
604 return !global_trace.buffer_disabled;
605 }
606
607 /*
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
610 * to page size.
611 *
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
616 */
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
623
624 /*
625 * trace_types_lock is used to protect the trace_types list.
626 */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630 * serialize the access of the ring buffer
631 *
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
635 *
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
643 *
644 * These primitives allow multi process access to different cpu ring buffer
645 * concurrently.
646 *
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
649 */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
660 } else {
661 /* gain it for accessing a cpu ring buffer. */
662
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
665
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
675 } else {
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
678 }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683 int cpu;
684
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695 (void)cpu;
696 mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701 (void)cpu;
702 mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 unsigned long flags,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
717 unsigned long flags,
718 int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
738 {
739 struct trace_entry *ent = ring_buffer_event_data(event);
740
741 tracing_generic_entry_update(ent, flags, pc);
742 ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 int type,
748 unsigned long len,
749 unsigned long flags, int pc)
750 {
751 struct ring_buffer_event *event;
752
753 event = ring_buffer_lock_reserve(buffer, len);
754 if (event != NULL)
755 trace_event_setup(event, type, flags, pc);
756
757 return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
764 /*
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
771 */
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
774 smp_wmb();
775 }
776
777 /**
778 * tracing_on - enable tracing buffers
779 *
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
782 */
783 void tracing_on(void)
784 {
785 tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793 __this_cpu_write(trace_taskinfo_save, true);
794
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
801 } else
802 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
810 */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
817 int alloc;
818 int pc;
819
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 return 0;
822
823 pc = preempt_count();
824
825 if (unlikely(tracing_selftest_running || tracing_disabled))
826 return 0;
827
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 irq_flags, pc);
834 if (!event)
835 return 0;
836
837 entry = ring_buffer_event_data(event);
838 entry->ip = ip;
839
840 memcpy(&entry->buf, str, size);
841
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
846 } else
847 entry->buf[size] = '\0';
848
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852 return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
860 */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
868 int pc;
869
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 return 0;
872
873 pc = preempt_count();
874
875 if (unlikely(tracing_selftest_running || tracing_disabled))
876 return 0;
877
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 irq_flags, pc);
882 if (!event)
883 return 0;
884
885 entry = ring_buffer_event_data(event);
886 entry->ip = ip;
887 entry->str = str;
888
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892 return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 void tracing_snapshot_instance(struct trace_array *tr)
898 {
899 struct tracer *tracer = tr->current_trace;
900 unsigned long flags;
901
902 if (in_nmi()) {
903 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 internal_trace_puts("*** snapshot is being ignored ***\n");
905 return;
906 }
907
908 if (!tr->allocated_snapshot) {
909 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 internal_trace_puts("*** stopping trace here! ***\n");
911 tracing_off();
912 return;
913 }
914
915 /* Note, snapshot can not be used when the tracer uses it */
916 if (tracer->use_max_tr) {
917 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 return;
920 }
921
922 local_irq_save(flags);
923 update_max_tr(tr, current, smp_processor_id());
924 local_irq_restore(flags);
925 }
926
927 /**
928 * trace_snapshot - take a snapshot of the current buffer.
929 *
930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live
932 * trace when some condition is triggered, but continue to trace.
933 *
934 * Note, make sure to allocate the snapshot with either
935 * a tracing_snapshot_alloc(), or by doing it manually
936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937 *
938 * If the snapshot buffer is not allocated, it will stop tracing.
939 * Basically making a permanent snapshot.
940 */
941 void tracing_snapshot(void)
942 {
943 struct trace_array *tr = &global_trace;
944
945 tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 int tracing_alloc_snapshot_instance(struct trace_array *tr)
954 {
955 int ret;
956
957 if (!tr->allocated_snapshot) {
958
959 /* allocate spare buffer */
960 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962 if (ret < 0)
963 return ret;
964
965 tr->allocated_snapshot = true;
966 }
967
968 return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973 /*
974 * We don't free the ring buffer. instead, resize it because
975 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 * we want preserve it.
977 */
978 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 set_buffer_entries(&tr->max_buffer, 1);
980 tracing_reset_online_cpus(&tr->max_buffer);
981 tr->allocated_snapshot = false;
982 }
983
984 /**
985 * tracing_alloc_snapshot - allocate snapshot buffer.
986 *
987 * This only allocates the snapshot buffer if it isn't already
988 * allocated - it doesn't also take a snapshot.
989 *
990 * This is meant to be used in cases where the snapshot buffer needs
991 * to be set up for events that can't sleep but need to be able to
992 * trigger a snapshot.
993 */
994 int tracing_alloc_snapshot(void)
995 {
996 struct trace_array *tr = &global_trace;
997 int ret;
998
999 ret = tracing_alloc_snapshot_instance(tr);
1000 WARN_ON(ret < 0);
1001
1002 return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 *
1009 * This is similar to trace_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1012 *
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1016 */
1017 void tracing_snapshot_alloc(void)
1018 {
1019 int ret;
1020
1021 ret = tracing_alloc_snapshot();
1022 if (ret < 0)
1023 return;
1024
1025 tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037 return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042 /* Give warning */
1043 tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050 if (tr->trace_buffer.buffer)
1051 ring_buffer_record_off(tr->trace_buffer.buffer);
1052 /*
1053 * This flag is looked at when buffers haven't been allocated
1054 * yet, or by some tracers (like irqsoff), that just want to
1055 * know if the ring buffer has been disabled, but it can handle
1056 * races of where it gets disabled but we still do a record.
1057 * As the check is in the fast path of the tracers, it is more
1058 * important to be fast than accurate.
1059 */
1060 tr->buffer_disabled = 1;
1061 /* Make the flag seen by readers */
1062 smp_wmb();
1063 }
1064
1065 /**
1066 * tracing_off - turn off tracing buffers
1067 *
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1072 */
1073 void tracing_off(void)
1074 {
1075 tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081 if (__disable_trace_on_warning)
1082 tracing_off();
1083 }
1084
1085 /**
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1088 *
1089 * Shows real state of the ring buffer if it is enabled or not.
1090 */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093 if (tr->trace_buffer.buffer)
1094 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099 * tracing_is_on - show state of ring buffers enabled
1100 */
1101 int tracing_is_on(void)
1102 {
1103 return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109 unsigned long buf_size;
1110
1111 if (!str)
1112 return 0;
1113 buf_size = memparse(str, &str);
1114 /* nr_entries can not be zero */
1115 if (buf_size == 0)
1116 return 0;
1117 trace_buf_size = buf_size;
1118 return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124 unsigned long threshold;
1125 int ret;
1126
1127 if (!str)
1128 return 0;
1129 ret = kstrtoul(str, 0, &threshold);
1130 if (ret < 0)
1131 return 0;
1132 tracing_thresh = threshold * 1000;
1133 return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139 return nsecs / 1000;
1140 }
1141
1142 /*
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the evals (enum) were defined.
1147 */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153 TRACE_FLAGS
1154 NULL
1155 };
1156
1157 static struct {
1158 u64 (*func)(void);
1159 const char *name;
1160 int in_ns; /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 { trace_clock_local, "local", 1 },
1163 { trace_clock_global, "global", 1 },
1164 { trace_clock_counter, "counter", 0 },
1165 { trace_clock_jiffies, "uptime", 0 },
1166 { trace_clock, "perf", 1 },
1167 { ktime_get_mono_fast_ns, "mono", 1 },
1168 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1169 { ktime_get_boot_fast_ns, "boot", 1 },
1170 ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174 * trace_parser_get_init - gets the buffer for trace parser
1175 */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178 memset(parser, 0, sizeof(*parser));
1179
1180 parser->buffer = kmalloc(size, GFP_KERNEL);
1181 if (!parser->buffer)
1182 return 1;
1183
1184 parser->size = size;
1185 return 0;
1186 }
1187
1188 /*
1189 * trace_parser_put - frees the buffer for trace parser
1190 */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193 kfree(parser->buffer);
1194 parser->buffer = NULL;
1195 }
1196
1197 /*
1198 * trace_get_user - reads the user input string separated by space
1199 * (matched by isspace(ch))
1200 *
1201 * For each string found the 'struct trace_parser' is updated,
1202 * and the function returns.
1203 *
1204 * Returns number of bytes read.
1205 *
1206 * See kernel/trace/trace.h for 'struct trace_parser' details.
1207 */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 size_t cnt, loff_t *ppos)
1210 {
1211 char ch;
1212 size_t read = 0;
1213 ssize_t ret;
1214
1215 if (!*ppos)
1216 trace_parser_clear(parser);
1217
1218 ret = get_user(ch, ubuf++);
1219 if (ret)
1220 goto out;
1221
1222 read++;
1223 cnt--;
1224
1225 /*
1226 * The parser is not finished with the last write,
1227 * continue reading the user input without skipping spaces.
1228 */
1229 if (!parser->cont) {
1230 /* skip white space */
1231 while (cnt && isspace(ch)) {
1232 ret = get_user(ch, ubuf++);
1233 if (ret)
1234 goto out;
1235 read++;
1236 cnt--;
1237 }
1238
1239 /* only spaces were written */
1240 if (isspace(ch)) {
1241 *ppos += read;
1242 ret = read;
1243 goto out;
1244 }
1245
1246 parser->idx = 0;
1247 }
1248
1249 /* read the non-space input */
1250 while (cnt && !isspace(ch)) {
1251 if (parser->idx < parser->size - 1)
1252 parser->buffer[parser->idx++] = ch;
1253 else {
1254 ret = -EINVAL;
1255 goto out;
1256 }
1257 ret = get_user(ch, ubuf++);
1258 if (ret)
1259 goto out;
1260 read++;
1261 cnt--;
1262 }
1263
1264 /* We either got finished input or we have to wait for another call. */
1265 if (isspace(ch)) {
1266 parser->buffer[parser->idx] = 0;
1267 parser->cont = false;
1268 } else if (parser->idx < parser->size - 1) {
1269 parser->cont = true;
1270 parser->buffer[parser->idx++] = ch;
1271 } else {
1272 ret = -EINVAL;
1273 goto out;
1274 }
1275
1276 *ppos += read;
1277 ret = read;
1278
1279 out:
1280 return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286 int len;
1287
1288 if (trace_seq_used(s) <= s->seq.readpos)
1289 return -EBUSY;
1290
1291 len = trace_seq_used(s) - s->seq.readpos;
1292 if (cnt > len)
1293 cnt = len;
1294 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296 s->seq.readpos += cnt;
1297 return cnt;
1298 }
1299
1300 unsigned long __read_mostly tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307 */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311 struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 struct trace_buffer *max_buf = &tr->max_buffer;
1313 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316 max_buf->cpu = cpu;
1317 max_buf->time_start = data->preempt_timestamp;
1318
1319 max_data->saved_latency = tr->max_latency;
1320 max_data->critical_start = data->critical_start;
1321 max_data->critical_end = data->critical_end;
1322
1323 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 max_data->pid = tsk->pid;
1325 /*
1326 * If tsk == current, then use current_uid(), as that does not use
1327 * RCU. The irq tracer can be called out of RCU scope.
1328 */
1329 if (tsk == current)
1330 max_data->uid = current_uid();
1331 else
1332 max_data->uid = task_uid(tsk);
1333
1334 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 max_data->policy = tsk->policy;
1336 max_data->rt_priority = tsk->rt_priority;
1337
1338 /* record this tasks comm */
1339 tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344 * @tr: tracer
1345 * @tsk: the task with the latency
1346 * @cpu: The cpu that initiated the trace.
1347 *
1348 * Flip the buffers between the @tr and the max_tr and record information
1349 * about which task was the cause of this latency.
1350 */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354 struct ring_buffer *buf;
1355
1356 if (tr->stop_count)
1357 return;
1358
1359 WARN_ON_ONCE(!irqs_disabled());
1360
1361 if (!tr->allocated_snapshot) {
1362 /* Only the nop tracer should hit this when disabling */
1363 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364 return;
1365 }
1366
1367 arch_spin_lock(&tr->max_lock);
1368
1369 /* Inherit the recordable setting from trace_buffer */
1370 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1371 ring_buffer_record_on(tr->max_buffer.buffer);
1372 else
1373 ring_buffer_record_off(tr->max_buffer.buffer);
1374
1375 buf = tr->trace_buffer.buffer;
1376 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1377 tr->max_buffer.buffer = buf;
1378
1379 __update_max_tr(tr, tsk, cpu);
1380 arch_spin_unlock(&tr->max_lock);
1381 }
1382
1383 /**
1384 * update_max_tr_single - only copy one trace over, and reset the rest
1385 * @tr - tracer
1386 * @tsk - task with the latency
1387 * @cpu - the cpu of the buffer to copy.
1388 *
1389 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1390 */
1391 void
1392 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1393 {
1394 int ret;
1395
1396 if (tr->stop_count)
1397 return;
1398
1399 WARN_ON_ONCE(!irqs_disabled());
1400 if (!tr->allocated_snapshot) {
1401 /* Only the nop tracer should hit this when disabling */
1402 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1403 return;
1404 }
1405
1406 arch_spin_lock(&tr->max_lock);
1407
1408 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1409
1410 if (ret == -EBUSY) {
1411 /*
1412 * We failed to swap the buffer due to a commit taking
1413 * place on this CPU. We fail to record, but we reset
1414 * the max trace buffer (no one writes directly to it)
1415 * and flag that it failed.
1416 */
1417 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1418 "Failed to swap buffers due to commit in progress\n");
1419 }
1420
1421 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1422
1423 __update_max_tr(tr, tsk, cpu);
1424 arch_spin_unlock(&tr->max_lock);
1425 }
1426 #endif /* CONFIG_TRACER_MAX_TRACE */
1427
1428 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1429 {
1430 /* Iterators are static, they should be filled or empty */
1431 if (trace_buffer_iter(iter, iter->cpu_file))
1432 return 0;
1433
1434 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1435 full);
1436 }
1437
1438 #ifdef CONFIG_FTRACE_STARTUP_TEST
1439 static bool selftests_can_run;
1440
1441 struct trace_selftests {
1442 struct list_head list;
1443 struct tracer *type;
1444 };
1445
1446 static LIST_HEAD(postponed_selftests);
1447
1448 static int save_selftest(struct tracer *type)
1449 {
1450 struct trace_selftests *selftest;
1451
1452 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1453 if (!selftest)
1454 return -ENOMEM;
1455
1456 selftest->type = type;
1457 list_add(&selftest->list, &postponed_selftests);
1458 return 0;
1459 }
1460
1461 static int run_tracer_selftest(struct tracer *type)
1462 {
1463 struct trace_array *tr = &global_trace;
1464 struct tracer *saved_tracer = tr->current_trace;
1465 int ret;
1466
1467 if (!type->selftest || tracing_selftest_disabled)
1468 return 0;
1469
1470 /*
1471 * If a tracer registers early in boot up (before scheduling is
1472 * initialized and such), then do not run its selftests yet.
1473 * Instead, run it a little later in the boot process.
1474 */
1475 if (!selftests_can_run)
1476 return save_selftest(type);
1477
1478 /*
1479 * Run a selftest on this tracer.
1480 * Here we reset the trace buffer, and set the current
1481 * tracer to be this tracer. The tracer can then run some
1482 * internal tracing to verify that everything is in order.
1483 * If we fail, we do not register this tracer.
1484 */
1485 tracing_reset_online_cpus(&tr->trace_buffer);
1486
1487 tr->current_trace = type;
1488
1489 #ifdef CONFIG_TRACER_MAX_TRACE
1490 if (type->use_max_tr) {
1491 /* If we expanded the buffers, make sure the max is expanded too */
1492 if (ring_buffer_expanded)
1493 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1494 RING_BUFFER_ALL_CPUS);
1495 tr->allocated_snapshot = true;
1496 }
1497 #endif
1498
1499 /* the test is responsible for initializing and enabling */
1500 pr_info("Testing tracer %s: ", type->name);
1501 ret = type->selftest(type, tr);
1502 /* the test is responsible for resetting too */
1503 tr->current_trace = saved_tracer;
1504 if (ret) {
1505 printk(KERN_CONT "FAILED!\n");
1506 /* Add the warning after printing 'FAILED' */
1507 WARN_ON(1);
1508 return -1;
1509 }
1510 /* Only reset on passing, to avoid touching corrupted buffers */
1511 tracing_reset_online_cpus(&tr->trace_buffer);
1512
1513 #ifdef CONFIG_TRACER_MAX_TRACE
1514 if (type->use_max_tr) {
1515 tr->allocated_snapshot = false;
1516
1517 /* Shrink the max buffer again */
1518 if (ring_buffer_expanded)
1519 ring_buffer_resize(tr->max_buffer.buffer, 1,
1520 RING_BUFFER_ALL_CPUS);
1521 }
1522 #endif
1523
1524 printk(KERN_CONT "PASSED\n");
1525 return 0;
1526 }
1527
1528 static __init int init_trace_selftests(void)
1529 {
1530 struct trace_selftests *p, *n;
1531 struct tracer *t, **last;
1532 int ret;
1533
1534 selftests_can_run = true;
1535
1536 mutex_lock(&trace_types_lock);
1537
1538 if (list_empty(&postponed_selftests))
1539 goto out;
1540
1541 pr_info("Running postponed tracer tests:\n");
1542
1543 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1544 ret = run_tracer_selftest(p->type);
1545 /* If the test fails, then warn and remove from available_tracers */
1546 if (ret < 0) {
1547 WARN(1, "tracer: %s failed selftest, disabling\n",
1548 p->type->name);
1549 last = &trace_types;
1550 for (t = trace_types; t; t = t->next) {
1551 if (t == p->type) {
1552 *last = t->next;
1553 break;
1554 }
1555 last = &t->next;
1556 }
1557 }
1558 list_del(&p->list);
1559 kfree(p);
1560 }
1561
1562 out:
1563 mutex_unlock(&trace_types_lock);
1564
1565 return 0;
1566 }
1567 core_initcall(init_trace_selftests);
1568 #else
1569 static inline int run_tracer_selftest(struct tracer *type)
1570 {
1571 return 0;
1572 }
1573 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1574
1575 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1576
1577 static void __init apply_trace_boot_options(void);
1578
1579 /**
1580 * register_tracer - register a tracer with the ftrace system.
1581 * @type - the plugin for the tracer
1582 *
1583 * Register a new plugin tracer.
1584 */
1585 int __init register_tracer(struct tracer *type)
1586 {
1587 struct tracer *t;
1588 int ret = 0;
1589
1590 if (!type->name) {
1591 pr_info("Tracer must have a name\n");
1592 return -1;
1593 }
1594
1595 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1596 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1597 return -1;
1598 }
1599
1600 mutex_lock(&trace_types_lock);
1601
1602 tracing_selftest_running = true;
1603
1604 for (t = trace_types; t; t = t->next) {
1605 if (strcmp(type->name, t->name) == 0) {
1606 /* already found */
1607 pr_info("Tracer %s already registered\n",
1608 type->name);
1609 ret = -1;
1610 goto out;
1611 }
1612 }
1613
1614 if (!type->set_flag)
1615 type->set_flag = &dummy_set_flag;
1616 if (!type->flags) {
1617 /*allocate a dummy tracer_flags*/
1618 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1619 if (!type->flags) {
1620 ret = -ENOMEM;
1621 goto out;
1622 }
1623 type->flags->val = 0;
1624 type->flags->opts = dummy_tracer_opt;
1625 } else
1626 if (!type->flags->opts)
1627 type->flags->opts = dummy_tracer_opt;
1628
1629 /* store the tracer for __set_tracer_option */
1630 type->flags->trace = type;
1631
1632 ret = run_tracer_selftest(type);
1633 if (ret < 0)
1634 goto out;
1635
1636 type->next = trace_types;
1637 trace_types = type;
1638 add_tracer_options(&global_trace, type);
1639
1640 out:
1641 tracing_selftest_running = false;
1642 mutex_unlock(&trace_types_lock);
1643
1644 if (ret || !default_bootup_tracer)
1645 goto out_unlock;
1646
1647 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1648 goto out_unlock;
1649
1650 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1651 /* Do we want this tracer to start on bootup? */
1652 tracing_set_tracer(&global_trace, type->name);
1653 default_bootup_tracer = NULL;
1654
1655 apply_trace_boot_options();
1656
1657 /* disable other selftests, since this will break it. */
1658 tracing_selftest_disabled = true;
1659 #ifdef CONFIG_FTRACE_STARTUP_TEST
1660 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1661 type->name);
1662 #endif
1663
1664 out_unlock:
1665 return ret;
1666 }
1667
1668 void tracing_reset(struct trace_buffer *buf, int cpu)
1669 {
1670 struct ring_buffer *buffer = buf->buffer;
1671
1672 if (!buffer)
1673 return;
1674
1675 ring_buffer_record_disable(buffer);
1676
1677 /* Make sure all commits have finished */
1678 synchronize_sched();
1679 ring_buffer_reset_cpu(buffer, cpu);
1680
1681 ring_buffer_record_enable(buffer);
1682 }
1683
1684 void tracing_reset_online_cpus(struct trace_buffer *buf)
1685 {
1686 struct ring_buffer *buffer = buf->buffer;
1687 int cpu;
1688
1689 if (!buffer)
1690 return;
1691
1692 ring_buffer_record_disable(buffer);
1693
1694 /* Make sure all commits have finished */
1695 synchronize_sched();
1696
1697 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1698
1699 for_each_online_cpu(cpu)
1700 ring_buffer_reset_cpu(buffer, cpu);
1701
1702 ring_buffer_record_enable(buffer);
1703 }
1704
1705 /* Must have trace_types_lock held */
1706 void tracing_reset_all_online_cpus(void)
1707 {
1708 struct trace_array *tr;
1709
1710 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1711 if (!tr->clear_trace)
1712 continue;
1713 tr->clear_trace = false;
1714 tracing_reset_online_cpus(&tr->trace_buffer);
1715 #ifdef CONFIG_TRACER_MAX_TRACE
1716 tracing_reset_online_cpus(&tr->max_buffer);
1717 #endif
1718 }
1719 }
1720
1721 static int *tgid_map;
1722
1723 #define SAVED_CMDLINES_DEFAULT 128
1724 #define NO_CMDLINE_MAP UINT_MAX
1725 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1726 struct saved_cmdlines_buffer {
1727 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1728 unsigned *map_cmdline_to_pid;
1729 unsigned cmdline_num;
1730 int cmdline_idx;
1731 char *saved_cmdlines;
1732 };
1733 static struct saved_cmdlines_buffer *savedcmd;
1734
1735 /* temporary disable recording */
1736 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1737
1738 static inline char *get_saved_cmdlines(int idx)
1739 {
1740 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1741 }
1742
1743 static inline void set_cmdline(int idx, const char *cmdline)
1744 {
1745 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1746 }
1747
1748 static int allocate_cmdlines_buffer(unsigned int val,
1749 struct saved_cmdlines_buffer *s)
1750 {
1751 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1752 GFP_KERNEL);
1753 if (!s->map_cmdline_to_pid)
1754 return -ENOMEM;
1755
1756 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1757 if (!s->saved_cmdlines) {
1758 kfree(s->map_cmdline_to_pid);
1759 return -ENOMEM;
1760 }
1761
1762 s->cmdline_idx = 0;
1763 s->cmdline_num = val;
1764 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1765 sizeof(s->map_pid_to_cmdline));
1766 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1767 val * sizeof(*s->map_cmdline_to_pid));
1768
1769 return 0;
1770 }
1771
1772 static int trace_create_savedcmd(void)
1773 {
1774 int ret;
1775
1776 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1777 if (!savedcmd)
1778 return -ENOMEM;
1779
1780 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1781 if (ret < 0) {
1782 kfree(savedcmd);
1783 savedcmd = NULL;
1784 return -ENOMEM;
1785 }
1786
1787 return 0;
1788 }
1789
1790 int is_tracing_stopped(void)
1791 {
1792 return global_trace.stop_count;
1793 }
1794
1795 /**
1796 * tracing_start - quick start of the tracer
1797 *
1798 * If tracing is enabled but was stopped by tracing_stop,
1799 * this will start the tracer back up.
1800 */
1801 void tracing_start(void)
1802 {
1803 struct ring_buffer *buffer;
1804 unsigned long flags;
1805
1806 if (tracing_disabled)
1807 return;
1808
1809 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1810 if (--global_trace.stop_count) {
1811 if (global_trace.stop_count < 0) {
1812 /* Someone screwed up their debugging */
1813 WARN_ON_ONCE(1);
1814 global_trace.stop_count = 0;
1815 }
1816 goto out;
1817 }
1818
1819 /* Prevent the buffers from switching */
1820 arch_spin_lock(&global_trace.max_lock);
1821
1822 buffer = global_trace.trace_buffer.buffer;
1823 if (buffer)
1824 ring_buffer_record_enable(buffer);
1825
1826 #ifdef CONFIG_TRACER_MAX_TRACE
1827 buffer = global_trace.max_buffer.buffer;
1828 if (buffer)
1829 ring_buffer_record_enable(buffer);
1830 #endif
1831
1832 arch_spin_unlock(&global_trace.max_lock);
1833
1834 out:
1835 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1836 }
1837
1838 static void tracing_start_tr(struct trace_array *tr)
1839 {
1840 struct ring_buffer *buffer;
1841 unsigned long flags;
1842
1843 if (tracing_disabled)
1844 return;
1845
1846 /* If global, we need to also start the max tracer */
1847 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1848 return tracing_start();
1849
1850 raw_spin_lock_irqsave(&tr->start_lock, flags);
1851
1852 if (--tr->stop_count) {
1853 if (tr->stop_count < 0) {
1854 /* Someone screwed up their debugging */
1855 WARN_ON_ONCE(1);
1856 tr->stop_count = 0;
1857 }
1858 goto out;
1859 }
1860
1861 buffer = tr->trace_buffer.buffer;
1862 if (buffer)
1863 ring_buffer_record_enable(buffer);
1864
1865 out:
1866 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1867 }
1868
1869 /**
1870 * tracing_stop - quick stop of the tracer
1871 *
1872 * Light weight way to stop tracing. Use in conjunction with
1873 * tracing_start.
1874 */
1875 void tracing_stop(void)
1876 {
1877 struct ring_buffer *buffer;
1878 unsigned long flags;
1879
1880 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1881 if (global_trace.stop_count++)
1882 goto out;
1883
1884 /* Prevent the buffers from switching */
1885 arch_spin_lock(&global_trace.max_lock);
1886
1887 buffer = global_trace.trace_buffer.buffer;
1888 if (buffer)
1889 ring_buffer_record_disable(buffer);
1890
1891 #ifdef CONFIG_TRACER_MAX_TRACE
1892 buffer = global_trace.max_buffer.buffer;
1893 if (buffer)
1894 ring_buffer_record_disable(buffer);
1895 #endif
1896
1897 arch_spin_unlock(&global_trace.max_lock);
1898
1899 out:
1900 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1901 }
1902
1903 static void tracing_stop_tr(struct trace_array *tr)
1904 {
1905 struct ring_buffer *buffer;
1906 unsigned long flags;
1907
1908 /* If global, we need to also stop the max tracer */
1909 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1910 return tracing_stop();
1911
1912 raw_spin_lock_irqsave(&tr->start_lock, flags);
1913 if (tr->stop_count++)
1914 goto out;
1915
1916 buffer = tr->trace_buffer.buffer;
1917 if (buffer)
1918 ring_buffer_record_disable(buffer);
1919
1920 out:
1921 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1922 }
1923
1924 static int trace_save_cmdline(struct task_struct *tsk)
1925 {
1926 unsigned pid, idx;
1927
1928 /* treat recording of idle task as a success */
1929 if (!tsk->pid)
1930 return 1;
1931
1932 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1933 return 0;
1934
1935 /*
1936 * It's not the end of the world if we don't get
1937 * the lock, but we also don't want to spin
1938 * nor do we want to disable interrupts,
1939 * so if we miss here, then better luck next time.
1940 */
1941 if (!arch_spin_trylock(&trace_cmdline_lock))
1942 return 0;
1943
1944 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1945 if (idx == NO_CMDLINE_MAP) {
1946 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1947
1948 /*
1949 * Check whether the cmdline buffer at idx has a pid
1950 * mapped. We are going to overwrite that entry so we
1951 * need to clear the map_pid_to_cmdline. Otherwise we
1952 * would read the new comm for the old pid.
1953 */
1954 pid = savedcmd->map_cmdline_to_pid[idx];
1955 if (pid != NO_CMDLINE_MAP)
1956 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1957
1958 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1959 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1960
1961 savedcmd->cmdline_idx = idx;
1962 }
1963
1964 set_cmdline(idx, tsk->comm);
1965
1966 arch_spin_unlock(&trace_cmdline_lock);
1967
1968 return 1;
1969 }
1970
1971 static void __trace_find_cmdline(int pid, char comm[])
1972 {
1973 unsigned map;
1974
1975 if (!pid) {
1976 strcpy(comm, "<idle>");
1977 return;
1978 }
1979
1980 if (WARN_ON_ONCE(pid < 0)) {
1981 strcpy(comm, "<XXX>");
1982 return;
1983 }
1984
1985 if (pid > PID_MAX_DEFAULT) {
1986 strcpy(comm, "<...>");
1987 return;
1988 }
1989
1990 map = savedcmd->map_pid_to_cmdline[pid];
1991 if (map != NO_CMDLINE_MAP)
1992 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1993 else
1994 strcpy(comm, "<...>");
1995 }
1996
1997 void trace_find_cmdline(int pid, char comm[])
1998 {
1999 preempt_disable();
2000 arch_spin_lock(&trace_cmdline_lock);
2001
2002 __trace_find_cmdline(pid, comm);
2003
2004 arch_spin_unlock(&trace_cmdline_lock);
2005 preempt_enable();
2006 }
2007
2008 int trace_find_tgid(int pid)
2009 {
2010 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2011 return 0;
2012
2013 return tgid_map[pid];
2014 }
2015
2016 static int trace_save_tgid(struct task_struct *tsk)
2017 {
2018 /* treat recording of idle task as a success */
2019 if (!tsk->pid)
2020 return 1;
2021
2022 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2023 return 0;
2024
2025 tgid_map[tsk->pid] = tsk->tgid;
2026 return 1;
2027 }
2028
2029 static bool tracing_record_taskinfo_skip(int flags)
2030 {
2031 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2032 return true;
2033 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2034 return true;
2035 if (!__this_cpu_read(trace_taskinfo_save))
2036 return true;
2037 return false;
2038 }
2039
2040 /**
2041 * tracing_record_taskinfo - record the task info of a task
2042 *
2043 * @task - task to record
2044 * @flags - TRACE_RECORD_CMDLINE for recording comm
2045 * - TRACE_RECORD_TGID for recording tgid
2046 */
2047 void tracing_record_taskinfo(struct task_struct *task, int flags)
2048 {
2049 bool done;
2050
2051 if (tracing_record_taskinfo_skip(flags))
2052 return;
2053
2054 /*
2055 * Record as much task information as possible. If some fail, continue
2056 * to try to record the others.
2057 */
2058 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2059 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2060
2061 /* If recording any information failed, retry again soon. */
2062 if (!done)
2063 return;
2064
2065 __this_cpu_write(trace_taskinfo_save, false);
2066 }
2067
2068 /**
2069 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2070 *
2071 * @prev - previous task during sched_switch
2072 * @next - next task during sched_switch
2073 * @flags - TRACE_RECORD_CMDLINE for recording comm
2074 * TRACE_RECORD_TGID for recording tgid
2075 */
2076 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2077 struct task_struct *next, int flags)
2078 {
2079 bool done;
2080
2081 if (tracing_record_taskinfo_skip(flags))
2082 return;
2083
2084 /*
2085 * Record as much task information as possible. If some fail, continue
2086 * to try to record the others.
2087 */
2088 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2089 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2090 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2091 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2092
2093 /* If recording any information failed, retry again soon. */
2094 if (!done)
2095 return;
2096
2097 __this_cpu_write(trace_taskinfo_save, false);
2098 }
2099
2100 /* Helpers to record a specific task information */
2101 void tracing_record_cmdline(struct task_struct *task)
2102 {
2103 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2104 }
2105
2106 void tracing_record_tgid(struct task_struct *task)
2107 {
2108 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2109 }
2110
2111 /*
2112 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2113 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2114 * simplifies those functions and keeps them in sync.
2115 */
2116 enum print_line_t trace_handle_return(struct trace_seq *s)
2117 {
2118 return trace_seq_has_overflowed(s) ?
2119 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2120 }
2121 EXPORT_SYMBOL_GPL(trace_handle_return);
2122
2123 void
2124 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2125 int pc)
2126 {
2127 struct task_struct *tsk = current;
2128
2129 entry->preempt_count = pc & 0xff;
2130 entry->pid = (tsk) ? tsk->pid : 0;
2131 entry->flags =
2132 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2133 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2134 #else
2135 TRACE_FLAG_IRQS_NOSUPPORT |
2136 #endif
2137 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2138 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2139 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2140 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2141 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2142 }
2143 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2144
2145 struct ring_buffer_event *
2146 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2147 int type,
2148 unsigned long len,
2149 unsigned long flags, int pc)
2150 {
2151 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2152 }
2153
2154 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2155 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2156 static int trace_buffered_event_ref;
2157
2158 /**
2159 * trace_buffered_event_enable - enable buffering events
2160 *
2161 * When events are being filtered, it is quicker to use a temporary
2162 * buffer to write the event data into if there's a likely chance
2163 * that it will not be committed. The discard of the ring buffer
2164 * is not as fast as committing, and is much slower than copying
2165 * a commit.
2166 *
2167 * When an event is to be filtered, allocate per cpu buffers to
2168 * write the event data into, and if the event is filtered and discarded
2169 * it is simply dropped, otherwise, the entire data is to be committed
2170 * in one shot.
2171 */
2172 void trace_buffered_event_enable(void)
2173 {
2174 struct ring_buffer_event *event;
2175 struct page *page;
2176 int cpu;
2177
2178 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2179
2180 if (trace_buffered_event_ref++)
2181 return;
2182
2183 for_each_tracing_cpu(cpu) {
2184 page = alloc_pages_node(cpu_to_node(cpu),
2185 GFP_KERNEL | __GFP_NORETRY, 0);
2186 if (!page)
2187 goto failed;
2188
2189 event = page_address(page);
2190 memset(event, 0, sizeof(*event));
2191
2192 per_cpu(trace_buffered_event, cpu) = event;
2193
2194 preempt_disable();
2195 if (cpu == smp_processor_id() &&
2196 this_cpu_read(trace_buffered_event) !=
2197 per_cpu(trace_buffered_event, cpu))
2198 WARN_ON_ONCE(1);
2199 preempt_enable();
2200 }
2201
2202 return;
2203 failed:
2204 trace_buffered_event_disable();
2205 }
2206
2207 static void enable_trace_buffered_event(void *data)
2208 {
2209 /* Probably not needed, but do it anyway */
2210 smp_rmb();
2211 this_cpu_dec(trace_buffered_event_cnt);
2212 }
2213
2214 static void disable_trace_buffered_event(void *data)
2215 {
2216 this_cpu_inc(trace_buffered_event_cnt);
2217 }
2218
2219 /**
2220 * trace_buffered_event_disable - disable buffering events
2221 *
2222 * When a filter is removed, it is faster to not use the buffered
2223 * events, and to commit directly into the ring buffer. Free up
2224 * the temp buffers when there are no more users. This requires
2225 * special synchronization with current events.
2226 */
2227 void trace_buffered_event_disable(void)
2228 {
2229 int cpu;
2230
2231 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2232
2233 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2234 return;
2235
2236 if (--trace_buffered_event_ref)
2237 return;
2238
2239 preempt_disable();
2240 /* For each CPU, set the buffer as used. */
2241 smp_call_function_many(tracing_buffer_mask,
2242 disable_trace_buffered_event, NULL, 1);
2243 preempt_enable();
2244
2245 /* Wait for all current users to finish */
2246 synchronize_sched();
2247
2248 for_each_tracing_cpu(cpu) {
2249 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2250 per_cpu(trace_buffered_event, cpu) = NULL;
2251 }
2252 /*
2253 * Make sure trace_buffered_event is NULL before clearing
2254 * trace_buffered_event_cnt.
2255 */
2256 smp_wmb();
2257
2258 preempt_disable();
2259 /* Do the work on each cpu */
2260 smp_call_function_many(tracing_buffer_mask,
2261 enable_trace_buffered_event, NULL, 1);
2262 preempt_enable();
2263 }
2264
2265 static struct ring_buffer *temp_buffer;
2266
2267 struct ring_buffer_event *
2268 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2269 struct trace_event_file *trace_file,
2270 int type, unsigned long len,
2271 unsigned long flags, int pc)
2272 {
2273 struct ring_buffer_event *entry;
2274 int val;
2275
2276 *current_rb = trace_file->tr->trace_buffer.buffer;
2277
2278 if ((trace_file->flags &
2279 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2280 (entry = this_cpu_read(trace_buffered_event))) {
2281 /* Try to use the per cpu buffer first */
2282 val = this_cpu_inc_return(trace_buffered_event_cnt);
2283 if (val == 1) {
2284 trace_event_setup(entry, type, flags, pc);
2285 entry->array[0] = len;
2286 return entry;
2287 }
2288 this_cpu_dec(trace_buffered_event_cnt);
2289 }
2290
2291 entry = __trace_buffer_lock_reserve(*current_rb,
2292 type, len, flags, pc);
2293 /*
2294 * If tracing is off, but we have triggers enabled
2295 * we still need to look at the event data. Use the temp_buffer
2296 * to store the trace event for the tigger to use. It's recusive
2297 * safe and will not be recorded anywhere.
2298 */
2299 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2300 *current_rb = temp_buffer;
2301 entry = __trace_buffer_lock_reserve(*current_rb,
2302 type, len, flags, pc);
2303 }
2304 return entry;
2305 }
2306 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2307
2308 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2309 static DEFINE_MUTEX(tracepoint_printk_mutex);
2310
2311 static void output_printk(struct trace_event_buffer *fbuffer)
2312 {
2313 struct trace_event_call *event_call;
2314 struct trace_event *event;
2315 unsigned long flags;
2316 struct trace_iterator *iter = tracepoint_print_iter;
2317
2318 /* We should never get here if iter is NULL */
2319 if (WARN_ON_ONCE(!iter))
2320 return;
2321
2322 event_call = fbuffer->trace_file->event_call;
2323 if (!event_call || !event_call->event.funcs ||
2324 !event_call->event.funcs->trace)
2325 return;
2326
2327 event = &fbuffer->trace_file->event_call->event;
2328
2329 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2330 trace_seq_init(&iter->seq);
2331 iter->ent = fbuffer->entry;
2332 event_call->event.funcs->trace(iter, 0, event);
2333 trace_seq_putc(&iter->seq, 0);
2334 printk("%s", iter->seq.buffer);
2335
2336 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2337 }
2338
2339 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2340 void __user *buffer, size_t *lenp,
2341 loff_t *ppos)
2342 {
2343 int save_tracepoint_printk;
2344 int ret;
2345
2346 mutex_lock(&tracepoint_printk_mutex);
2347 save_tracepoint_printk = tracepoint_printk;
2348
2349 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2350
2351 /*
2352 * This will force exiting early, as tracepoint_printk
2353 * is always zero when tracepoint_printk_iter is not allocated
2354 */
2355 if (!tracepoint_print_iter)
2356 tracepoint_printk = 0;
2357
2358 if (save_tracepoint_printk == tracepoint_printk)
2359 goto out;
2360
2361 if (tracepoint_printk)
2362 static_key_enable(&tracepoint_printk_key.key);
2363 else
2364 static_key_disable(&tracepoint_printk_key.key);
2365
2366 out:
2367 mutex_unlock(&tracepoint_printk_mutex);
2368
2369 return ret;
2370 }
2371
2372 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2373 {
2374 if (static_key_false(&tracepoint_printk_key.key))
2375 output_printk(fbuffer);
2376
2377 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2378 fbuffer->event, fbuffer->entry,
2379 fbuffer->flags, fbuffer->pc);
2380 }
2381 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2382
2383 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2384 struct ring_buffer *buffer,
2385 struct ring_buffer_event *event,
2386 unsigned long flags, int pc,
2387 struct pt_regs *regs)
2388 {
2389 __buffer_unlock_commit(buffer, event);
2390
2391 /*
2392 * If regs is not set, then skip the following callers:
2393 * trace_buffer_unlock_commit_regs
2394 * event_trigger_unlock_commit
2395 * trace_event_buffer_commit
2396 * trace_event_raw_event_sched_switch
2397 * Note, we can still get here via blktrace, wakeup tracer
2398 * and mmiotrace, but that's ok if they lose a function or
2399 * two. They are that meaningful.
2400 */
2401 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2402 ftrace_trace_userstack(buffer, flags, pc);
2403 }
2404
2405 /*
2406 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2407 */
2408 void
2409 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2410 struct ring_buffer_event *event)
2411 {
2412 __buffer_unlock_commit(buffer, event);
2413 }
2414
2415 static void
2416 trace_process_export(struct trace_export *export,
2417 struct ring_buffer_event *event)
2418 {
2419 struct trace_entry *entry;
2420 unsigned int size = 0;
2421
2422 entry = ring_buffer_event_data(event);
2423 size = ring_buffer_event_length(event);
2424 export->write(entry, size);
2425 }
2426
2427 static DEFINE_MUTEX(ftrace_export_lock);
2428
2429 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2430
2431 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2432
2433 static inline void ftrace_exports_enable(void)
2434 {
2435 static_branch_enable(&ftrace_exports_enabled);
2436 }
2437
2438 static inline void ftrace_exports_disable(void)
2439 {
2440 static_branch_disable(&ftrace_exports_enabled);
2441 }
2442
2443 void ftrace_exports(struct ring_buffer_event *event)
2444 {
2445 struct trace_export *export;
2446
2447 preempt_disable_notrace();
2448
2449 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2450 while (export) {
2451 trace_process_export(export, event);
2452 export = rcu_dereference_raw_notrace(export->next);
2453 }
2454
2455 preempt_enable_notrace();
2456 }
2457
2458 static inline void
2459 add_trace_export(struct trace_export **list, struct trace_export *export)
2460 {
2461 rcu_assign_pointer(export->next, *list);
2462 /*
2463 * We are entering export into the list but another
2464 * CPU might be walking that list. We need to make sure
2465 * the export->next pointer is valid before another CPU sees
2466 * the export pointer included into the list.
2467 */
2468 rcu_assign_pointer(*list, export);
2469 }
2470
2471 static inline int
2472 rm_trace_export(struct trace_export **list, struct trace_export *export)
2473 {
2474 struct trace_export **p;
2475
2476 for (p = list; *p != NULL; p = &(*p)->next)
2477 if (*p == export)
2478 break;
2479
2480 if (*p != export)
2481 return -1;
2482
2483 rcu_assign_pointer(*p, (*p)->next);
2484
2485 return 0;
2486 }
2487
2488 static inline void
2489 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2490 {
2491 if (*list == NULL)
2492 ftrace_exports_enable();
2493
2494 add_trace_export(list, export);
2495 }
2496
2497 static inline int
2498 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2499 {
2500 int ret;
2501
2502 ret = rm_trace_export(list, export);
2503 if (*list == NULL)
2504 ftrace_exports_disable();
2505
2506 return ret;
2507 }
2508
2509 int register_ftrace_export(struct trace_export *export)
2510 {
2511 if (WARN_ON_ONCE(!export->write))
2512 return -1;
2513
2514 mutex_lock(&ftrace_export_lock);
2515
2516 add_ftrace_export(&ftrace_exports_list, export);
2517
2518 mutex_unlock(&ftrace_export_lock);
2519
2520 return 0;
2521 }
2522 EXPORT_SYMBOL_GPL(register_ftrace_export);
2523
2524 int unregister_ftrace_export(struct trace_export *export)
2525 {
2526 int ret;
2527
2528 mutex_lock(&ftrace_export_lock);
2529
2530 ret = rm_ftrace_export(&ftrace_exports_list, export);
2531
2532 mutex_unlock(&ftrace_export_lock);
2533
2534 return ret;
2535 }
2536 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2537
2538 void
2539 trace_function(struct trace_array *tr,
2540 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2541 int pc)
2542 {
2543 struct trace_event_call *call = &event_function;
2544 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2545 struct ring_buffer_event *event;
2546 struct ftrace_entry *entry;
2547
2548 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2549 flags, pc);
2550 if (!event)
2551 return;
2552 entry = ring_buffer_event_data(event);
2553 entry->ip = ip;
2554 entry->parent_ip = parent_ip;
2555
2556 if (!call_filter_check_discard(call, entry, buffer, event)) {
2557 if (static_branch_unlikely(&ftrace_exports_enabled))
2558 ftrace_exports(event);
2559 __buffer_unlock_commit(buffer, event);
2560 }
2561 }
2562
2563 #ifdef CONFIG_STACKTRACE
2564
2565 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2566 struct ftrace_stack {
2567 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2568 };
2569
2570 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2571 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2572
2573 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2574 unsigned long flags,
2575 int skip, int pc, struct pt_regs *regs)
2576 {
2577 struct trace_event_call *call = &event_kernel_stack;
2578 struct ring_buffer_event *event;
2579 struct stack_entry *entry;
2580 struct stack_trace trace;
2581 int use_stack;
2582 int size = FTRACE_STACK_ENTRIES;
2583
2584 trace.nr_entries = 0;
2585 trace.skip = skip;
2586
2587 /*
2588 * Add two, for this function and the call to save_stack_trace()
2589 * If regs is set, then these functions will not be in the way.
2590 */
2591 if (!regs)
2592 trace.skip += 2;
2593
2594 /*
2595 * Since events can happen in NMIs there's no safe way to
2596 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2597 * or NMI comes in, it will just have to use the default
2598 * FTRACE_STACK_SIZE.
2599 */
2600 preempt_disable_notrace();
2601
2602 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2603 /*
2604 * We don't need any atomic variables, just a barrier.
2605 * If an interrupt comes in, we don't care, because it would
2606 * have exited and put the counter back to what we want.
2607 * We just need a barrier to keep gcc from moving things
2608 * around.
2609 */
2610 barrier();
2611 if (use_stack == 1) {
2612 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2613 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2614
2615 if (regs)
2616 save_stack_trace_regs(regs, &trace);
2617 else
2618 save_stack_trace(&trace);
2619
2620 if (trace.nr_entries > size)
2621 size = trace.nr_entries;
2622 } else
2623 /* From now on, use_stack is a boolean */
2624 use_stack = 0;
2625
2626 size *= sizeof(unsigned long);
2627
2628 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2629 sizeof(*entry) + size, flags, pc);
2630 if (!event)
2631 goto out;
2632 entry = ring_buffer_event_data(event);
2633
2634 memset(&entry->caller, 0, size);
2635
2636 if (use_stack)
2637 memcpy(&entry->caller, trace.entries,
2638 trace.nr_entries * sizeof(unsigned long));
2639 else {
2640 trace.max_entries = FTRACE_STACK_ENTRIES;
2641 trace.entries = entry->caller;
2642 if (regs)
2643 save_stack_trace_regs(regs, &trace);
2644 else
2645 save_stack_trace(&trace);
2646 }
2647
2648 entry->size = trace.nr_entries;
2649
2650 if (!call_filter_check_discard(call, entry, buffer, event))
2651 __buffer_unlock_commit(buffer, event);
2652
2653 out:
2654 /* Again, don't let gcc optimize things here */
2655 barrier();
2656 __this_cpu_dec(ftrace_stack_reserve);
2657 preempt_enable_notrace();
2658
2659 }
2660
2661 static inline void ftrace_trace_stack(struct trace_array *tr,
2662 struct ring_buffer *buffer,
2663 unsigned long flags,
2664 int skip, int pc, struct pt_regs *regs)
2665 {
2666 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2667 return;
2668
2669 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2670 }
2671
2672 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2673 int pc)
2674 {
2675 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2676
2677 if (rcu_is_watching()) {
2678 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2679 return;
2680 }
2681
2682 /*
2683 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2684 * but if the above rcu_is_watching() failed, then the NMI
2685 * triggered someplace critical, and rcu_irq_enter() should
2686 * not be called from NMI.
2687 */
2688 if (unlikely(in_nmi()))
2689 return;
2690
2691 /*
2692 * It is possible that a function is being traced in a
2693 * location that RCU is not watching. A call to
2694 * rcu_irq_enter() will make sure that it is, but there's
2695 * a few internal rcu functions that could be traced
2696 * where that wont work either. In those cases, we just
2697 * do nothing.
2698 */
2699 if (unlikely(rcu_irq_enter_disabled()))
2700 return;
2701
2702 rcu_irq_enter_irqson();
2703 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2704 rcu_irq_exit_irqson();
2705 }
2706
2707 /**
2708 * trace_dump_stack - record a stack back trace in the trace buffer
2709 * @skip: Number of functions to skip (helper handlers)
2710 */
2711 void trace_dump_stack(int skip)
2712 {
2713 unsigned long flags;
2714
2715 if (tracing_disabled || tracing_selftest_running)
2716 return;
2717
2718 local_save_flags(flags);
2719
2720 /*
2721 * Skip 3 more, seems to get us at the caller of
2722 * this function.
2723 */
2724 skip += 3;
2725 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2726 flags, skip, preempt_count(), NULL);
2727 }
2728
2729 static DEFINE_PER_CPU(int, user_stack_count);
2730
2731 void
2732 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2733 {
2734 struct trace_event_call *call = &event_user_stack;
2735 struct ring_buffer_event *event;
2736 struct userstack_entry *entry;
2737 struct stack_trace trace;
2738
2739 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2740 return;
2741
2742 /*
2743 * NMIs can not handle page faults, even with fix ups.
2744 * The save user stack can (and often does) fault.
2745 */
2746 if (unlikely(in_nmi()))
2747 return;
2748
2749 /*
2750 * prevent recursion, since the user stack tracing may
2751 * trigger other kernel events.
2752 */
2753 preempt_disable();
2754 if (__this_cpu_read(user_stack_count))
2755 goto out;
2756
2757 __this_cpu_inc(user_stack_count);
2758
2759 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2760 sizeof(*entry), flags, pc);
2761 if (!event)
2762 goto out_drop_count;
2763 entry = ring_buffer_event_data(event);
2764
2765 entry->tgid = current->tgid;
2766 memset(&entry->caller, 0, sizeof(entry->caller));
2767
2768 trace.nr_entries = 0;
2769 trace.max_entries = FTRACE_STACK_ENTRIES;
2770 trace.skip = 0;
2771 trace.entries = entry->caller;
2772
2773 save_stack_trace_user(&trace);
2774 if (!call_filter_check_discard(call, entry, buffer, event))
2775 __buffer_unlock_commit(buffer, event);
2776
2777 out_drop_count:
2778 __this_cpu_dec(user_stack_count);
2779 out:
2780 preempt_enable();
2781 }
2782
2783 #ifdef UNUSED
2784 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2785 {
2786 ftrace_trace_userstack(tr, flags, preempt_count());
2787 }
2788 #endif /* UNUSED */
2789
2790 #endif /* CONFIG_STACKTRACE */
2791
2792 /* created for use with alloc_percpu */
2793 struct trace_buffer_struct {
2794 int nesting;
2795 char buffer[4][TRACE_BUF_SIZE];
2796 };
2797
2798 static struct trace_buffer_struct *trace_percpu_buffer;
2799
2800 /*
2801 * Thise allows for lockless recording. If we're nested too deeply, then
2802 * this returns NULL.
2803 */
2804 static char *get_trace_buf(void)
2805 {
2806 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2807
2808 if (!buffer || buffer->nesting >= 4)
2809 return NULL;
2810
2811 buffer->nesting++;
2812
2813 /* Interrupts must see nesting incremented before we use the buffer */
2814 barrier();
2815 return &buffer->buffer[buffer->nesting][0];
2816 }
2817
2818 static void put_trace_buf(void)
2819 {
2820 /* Don't let the decrement of nesting leak before this */
2821 barrier();
2822 this_cpu_dec(trace_percpu_buffer->nesting);
2823 }
2824
2825 static int alloc_percpu_trace_buffer(void)
2826 {
2827 struct trace_buffer_struct *buffers;
2828
2829 buffers = alloc_percpu(struct trace_buffer_struct);
2830 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2831 return -ENOMEM;
2832
2833 trace_percpu_buffer = buffers;
2834 return 0;
2835 }
2836
2837 static int buffers_allocated;
2838
2839 void trace_printk_init_buffers(void)
2840 {
2841 if (buffers_allocated)
2842 return;
2843
2844 if (alloc_percpu_trace_buffer())
2845 return;
2846
2847 /* trace_printk() is for debug use only. Don't use it in production. */
2848
2849 pr_warn("\n");
2850 pr_warn("**********************************************************\n");
2851 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2852 pr_warn("** **\n");
2853 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2854 pr_warn("** **\n");
2855 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2856 pr_warn("** unsafe for production use. **\n");
2857 pr_warn("** **\n");
2858 pr_warn("** If you see this message and you are not debugging **\n");
2859 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2860 pr_warn("** **\n");
2861 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2862 pr_warn("**********************************************************\n");
2863
2864 /* Expand the buffers to set size */
2865 tracing_update_buffers();
2866
2867 buffers_allocated = 1;
2868
2869 /*
2870 * trace_printk_init_buffers() can be called by modules.
2871 * If that happens, then we need to start cmdline recording
2872 * directly here. If the global_trace.buffer is already
2873 * allocated here, then this was called by module code.
2874 */
2875 if (global_trace.trace_buffer.buffer)
2876 tracing_start_cmdline_record();
2877 }
2878
2879 void trace_printk_start_comm(void)
2880 {
2881 /* Start tracing comms if trace printk is set */
2882 if (!buffers_allocated)
2883 return;
2884 tracing_start_cmdline_record();
2885 }
2886
2887 static void trace_printk_start_stop_comm(int enabled)
2888 {
2889 if (!buffers_allocated)
2890 return;
2891
2892 if (enabled)
2893 tracing_start_cmdline_record();
2894 else
2895 tracing_stop_cmdline_record();
2896 }
2897
2898 /**
2899 * trace_vbprintk - write binary msg to tracing buffer
2900 *
2901 */
2902 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2903 {
2904 struct trace_event_call *call = &event_bprint;
2905 struct ring_buffer_event *event;
2906 struct ring_buffer *buffer;
2907 struct trace_array *tr = &global_trace;
2908 struct bprint_entry *entry;
2909 unsigned long flags;
2910 char *tbuffer;
2911 int len = 0, size, pc;
2912
2913 if (unlikely(tracing_selftest_running || tracing_disabled))
2914 return 0;
2915
2916 /* Don't pollute graph traces with trace_vprintk internals */
2917 pause_graph_tracing();
2918
2919 pc = preempt_count();
2920 preempt_disable_notrace();
2921
2922 tbuffer = get_trace_buf();
2923 if (!tbuffer) {
2924 len = 0;
2925 goto out_nobuffer;
2926 }
2927
2928 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2929
2930 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2931 goto out;
2932
2933 local_save_flags(flags);
2934 size = sizeof(*entry) + sizeof(u32) * len;
2935 buffer = tr->trace_buffer.buffer;
2936 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2937 flags, pc);
2938 if (!event)
2939 goto out;
2940 entry = ring_buffer_event_data(event);
2941 entry->ip = ip;
2942 entry->fmt = fmt;
2943
2944 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2945 if (!call_filter_check_discard(call, entry, buffer, event)) {
2946 __buffer_unlock_commit(buffer, event);
2947 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2948 }
2949
2950 out:
2951 put_trace_buf();
2952
2953 out_nobuffer:
2954 preempt_enable_notrace();
2955 unpause_graph_tracing();
2956
2957 return len;
2958 }
2959 EXPORT_SYMBOL_GPL(trace_vbprintk);
2960
2961 __printf(3, 0)
2962 static int
2963 __trace_array_vprintk(struct ring_buffer *buffer,
2964 unsigned long ip, const char *fmt, va_list args)
2965 {
2966 struct trace_event_call *call = &event_print;
2967 struct ring_buffer_event *event;
2968 int len = 0, size, pc;
2969 struct print_entry *entry;
2970 unsigned long flags;
2971 char *tbuffer;
2972
2973 if (tracing_disabled || tracing_selftest_running)
2974 return 0;
2975
2976 /* Don't pollute graph traces with trace_vprintk internals */
2977 pause_graph_tracing();
2978
2979 pc = preempt_count();
2980 preempt_disable_notrace();
2981
2982
2983 tbuffer = get_trace_buf();
2984 if (!tbuffer) {
2985 len = 0;
2986 goto out_nobuffer;
2987 }
2988
2989 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2990
2991 local_save_flags(flags);
2992 size = sizeof(*entry) + len + 1;
2993 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2994 flags, pc);
2995 if (!event)
2996 goto out;
2997 entry = ring_buffer_event_data(event);
2998 entry->ip = ip;
2999
3000 memcpy(&entry->buf, tbuffer, len + 1);
3001 if (!call_filter_check_discard(call, entry, buffer, event)) {
3002 __buffer_unlock_commit(buffer, event);
3003 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3004 }
3005
3006 out:
3007 put_trace_buf();
3008
3009 out_nobuffer:
3010 preempt_enable_notrace();
3011 unpause_graph_tracing();
3012
3013 return len;
3014 }
3015
3016 __printf(3, 0)
3017 int trace_array_vprintk(struct trace_array *tr,
3018 unsigned long ip, const char *fmt, va_list args)
3019 {
3020 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3021 }
3022
3023 __printf(3, 0)
3024 int trace_array_printk(struct trace_array *tr,
3025 unsigned long ip, const char *fmt, ...)
3026 {
3027 int ret;
3028 va_list ap;
3029
3030 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3031 return 0;
3032
3033 va_start(ap, fmt);
3034 ret = trace_array_vprintk(tr, ip, fmt, ap);
3035 va_end(ap);
3036 return ret;
3037 }
3038
3039 __printf(3, 4)
3040 int trace_array_printk_buf(struct ring_buffer *buffer,
3041 unsigned long ip, const char *fmt, ...)
3042 {
3043 int ret;
3044 va_list ap;
3045
3046 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3047 return 0;
3048
3049 va_start(ap, fmt);
3050 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3051 va_end(ap);
3052 return ret;
3053 }
3054
3055 __printf(2, 0)
3056 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3057 {
3058 return trace_array_vprintk(&global_trace, ip, fmt, args);
3059 }
3060 EXPORT_SYMBOL_GPL(trace_vprintk);
3061
3062 static void trace_iterator_increment(struct trace_iterator *iter)
3063 {
3064 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3065
3066 iter->idx++;
3067 if (buf_iter)
3068 ring_buffer_read(buf_iter, NULL);
3069 }
3070
3071 static struct trace_entry *
3072 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3073 unsigned long *lost_events)
3074 {
3075 struct ring_buffer_event *event;
3076 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3077
3078 if (buf_iter)
3079 event = ring_buffer_iter_peek(buf_iter, ts);
3080 else
3081 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3082 lost_events);
3083
3084 if (event) {
3085 iter->ent_size = ring_buffer_event_length(event);
3086 return ring_buffer_event_data(event);
3087 }
3088 iter->ent_size = 0;
3089 return NULL;
3090 }
3091
3092 static struct trace_entry *
3093 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3094 unsigned long *missing_events, u64 *ent_ts)
3095 {
3096 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3097 struct trace_entry *ent, *next = NULL;
3098 unsigned long lost_events = 0, next_lost = 0;
3099 int cpu_file = iter->cpu_file;
3100 u64 next_ts = 0, ts;
3101 int next_cpu = -1;
3102 int next_size = 0;
3103 int cpu;
3104
3105 /*
3106 * If we are in a per_cpu trace file, don't bother by iterating over
3107 * all cpu and peek directly.
3108 */
3109 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3110 if (ring_buffer_empty_cpu(buffer, cpu_file))
3111 return NULL;
3112 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3113 if (ent_cpu)
3114 *ent_cpu = cpu_file;
3115
3116 return ent;
3117 }
3118
3119 for_each_tracing_cpu(cpu) {
3120
3121 if (ring_buffer_empty_cpu(buffer, cpu))
3122 continue;
3123
3124 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3125
3126 /*
3127 * Pick the entry with the smallest timestamp:
3128 */
3129 if (ent && (!next || ts < next_ts)) {
3130 next = ent;
3131 next_cpu = cpu;
3132 next_ts = ts;
3133 next_lost = lost_events;
3134 next_size = iter->ent_size;
3135 }
3136 }
3137
3138 iter->ent_size = next_size;
3139
3140 if (ent_cpu)
3141 *ent_cpu = next_cpu;
3142
3143 if (ent_ts)
3144 *ent_ts = next_ts;
3145
3146 if (missing_events)
3147 *missing_events = next_lost;
3148
3149 return next;
3150 }
3151
3152 /* Find the next real entry, without updating the iterator itself */
3153 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3154 int *ent_cpu, u64 *ent_ts)
3155 {
3156 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3157 }
3158
3159 /* Find the next real entry, and increment the iterator to the next entry */
3160 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3161 {
3162 iter->ent = __find_next_entry(iter, &iter->cpu,
3163 &iter->lost_events, &iter->ts);
3164
3165 if (iter->ent)
3166 trace_iterator_increment(iter);
3167
3168 return iter->ent ? iter : NULL;
3169 }
3170
3171 static void trace_consume(struct trace_iterator *iter)
3172 {
3173 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3174 &iter->lost_events);
3175 }
3176
3177 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3178 {
3179 struct trace_iterator *iter = m->private;
3180 int i = (int)*pos;
3181 void *ent;
3182
3183 WARN_ON_ONCE(iter->leftover);
3184
3185 (*pos)++;
3186
3187 /* can't go backwards */
3188 if (iter->idx > i)
3189 return NULL;
3190
3191 if (iter->idx < 0)
3192 ent = trace_find_next_entry_inc(iter);
3193 else
3194 ent = iter;
3195
3196 while (ent && iter->idx < i)
3197 ent = trace_find_next_entry_inc(iter);
3198
3199 iter->pos = *pos;
3200
3201 return ent;
3202 }
3203
3204 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3205 {
3206 struct ring_buffer_event *event;
3207 struct ring_buffer_iter *buf_iter;
3208 unsigned long entries = 0;
3209 u64 ts;
3210
3211 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3212
3213 buf_iter = trace_buffer_iter(iter, cpu);
3214 if (!buf_iter)
3215 return;
3216
3217 ring_buffer_iter_reset(buf_iter);
3218
3219 /*
3220 * We could have the case with the max latency tracers
3221 * that a reset never took place on a cpu. This is evident
3222 * by the timestamp being before the start of the buffer.
3223 */
3224 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3225 if (ts >= iter->trace_buffer->time_start)
3226 break;
3227 entries++;
3228 ring_buffer_read(buf_iter, NULL);
3229 }
3230
3231 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3232 }
3233
3234 /*
3235 * The current tracer is copied to avoid a global locking
3236 * all around.
3237 */
3238 static void *s_start(struct seq_file *m, loff_t *pos)
3239 {
3240 struct trace_iterator *iter = m->private;
3241 struct trace_array *tr = iter->tr;
3242 int cpu_file = iter->cpu_file;
3243 void *p = NULL;
3244 loff_t l = 0;
3245 int cpu;
3246
3247 /*
3248 * copy the tracer to avoid using a global lock all around.
3249 * iter->trace is a copy of current_trace, the pointer to the
3250 * name may be used instead of a strcmp(), as iter->trace->name
3251 * will point to the same string as current_trace->name.
3252 */
3253 mutex_lock(&trace_types_lock);
3254 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3255 *iter->trace = *tr->current_trace;
3256 mutex_unlock(&trace_types_lock);
3257
3258 #ifdef CONFIG_TRACER_MAX_TRACE
3259 if (iter->snapshot && iter->trace->use_max_tr)
3260 return ERR_PTR(-EBUSY);
3261 #endif
3262
3263 if (!iter->snapshot)
3264 atomic_inc(&trace_record_taskinfo_disabled);
3265
3266 if (*pos != iter->pos) {
3267 iter->ent = NULL;
3268 iter->cpu = 0;
3269 iter->idx = -1;
3270
3271 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3272 for_each_tracing_cpu(cpu)
3273 tracing_iter_reset(iter, cpu);
3274 } else
3275 tracing_iter_reset(iter, cpu_file);
3276
3277 iter->leftover = 0;
3278 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3279 ;
3280
3281 } else {
3282 /*
3283 * If we overflowed the seq_file before, then we want
3284 * to just reuse the trace_seq buffer again.
3285 */
3286 if (iter->leftover)
3287 p = iter;
3288 else {
3289 l = *pos - 1;
3290 p = s_next(m, p, &l);
3291 }
3292 }
3293
3294 trace_event_read_lock();
3295 trace_access_lock(cpu_file);
3296 return p;
3297 }
3298
3299 static void s_stop(struct seq_file *m, void *p)
3300 {
3301 struct trace_iterator *iter = m->private;
3302
3303 #ifdef CONFIG_TRACER_MAX_TRACE
3304 if (iter->snapshot && iter->trace->use_max_tr)
3305 return;
3306 #endif
3307
3308 if (!iter->snapshot)
3309 atomic_dec(&trace_record_taskinfo_disabled);
3310
3311 trace_access_unlock(iter->cpu_file);
3312 trace_event_read_unlock();
3313 }
3314
3315 static void
3316 get_total_entries(struct trace_buffer *buf,
3317 unsigned long *total, unsigned long *entries)
3318 {
3319 unsigned long count;
3320 int cpu;
3321
3322 *total = 0;
3323 *entries = 0;
3324
3325 for_each_tracing_cpu(cpu) {
3326 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3327 /*
3328 * If this buffer has skipped entries, then we hold all
3329 * entries for the trace and we need to ignore the
3330 * ones before the time stamp.
3331 */
3332 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3333 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3334 /* total is the same as the entries */
3335 *total += count;
3336 } else
3337 *total += count +
3338 ring_buffer_overrun_cpu(buf->buffer, cpu);
3339 *entries += count;
3340 }
3341 }
3342
3343 static void print_lat_help_header(struct seq_file *m)
3344 {
3345 seq_puts(m, "# _------=> CPU# \n"
3346 "# / _-----=> irqs-off \n"
3347 "# | / _----=> need-resched \n"
3348 "# || / _---=> hardirq/softirq \n"
3349 "# ||| / _--=> preempt-depth \n"
3350 "# |||| / delay \n"
3351 "# cmd pid ||||| time | caller \n"
3352 "# \\ / ||||| \\ | / \n");
3353 }
3354
3355 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3356 {
3357 unsigned long total;
3358 unsigned long entries;
3359
3360 get_total_entries(buf, &total, &entries);
3361 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3362 entries, total, num_online_cpus());
3363 seq_puts(m, "#\n");
3364 }
3365
3366 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3367 unsigned int flags)
3368 {
3369 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3370
3371 print_event_info(buf, m);
3372
3373 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3374 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3375 }
3376
3377 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3378 unsigned int flags)
3379 {
3380 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3381 const char tgid_space[] = " ";
3382 const char space[] = " ";
3383
3384 print_event_info(buf, m);
3385
3386 seq_printf(m, "# %s _-----=> irqs-off\n",
3387 tgid ? tgid_space : space);
3388 seq_printf(m, "# %s / _----=> need-resched\n",
3389 tgid ? tgid_space : space);
3390 seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
3391 tgid ? tgid_space : space);
3392 seq_printf(m, "# %s|| / _--=> preempt-depth\n",
3393 tgid ? tgid_space : space);
3394 seq_printf(m, "# %s||| / delay\n",
3395 tgid ? tgid_space : space);
3396 seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
3397 tgid ? " TGID " : space);
3398 seq_printf(m, "# | | %s | |||| | |\n",
3399 tgid ? " | " : space);
3400 }
3401
3402 void
3403 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3404 {
3405 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3406 struct trace_buffer *buf = iter->trace_buffer;
3407 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3408 struct tracer *type = iter->trace;
3409 unsigned long entries;
3410 unsigned long total;
3411 const char *name = "preemption";
3412
3413 name = type->name;
3414
3415 get_total_entries(buf, &total, &entries);
3416
3417 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3418 name, UTS_RELEASE);
3419 seq_puts(m, "# -----------------------------------"
3420 "---------------------------------\n");
3421 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3422 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3423 nsecs_to_usecs(data->saved_latency),
3424 entries,
3425 total,
3426 buf->cpu,
3427 #if defined(CONFIG_PREEMPT_NONE)
3428 "server",
3429 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3430 "desktop",
3431 #elif defined(CONFIG_PREEMPT)
3432 "preempt",
3433 #else
3434 "unknown",
3435 #endif
3436 /* These are reserved for later use */
3437 0, 0, 0, 0);
3438 #ifdef CONFIG_SMP
3439 seq_printf(m, " #P:%d)\n", num_online_cpus());
3440 #else
3441 seq_puts(m, ")\n");
3442 #endif
3443 seq_puts(m, "# -----------------\n");
3444 seq_printf(m, "# | task: %.16s-%d "
3445 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3446 data->comm, data->pid,
3447 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3448 data->policy, data->rt_priority);
3449 seq_puts(m, "# -----------------\n");
3450
3451 if (data->critical_start) {
3452 seq_puts(m, "# => started at: ");
3453 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3454 trace_print_seq(m, &iter->seq);
3455 seq_puts(m, "\n# => ended at: ");
3456 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3457 trace_print_seq(m, &iter->seq);
3458 seq_puts(m, "\n#\n");
3459 }
3460
3461 seq_puts(m, "#\n");
3462 }
3463
3464 static void test_cpu_buff_start(struct trace_iterator *iter)
3465 {
3466 struct trace_seq *s = &iter->seq;
3467 struct trace_array *tr = iter->tr;
3468
3469 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3470 return;
3471
3472 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3473 return;
3474
3475 if (cpumask_available(iter->started) &&
3476 cpumask_test_cpu(iter->cpu, iter->started))
3477 return;
3478
3479 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3480 return;
3481
3482 if (cpumask_available(iter->started))
3483 cpumask_set_cpu(iter->cpu, iter->started);
3484
3485 /* Don't print started cpu buffer for the first entry of the trace */
3486 if (iter->idx > 1)
3487 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3488 iter->cpu);
3489 }
3490
3491 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3492 {
3493 struct trace_array *tr = iter->tr;
3494 struct trace_seq *s = &iter->seq;
3495 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3496 struct trace_entry *entry;
3497 struct trace_event *event;
3498
3499 entry = iter->ent;
3500
3501 test_cpu_buff_start(iter);
3502
3503 event = ftrace_find_event(entry->type);
3504
3505 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3506 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3507 trace_print_lat_context(iter);
3508 else
3509 trace_print_context(iter);
3510 }
3511
3512 if (trace_seq_has_overflowed(s))
3513 return TRACE_TYPE_PARTIAL_LINE;
3514
3515 if (event)
3516 return event->funcs->trace(iter, sym_flags, event);
3517
3518 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3519
3520 return trace_handle_return(s);
3521 }
3522
3523 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3524 {
3525 struct trace_array *tr = iter->tr;
3526 struct trace_seq *s = &iter->seq;
3527 struct trace_entry *entry;
3528 struct trace_event *event;
3529
3530 entry = iter->ent;
3531
3532 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3533 trace_seq_printf(s, "%d %d %llu ",
3534 entry->pid, iter->cpu, iter->ts);
3535
3536 if (trace_seq_has_overflowed(s))
3537 return TRACE_TYPE_PARTIAL_LINE;
3538
3539 event = ftrace_find_event(entry->type);
3540 if (event)
3541 return event->funcs->raw(iter, 0, event);
3542
3543 trace_seq_printf(s, "%d ?\n", entry->type);
3544
3545 return trace_handle_return(s);
3546 }
3547
3548 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3549 {
3550 struct trace_array *tr = iter->tr;
3551 struct trace_seq *s = &iter->seq;
3552 unsigned char newline = '\n';
3553 struct trace_entry *entry;
3554 struct trace_event *event;
3555
3556 entry = iter->ent;
3557
3558 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3559 SEQ_PUT_HEX_FIELD(s, entry->pid);
3560 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3561 SEQ_PUT_HEX_FIELD(s, iter->ts);
3562 if (trace_seq_has_overflowed(s))
3563 return TRACE_TYPE_PARTIAL_LINE;
3564 }
3565
3566 event = ftrace_find_event(entry->type);
3567 if (event) {
3568 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3569 if (ret != TRACE_TYPE_HANDLED)
3570 return ret;
3571 }
3572
3573 SEQ_PUT_FIELD(s, newline);
3574
3575 return trace_handle_return(s);
3576 }
3577
3578 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3579 {
3580 struct trace_array *tr = iter->tr;
3581 struct trace_seq *s = &iter->seq;
3582 struct trace_entry *entry;
3583 struct trace_event *event;
3584
3585 entry = iter->ent;
3586
3587 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3588 SEQ_PUT_FIELD(s, entry->pid);
3589 SEQ_PUT_FIELD(s, iter->cpu);
3590 SEQ_PUT_FIELD(s, iter->ts);
3591 if (trace_seq_has_overflowed(s))
3592 return TRACE_TYPE_PARTIAL_LINE;
3593 }
3594
3595 event = ftrace_find_event(entry->type);
3596 return event ? event->funcs->binary(iter, 0, event) :
3597 TRACE_TYPE_HANDLED;
3598 }
3599
3600 int trace_empty(struct trace_iterator *iter)
3601 {
3602 struct ring_buffer_iter *buf_iter;
3603 int cpu;
3604
3605 /* If we are looking at one CPU buffer, only check that one */
3606 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3607 cpu = iter->cpu_file;
3608 buf_iter = trace_buffer_iter(iter, cpu);
3609 if (buf_iter) {
3610 if (!ring_buffer_iter_empty(buf_iter))
3611 return 0;
3612 } else {
3613 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3614 return 0;
3615 }
3616 return 1;
3617 }
3618
3619 for_each_tracing_cpu(cpu) {
3620 buf_iter = trace_buffer_iter(iter, cpu);
3621 if (buf_iter) {
3622 if (!ring_buffer_iter_empty(buf_iter))
3623 return 0;
3624 } else {
3625 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3626 return 0;
3627 }
3628 }
3629
3630 return 1;
3631 }
3632
3633 /* Called with trace_event_read_lock() held. */
3634 enum print_line_t print_trace_line(struct trace_iterator *iter)
3635 {
3636 struct trace_array *tr = iter->tr;
3637 unsigned long trace_flags = tr->trace_flags;
3638 enum print_line_t ret;
3639
3640 if (iter->lost_events) {
3641 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3642 iter->cpu, iter->lost_events);
3643 if (trace_seq_has_overflowed(&iter->seq))
3644 return TRACE_TYPE_PARTIAL_LINE;
3645 }
3646
3647 if (iter->trace && iter->trace->print_line) {
3648 ret = iter->trace->print_line(iter);
3649 if (ret != TRACE_TYPE_UNHANDLED)
3650 return ret;
3651 }
3652
3653 if (iter->ent->type == TRACE_BPUTS &&
3654 trace_flags & TRACE_ITER_PRINTK &&
3655 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3656 return trace_print_bputs_msg_only(iter);
3657
3658 if (iter->ent->type == TRACE_BPRINT &&
3659 trace_flags & TRACE_ITER_PRINTK &&
3660 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661 return trace_print_bprintk_msg_only(iter);
3662
3663 if (iter->ent->type == TRACE_PRINT &&
3664 trace_flags & TRACE_ITER_PRINTK &&
3665 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666 return trace_print_printk_msg_only(iter);
3667
3668 if (trace_flags & TRACE_ITER_BIN)
3669 return print_bin_fmt(iter);
3670
3671 if (trace_flags & TRACE_ITER_HEX)
3672 return print_hex_fmt(iter);
3673
3674 if (trace_flags & TRACE_ITER_RAW)
3675 return print_raw_fmt(iter);
3676
3677 return print_trace_fmt(iter);
3678 }
3679
3680 void trace_latency_header(struct seq_file *m)
3681 {
3682 struct trace_iterator *iter = m->private;
3683 struct trace_array *tr = iter->tr;
3684
3685 /* print nothing if the buffers are empty */
3686 if (trace_empty(iter))
3687 return;
3688
3689 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3690 print_trace_header(m, iter);
3691
3692 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3693 print_lat_help_header(m);
3694 }
3695
3696 void trace_default_header(struct seq_file *m)
3697 {
3698 struct trace_iterator *iter = m->private;
3699 struct trace_array *tr = iter->tr;
3700 unsigned long trace_flags = tr->trace_flags;
3701
3702 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3703 return;
3704
3705 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3706 /* print nothing if the buffers are empty */
3707 if (trace_empty(iter))
3708 return;
3709 print_trace_header(m, iter);
3710 if (!(trace_flags & TRACE_ITER_VERBOSE))
3711 print_lat_help_header(m);
3712 } else {
3713 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3714 if (trace_flags & TRACE_ITER_IRQ_INFO)
3715 print_func_help_header_irq(iter->trace_buffer,
3716 m, trace_flags);
3717 else
3718 print_func_help_header(iter->trace_buffer, m,
3719 trace_flags);
3720 }
3721 }
3722 }
3723
3724 static void test_ftrace_alive(struct seq_file *m)
3725 {
3726 if (!ftrace_is_dead())
3727 return;
3728 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3729 "# MAY BE MISSING FUNCTION EVENTS\n");
3730 }
3731
3732 #ifdef CONFIG_TRACER_MAX_TRACE
3733 static void show_snapshot_main_help(struct seq_file *m)
3734 {
3735 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3736 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3737 "# Takes a snapshot of the main buffer.\n"
3738 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3739 "# (Doesn't have to be '2' works with any number that\n"
3740 "# is not a '0' or '1')\n");
3741 }
3742
3743 static void show_snapshot_percpu_help(struct seq_file *m)
3744 {
3745 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3746 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3747 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3748 "# Takes a snapshot of the main buffer for this cpu.\n");
3749 #else
3750 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3751 "# Must use main snapshot file to allocate.\n");
3752 #endif
3753 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3754 "# (Doesn't have to be '2' works with any number that\n"
3755 "# is not a '0' or '1')\n");
3756 }
3757
3758 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3759 {
3760 if (iter->tr->allocated_snapshot)
3761 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3762 else
3763 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3764
3765 seq_puts(m, "# Snapshot commands:\n");
3766 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3767 show_snapshot_main_help(m);
3768 else
3769 show_snapshot_percpu_help(m);
3770 }
3771 #else
3772 /* Should never be called */
3773 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3774 #endif
3775
3776 static int s_show(struct seq_file *m, void *v)
3777 {
3778 struct trace_iterator *iter = v;
3779 int ret;
3780
3781 if (iter->ent == NULL) {
3782 if (iter->tr) {
3783 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3784 seq_puts(m, "#\n");
3785 test_ftrace_alive(m);
3786 }
3787 if (iter->snapshot && trace_empty(iter))
3788 print_snapshot_help(m, iter);
3789 else if (iter->trace && iter->trace->print_header)
3790 iter->trace->print_header(m);
3791 else
3792 trace_default_header(m);
3793
3794 } else if (iter->leftover) {
3795 /*
3796 * If we filled the seq_file buffer earlier, we
3797 * want to just show it now.
3798 */
3799 ret = trace_print_seq(m, &iter->seq);
3800
3801 /* ret should this time be zero, but you never know */
3802 iter->leftover = ret;
3803
3804 } else {
3805 print_trace_line(iter);
3806 ret = trace_print_seq(m, &iter->seq);
3807 /*
3808 * If we overflow the seq_file buffer, then it will
3809 * ask us for this data again at start up.
3810 * Use that instead.
3811 * ret is 0 if seq_file write succeeded.
3812 * -1 otherwise.
3813 */
3814 iter->leftover = ret;
3815 }
3816
3817 return 0;
3818 }
3819
3820 /*
3821 * Should be used after trace_array_get(), trace_types_lock
3822 * ensures that i_cdev was already initialized.
3823 */
3824 static inline int tracing_get_cpu(struct inode *inode)
3825 {
3826 if (inode->i_cdev) /* See trace_create_cpu_file() */
3827 return (long)inode->i_cdev - 1;
3828 return RING_BUFFER_ALL_CPUS;
3829 }
3830
3831 static const struct seq_operations tracer_seq_ops = {
3832 .start = s_start,
3833 .next = s_next,
3834 .stop = s_stop,
3835 .show = s_show,
3836 };
3837
3838 static struct trace_iterator *
3839 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3840 {
3841 struct trace_array *tr = inode->i_private;
3842 struct trace_iterator *iter;
3843 int cpu;
3844
3845 if (tracing_disabled)
3846 return ERR_PTR(-ENODEV);
3847
3848 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3849 if (!iter)
3850 return ERR_PTR(-ENOMEM);
3851
3852 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3853 GFP_KERNEL);
3854 if (!iter->buffer_iter)
3855 goto release;
3856
3857 /*
3858 * We make a copy of the current tracer to avoid concurrent
3859 * changes on it while we are reading.
3860 */
3861 mutex_lock(&trace_types_lock);
3862 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3863 if (!iter->trace)
3864 goto fail;
3865
3866 *iter->trace = *tr->current_trace;
3867
3868 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3869 goto fail;
3870
3871 iter->tr = tr;
3872
3873 #ifdef CONFIG_TRACER_MAX_TRACE
3874 /* Currently only the top directory has a snapshot */
3875 if (tr->current_trace->print_max || snapshot)
3876 iter->trace_buffer = &tr->max_buffer;
3877 else
3878 #endif
3879 iter->trace_buffer = &tr->trace_buffer;
3880 iter->snapshot = snapshot;
3881 iter->pos = -1;
3882 iter->cpu_file = tracing_get_cpu(inode);
3883 mutex_init(&iter->mutex);
3884
3885 /* Notify the tracer early; before we stop tracing. */
3886 if (iter->trace && iter->trace->open)
3887 iter->trace->open(iter);
3888
3889 /* Annotate start of buffers if we had overruns */
3890 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3891 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3892
3893 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3894 if (trace_clocks[tr->clock_id].in_ns)
3895 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3896
3897 /* stop the trace while dumping if we are not opening "snapshot" */
3898 if (!iter->snapshot)
3899 tracing_stop_tr(tr);
3900
3901 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3902 for_each_tracing_cpu(cpu) {
3903 iter->buffer_iter[cpu] =
3904 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3905 cpu, GFP_KERNEL);
3906 }
3907 ring_buffer_read_prepare_sync();
3908 for_each_tracing_cpu(cpu) {
3909 ring_buffer_read_start(iter->buffer_iter[cpu]);
3910 tracing_iter_reset(iter, cpu);
3911 }
3912 } else {
3913 cpu = iter->cpu_file;
3914 iter->buffer_iter[cpu] =
3915 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3916 cpu, GFP_KERNEL);
3917 ring_buffer_read_prepare_sync();
3918 ring_buffer_read_start(iter->buffer_iter[cpu]);
3919 tracing_iter_reset(iter, cpu);
3920 }
3921
3922 mutex_unlock(&trace_types_lock);
3923
3924 return iter;
3925
3926 fail:
3927 mutex_unlock(&trace_types_lock);
3928 kfree(iter->trace);
3929 kfree(iter->buffer_iter);
3930 release:
3931 seq_release_private(inode, file);
3932 return ERR_PTR(-ENOMEM);
3933 }
3934
3935 int tracing_open_generic(struct inode *inode, struct file *filp)
3936 {
3937 if (tracing_disabled)
3938 return -ENODEV;
3939
3940 filp->private_data = inode->i_private;
3941 return 0;
3942 }
3943
3944 bool tracing_is_disabled(void)
3945 {
3946 return (tracing_disabled) ? true: false;
3947 }
3948
3949 /*
3950 * Open and update trace_array ref count.
3951 * Must have the current trace_array passed to it.
3952 */
3953 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3954 {
3955 struct trace_array *tr = inode->i_private;
3956
3957 if (tracing_disabled)
3958 return -ENODEV;
3959
3960 if (trace_array_get(tr) < 0)
3961 return -ENODEV;
3962
3963 filp->private_data = inode->i_private;
3964
3965 return 0;
3966 }
3967
3968 static int tracing_release(struct inode *inode, struct file *file)
3969 {
3970 struct trace_array *tr = inode->i_private;
3971 struct seq_file *m = file->private_data;
3972 struct trace_iterator *iter;
3973 int cpu;
3974
3975 if (!(file->f_mode & FMODE_READ)) {
3976 trace_array_put(tr);
3977 return 0;
3978 }
3979
3980 /* Writes do not use seq_file */
3981 iter = m->private;
3982 mutex_lock(&trace_types_lock);
3983
3984 for_each_tracing_cpu(cpu) {
3985 if (iter->buffer_iter[cpu])
3986 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3987 }
3988
3989 if (iter->trace && iter->trace->close)
3990 iter->trace->close(iter);
3991
3992 if (!iter->snapshot)
3993 /* reenable tracing if it was previously enabled */
3994 tracing_start_tr(tr);
3995
3996 __trace_array_put(tr);
3997
3998 mutex_unlock(&trace_types_lock);
3999
4000 mutex_destroy(&iter->mutex);
4001 free_cpumask_var(iter->started);
4002 kfree(iter->trace);
4003 kfree(iter->buffer_iter);
4004 seq_release_private(inode, file);
4005
4006 return 0;
4007 }
4008
4009 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4010 {
4011 struct trace_array *tr = inode->i_private;
4012
4013 trace_array_put(tr);
4014 return 0;
4015 }
4016
4017 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4018 {
4019 struct trace_array *tr = inode->i_private;
4020
4021 trace_array_put(tr);
4022
4023 return single_release(inode, file);
4024 }
4025
4026 static int tracing_open(struct inode *inode, struct file *file)
4027 {
4028 struct trace_array *tr = inode->i_private;
4029 struct trace_iterator *iter;
4030 int ret = 0;
4031
4032 if (trace_array_get(tr) < 0)
4033 return -ENODEV;
4034
4035 /* If this file was open for write, then erase contents */
4036 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4037 int cpu = tracing_get_cpu(inode);
4038 struct trace_buffer *trace_buf = &tr->trace_buffer;
4039
4040 #ifdef CONFIG_TRACER_MAX_TRACE
4041 if (tr->current_trace->print_max)
4042 trace_buf = &tr->max_buffer;
4043 #endif
4044
4045 if (cpu == RING_BUFFER_ALL_CPUS)
4046 tracing_reset_online_cpus(trace_buf);
4047 else
4048 tracing_reset(trace_buf, cpu);
4049 }
4050
4051 if (file->f_mode & FMODE_READ) {
4052 iter = __tracing_open(inode, file, false);
4053 if (IS_ERR(iter))
4054 ret = PTR_ERR(iter);
4055 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4056 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4057 }
4058
4059 if (ret < 0)
4060 trace_array_put(tr);
4061
4062 return ret;
4063 }
4064
4065 /*
4066 * Some tracers are not suitable for instance buffers.
4067 * A tracer is always available for the global array (toplevel)
4068 * or if it explicitly states that it is.
4069 */
4070 static bool
4071 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4072 {
4073 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4074 }
4075
4076 /* Find the next tracer that this trace array may use */
4077 static struct tracer *
4078 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4079 {
4080 while (t && !trace_ok_for_array(t, tr))
4081 t = t->next;
4082
4083 return t;
4084 }
4085
4086 static void *
4087 t_next(struct seq_file *m, void *v, loff_t *pos)
4088 {
4089 struct trace_array *tr = m->private;
4090 struct tracer *t = v;
4091
4092 (*pos)++;
4093
4094 if (t)
4095 t = get_tracer_for_array(tr, t->next);
4096
4097 return t;
4098 }
4099
4100 static void *t_start(struct seq_file *m, loff_t *pos)
4101 {
4102 struct trace_array *tr = m->private;
4103 struct tracer *t;
4104 loff_t l = 0;
4105
4106 mutex_lock(&trace_types_lock);
4107
4108 t = get_tracer_for_array(tr, trace_types);
4109 for (; t && l < *pos; t = t_next(m, t, &l))
4110 ;
4111
4112 return t;
4113 }
4114
4115 static void t_stop(struct seq_file *m, void *p)
4116 {
4117 mutex_unlock(&trace_types_lock);
4118 }
4119
4120 static int t_show(struct seq_file *m, void *v)
4121 {
4122 struct tracer *t = v;
4123
4124 if (!t)
4125 return 0;
4126
4127 seq_puts(m, t->name);
4128 if (t->next)
4129 seq_putc(m, ' ');
4130 else
4131 seq_putc(m, '\n');
4132
4133 return 0;
4134 }
4135
4136 static const struct seq_operations show_traces_seq_ops = {
4137 .start = t_start,
4138 .next = t_next,
4139 .stop = t_stop,
4140 .show = t_show,
4141 };
4142
4143 static int show_traces_open(struct inode *inode, struct file *file)
4144 {
4145 struct trace_array *tr = inode->i_private;
4146 struct seq_file *m;
4147 int ret;
4148
4149 if (tracing_disabled)
4150 return -ENODEV;
4151
4152 ret = seq_open(file, &show_traces_seq_ops);
4153 if (ret)
4154 return ret;
4155
4156 m = file->private_data;
4157 m->private = tr;
4158
4159 return 0;
4160 }
4161
4162 static ssize_t
4163 tracing_write_stub(struct file *filp, const char __user *ubuf,
4164 size_t count, loff_t *ppos)
4165 {
4166 return count;
4167 }
4168
4169 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4170 {
4171 int ret;
4172
4173 if (file->f_mode & FMODE_READ)
4174 ret = seq_lseek(file, offset, whence);
4175 else
4176 file->f_pos = ret = 0;
4177
4178 return ret;
4179 }
4180
4181 static const struct file_operations tracing_fops = {
4182 .open = tracing_open,
4183 .read = seq_read,
4184 .write = tracing_write_stub,
4185 .llseek = tracing_lseek,
4186 .release = tracing_release,
4187 };
4188
4189 static const struct file_operations show_traces_fops = {
4190 .open = show_traces_open,
4191 .read = seq_read,
4192 .release = seq_release,
4193 .llseek = seq_lseek,
4194 };
4195
4196 static ssize_t
4197 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4198 size_t count, loff_t *ppos)
4199 {
4200 struct trace_array *tr = file_inode(filp)->i_private;
4201 char *mask_str;
4202 int len;
4203
4204 len = snprintf(NULL, 0, "%*pb\n",
4205 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4206 mask_str = kmalloc(len, GFP_KERNEL);
4207 if (!mask_str)
4208 return -ENOMEM;
4209
4210 len = snprintf(mask_str, len, "%*pb\n",
4211 cpumask_pr_args(tr->tracing_cpumask));
4212 if (len >= count) {
4213 count = -EINVAL;
4214 goto out_err;
4215 }
4216 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4217
4218 out_err:
4219 kfree(mask_str);
4220
4221 return count;
4222 }
4223
4224 static ssize_t
4225 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4226 size_t count, loff_t *ppos)
4227 {
4228 struct trace_array *tr = file_inode(filp)->i_private;
4229 cpumask_var_t tracing_cpumask_new;
4230 int err, cpu;
4231
4232 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4233 return -ENOMEM;
4234
4235 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4236 if (err)
4237 goto err_unlock;
4238
4239 local_irq_disable();
4240 arch_spin_lock(&tr->max_lock);
4241 for_each_tracing_cpu(cpu) {
4242 /*
4243 * Increase/decrease the disabled counter if we are
4244 * about to flip a bit in the cpumask:
4245 */
4246 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4247 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4248 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4249 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4250 }
4251 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4252 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4253 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4254 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4255 }
4256 }
4257 arch_spin_unlock(&tr->max_lock);
4258 local_irq_enable();
4259
4260 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4261 free_cpumask_var(tracing_cpumask_new);
4262
4263 return count;
4264
4265 err_unlock:
4266 free_cpumask_var(tracing_cpumask_new);
4267
4268 return err;
4269 }
4270
4271 static const struct file_operations tracing_cpumask_fops = {
4272 .open = tracing_open_generic_tr,
4273 .read = tracing_cpumask_read,
4274 .write = tracing_cpumask_write,
4275 .release = tracing_release_generic_tr,
4276 .llseek = generic_file_llseek,
4277 };
4278
4279 static int tracing_trace_options_show(struct seq_file *m, void *v)
4280 {
4281 struct tracer_opt *trace_opts;
4282 struct trace_array *tr = m->private;
4283 u32 tracer_flags;
4284 int i;
4285
4286 mutex_lock(&trace_types_lock);
4287 tracer_flags = tr->current_trace->flags->val;
4288 trace_opts = tr->current_trace->flags->opts;
4289
4290 for (i = 0; trace_options[i]; i++) {
4291 if (tr->trace_flags & (1 << i))
4292 seq_printf(m, "%s\n", trace_options[i]);
4293 else
4294 seq_printf(m, "no%s\n", trace_options[i]);
4295 }
4296
4297 for (i = 0; trace_opts[i].name; i++) {
4298 if (tracer_flags & trace_opts[i].bit)
4299 seq_printf(m, "%s\n", trace_opts[i].name);
4300 else
4301 seq_printf(m, "no%s\n", trace_opts[i].name);
4302 }
4303 mutex_unlock(&trace_types_lock);
4304
4305 return 0;
4306 }
4307
4308 static int __set_tracer_option(struct trace_array *tr,
4309 struct tracer_flags *tracer_flags,
4310 struct tracer_opt *opts, int neg)
4311 {
4312 struct tracer *trace = tracer_flags->trace;
4313 int ret;
4314
4315 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4316 if (ret)
4317 return ret;
4318
4319 if (neg)
4320 tracer_flags->val &= ~opts->bit;
4321 else
4322 tracer_flags->val |= opts->bit;
4323 return 0;
4324 }
4325
4326 /* Try to assign a tracer specific option */
4327 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4328 {
4329 struct tracer *trace = tr->current_trace;
4330 struct tracer_flags *tracer_flags = trace->flags;
4331 struct tracer_opt *opts = NULL;
4332 int i;
4333
4334 for (i = 0; tracer_flags->opts[i].name; i++) {
4335 opts = &tracer_flags->opts[i];
4336
4337 if (strcmp(cmp, opts->name) == 0)
4338 return __set_tracer_option(tr, trace->flags, opts, neg);
4339 }
4340
4341 return -EINVAL;
4342 }
4343
4344 /* Some tracers require overwrite to stay enabled */
4345 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4346 {
4347 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4348 return -1;
4349
4350 return 0;
4351 }
4352
4353 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4354 {
4355 /* do nothing if flag is already set */
4356 if (!!(tr->trace_flags & mask) == !!enabled)
4357 return 0;
4358
4359 /* Give the tracer a chance to approve the change */
4360 if (tr->current_trace->flag_changed)
4361 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4362 return -EINVAL;
4363
4364 if (enabled)
4365 tr->trace_flags |= mask;
4366 else
4367 tr->trace_flags &= ~mask;
4368
4369 if (mask == TRACE_ITER_RECORD_CMD)
4370 trace_event_enable_cmd_record(enabled);
4371
4372 if (mask == TRACE_ITER_RECORD_TGID) {
4373 if (!tgid_map)
4374 tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4375 GFP_KERNEL);
4376 if (!tgid_map) {
4377 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4378 return -ENOMEM;
4379 }
4380
4381 trace_event_enable_tgid_record(enabled);
4382 }
4383
4384 if (mask == TRACE_ITER_EVENT_FORK)
4385 trace_event_follow_fork(tr, enabled);
4386
4387 if (mask == TRACE_ITER_FUNC_FORK)
4388 ftrace_pid_follow_fork(tr, enabled);
4389
4390 if (mask == TRACE_ITER_OVERWRITE) {
4391 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4392 #ifdef CONFIG_TRACER_MAX_TRACE
4393 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4394 #endif
4395 }
4396
4397 if (mask == TRACE_ITER_PRINTK) {
4398 trace_printk_start_stop_comm(enabled);
4399 trace_printk_control(enabled);
4400 }
4401
4402 return 0;
4403 }
4404
4405 static int trace_set_options(struct trace_array *tr, char *option)
4406 {
4407 char *cmp;
4408 int neg = 0;
4409 int ret = -ENODEV;
4410 int i;
4411 size_t orig_len = strlen(option);
4412
4413 cmp = strstrip(option);
4414
4415 if (strncmp(cmp, "no", 2) == 0) {
4416 neg = 1;
4417 cmp += 2;
4418 }
4419
4420 mutex_lock(&trace_types_lock);
4421
4422 for (i = 0; trace_options[i]; i++) {
4423 if (strcmp(cmp, trace_options[i]) == 0) {
4424 ret = set_tracer_flag(tr, 1 << i, !neg);
4425 break;
4426 }
4427 }
4428
4429 /* If no option could be set, test the specific tracer options */
4430 if (!trace_options[i])
4431 ret = set_tracer_option(tr, cmp, neg);
4432
4433 mutex_unlock(&trace_types_lock);
4434
4435 /*
4436 * If the first trailing whitespace is replaced with '\0' by strstrip,
4437 * turn it back into a space.
4438 */
4439 if (orig_len > strlen(option))
4440 option[strlen(option)] = ' ';
4441
4442 return ret;
4443 }
4444
4445 static void __init apply_trace_boot_options(void)
4446 {
4447 char *buf = trace_boot_options_buf;
4448 char *option;
4449
4450 while (true) {
4451 option = strsep(&buf, ",");
4452
4453 if (!option)
4454 break;
4455
4456 if (*option)
4457 trace_set_options(&global_trace, option);
4458
4459 /* Put back the comma to allow this to be called again */
4460 if (buf)
4461 *(buf - 1) = ',';
4462 }
4463 }
4464
4465 static ssize_t
4466 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4467 size_t cnt, loff_t *ppos)
4468 {
4469 struct seq_file *m = filp->private_data;
4470 struct trace_array *tr = m->private;
4471 char buf[64];
4472 int ret;
4473
4474 if (cnt >= sizeof(buf))
4475 return -EINVAL;
4476
4477 if (copy_from_user(buf, ubuf, cnt))
4478 return -EFAULT;
4479
4480 buf[cnt] = 0;
4481
4482 ret = trace_set_options(tr, buf);
4483 if (ret < 0)
4484 return ret;
4485
4486 *ppos += cnt;
4487
4488 return cnt;
4489 }
4490
4491 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4492 {
4493 struct trace_array *tr = inode->i_private;
4494 int ret;
4495
4496 if (tracing_disabled)
4497 return -ENODEV;
4498
4499 if (trace_array_get(tr) < 0)
4500 return -ENODEV;
4501
4502 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4503 if (ret < 0)
4504 trace_array_put(tr);
4505
4506 return ret;
4507 }
4508
4509 static const struct file_operations tracing_iter_fops = {
4510 .open = tracing_trace_options_open,
4511 .read = seq_read,
4512 .llseek = seq_lseek,
4513 .release = tracing_single_release_tr,
4514 .write = tracing_trace_options_write,
4515 };
4516
4517 static const char readme_msg[] =
4518 "tracing mini-HOWTO:\n\n"
4519 "# echo 0 > tracing_on : quick way to disable tracing\n"
4520 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4521 " Important files:\n"
4522 " trace\t\t\t- The static contents of the buffer\n"
4523 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4524 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4525 " current_tracer\t- function and latency tracers\n"
4526 " available_tracers\t- list of configured tracers for current_tracer\n"
4527 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4528 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4529 " trace_clock\t\t-change the clock used to order events\n"
4530 " local: Per cpu clock but may not be synced across CPUs\n"
4531 " global: Synced across CPUs but slows tracing down.\n"
4532 " counter: Not a clock, but just an increment\n"
4533 " uptime: Jiffy counter from time of boot\n"
4534 " perf: Same clock that perf events use\n"
4535 #ifdef CONFIG_X86_64
4536 " x86-tsc: TSC cycle counter\n"
4537 #endif
4538 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4539 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4540 " tracing_cpumask\t- Limit which CPUs to trace\n"
4541 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4542 "\t\t\t Remove sub-buffer with rmdir\n"
4543 " trace_options\t\t- Set format or modify how tracing happens\n"
4544 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4545 "\t\t\t option name\n"
4546 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4547 #ifdef CONFIG_DYNAMIC_FTRACE
4548 "\n available_filter_functions - list of functions that can be filtered on\n"
4549 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4550 "\t\t\t functions\n"
4551 "\t accepts: func_full_name or glob-matching-pattern\n"
4552 "\t modules: Can select a group via module\n"
4553 "\t Format: :mod:<module-name>\n"
4554 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4555 "\t triggers: a command to perform when function is hit\n"
4556 "\t Format: <function>:<trigger>[:count]\n"
4557 "\t trigger: traceon, traceoff\n"
4558 "\t\t enable_event:<system>:<event>\n"
4559 "\t\t disable_event:<system>:<event>\n"
4560 #ifdef CONFIG_STACKTRACE
4561 "\t\t stacktrace\n"
4562 #endif
4563 #ifdef CONFIG_TRACER_SNAPSHOT
4564 "\t\t snapshot\n"
4565 #endif
4566 "\t\t dump\n"
4567 "\t\t cpudump\n"
4568 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4569 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4570 "\t The first one will disable tracing every time do_fault is hit\n"
4571 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4572 "\t The first time do trap is hit and it disables tracing, the\n"
4573 "\t counter will decrement to 2. If tracing is already disabled,\n"
4574 "\t the counter will not decrement. It only decrements when the\n"
4575 "\t trigger did work\n"
4576 "\t To remove trigger without count:\n"
4577 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4578 "\t To remove trigger with a count:\n"
4579 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4580 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4581 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4582 "\t modules: Can select a group via module command :mod:\n"
4583 "\t Does not accept triggers\n"
4584 #endif /* CONFIG_DYNAMIC_FTRACE */
4585 #ifdef CONFIG_FUNCTION_TRACER
4586 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4587 "\t\t (function)\n"
4588 #endif
4589 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4590 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4591 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4592 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4593 #endif
4594 #ifdef CONFIG_TRACER_SNAPSHOT
4595 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4596 "\t\t\t snapshot buffer. Read the contents for more\n"
4597 "\t\t\t information\n"
4598 #endif
4599 #ifdef CONFIG_STACK_TRACER
4600 " stack_trace\t\t- Shows the max stack trace when active\n"
4601 " stack_max_size\t- Shows current max stack size that was traced\n"
4602 "\t\t\t Write into this file to reset the max size (trigger a\n"
4603 "\t\t\t new trace)\n"
4604 #ifdef CONFIG_DYNAMIC_FTRACE
4605 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4606 "\t\t\t traces\n"
4607 #endif
4608 #endif /* CONFIG_STACK_TRACER */
4609 #ifdef CONFIG_KPROBE_EVENTS
4610 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4611 "\t\t\t Write into this file to define/undefine new trace events.\n"
4612 #endif
4613 #ifdef CONFIG_UPROBE_EVENTS
4614 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4615 "\t\t\t Write into this file to define/undefine new trace events.\n"
4616 #endif
4617 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4618 "\t accepts: event-definitions (one definition per line)\n"
4619 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4620 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4621 "\t -:[<group>/]<event>\n"
4622 #ifdef CONFIG_KPROBE_EVENTS
4623 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4624 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4625 #endif
4626 #ifdef CONFIG_UPROBE_EVENTS
4627 "\t place: <path>:<offset>\n"
4628 #endif
4629 "\t args: <name>=fetcharg[:type]\n"
4630 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4631 "\t $stack<index>, $stack, $retval, $comm\n"
4632 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4633 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4634 #endif
4635 " events/\t\t- Directory containing all trace event subsystems:\n"
4636 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4637 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4638 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4639 "\t\t\t events\n"
4640 " filter\t\t- If set, only events passing filter are traced\n"
4641 " events/<system>/<event>/\t- Directory containing control files for\n"
4642 "\t\t\t <event>:\n"
4643 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4644 " filter\t\t- If set, only events passing filter are traced\n"
4645 " trigger\t\t- If set, a command to perform when event is hit\n"
4646 "\t Format: <trigger>[:count][if <filter>]\n"
4647 "\t trigger: traceon, traceoff\n"
4648 "\t enable_event:<system>:<event>\n"
4649 "\t disable_event:<system>:<event>\n"
4650 #ifdef CONFIG_HIST_TRIGGERS
4651 "\t enable_hist:<system>:<event>\n"
4652 "\t disable_hist:<system>:<event>\n"
4653 #endif
4654 #ifdef CONFIG_STACKTRACE
4655 "\t\t stacktrace\n"
4656 #endif
4657 #ifdef CONFIG_TRACER_SNAPSHOT
4658 "\t\t snapshot\n"
4659 #endif
4660 #ifdef CONFIG_HIST_TRIGGERS
4661 "\t\t hist (see below)\n"
4662 #endif
4663 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4664 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4665 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4666 "\t events/block/block_unplug/trigger\n"
4667 "\t The first disables tracing every time block_unplug is hit.\n"
4668 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4669 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4670 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4671 "\t Like function triggers, the counter is only decremented if it\n"
4672 "\t enabled or disabled tracing.\n"
4673 "\t To remove a trigger without a count:\n"
4674 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4675 "\t To remove a trigger with a count:\n"
4676 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4677 "\t Filters can be ignored when removing a trigger.\n"
4678 #ifdef CONFIG_HIST_TRIGGERS
4679 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4680 "\t Format: hist:keys=<field1[,field2,...]>\n"
4681 "\t [:values=<field1[,field2,...]>]\n"
4682 "\t [:sort=<field1[,field2,...]>]\n"
4683 "\t [:size=#entries]\n"
4684 "\t [:pause][:continue][:clear]\n"
4685 "\t [:name=histname1]\n"
4686 "\t [if <filter>]\n\n"
4687 "\t When a matching event is hit, an entry is added to a hash\n"
4688 "\t table using the key(s) and value(s) named, and the value of a\n"
4689 "\t sum called 'hitcount' is incremented. Keys and values\n"
4690 "\t correspond to fields in the event's format description. Keys\n"
4691 "\t can be any field, or the special string 'stacktrace'.\n"
4692 "\t Compound keys consisting of up to two fields can be specified\n"
4693 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4694 "\t fields. Sort keys consisting of up to two fields can be\n"
4695 "\t specified using the 'sort' keyword. The sort direction can\n"
4696 "\t be modified by appending '.descending' or '.ascending' to a\n"
4697 "\t sort field. The 'size' parameter can be used to specify more\n"
4698 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4699 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4700 "\t its histogram data will be shared with other triggers of the\n"
4701 "\t same name, and trigger hits will update this common data.\n\n"
4702 "\t Reading the 'hist' file for the event will dump the hash\n"
4703 "\t table in its entirety to stdout. If there are multiple hist\n"
4704 "\t triggers attached to an event, there will be a table for each\n"
4705 "\t trigger in the output. The table displayed for a named\n"
4706 "\t trigger will be the same as any other instance having the\n"
4707 "\t same name. The default format used to display a given field\n"
4708 "\t can be modified by appending any of the following modifiers\n"
4709 "\t to the field name, as applicable:\n\n"
4710 "\t .hex display a number as a hex value\n"
4711 "\t .sym display an address as a symbol\n"
4712 "\t .sym-offset display an address as a symbol and offset\n"
4713 "\t .execname display a common_pid as a program name\n"
4714 "\t .syscall display a syscall id as a syscall name\n\n"
4715 "\t .log2 display log2 value rather than raw number\n\n"
4716 "\t The 'pause' parameter can be used to pause an existing hist\n"
4717 "\t trigger or to start a hist trigger but not log any events\n"
4718 "\t until told to do so. 'continue' can be used to start or\n"
4719 "\t restart a paused hist trigger.\n\n"
4720 "\t The 'clear' parameter will clear the contents of a running\n"
4721 "\t hist trigger and leave its current paused/active state\n"
4722 "\t unchanged.\n\n"
4723 "\t The enable_hist and disable_hist triggers can be used to\n"
4724 "\t have one event conditionally start and stop another event's\n"
4725 "\t already-attached hist trigger. The syntax is analagous to\n"
4726 "\t the enable_event and disable_event triggers.\n"
4727 #endif
4728 ;
4729
4730 static ssize_t
4731 tracing_readme_read(struct file *filp, char __user *ubuf,
4732 size_t cnt, loff_t *ppos)
4733 {
4734 return simple_read_from_buffer(ubuf, cnt, ppos,
4735 readme_msg, strlen(readme_msg));
4736 }
4737
4738 static const struct file_operations tracing_readme_fops = {
4739 .open = tracing_open_generic,
4740 .read = tracing_readme_read,
4741 .llseek = generic_file_llseek,
4742 };
4743
4744 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4745 {
4746 int *ptr = v;
4747
4748 if (*pos || m->count)
4749 ptr++;
4750
4751 (*pos)++;
4752
4753 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4754 if (trace_find_tgid(*ptr))
4755 return ptr;
4756 }
4757
4758 return NULL;
4759 }
4760
4761 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4762 {
4763 void *v;
4764 loff_t l = 0;
4765
4766 if (!tgid_map)
4767 return NULL;
4768
4769 v = &tgid_map[0];
4770 while (l <= *pos) {
4771 v = saved_tgids_next(m, v, &l);
4772 if (!v)
4773 return NULL;
4774 }
4775
4776 return v;
4777 }
4778
4779 static void saved_tgids_stop(struct seq_file *m, void *v)
4780 {
4781 }
4782
4783 static int saved_tgids_show(struct seq_file *m, void *v)
4784 {
4785 int pid = (int *)v - tgid_map;
4786
4787 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4788 return 0;
4789 }
4790
4791 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4792 .start = saved_tgids_start,
4793 .stop = saved_tgids_stop,
4794 .next = saved_tgids_next,
4795 .show = saved_tgids_show,
4796 };
4797
4798 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4799 {
4800 if (tracing_disabled)
4801 return -ENODEV;
4802
4803 return seq_open(filp, &tracing_saved_tgids_seq_ops);
4804 }
4805
4806
4807 static const struct file_operations tracing_saved_tgids_fops = {
4808 .open = tracing_saved_tgids_open,
4809 .read = seq_read,
4810 .llseek = seq_lseek,
4811 .release = seq_release,
4812 };
4813
4814 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4815 {
4816 unsigned int *ptr = v;
4817
4818 if (*pos || m->count)
4819 ptr++;
4820
4821 (*pos)++;
4822
4823 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4824 ptr++) {
4825 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4826 continue;
4827
4828 return ptr;
4829 }
4830
4831 return NULL;
4832 }
4833
4834 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4835 {
4836 void *v;
4837 loff_t l = 0;
4838
4839 preempt_disable();
4840 arch_spin_lock(&trace_cmdline_lock);
4841
4842 v = &savedcmd->map_cmdline_to_pid[0];
4843 while (l <= *pos) {
4844 v = saved_cmdlines_next(m, v, &l);
4845 if (!v)
4846 return NULL;
4847 }
4848
4849 return v;
4850 }
4851
4852 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4853 {
4854 arch_spin_unlock(&trace_cmdline_lock);
4855 preempt_enable();
4856 }
4857
4858 static int saved_cmdlines_show(struct seq_file *m, void *v)
4859 {
4860 char buf[TASK_COMM_LEN];
4861 unsigned int *pid = v;
4862
4863 __trace_find_cmdline(*pid, buf);
4864 seq_printf(m, "%d %s\n", *pid, buf);
4865 return 0;
4866 }
4867
4868 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4869 .start = saved_cmdlines_start,
4870 .next = saved_cmdlines_next,
4871 .stop = saved_cmdlines_stop,
4872 .show = saved_cmdlines_show,
4873 };
4874
4875 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4876 {
4877 if (tracing_disabled)
4878 return -ENODEV;
4879
4880 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4881 }
4882
4883 static const struct file_operations tracing_saved_cmdlines_fops = {
4884 .open = tracing_saved_cmdlines_open,
4885 .read = seq_read,
4886 .llseek = seq_lseek,
4887 .release = seq_release,
4888 };
4889
4890 static ssize_t
4891 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4892 size_t cnt, loff_t *ppos)
4893 {
4894 char buf[64];
4895 int r;
4896
4897 arch_spin_lock(&trace_cmdline_lock);
4898 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4899 arch_spin_unlock(&trace_cmdline_lock);
4900
4901 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4902 }
4903
4904 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4905 {
4906 kfree(s->saved_cmdlines);
4907 kfree(s->map_cmdline_to_pid);
4908 kfree(s);
4909 }
4910
4911 static int tracing_resize_saved_cmdlines(unsigned int val)
4912 {
4913 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4914
4915 s = kmalloc(sizeof(*s), GFP_KERNEL);
4916 if (!s)
4917 return -ENOMEM;
4918
4919 if (allocate_cmdlines_buffer(val, s) < 0) {
4920 kfree(s);
4921 return -ENOMEM;
4922 }
4923
4924 arch_spin_lock(&trace_cmdline_lock);
4925 savedcmd_temp = savedcmd;
4926 savedcmd = s;
4927 arch_spin_unlock(&trace_cmdline_lock);
4928 free_saved_cmdlines_buffer(savedcmd_temp);
4929
4930 return 0;
4931 }
4932
4933 static ssize_t
4934 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4935 size_t cnt, loff_t *ppos)
4936 {
4937 unsigned long val;
4938 int ret;
4939
4940 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4941 if (ret)
4942 return ret;
4943
4944 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4945 if (!val || val > PID_MAX_DEFAULT)
4946 return -EINVAL;
4947
4948 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4949 if (ret < 0)
4950 return ret;
4951
4952 *ppos += cnt;
4953
4954 return cnt;
4955 }
4956
4957 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4958 .open = tracing_open_generic,
4959 .read = tracing_saved_cmdlines_size_read,
4960 .write = tracing_saved_cmdlines_size_write,
4961 };
4962
4963 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4964 static union trace_eval_map_item *
4965 update_eval_map(union trace_eval_map_item *ptr)
4966 {
4967 if (!ptr->map.eval_string) {
4968 if (ptr->tail.next) {
4969 ptr = ptr->tail.next;
4970 /* Set ptr to the next real item (skip head) */
4971 ptr++;
4972 } else
4973 return NULL;
4974 }
4975 return ptr;
4976 }
4977
4978 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4979 {
4980 union trace_eval_map_item *ptr = v;
4981
4982 /*
4983 * Paranoid! If ptr points to end, we don't want to increment past it.
4984 * This really should never happen.
4985 */
4986 ptr = update_eval_map(ptr);
4987 if (WARN_ON_ONCE(!ptr))
4988 return NULL;
4989
4990 ptr++;
4991
4992 (*pos)++;
4993
4994 ptr = update_eval_map(ptr);
4995
4996 return ptr;
4997 }
4998
4999 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5000 {
5001 union trace_eval_map_item *v;
5002 loff_t l = 0;
5003
5004 mutex_lock(&trace_eval_mutex);
5005
5006 v = trace_eval_maps;
5007 if (v)
5008 v++;
5009
5010 while (v && l < *pos) {
5011 v = eval_map_next(m, v, &l);
5012 }
5013
5014 return v;
5015 }
5016
5017 static void eval_map_stop(struct seq_file *m, void *v)
5018 {
5019 mutex_unlock(&trace_eval_mutex);
5020 }
5021
5022 static int eval_map_show(struct seq_file *m, void *v)
5023 {
5024 union trace_eval_map_item *ptr = v;
5025
5026 seq_printf(m, "%s %ld (%s)\n",
5027 ptr->map.eval_string, ptr->map.eval_value,
5028 ptr->map.system);
5029
5030 return 0;
5031 }
5032
5033 static const struct seq_operations tracing_eval_map_seq_ops = {
5034 .start = eval_map_start,
5035 .next = eval_map_next,
5036 .stop = eval_map_stop,
5037 .show = eval_map_show,
5038 };
5039
5040 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5041 {
5042 if (tracing_disabled)
5043 return -ENODEV;
5044
5045 return seq_open(filp, &tracing_eval_map_seq_ops);
5046 }
5047
5048 static const struct file_operations tracing_eval_map_fops = {
5049 .open = tracing_eval_map_open,
5050 .read = seq_read,
5051 .llseek = seq_lseek,
5052 .release = seq_release,
5053 };
5054
5055 static inline union trace_eval_map_item *
5056 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5057 {
5058 /* Return tail of array given the head */
5059 return ptr + ptr->head.length + 1;
5060 }
5061
5062 static void
5063 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5064 int len)
5065 {
5066 struct trace_eval_map **stop;
5067 struct trace_eval_map **map;
5068 union trace_eval_map_item *map_array;
5069 union trace_eval_map_item *ptr;
5070
5071 stop = start + len;
5072
5073 /*
5074 * The trace_eval_maps contains the map plus a head and tail item,
5075 * where the head holds the module and length of array, and the
5076 * tail holds a pointer to the next list.
5077 */
5078 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5079 if (!map_array) {
5080 pr_warn("Unable to allocate trace eval mapping\n");
5081 return;
5082 }
5083
5084 mutex_lock(&trace_eval_mutex);
5085
5086 if (!trace_eval_maps)
5087 trace_eval_maps = map_array;
5088 else {
5089 ptr = trace_eval_maps;
5090 for (;;) {
5091 ptr = trace_eval_jmp_to_tail(ptr);
5092 if (!ptr->tail.next)
5093 break;
5094 ptr = ptr->tail.next;
5095
5096 }
5097 ptr->tail.next = map_array;
5098 }
5099 map_array->head.mod = mod;
5100 map_array->head.length = len;
5101 map_array++;
5102
5103 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5104 map_array->map = **map;
5105 map_array++;
5106 }
5107 memset(map_array, 0, sizeof(*map_array));
5108
5109 mutex_unlock(&trace_eval_mutex);
5110 }
5111
5112 static void trace_create_eval_file(struct dentry *d_tracer)
5113 {
5114 trace_create_file("eval_map", 0444, d_tracer,
5115 NULL, &tracing_eval_map_fops);
5116 }
5117
5118 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5119 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5120 static inline void trace_insert_eval_map_file(struct module *mod,
5121 struct trace_eval_map **start, int len) { }
5122 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5123
5124 static void trace_insert_eval_map(struct module *mod,
5125 struct trace_eval_map **start, int len)
5126 {
5127 struct trace_eval_map **map;
5128
5129 if (len <= 0)
5130 return;
5131
5132 map = start;
5133
5134 trace_event_eval_update(map, len);
5135
5136 trace_insert_eval_map_file(mod, start, len);
5137 }
5138
5139 static ssize_t
5140 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5141 size_t cnt, loff_t *ppos)
5142 {
5143 struct trace_array *tr = filp->private_data;
5144 char buf[MAX_TRACER_SIZE+2];
5145 int r;
5146
5147 mutex_lock(&trace_types_lock);
5148 r = sprintf(buf, "%s\n", tr->current_trace->name);
5149 mutex_unlock(&trace_types_lock);
5150
5151 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5152 }
5153
5154 int tracer_init(struct tracer *t, struct trace_array *tr)
5155 {
5156 tracing_reset_online_cpus(&tr->trace_buffer);
5157 return t->init(tr);
5158 }
5159
5160 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5161 {
5162 int cpu;
5163
5164 for_each_tracing_cpu(cpu)
5165 per_cpu_ptr(buf->data, cpu)->entries = val;
5166 }
5167
5168 #ifdef CONFIG_TRACER_MAX_TRACE
5169 /* resize @tr's buffer to the size of @size_tr's entries */
5170 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5171 struct trace_buffer *size_buf, int cpu_id)
5172 {
5173 int cpu, ret = 0;
5174
5175 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5176 for_each_tracing_cpu(cpu) {
5177 ret = ring_buffer_resize(trace_buf->buffer,
5178 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5179 if (ret < 0)
5180 break;
5181 per_cpu_ptr(trace_buf->data, cpu)->entries =
5182 per_cpu_ptr(size_buf->data, cpu)->entries;
5183 }
5184 } else {
5185 ret = ring_buffer_resize(trace_buf->buffer,
5186 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5187 if (ret == 0)
5188 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5189 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5190 }
5191
5192 return ret;
5193 }
5194 #endif /* CONFIG_TRACER_MAX_TRACE */
5195
5196 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5197 unsigned long size, int cpu)
5198 {
5199 int ret;
5200
5201 /*
5202 * If kernel or user changes the size of the ring buffer
5203 * we use the size that was given, and we can forget about
5204 * expanding it later.
5205 */
5206 ring_buffer_expanded = true;
5207
5208 /* May be called before buffers are initialized */
5209 if (!tr->trace_buffer.buffer)
5210 return 0;
5211
5212 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5213 if (ret < 0)
5214 return ret;
5215
5216 #ifdef CONFIG_TRACER_MAX_TRACE
5217 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5218 !tr->current_trace->use_max_tr)
5219 goto out;
5220
5221 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5222 if (ret < 0) {
5223 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5224 &tr->trace_buffer, cpu);
5225 if (r < 0) {
5226 /*
5227 * AARGH! We are left with different
5228 * size max buffer!!!!
5229 * The max buffer is our "snapshot" buffer.
5230 * When a tracer needs a snapshot (one of the
5231 * latency tracers), it swaps the max buffer
5232 * with the saved snap shot. We succeeded to
5233 * update the size of the main buffer, but failed to
5234 * update the size of the max buffer. But when we tried
5235 * to reset the main buffer to the original size, we
5236 * failed there too. This is very unlikely to
5237 * happen, but if it does, warn and kill all
5238 * tracing.
5239 */
5240 WARN_ON(1);
5241 tracing_disabled = 1;
5242 }
5243 return ret;
5244 }
5245
5246 if (cpu == RING_BUFFER_ALL_CPUS)
5247 set_buffer_entries(&tr->max_buffer, size);
5248 else
5249 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5250
5251 out:
5252 #endif /* CONFIG_TRACER_MAX_TRACE */
5253
5254 if (cpu == RING_BUFFER_ALL_CPUS)
5255 set_buffer_entries(&tr->trace_buffer, size);
5256 else
5257 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5258
5259 return ret;
5260 }
5261
5262 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5263 unsigned long size, int cpu_id)
5264 {
5265 int ret = size;
5266
5267 mutex_lock(&trace_types_lock);
5268
5269 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5270 /* make sure, this cpu is enabled in the mask */
5271 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5272 ret = -EINVAL;
5273 goto out;
5274 }
5275 }
5276
5277 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5278 if (ret < 0)
5279 ret = -ENOMEM;
5280
5281 out:
5282 mutex_unlock(&trace_types_lock);
5283
5284 return ret;
5285 }
5286
5287
5288 /**
5289 * tracing_update_buffers - used by tracing facility to expand ring buffers
5290 *
5291 * To save on memory when the tracing is never used on a system with it
5292 * configured in. The ring buffers are set to a minimum size. But once
5293 * a user starts to use the tracing facility, then they need to grow
5294 * to their default size.
5295 *
5296 * This function is to be called when a tracer is about to be used.
5297 */
5298 int tracing_update_buffers(void)
5299 {
5300 int ret = 0;
5301
5302 mutex_lock(&trace_types_lock);
5303 if (!ring_buffer_expanded)
5304 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5305 RING_BUFFER_ALL_CPUS);
5306 mutex_unlock(&trace_types_lock);
5307
5308 return ret;
5309 }
5310
5311 struct trace_option_dentry;
5312
5313 static void
5314 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5315
5316 /*
5317 * Used to clear out the tracer before deletion of an instance.
5318 * Must have trace_types_lock held.
5319 */
5320 static void tracing_set_nop(struct trace_array *tr)
5321 {
5322 if (tr->current_trace == &nop_trace)
5323 return;
5324
5325 tr->current_trace->enabled--;
5326
5327 if (tr->current_trace->reset)
5328 tr->current_trace->reset(tr);
5329
5330 tr->current_trace = &nop_trace;
5331 }
5332
5333 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5334 {
5335 /* Only enable if the directory has been created already. */
5336 if (!tr->dir)
5337 return;
5338
5339 create_trace_option_files(tr, t);
5340 }
5341
5342 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5343 {
5344 struct tracer *t;
5345 #ifdef CONFIG_TRACER_MAX_TRACE
5346 bool had_max_tr;
5347 #endif
5348 int ret = 0;
5349
5350 mutex_lock(&trace_types_lock);
5351
5352 if (!ring_buffer_expanded) {
5353 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5354 RING_BUFFER_ALL_CPUS);
5355 if (ret < 0)
5356 goto out;
5357 ret = 0;
5358 }
5359
5360 for (t = trace_types; t; t = t->next) {
5361 if (strcmp(t->name, buf) == 0)
5362 break;
5363 }
5364 if (!t) {
5365 ret = -EINVAL;
5366 goto out;
5367 }
5368 if (t == tr->current_trace)
5369 goto out;
5370
5371 /* Some tracers won't work on kernel command line */
5372 if (system_state < SYSTEM_RUNNING && t->noboot) {
5373 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5374 t->name);
5375 goto out;
5376 }
5377
5378 /* Some tracers are only allowed for the top level buffer */
5379 if (!trace_ok_for_array(t, tr)) {
5380 ret = -EINVAL;
5381 goto out;
5382 }
5383
5384 /* If trace pipe files are being read, we can't change the tracer */
5385 if (tr->current_trace->ref) {
5386 ret = -EBUSY;
5387 goto out;
5388 }
5389
5390 trace_branch_disable();
5391
5392 tr->current_trace->enabled--;
5393
5394 if (tr->current_trace->reset)
5395 tr->current_trace->reset(tr);
5396
5397 /* Current trace needs to be nop_trace before synchronize_sched */
5398 tr->current_trace = &nop_trace;
5399
5400 #ifdef CONFIG_TRACER_MAX_TRACE
5401 had_max_tr = tr->allocated_snapshot;
5402
5403 if (had_max_tr && !t->use_max_tr) {
5404 /*
5405 * We need to make sure that the update_max_tr sees that
5406 * current_trace changed to nop_trace to keep it from
5407 * swapping the buffers after we resize it.
5408 * The update_max_tr is called from interrupts disabled
5409 * so a synchronized_sched() is sufficient.
5410 */
5411 synchronize_sched();
5412 free_snapshot(tr);
5413 }
5414 #endif
5415
5416 #ifdef CONFIG_TRACER_MAX_TRACE
5417 if (t->use_max_tr && !had_max_tr) {
5418 ret = tracing_alloc_snapshot_instance(tr);
5419 if (ret < 0)
5420 goto out;
5421 }
5422 #endif
5423
5424 if (t->init) {
5425 ret = tracer_init(t, tr);
5426 if (ret)
5427 goto out;
5428 }
5429
5430 tr->current_trace = t;
5431 tr->current_trace->enabled++;
5432 trace_branch_enable(tr);
5433 out:
5434 mutex_unlock(&trace_types_lock);
5435
5436 return ret;
5437 }
5438
5439 static ssize_t
5440 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5441 size_t cnt, loff_t *ppos)
5442 {
5443 struct trace_array *tr = filp->private_data;
5444 char buf[MAX_TRACER_SIZE+1];
5445 int i;
5446 size_t ret;
5447 int err;
5448
5449 ret = cnt;
5450
5451 if (cnt > MAX_TRACER_SIZE)
5452 cnt = MAX_TRACER_SIZE;
5453
5454 if (copy_from_user(buf, ubuf, cnt))
5455 return -EFAULT;
5456
5457 buf[cnt] = 0;
5458
5459 /* strip ending whitespace. */
5460 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5461 buf[i] = 0;
5462
5463 err = tracing_set_tracer(tr, buf);
5464 if (err)
5465 return err;
5466
5467 *ppos += ret;
5468
5469 return ret;
5470 }
5471
5472 static ssize_t
5473 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5474 size_t cnt, loff_t *ppos)
5475 {
5476 char buf[64];
5477 int r;
5478
5479 r = snprintf(buf, sizeof(buf), "%ld\n",
5480 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5481 if (r > sizeof(buf))
5482 r = sizeof(buf);
5483 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5484 }
5485
5486 static ssize_t
5487 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5488 size_t cnt, loff_t *ppos)
5489 {
5490 unsigned long val;
5491 int ret;
5492
5493 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5494 if (ret)
5495 return ret;
5496
5497 *ptr = val * 1000;
5498
5499 return cnt;
5500 }
5501
5502 static ssize_t
5503 tracing_thresh_read(struct file *filp, char __user *ubuf,
5504 size_t cnt, loff_t *ppos)
5505 {
5506 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5507 }
5508
5509 static ssize_t
5510 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5511 size_t cnt, loff_t *ppos)
5512 {
5513 struct trace_array *tr = filp->private_data;
5514 int ret;
5515
5516 mutex_lock(&trace_types_lock);
5517 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5518 if (ret < 0)
5519 goto out;
5520
5521 if (tr->current_trace->update_thresh) {
5522 ret = tr->current_trace->update_thresh(tr);
5523 if (ret < 0)
5524 goto out;
5525 }
5526
5527 ret = cnt;
5528 out:
5529 mutex_unlock(&trace_types_lock);
5530
5531 return ret;
5532 }
5533
5534 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5535
5536 static ssize_t
5537 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5538 size_t cnt, loff_t *ppos)
5539 {
5540 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5541 }
5542
5543 static ssize_t
5544 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5545 size_t cnt, loff_t *ppos)
5546 {
5547 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5548 }
5549
5550 #endif
5551
5552 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5553 {
5554 struct trace_array *tr = inode->i_private;
5555 struct trace_iterator *iter;
5556 int ret = 0;
5557
5558 if (tracing_disabled)
5559 return -ENODEV;
5560
5561 if (trace_array_get(tr) < 0)
5562 return -ENODEV;
5563
5564 mutex_lock(&trace_types_lock);
5565
5566 /* create a buffer to store the information to pass to userspace */
5567 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5568 if (!iter) {
5569 ret = -ENOMEM;
5570 __trace_array_put(tr);
5571 goto out;
5572 }
5573
5574 trace_seq_init(&iter->seq);
5575 iter->trace = tr->current_trace;
5576
5577 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5578 ret = -ENOMEM;
5579 goto fail;
5580 }
5581
5582 /* trace pipe does not show start of buffer */
5583 cpumask_setall(iter->started);
5584
5585 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5586 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5587
5588 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5589 if (trace_clocks[tr->clock_id].in_ns)
5590 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5591
5592 iter->tr = tr;
5593 iter->trace_buffer = &tr->trace_buffer;
5594 iter->cpu_file = tracing_get_cpu(inode);
5595 mutex_init(&iter->mutex);
5596 filp->private_data = iter;
5597
5598 if (iter->trace->pipe_open)
5599 iter->trace->pipe_open(iter);
5600
5601 nonseekable_open(inode, filp);
5602
5603 tr->current_trace->ref++;
5604 out:
5605 mutex_unlock(&trace_types_lock);
5606 return ret;
5607
5608 fail:
5609 kfree(iter);
5610 __trace_array_put(tr);
5611 mutex_unlock(&trace_types_lock);
5612 return ret;
5613 }
5614
5615 static int tracing_release_pipe(struct inode *inode, struct file *file)
5616 {
5617 struct trace_iterator *iter = file->private_data;
5618 struct trace_array *tr = inode->i_private;
5619
5620 mutex_lock(&trace_types_lock);
5621
5622 tr->current_trace->ref--;
5623
5624 if (iter->trace->pipe_close)
5625 iter->trace->pipe_close(iter);
5626
5627 mutex_unlock(&trace_types_lock);
5628
5629 free_cpumask_var(iter->started);
5630 mutex_destroy(&iter->mutex);
5631 kfree(iter);
5632
5633 trace_array_put(tr);
5634
5635 return 0;
5636 }
5637
5638 static unsigned int
5639 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5640 {
5641 struct trace_array *tr = iter->tr;
5642
5643 /* Iterators are static, they should be filled or empty */
5644 if (trace_buffer_iter(iter, iter->cpu_file))
5645 return POLLIN | POLLRDNORM;
5646
5647 if (tr->trace_flags & TRACE_ITER_BLOCK)
5648 /*
5649 * Always select as readable when in blocking mode
5650 */
5651 return POLLIN | POLLRDNORM;
5652 else
5653 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5654 filp, poll_table);
5655 }
5656
5657 static unsigned int
5658 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5659 {
5660 struct trace_iterator *iter = filp->private_data;
5661
5662 return trace_poll(iter, filp, poll_table);
5663 }
5664
5665 /* Must be called with iter->mutex held. */
5666 static int tracing_wait_pipe(struct file *filp)
5667 {
5668 struct trace_iterator *iter = filp->private_data;
5669 int ret;
5670
5671 while (trace_empty(iter)) {
5672
5673 if ((filp->f_flags & O_NONBLOCK)) {
5674 return -EAGAIN;
5675 }
5676
5677 /*
5678 * We block until we read something and tracing is disabled.
5679 * We still block if tracing is disabled, but we have never
5680 * read anything. This allows a user to cat this file, and
5681 * then enable tracing. But after we have read something,
5682 * we give an EOF when tracing is again disabled.
5683 *
5684 * iter->pos will be 0 if we haven't read anything.
5685 */
5686 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5687 break;
5688
5689 mutex_unlock(&iter->mutex);
5690
5691 ret = wait_on_pipe(iter, false);
5692
5693 mutex_lock(&iter->mutex);
5694
5695 if (ret)
5696 return ret;
5697 }
5698
5699 return 1;
5700 }
5701
5702 /*
5703 * Consumer reader.
5704 */
5705 static ssize_t
5706 tracing_read_pipe(struct file *filp, char __user *ubuf,
5707 size_t cnt, loff_t *ppos)
5708 {
5709 struct trace_iterator *iter = filp->private_data;
5710 ssize_t sret;
5711
5712 /*
5713 * Avoid more than one consumer on a single file descriptor
5714 * This is just a matter of traces coherency, the ring buffer itself
5715 * is protected.
5716 */
5717 mutex_lock(&iter->mutex);
5718
5719 /* return any leftover data */
5720 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5721 if (sret != -EBUSY)
5722 goto out;
5723
5724 trace_seq_init(&iter->seq);
5725
5726 if (iter->trace->read) {
5727 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5728 if (sret)
5729 goto out;
5730 }
5731
5732 waitagain:
5733 sret = tracing_wait_pipe(filp);
5734 if (sret <= 0)
5735 goto out;
5736
5737 /* stop when tracing is finished */
5738 if (trace_empty(iter)) {
5739 sret = 0;
5740 goto out;
5741 }
5742
5743 if (cnt >= PAGE_SIZE)
5744 cnt = PAGE_SIZE - 1;
5745
5746 /* reset all but tr, trace, and overruns */
5747 memset(&iter->seq, 0,
5748 sizeof(struct trace_iterator) -
5749 offsetof(struct trace_iterator, seq));
5750 cpumask_clear(iter->started);
5751 iter->pos = -1;
5752
5753 trace_event_read_lock();
5754 trace_access_lock(iter->cpu_file);
5755 while (trace_find_next_entry_inc(iter) != NULL) {
5756 enum print_line_t ret;
5757 int save_len = iter->seq.seq.len;
5758
5759 ret = print_trace_line(iter);
5760 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5761 /* don't print partial lines */
5762 iter->seq.seq.len = save_len;
5763 break;
5764 }
5765 if (ret != TRACE_TYPE_NO_CONSUME)
5766 trace_consume(iter);
5767
5768 if (trace_seq_used(&iter->seq) >= cnt)
5769 break;
5770
5771 /*
5772 * Setting the full flag means we reached the trace_seq buffer
5773 * size and we should leave by partial output condition above.
5774 * One of the trace_seq_* functions is not used properly.
5775 */
5776 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5777 iter->ent->type);
5778 }
5779 trace_access_unlock(iter->cpu_file);
5780 trace_event_read_unlock();
5781
5782 /* Now copy what we have to the user */
5783 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5784 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5785 trace_seq_init(&iter->seq);
5786
5787 /*
5788 * If there was nothing to send to user, in spite of consuming trace
5789 * entries, go back to wait for more entries.
5790 */
5791 if (sret == -EBUSY)
5792 goto waitagain;
5793
5794 out:
5795 mutex_unlock(&iter->mutex);
5796
5797 return sret;
5798 }
5799
5800 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5801 unsigned int idx)
5802 {
5803 __free_page(spd->pages[idx]);
5804 }
5805
5806 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5807 .can_merge = 0,
5808 .confirm = generic_pipe_buf_confirm,
5809 .release = generic_pipe_buf_release,
5810 .steal = generic_pipe_buf_steal,
5811 .get = generic_pipe_buf_get,
5812 };
5813
5814 static size_t
5815 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5816 {
5817 size_t count;
5818 int save_len;
5819 int ret;
5820
5821 /* Seq buffer is page-sized, exactly what we need. */
5822 for (;;) {
5823 save_len = iter->seq.seq.len;
5824 ret = print_trace_line(iter);
5825
5826 if (trace_seq_has_overflowed(&iter->seq)) {
5827 iter->seq.seq.len = save_len;
5828 break;
5829 }
5830
5831 /*
5832 * This should not be hit, because it should only
5833 * be set if the iter->seq overflowed. But check it
5834 * anyway to be safe.
5835 */
5836 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5837 iter->seq.seq.len = save_len;
5838 break;
5839 }
5840
5841 count = trace_seq_used(&iter->seq) - save_len;
5842 if (rem < count) {
5843 rem = 0;
5844 iter->seq.seq.len = save_len;
5845 break;
5846 }
5847
5848 if (ret != TRACE_TYPE_NO_CONSUME)
5849 trace_consume(iter);
5850 rem -= count;
5851 if (!trace_find_next_entry_inc(iter)) {
5852 rem = 0;
5853 iter->ent = NULL;
5854 break;
5855 }
5856 }
5857
5858 return rem;
5859 }
5860
5861 static ssize_t tracing_splice_read_pipe(struct file *filp,
5862 loff_t *ppos,
5863 struct pipe_inode_info *pipe,
5864 size_t len,
5865 unsigned int flags)
5866 {
5867 struct page *pages_def[PIPE_DEF_BUFFERS];
5868 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5869 struct trace_iterator *iter = filp->private_data;
5870 struct splice_pipe_desc spd = {
5871 .pages = pages_def,
5872 .partial = partial_def,
5873 .nr_pages = 0, /* This gets updated below. */
5874 .nr_pages_max = PIPE_DEF_BUFFERS,
5875 .ops = &tracing_pipe_buf_ops,
5876 .spd_release = tracing_spd_release_pipe,
5877 };
5878 ssize_t ret;
5879 size_t rem;
5880 unsigned int i;
5881
5882 if (splice_grow_spd(pipe, &spd))
5883 return -ENOMEM;
5884
5885 mutex_lock(&iter->mutex);
5886
5887 if (iter->trace->splice_read) {
5888 ret = iter->trace->splice_read(iter, filp,
5889 ppos, pipe, len, flags);
5890 if (ret)
5891 goto out_err;
5892 }
5893
5894 ret = tracing_wait_pipe(filp);
5895 if (ret <= 0)
5896 goto out_err;
5897
5898 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5899 ret = -EFAULT;
5900 goto out_err;
5901 }
5902
5903 trace_event_read_lock();
5904 trace_access_lock(iter->cpu_file);
5905
5906 /* Fill as many pages as possible. */
5907 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5908 spd.pages[i] = alloc_page(GFP_KERNEL);
5909 if (!spd.pages[i])
5910 break;
5911
5912 rem = tracing_fill_pipe_page(rem, iter);
5913
5914 /* Copy the data into the page, so we can start over. */
5915 ret = trace_seq_to_buffer(&iter->seq,
5916 page_address(spd.pages[i]),
5917 trace_seq_used(&iter->seq));
5918 if (ret < 0) {
5919 __free_page(spd.pages[i]);
5920 break;
5921 }
5922 spd.partial[i].offset = 0;
5923 spd.partial[i].len = trace_seq_used(&iter->seq);
5924
5925 trace_seq_init(&iter->seq);
5926 }
5927
5928 trace_access_unlock(iter->cpu_file);
5929 trace_event_read_unlock();
5930 mutex_unlock(&iter->mutex);
5931
5932 spd.nr_pages = i;
5933
5934 if (i)
5935 ret = splice_to_pipe(pipe, &spd);
5936 else
5937 ret = 0;
5938 out:
5939 splice_shrink_spd(&spd);
5940 return ret;
5941
5942 out_err:
5943 mutex_unlock(&iter->mutex);
5944 goto out;
5945 }
5946
5947 static ssize_t
5948 tracing_entries_read(struct file *filp, char __user *ubuf,
5949 size_t cnt, loff_t *ppos)
5950 {
5951 struct inode *inode = file_inode(filp);
5952 struct trace_array *tr = inode->i_private;
5953 int cpu = tracing_get_cpu(inode);
5954 char buf[64];
5955 int r = 0;
5956 ssize_t ret;
5957
5958 mutex_lock(&trace_types_lock);
5959
5960 if (cpu == RING_BUFFER_ALL_CPUS) {
5961 int cpu, buf_size_same;
5962 unsigned long size;
5963
5964 size = 0;
5965 buf_size_same = 1;
5966 /* check if all cpu sizes are same */
5967 for_each_tracing_cpu(cpu) {
5968 /* fill in the size from first enabled cpu */
5969 if (size == 0)
5970 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5971 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5972 buf_size_same = 0;
5973 break;
5974 }
5975 }
5976
5977 if (buf_size_same) {
5978 if (!ring_buffer_expanded)
5979 r = sprintf(buf, "%lu (expanded: %lu)\n",
5980 size >> 10,
5981 trace_buf_size >> 10);
5982 else
5983 r = sprintf(buf, "%lu\n", size >> 10);
5984 } else
5985 r = sprintf(buf, "X\n");
5986 } else
5987 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5988
5989 mutex_unlock(&trace_types_lock);
5990
5991 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5992 return ret;
5993 }
5994
5995 static ssize_t
5996 tracing_entries_write(struct file *filp, const char __user *ubuf,
5997 size_t cnt, loff_t *ppos)
5998 {
5999 struct inode *inode = file_inode(filp);
6000 struct trace_array *tr = inode->i_private;
6001 unsigned long val;
6002 int ret;
6003
6004 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6005 if (ret)
6006 return ret;
6007
6008 /* must have at least 1 entry */
6009 if (!val)
6010 return -EINVAL;
6011
6012 /* value is in KB */
6013 val <<= 10;
6014 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6015 if (ret < 0)
6016 return ret;
6017
6018 *ppos += cnt;
6019
6020 return cnt;
6021 }
6022
6023 static ssize_t
6024 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6025 size_t cnt, loff_t *ppos)
6026 {
6027 struct trace_array *tr = filp->private_data;
6028 char buf[64];
6029 int r, cpu;
6030 unsigned long size = 0, expanded_size = 0;
6031
6032 mutex_lock(&trace_types_lock);
6033 for_each_tracing_cpu(cpu) {
6034 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6035 if (!ring_buffer_expanded)
6036 expanded_size += trace_buf_size >> 10;
6037 }
6038 if (ring_buffer_expanded)
6039 r = sprintf(buf, "%lu\n", size);
6040 else
6041 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6042 mutex_unlock(&trace_types_lock);
6043
6044 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6045 }
6046
6047 static ssize_t
6048 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6049 size_t cnt, loff_t *ppos)
6050 {
6051 /*
6052 * There is no need to read what the user has written, this function
6053 * is just to make sure that there is no error when "echo" is used
6054 */
6055
6056 *ppos += cnt;
6057
6058 return cnt;
6059 }
6060
6061 static int
6062 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6063 {
6064 struct trace_array *tr = inode->i_private;
6065
6066 /* disable tracing ? */
6067 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6068 tracer_tracing_off(tr);
6069 /* resize the ring buffer to 0 */
6070 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6071
6072 trace_array_put(tr);
6073
6074 return 0;
6075 }
6076
6077 static ssize_t
6078 tracing_mark_write(struct file *filp, const char __user *ubuf,
6079 size_t cnt, loff_t *fpos)
6080 {
6081 struct trace_array *tr = filp->private_data;
6082 struct ring_buffer_event *event;
6083 struct ring_buffer *buffer;
6084 struct print_entry *entry;
6085 unsigned long irq_flags;
6086 const char faulted[] = "<faulted>";
6087 ssize_t written;
6088 int size;
6089 int len;
6090
6091 /* Used in tracing_mark_raw_write() as well */
6092 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6093
6094 if (tracing_disabled)
6095 return -EINVAL;
6096
6097 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6098 return -EINVAL;
6099
6100 if (cnt > TRACE_BUF_SIZE)
6101 cnt = TRACE_BUF_SIZE;
6102
6103 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6104
6105 local_save_flags(irq_flags);
6106 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6107
6108 /* If less than "<faulted>", then make sure we can still add that */
6109 if (cnt < FAULTED_SIZE)
6110 size += FAULTED_SIZE - cnt;
6111
6112 buffer = tr->trace_buffer.buffer;
6113 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6114 irq_flags, preempt_count());
6115 if (unlikely(!event))
6116 /* Ring buffer disabled, return as if not open for write */
6117 return -EBADF;
6118
6119 entry = ring_buffer_event_data(event);
6120 entry->ip = _THIS_IP_;
6121
6122 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6123 if (len) {
6124 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6125 cnt = FAULTED_SIZE;
6126 written = -EFAULT;
6127 } else
6128 written = cnt;
6129 len = cnt;
6130
6131 if (entry->buf[cnt - 1] != '\n') {
6132 entry->buf[cnt] = '\n';
6133 entry->buf[cnt + 1] = '\0';
6134 } else
6135 entry->buf[cnt] = '\0';
6136
6137 __buffer_unlock_commit(buffer, event);
6138
6139 if (written > 0)
6140 *fpos += written;
6141
6142 return written;
6143 }
6144
6145 /* Limit it for now to 3K (including tag) */
6146 #define RAW_DATA_MAX_SIZE (1024*3)
6147
6148 static ssize_t
6149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6150 size_t cnt, loff_t *fpos)
6151 {
6152 struct trace_array *tr = filp->private_data;
6153 struct ring_buffer_event *event;
6154 struct ring_buffer *buffer;
6155 struct raw_data_entry *entry;
6156 const char faulted[] = "<faulted>";
6157 unsigned long irq_flags;
6158 ssize_t written;
6159 int size;
6160 int len;
6161
6162 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6163
6164 if (tracing_disabled)
6165 return -EINVAL;
6166
6167 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6168 return -EINVAL;
6169
6170 /* The marker must at least have a tag id */
6171 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6172 return -EINVAL;
6173
6174 if (cnt > TRACE_BUF_SIZE)
6175 cnt = TRACE_BUF_SIZE;
6176
6177 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6178
6179 local_save_flags(irq_flags);
6180 size = sizeof(*entry) + cnt;
6181 if (cnt < FAULT_SIZE_ID)
6182 size += FAULT_SIZE_ID - cnt;
6183
6184 buffer = tr->trace_buffer.buffer;
6185 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6186 irq_flags, preempt_count());
6187 if (!event)
6188 /* Ring buffer disabled, return as if not open for write */
6189 return -EBADF;
6190
6191 entry = ring_buffer_event_data(event);
6192
6193 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6194 if (len) {
6195 entry->id = -1;
6196 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6197 written = -EFAULT;
6198 } else
6199 written = cnt;
6200
6201 __buffer_unlock_commit(buffer, event);
6202
6203 if (written > 0)
6204 *fpos += written;
6205
6206 return written;
6207 }
6208
6209 static int tracing_clock_show(struct seq_file *m, void *v)
6210 {
6211 struct trace_array *tr = m->private;
6212 int i;
6213
6214 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6215 seq_printf(m,
6216 "%s%s%s%s", i ? " " : "",
6217 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6218 i == tr->clock_id ? "]" : "");
6219 seq_putc(m, '\n');
6220
6221 return 0;
6222 }
6223
6224 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6225 {
6226 int i;
6227
6228 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6229 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6230 break;
6231 }
6232 if (i == ARRAY_SIZE(trace_clocks))
6233 return -EINVAL;
6234
6235 mutex_lock(&trace_types_lock);
6236
6237 tr->clock_id = i;
6238
6239 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6240
6241 /*
6242 * New clock may not be consistent with the previous clock.
6243 * Reset the buffer so that it doesn't have incomparable timestamps.
6244 */
6245 tracing_reset_online_cpus(&tr->trace_buffer);
6246
6247 #ifdef CONFIG_TRACER_MAX_TRACE
6248 if (tr->max_buffer.buffer)
6249 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6250 tracing_reset_online_cpus(&tr->max_buffer);
6251 #endif
6252
6253 mutex_unlock(&trace_types_lock);
6254
6255 return 0;
6256 }
6257
6258 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6259 size_t cnt, loff_t *fpos)
6260 {
6261 struct seq_file *m = filp->private_data;
6262 struct trace_array *tr = m->private;
6263 char buf[64];
6264 const char *clockstr;
6265 int ret;
6266
6267 if (cnt >= sizeof(buf))
6268 return -EINVAL;
6269
6270 if (copy_from_user(buf, ubuf, cnt))
6271 return -EFAULT;
6272
6273 buf[cnt] = 0;
6274
6275 clockstr = strstrip(buf);
6276
6277 ret = tracing_set_clock(tr, clockstr);
6278 if (ret)
6279 return ret;
6280
6281 *fpos += cnt;
6282
6283 return cnt;
6284 }
6285
6286 static int tracing_clock_open(struct inode *inode, struct file *file)
6287 {
6288 struct trace_array *tr = inode->i_private;
6289 int ret;
6290
6291 if (tracing_disabled)
6292 return -ENODEV;
6293
6294 if (trace_array_get(tr))
6295 return -ENODEV;
6296
6297 ret = single_open(file, tracing_clock_show, inode->i_private);
6298 if (ret < 0)
6299 trace_array_put(tr);
6300
6301 return ret;
6302 }
6303
6304 struct ftrace_buffer_info {
6305 struct trace_iterator iter;
6306 void *spare;
6307 unsigned int spare_cpu;
6308 unsigned int read;
6309 };
6310
6311 #ifdef CONFIG_TRACER_SNAPSHOT
6312 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6313 {
6314 struct trace_array *tr = inode->i_private;
6315 struct trace_iterator *iter;
6316 struct seq_file *m;
6317 int ret = 0;
6318
6319 if (trace_array_get(tr) < 0)
6320 return -ENODEV;
6321
6322 if (file->f_mode & FMODE_READ) {
6323 iter = __tracing_open(inode, file, true);
6324 if (IS_ERR(iter))
6325 ret = PTR_ERR(iter);
6326 } else {
6327 /* Writes still need the seq_file to hold the private data */
6328 ret = -ENOMEM;
6329 m = kzalloc(sizeof(*m), GFP_KERNEL);
6330 if (!m)
6331 goto out;
6332 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6333 if (!iter) {
6334 kfree(m);
6335 goto out;
6336 }
6337 ret = 0;
6338
6339 iter->tr = tr;
6340 iter->trace_buffer = &tr->max_buffer;
6341 iter->cpu_file = tracing_get_cpu(inode);
6342 m->private = iter;
6343 file->private_data = m;
6344 }
6345 out:
6346 if (ret < 0)
6347 trace_array_put(tr);
6348
6349 return ret;
6350 }
6351
6352 static ssize_t
6353 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6354 loff_t *ppos)
6355 {
6356 struct seq_file *m = filp->private_data;
6357 struct trace_iterator *iter = m->private;
6358 struct trace_array *tr = iter->tr;
6359 unsigned long val;
6360 int ret;
6361
6362 ret = tracing_update_buffers();
6363 if (ret < 0)
6364 return ret;
6365
6366 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6367 if (ret)
6368 return ret;
6369
6370 mutex_lock(&trace_types_lock);
6371
6372 if (tr->current_trace->use_max_tr) {
6373 ret = -EBUSY;
6374 goto out;
6375 }
6376
6377 switch (val) {
6378 case 0:
6379 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6380 ret = -EINVAL;
6381 break;
6382 }
6383 if (tr->allocated_snapshot)
6384 free_snapshot(tr);
6385 break;
6386 case 1:
6387 /* Only allow per-cpu swap if the ring buffer supports it */
6388 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6389 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6390 ret = -EINVAL;
6391 break;
6392 }
6393 #endif
6394 if (!tr->allocated_snapshot) {
6395 ret = tracing_alloc_snapshot_instance(tr);
6396 if (ret < 0)
6397 break;
6398 }
6399 local_irq_disable();
6400 /* Now, we're going to swap */
6401 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6402 update_max_tr(tr, current, smp_processor_id());
6403 else
6404 update_max_tr_single(tr, current, iter->cpu_file);
6405 local_irq_enable();
6406 break;
6407 default:
6408 if (tr->allocated_snapshot) {
6409 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6410 tracing_reset_online_cpus(&tr->max_buffer);
6411 else
6412 tracing_reset(&tr->max_buffer, iter->cpu_file);
6413 }
6414 break;
6415 }
6416
6417 if (ret >= 0) {
6418 *ppos += cnt;
6419 ret = cnt;
6420 }
6421 out:
6422 mutex_unlock(&trace_types_lock);
6423 return ret;
6424 }
6425
6426 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6427 {
6428 struct seq_file *m = file->private_data;
6429 int ret;
6430
6431 ret = tracing_release(inode, file);
6432
6433 if (file->f_mode & FMODE_READ)
6434 return ret;
6435
6436 /* If write only, the seq_file is just a stub */
6437 if (m)
6438 kfree(m->private);
6439 kfree(m);
6440
6441 return 0;
6442 }
6443
6444 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6445 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6446 size_t count, loff_t *ppos);
6447 static int tracing_buffers_release(struct inode *inode, struct file *file);
6448 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6449 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6450
6451 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6452 {
6453 struct ftrace_buffer_info *info;
6454 int ret;
6455
6456 ret = tracing_buffers_open(inode, filp);
6457 if (ret < 0)
6458 return ret;
6459
6460 info = filp->private_data;
6461
6462 if (info->iter.trace->use_max_tr) {
6463 tracing_buffers_release(inode, filp);
6464 return -EBUSY;
6465 }
6466
6467 info->iter.snapshot = true;
6468 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6469
6470 return ret;
6471 }
6472
6473 #endif /* CONFIG_TRACER_SNAPSHOT */
6474
6475
6476 static const struct file_operations tracing_thresh_fops = {
6477 .open = tracing_open_generic,
6478 .read = tracing_thresh_read,
6479 .write = tracing_thresh_write,
6480 .llseek = generic_file_llseek,
6481 };
6482
6483 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6484 static const struct file_operations tracing_max_lat_fops = {
6485 .open = tracing_open_generic,
6486 .read = tracing_max_lat_read,
6487 .write = tracing_max_lat_write,
6488 .llseek = generic_file_llseek,
6489 };
6490 #endif
6491
6492 static const struct file_operations set_tracer_fops = {
6493 .open = tracing_open_generic,
6494 .read = tracing_set_trace_read,
6495 .write = tracing_set_trace_write,
6496 .llseek = generic_file_llseek,
6497 };
6498
6499 static const struct file_operations tracing_pipe_fops = {
6500 .open = tracing_open_pipe,
6501 .poll = tracing_poll_pipe,
6502 .read = tracing_read_pipe,
6503 .splice_read = tracing_splice_read_pipe,
6504 .release = tracing_release_pipe,
6505 .llseek = no_llseek,
6506 };
6507
6508 static const struct file_operations tracing_entries_fops = {
6509 .open = tracing_open_generic_tr,
6510 .read = tracing_entries_read,
6511 .write = tracing_entries_write,
6512 .llseek = generic_file_llseek,
6513 .release = tracing_release_generic_tr,
6514 };
6515
6516 static const struct file_operations tracing_total_entries_fops = {
6517 .open = tracing_open_generic_tr,
6518 .read = tracing_total_entries_read,
6519 .llseek = generic_file_llseek,
6520 .release = tracing_release_generic_tr,
6521 };
6522
6523 static const struct file_operations tracing_free_buffer_fops = {
6524 .open = tracing_open_generic_tr,
6525 .write = tracing_free_buffer_write,
6526 .release = tracing_free_buffer_release,
6527 };
6528
6529 static const struct file_operations tracing_mark_fops = {
6530 .open = tracing_open_generic_tr,
6531 .write = tracing_mark_write,
6532 .llseek = generic_file_llseek,
6533 .release = tracing_release_generic_tr,
6534 };
6535
6536 static const struct file_operations tracing_mark_raw_fops = {
6537 .open = tracing_open_generic_tr,
6538 .write = tracing_mark_raw_write,
6539 .llseek = generic_file_llseek,
6540 .release = tracing_release_generic_tr,
6541 };
6542
6543 static const struct file_operations trace_clock_fops = {
6544 .open = tracing_clock_open,
6545 .read = seq_read,
6546 .llseek = seq_lseek,
6547 .release = tracing_single_release_tr,
6548 .write = tracing_clock_write,
6549 };
6550
6551 #ifdef CONFIG_TRACER_SNAPSHOT
6552 static const struct file_operations snapshot_fops = {
6553 .open = tracing_snapshot_open,
6554 .read = seq_read,
6555 .write = tracing_snapshot_write,
6556 .llseek = tracing_lseek,
6557 .release = tracing_snapshot_release,
6558 };
6559
6560 static const struct file_operations snapshot_raw_fops = {
6561 .open = snapshot_raw_open,
6562 .read = tracing_buffers_read,
6563 .release = tracing_buffers_release,
6564 .splice_read = tracing_buffers_splice_read,
6565 .llseek = no_llseek,
6566 };
6567
6568 #endif /* CONFIG_TRACER_SNAPSHOT */
6569
6570 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6571 {
6572 struct trace_array *tr = inode->i_private;
6573 struct ftrace_buffer_info *info;
6574 int ret;
6575
6576 if (tracing_disabled)
6577 return -ENODEV;
6578
6579 if (trace_array_get(tr) < 0)
6580 return -ENODEV;
6581
6582 info = kzalloc(sizeof(*info), GFP_KERNEL);
6583 if (!info) {
6584 trace_array_put(tr);
6585 return -ENOMEM;
6586 }
6587
6588 mutex_lock(&trace_types_lock);
6589
6590 info->iter.tr = tr;
6591 info->iter.cpu_file = tracing_get_cpu(inode);
6592 info->iter.trace = tr->current_trace;
6593 info->iter.trace_buffer = &tr->trace_buffer;
6594 info->spare = NULL;
6595 /* Force reading ring buffer for first read */
6596 info->read = (unsigned int)-1;
6597
6598 filp->private_data = info;
6599
6600 tr->current_trace->ref++;
6601
6602 mutex_unlock(&trace_types_lock);
6603
6604 ret = nonseekable_open(inode, filp);
6605 if (ret < 0)
6606 trace_array_put(tr);
6607
6608 return ret;
6609 }
6610
6611 static unsigned int
6612 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6613 {
6614 struct ftrace_buffer_info *info = filp->private_data;
6615 struct trace_iterator *iter = &info->iter;
6616
6617 return trace_poll(iter, filp, poll_table);
6618 }
6619
6620 static ssize_t
6621 tracing_buffers_read(struct file *filp, char __user *ubuf,
6622 size_t count, loff_t *ppos)
6623 {
6624 struct ftrace_buffer_info *info = filp->private_data;
6625 struct trace_iterator *iter = &info->iter;
6626 ssize_t ret = 0;
6627 ssize_t size;
6628
6629 if (!count)
6630 return 0;
6631
6632 #ifdef CONFIG_TRACER_MAX_TRACE
6633 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6634 return -EBUSY;
6635 #endif
6636
6637 if (!info->spare) {
6638 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6639 iter->cpu_file);
6640 if (IS_ERR(info->spare)) {
6641 ret = PTR_ERR(info->spare);
6642 info->spare = NULL;
6643 } else {
6644 info->spare_cpu = iter->cpu_file;
6645 }
6646 }
6647 if (!info->spare)
6648 return ret;
6649
6650 /* Do we have previous read data to read? */
6651 if (info->read < PAGE_SIZE)
6652 goto read;
6653
6654 again:
6655 trace_access_lock(iter->cpu_file);
6656 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6657 &info->spare,
6658 count,
6659 iter->cpu_file, 0);
6660 trace_access_unlock(iter->cpu_file);
6661
6662 if (ret < 0) {
6663 if (trace_empty(iter)) {
6664 if ((filp->f_flags & O_NONBLOCK))
6665 return -EAGAIN;
6666
6667 ret = wait_on_pipe(iter, false);
6668 if (ret)
6669 return ret;
6670
6671 goto again;
6672 }
6673 return 0;
6674 }
6675
6676 info->read = 0;
6677 read:
6678 size = PAGE_SIZE - info->read;
6679 if (size > count)
6680 size = count;
6681
6682 ret = copy_to_user(ubuf, info->spare + info->read, size);
6683 if (ret == size)
6684 return -EFAULT;
6685
6686 size -= ret;
6687
6688 *ppos += size;
6689 info->read += size;
6690
6691 return size;
6692 }
6693
6694 static int tracing_buffers_release(struct inode *inode, struct file *file)
6695 {
6696 struct ftrace_buffer_info *info = file->private_data;
6697 struct trace_iterator *iter = &info->iter;
6698
6699 mutex_lock(&trace_types_lock);
6700
6701 iter->tr->current_trace->ref--;
6702
6703 __trace_array_put(iter->tr);
6704
6705 if (info->spare)
6706 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6707 info->spare_cpu, info->spare);
6708 kfree(info);
6709
6710 mutex_unlock(&trace_types_lock);
6711
6712 return 0;
6713 }
6714
6715 struct buffer_ref {
6716 struct ring_buffer *buffer;
6717 void *page;
6718 int cpu;
6719 int ref;
6720 };
6721
6722 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6723 struct pipe_buffer *buf)
6724 {
6725 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6726
6727 if (--ref->ref)
6728 return;
6729
6730 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6731 kfree(ref);
6732 buf->private = 0;
6733 }
6734
6735 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6736 struct pipe_buffer *buf)
6737 {
6738 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6739
6740 ref->ref++;
6741 }
6742
6743 /* Pipe buffer operations for a buffer. */
6744 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6745 .can_merge = 0,
6746 .confirm = generic_pipe_buf_confirm,
6747 .release = buffer_pipe_buf_release,
6748 .steal = generic_pipe_buf_steal,
6749 .get = buffer_pipe_buf_get,
6750 };
6751
6752 /*
6753 * Callback from splice_to_pipe(), if we need to release some pages
6754 * at the end of the spd in case we error'ed out in filling the pipe.
6755 */
6756 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6757 {
6758 struct buffer_ref *ref =
6759 (struct buffer_ref *)spd->partial[i].private;
6760
6761 if (--ref->ref)
6762 return;
6763
6764 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6765 kfree(ref);
6766 spd->partial[i].private = 0;
6767 }
6768
6769 static ssize_t
6770 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6771 struct pipe_inode_info *pipe, size_t len,
6772 unsigned int flags)
6773 {
6774 struct ftrace_buffer_info *info = file->private_data;
6775 struct trace_iterator *iter = &info->iter;
6776 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6777 struct page *pages_def[PIPE_DEF_BUFFERS];
6778 struct splice_pipe_desc spd = {
6779 .pages = pages_def,
6780 .partial = partial_def,
6781 .nr_pages_max = PIPE_DEF_BUFFERS,
6782 .ops = &buffer_pipe_buf_ops,
6783 .spd_release = buffer_spd_release,
6784 };
6785 struct buffer_ref *ref;
6786 int entries, i;
6787 ssize_t ret = 0;
6788
6789 #ifdef CONFIG_TRACER_MAX_TRACE
6790 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6791 return -EBUSY;
6792 #endif
6793
6794 if (*ppos & (PAGE_SIZE - 1))
6795 return -EINVAL;
6796
6797 if (len & (PAGE_SIZE - 1)) {
6798 if (len < PAGE_SIZE)
6799 return -EINVAL;
6800 len &= PAGE_MASK;
6801 }
6802
6803 if (splice_grow_spd(pipe, &spd))
6804 return -ENOMEM;
6805
6806 again:
6807 trace_access_lock(iter->cpu_file);
6808 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6809
6810 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6811 struct page *page;
6812 int r;
6813
6814 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6815 if (!ref) {
6816 ret = -ENOMEM;
6817 break;
6818 }
6819
6820 ref->ref = 1;
6821 ref->buffer = iter->trace_buffer->buffer;
6822 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6823 if (IS_ERR(ref->page)) {
6824 ret = PTR_ERR(ref->page);
6825 ref->page = NULL;
6826 kfree(ref);
6827 break;
6828 }
6829 ref->cpu = iter->cpu_file;
6830
6831 r = ring_buffer_read_page(ref->buffer, &ref->page,
6832 len, iter->cpu_file, 1);
6833 if (r < 0) {
6834 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6835 ref->page);
6836 kfree(ref);
6837 break;
6838 }
6839
6840 page = virt_to_page(ref->page);
6841
6842 spd.pages[i] = page;
6843 spd.partial[i].len = PAGE_SIZE;
6844 spd.partial[i].offset = 0;
6845 spd.partial[i].private = (unsigned long)ref;
6846 spd.nr_pages++;
6847 *ppos += PAGE_SIZE;
6848
6849 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6850 }
6851
6852 trace_access_unlock(iter->cpu_file);
6853 spd.nr_pages = i;
6854
6855 /* did we read anything? */
6856 if (!spd.nr_pages) {
6857 if (ret)
6858 goto out;
6859
6860 ret = -EAGAIN;
6861 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6862 goto out;
6863
6864 ret = wait_on_pipe(iter, true);
6865 if (ret)
6866 goto out;
6867
6868 goto again;
6869 }
6870
6871 ret = splice_to_pipe(pipe, &spd);
6872 out:
6873 splice_shrink_spd(&spd);
6874
6875 return ret;
6876 }
6877
6878 static const struct file_operations tracing_buffers_fops = {
6879 .open = tracing_buffers_open,
6880 .read = tracing_buffers_read,
6881 .poll = tracing_buffers_poll,
6882 .release = tracing_buffers_release,
6883 .splice_read = tracing_buffers_splice_read,
6884 .llseek = no_llseek,
6885 };
6886
6887 static ssize_t
6888 tracing_stats_read(struct file *filp, char __user *ubuf,
6889 size_t count, loff_t *ppos)
6890 {
6891 struct inode *inode = file_inode(filp);
6892 struct trace_array *tr = inode->i_private;
6893 struct trace_buffer *trace_buf = &tr->trace_buffer;
6894 int cpu = tracing_get_cpu(inode);
6895 struct trace_seq *s;
6896 unsigned long cnt;
6897 unsigned long long t;
6898 unsigned long usec_rem;
6899
6900 s = kmalloc(sizeof(*s), GFP_KERNEL);
6901 if (!s)
6902 return -ENOMEM;
6903
6904 trace_seq_init(s);
6905
6906 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6907 trace_seq_printf(s, "entries: %ld\n", cnt);
6908
6909 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6910 trace_seq_printf(s, "overrun: %ld\n", cnt);
6911
6912 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6913 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6914
6915 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6916 trace_seq_printf(s, "bytes: %ld\n", cnt);
6917
6918 if (trace_clocks[tr->clock_id].in_ns) {
6919 /* local or global for trace_clock */
6920 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6921 usec_rem = do_div(t, USEC_PER_SEC);
6922 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6923 t, usec_rem);
6924
6925 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6926 usec_rem = do_div(t, USEC_PER_SEC);
6927 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6928 } else {
6929 /* counter or tsc mode for trace_clock */
6930 trace_seq_printf(s, "oldest event ts: %llu\n",
6931 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6932
6933 trace_seq_printf(s, "now ts: %llu\n",
6934 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6935 }
6936
6937 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6938 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6939
6940 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6941 trace_seq_printf(s, "read events: %ld\n", cnt);
6942
6943 count = simple_read_from_buffer(ubuf, count, ppos,
6944 s->buffer, trace_seq_used(s));
6945
6946 kfree(s);
6947
6948 return count;
6949 }
6950
6951 static const struct file_operations tracing_stats_fops = {
6952 .open = tracing_open_generic_tr,
6953 .read = tracing_stats_read,
6954 .llseek = generic_file_llseek,
6955 .release = tracing_release_generic_tr,
6956 };
6957
6958 #ifdef CONFIG_DYNAMIC_FTRACE
6959
6960 static ssize_t
6961 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6962 size_t cnt, loff_t *ppos)
6963 {
6964 unsigned long *p = filp->private_data;
6965 char buf[64]; /* Not too big for a shallow stack */
6966 int r;
6967
6968 r = scnprintf(buf, 63, "%ld", *p);
6969 buf[r++] = '\n';
6970
6971 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6972 }
6973
6974 static const struct file_operations tracing_dyn_info_fops = {
6975 .open = tracing_open_generic,
6976 .read = tracing_read_dyn_info,
6977 .llseek = generic_file_llseek,
6978 };
6979 #endif /* CONFIG_DYNAMIC_FTRACE */
6980
6981 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6982 static void
6983 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6984 struct trace_array *tr, struct ftrace_probe_ops *ops,
6985 void *data)
6986 {
6987 tracing_snapshot_instance(tr);
6988 }
6989
6990 static void
6991 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6992 struct trace_array *tr, struct ftrace_probe_ops *ops,
6993 void *data)
6994 {
6995 struct ftrace_func_mapper *mapper = data;
6996 long *count = NULL;
6997
6998 if (mapper)
6999 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7000
7001 if (count) {
7002
7003 if (*count <= 0)
7004 return;
7005
7006 (*count)--;
7007 }
7008
7009 tracing_snapshot_instance(tr);
7010 }
7011
7012 static int
7013 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7014 struct ftrace_probe_ops *ops, void *data)
7015 {
7016 struct ftrace_func_mapper *mapper = data;
7017 long *count = NULL;
7018
7019 seq_printf(m, "%ps:", (void *)ip);
7020
7021 seq_puts(m, "snapshot");
7022
7023 if (mapper)
7024 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7025
7026 if (count)
7027 seq_printf(m, ":count=%ld\n", *count);
7028 else
7029 seq_puts(m, ":unlimited\n");
7030
7031 return 0;
7032 }
7033
7034 static int
7035 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7036 unsigned long ip, void *init_data, void **data)
7037 {
7038 struct ftrace_func_mapper *mapper = *data;
7039
7040 if (!mapper) {
7041 mapper = allocate_ftrace_func_mapper();
7042 if (!mapper)
7043 return -ENOMEM;
7044 *data = mapper;
7045 }
7046
7047 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7048 }
7049
7050 static void
7051 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7052 unsigned long ip, void *data)
7053 {
7054 struct ftrace_func_mapper *mapper = data;
7055
7056 if (!ip) {
7057 if (!mapper)
7058 return;
7059 free_ftrace_func_mapper(mapper, NULL);
7060 return;
7061 }
7062
7063 ftrace_func_mapper_remove_ip(mapper, ip);
7064 }
7065
7066 static struct ftrace_probe_ops snapshot_probe_ops = {
7067 .func = ftrace_snapshot,
7068 .print = ftrace_snapshot_print,
7069 };
7070
7071 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7072 .func = ftrace_count_snapshot,
7073 .print = ftrace_snapshot_print,
7074 .init = ftrace_snapshot_init,
7075 .free = ftrace_snapshot_free,
7076 };
7077
7078 static int
7079 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7080 char *glob, char *cmd, char *param, int enable)
7081 {
7082 struct ftrace_probe_ops *ops;
7083 void *count = (void *)-1;
7084 char *number;
7085 int ret;
7086
7087 if (!tr)
7088 return -ENODEV;
7089
7090 /* hash funcs only work with set_ftrace_filter */
7091 if (!enable)
7092 return -EINVAL;
7093
7094 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7095
7096 if (glob[0] == '!')
7097 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7098
7099 if (!param)
7100 goto out_reg;
7101
7102 number = strsep(&param, ":");
7103
7104 if (!strlen(number))
7105 goto out_reg;
7106
7107 /*
7108 * We use the callback data field (which is a pointer)
7109 * as our counter.
7110 */
7111 ret = kstrtoul(number, 0, (unsigned long *)&count);
7112 if (ret)
7113 return ret;
7114
7115 out_reg:
7116 ret = tracing_alloc_snapshot_instance(tr);
7117 if (ret < 0)
7118 goto out;
7119
7120 ret = register_ftrace_function_probe(glob, tr, ops, count);
7121
7122 out:
7123 return ret < 0 ? ret : 0;
7124 }
7125
7126 static struct ftrace_func_command ftrace_snapshot_cmd = {
7127 .name = "snapshot",
7128 .func = ftrace_trace_snapshot_callback,
7129 };
7130
7131 static __init int register_snapshot_cmd(void)
7132 {
7133 return register_ftrace_command(&ftrace_snapshot_cmd);
7134 }
7135 #else
7136 static inline __init int register_snapshot_cmd(void) { return 0; }
7137 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7138
7139 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7140 {
7141 if (WARN_ON(!tr->dir))
7142 return ERR_PTR(-ENODEV);
7143
7144 /* Top directory uses NULL as the parent */
7145 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7146 return NULL;
7147
7148 /* All sub buffers have a descriptor */
7149 return tr->dir;
7150 }
7151
7152 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7153 {
7154 struct dentry *d_tracer;
7155
7156 if (tr->percpu_dir)
7157 return tr->percpu_dir;
7158
7159 d_tracer = tracing_get_dentry(tr);
7160 if (IS_ERR(d_tracer))
7161 return NULL;
7162
7163 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7164
7165 WARN_ONCE(!tr->percpu_dir,
7166 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7167
7168 return tr->percpu_dir;
7169 }
7170
7171 static struct dentry *
7172 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7173 void *data, long cpu, const struct file_operations *fops)
7174 {
7175 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7176
7177 if (ret) /* See tracing_get_cpu() */
7178 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7179 return ret;
7180 }
7181
7182 static void
7183 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7184 {
7185 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7186 struct dentry *d_cpu;
7187 char cpu_dir[30]; /* 30 characters should be more than enough */
7188
7189 if (!d_percpu)
7190 return;
7191
7192 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7193 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7194 if (!d_cpu) {
7195 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7196 return;
7197 }
7198
7199 /* per cpu trace_pipe */
7200 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7201 tr, cpu, &tracing_pipe_fops);
7202
7203 /* per cpu trace */
7204 trace_create_cpu_file("trace", 0644, d_cpu,
7205 tr, cpu, &tracing_fops);
7206
7207 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7208 tr, cpu, &tracing_buffers_fops);
7209
7210 trace_create_cpu_file("stats", 0444, d_cpu,
7211 tr, cpu, &tracing_stats_fops);
7212
7213 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7214 tr, cpu, &tracing_entries_fops);
7215
7216 #ifdef CONFIG_TRACER_SNAPSHOT
7217 trace_create_cpu_file("snapshot", 0644, d_cpu,
7218 tr, cpu, &snapshot_fops);
7219
7220 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7221 tr, cpu, &snapshot_raw_fops);
7222 #endif
7223 }
7224
7225 #ifdef CONFIG_FTRACE_SELFTEST
7226 /* Let selftest have access to static functions in this file */
7227 #include "trace_selftest.c"
7228 #endif
7229
7230 static ssize_t
7231 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7232 loff_t *ppos)
7233 {
7234 struct trace_option_dentry *topt = filp->private_data;
7235 char *buf;
7236
7237 if (topt->flags->val & topt->opt->bit)
7238 buf = "1\n";
7239 else
7240 buf = "0\n";
7241
7242 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7243 }
7244
7245 static ssize_t
7246 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7247 loff_t *ppos)
7248 {
7249 struct trace_option_dentry *topt = filp->private_data;
7250 unsigned long val;
7251 int ret;
7252
7253 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7254 if (ret)
7255 return ret;
7256
7257 if (val != 0 && val != 1)
7258 return -EINVAL;
7259
7260 if (!!(topt->flags->val & topt->opt->bit) != val) {
7261 mutex_lock(&trace_types_lock);
7262 ret = __set_tracer_option(topt->tr, topt->flags,
7263 topt->opt, !val);
7264 mutex_unlock(&trace_types_lock);
7265 if (ret)
7266 return ret;
7267 }
7268
7269 *ppos += cnt;
7270
7271 return cnt;
7272 }
7273
7274
7275 static const struct file_operations trace_options_fops = {
7276 .open = tracing_open_generic,
7277 .read = trace_options_read,
7278 .write = trace_options_write,
7279 .llseek = generic_file_llseek,
7280 };
7281
7282 /*
7283 * In order to pass in both the trace_array descriptor as well as the index
7284 * to the flag that the trace option file represents, the trace_array
7285 * has a character array of trace_flags_index[], which holds the index
7286 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7287 * The address of this character array is passed to the flag option file
7288 * read/write callbacks.
7289 *
7290 * In order to extract both the index and the trace_array descriptor,
7291 * get_tr_index() uses the following algorithm.
7292 *
7293 * idx = *ptr;
7294 *
7295 * As the pointer itself contains the address of the index (remember
7296 * index[1] == 1).
7297 *
7298 * Then to get the trace_array descriptor, by subtracting that index
7299 * from the ptr, we get to the start of the index itself.
7300 *
7301 * ptr - idx == &index[0]
7302 *
7303 * Then a simple container_of() from that pointer gets us to the
7304 * trace_array descriptor.
7305 */
7306 static void get_tr_index(void *data, struct trace_array **ptr,
7307 unsigned int *pindex)
7308 {
7309 *pindex = *(unsigned char *)data;
7310
7311 *ptr = container_of(data - *pindex, struct trace_array,
7312 trace_flags_index);
7313 }
7314
7315 static ssize_t
7316 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7317 loff_t *ppos)
7318 {
7319 void *tr_index = filp->private_data;
7320 struct trace_array *tr;
7321 unsigned int index;
7322 char *buf;
7323
7324 get_tr_index(tr_index, &tr, &index);
7325
7326 if (tr->trace_flags & (1 << index))
7327 buf = "1\n";
7328 else
7329 buf = "0\n";
7330
7331 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7332 }
7333
7334 static ssize_t
7335 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7336 loff_t *ppos)
7337 {
7338 void *tr_index = filp->private_data;
7339 struct trace_array *tr;
7340 unsigned int index;
7341 unsigned long val;
7342 int ret;
7343
7344 get_tr_index(tr_index, &tr, &index);
7345
7346 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7347 if (ret)
7348 return ret;
7349
7350 if (val != 0 && val != 1)
7351 return -EINVAL;
7352
7353 mutex_lock(&trace_types_lock);
7354 ret = set_tracer_flag(tr, 1 << index, val);
7355 mutex_unlock(&trace_types_lock);
7356
7357 if (ret < 0)
7358 return ret;
7359
7360 *ppos += cnt;
7361
7362 return cnt;
7363 }
7364
7365 static const struct file_operations trace_options_core_fops = {
7366 .open = tracing_open_generic,
7367 .read = trace_options_core_read,
7368 .write = trace_options_core_write,
7369 .llseek = generic_file_llseek,
7370 };
7371
7372 struct dentry *trace_create_file(const char *name,
7373 umode_t mode,
7374 struct dentry *parent,
7375 void *data,
7376 const struct file_operations *fops)
7377 {
7378 struct dentry *ret;
7379
7380 ret = tracefs_create_file(name, mode, parent, data, fops);
7381 if (!ret)
7382 pr_warn("Could not create tracefs '%s' entry\n", name);
7383
7384 return ret;
7385 }
7386
7387
7388 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7389 {
7390 struct dentry *d_tracer;
7391
7392 if (tr->options)
7393 return tr->options;
7394
7395 d_tracer = tracing_get_dentry(tr);
7396 if (IS_ERR(d_tracer))
7397 return NULL;
7398
7399 tr->options = tracefs_create_dir("options", d_tracer);
7400 if (!tr->options) {
7401 pr_warn("Could not create tracefs directory 'options'\n");
7402 return NULL;
7403 }
7404
7405 return tr->options;
7406 }
7407
7408 static void
7409 create_trace_option_file(struct trace_array *tr,
7410 struct trace_option_dentry *topt,
7411 struct tracer_flags *flags,
7412 struct tracer_opt *opt)
7413 {
7414 struct dentry *t_options;
7415
7416 t_options = trace_options_init_dentry(tr);
7417 if (!t_options)
7418 return;
7419
7420 topt->flags = flags;
7421 topt->opt = opt;
7422 topt->tr = tr;
7423
7424 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7425 &trace_options_fops);
7426
7427 }
7428
7429 static void
7430 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7431 {
7432 struct trace_option_dentry *topts;
7433 struct trace_options *tr_topts;
7434 struct tracer_flags *flags;
7435 struct tracer_opt *opts;
7436 int cnt;
7437 int i;
7438
7439 if (!tracer)
7440 return;
7441
7442 flags = tracer->flags;
7443
7444 if (!flags || !flags->opts)
7445 return;
7446
7447 /*
7448 * If this is an instance, only create flags for tracers
7449 * the instance may have.
7450 */
7451 if (!trace_ok_for_array(tracer, tr))
7452 return;
7453
7454 for (i = 0; i < tr->nr_topts; i++) {
7455 /* Make sure there's no duplicate flags. */
7456 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7457 return;
7458 }
7459
7460 opts = flags->opts;
7461
7462 for (cnt = 0; opts[cnt].name; cnt++)
7463 ;
7464
7465 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7466 if (!topts)
7467 return;
7468
7469 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7470 GFP_KERNEL);
7471 if (!tr_topts) {
7472 kfree(topts);
7473 return;
7474 }
7475
7476 tr->topts = tr_topts;
7477 tr->topts[tr->nr_topts].tracer = tracer;
7478 tr->topts[tr->nr_topts].topts = topts;
7479 tr->nr_topts++;
7480
7481 for (cnt = 0; opts[cnt].name; cnt++) {
7482 create_trace_option_file(tr, &topts[cnt], flags,
7483 &opts[cnt]);
7484 WARN_ONCE(topts[cnt].entry == NULL,
7485 "Failed to create trace option: %s",
7486 opts[cnt].name);
7487 }
7488 }
7489
7490 static struct dentry *
7491 create_trace_option_core_file(struct trace_array *tr,
7492 const char *option, long index)
7493 {
7494 struct dentry *t_options;
7495
7496 t_options = trace_options_init_dentry(tr);
7497 if (!t_options)
7498 return NULL;
7499
7500 return trace_create_file(option, 0644, t_options,
7501 (void *)&tr->trace_flags_index[index],
7502 &trace_options_core_fops);
7503 }
7504
7505 static void create_trace_options_dir(struct trace_array *tr)
7506 {
7507 struct dentry *t_options;
7508 bool top_level = tr == &global_trace;
7509 int i;
7510
7511 t_options = trace_options_init_dentry(tr);
7512 if (!t_options)
7513 return;
7514
7515 for (i = 0; trace_options[i]; i++) {
7516 if (top_level ||
7517 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7518 create_trace_option_core_file(tr, trace_options[i], i);
7519 }
7520 }
7521
7522 static ssize_t
7523 rb_simple_read(struct file *filp, char __user *ubuf,
7524 size_t cnt, loff_t *ppos)
7525 {
7526 struct trace_array *tr = filp->private_data;
7527 char buf[64];
7528 int r;
7529
7530 r = tracer_tracing_is_on(tr);
7531 r = sprintf(buf, "%d\n", r);
7532
7533 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7534 }
7535
7536 static ssize_t
7537 rb_simple_write(struct file *filp, const char __user *ubuf,
7538 size_t cnt, loff_t *ppos)
7539 {
7540 struct trace_array *tr = filp->private_data;
7541 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7542 unsigned long val;
7543 int ret;
7544
7545 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7546 if (ret)
7547 return ret;
7548
7549 if (buffer) {
7550 mutex_lock(&trace_types_lock);
7551 if (!!val == tracer_tracing_is_on(tr)) {
7552 val = 0; /* do nothing */
7553 } else if (val) {
7554 tracer_tracing_on(tr);
7555 if (tr->current_trace->start)
7556 tr->current_trace->start(tr);
7557 } else {
7558 tracer_tracing_off(tr);
7559 if (tr->current_trace->stop)
7560 tr->current_trace->stop(tr);
7561 }
7562 mutex_unlock(&trace_types_lock);
7563 }
7564
7565 (*ppos)++;
7566
7567 return cnt;
7568 }
7569
7570 static const struct file_operations rb_simple_fops = {
7571 .open = tracing_open_generic_tr,
7572 .read = rb_simple_read,
7573 .write = rb_simple_write,
7574 .release = tracing_release_generic_tr,
7575 .llseek = default_llseek,
7576 };
7577
7578 struct dentry *trace_instance_dir;
7579
7580 static void
7581 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7582
7583 static int
7584 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7585 {
7586 enum ring_buffer_flags rb_flags;
7587
7588 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7589
7590 buf->tr = tr;
7591
7592 buf->buffer = ring_buffer_alloc(size, rb_flags);
7593 if (!buf->buffer)
7594 return -ENOMEM;
7595
7596 buf->data = alloc_percpu(struct trace_array_cpu);
7597 if (!buf->data) {
7598 ring_buffer_free(buf->buffer);
7599 buf->buffer = NULL;
7600 return -ENOMEM;
7601 }
7602
7603 /* Allocate the first page for all buffers */
7604 set_buffer_entries(&tr->trace_buffer,
7605 ring_buffer_size(tr->trace_buffer.buffer, 0));
7606
7607 return 0;
7608 }
7609
7610 static int allocate_trace_buffers(struct trace_array *tr, int size)
7611 {
7612 int ret;
7613
7614 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7615 if (ret)
7616 return ret;
7617
7618 #ifdef CONFIG_TRACER_MAX_TRACE
7619 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7620 allocate_snapshot ? size : 1);
7621 if (WARN_ON(ret)) {
7622 ring_buffer_free(tr->trace_buffer.buffer);
7623 tr->trace_buffer.buffer = NULL;
7624 free_percpu(tr->trace_buffer.data);
7625 tr->trace_buffer.data = NULL;
7626 return -ENOMEM;
7627 }
7628 tr->allocated_snapshot = allocate_snapshot;
7629
7630 /*
7631 * Only the top level trace array gets its snapshot allocated
7632 * from the kernel command line.
7633 */
7634 allocate_snapshot = false;
7635 #endif
7636 return 0;
7637 }
7638
7639 static void free_trace_buffer(struct trace_buffer *buf)
7640 {
7641 if (buf->buffer) {
7642 ring_buffer_free(buf->buffer);
7643 buf->buffer = NULL;
7644 free_percpu(buf->data);
7645 buf->data = NULL;
7646 }
7647 }
7648
7649 static void free_trace_buffers(struct trace_array *tr)
7650 {
7651 if (!tr)
7652 return;
7653
7654 free_trace_buffer(&tr->trace_buffer);
7655
7656 #ifdef CONFIG_TRACER_MAX_TRACE
7657 free_trace_buffer(&tr->max_buffer);
7658 #endif
7659 }
7660
7661 static void init_trace_flags_index(struct trace_array *tr)
7662 {
7663 int i;
7664
7665 /* Used by the trace options files */
7666 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7667 tr->trace_flags_index[i] = i;
7668 }
7669
7670 static void __update_tracer_options(struct trace_array *tr)
7671 {
7672 struct tracer *t;
7673
7674 for (t = trace_types; t; t = t->next)
7675 add_tracer_options(tr, t);
7676 }
7677
7678 static void update_tracer_options(struct trace_array *tr)
7679 {
7680 mutex_lock(&trace_types_lock);
7681 __update_tracer_options(tr);
7682 mutex_unlock(&trace_types_lock);
7683 }
7684
7685 static int instance_mkdir(const char *name)
7686 {
7687 struct trace_array *tr;
7688 int ret;
7689
7690 mutex_lock(&trace_types_lock);
7691
7692 ret = -EEXIST;
7693 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7694 if (tr->name && strcmp(tr->name, name) == 0)
7695 goto out_unlock;
7696 }
7697
7698 ret = -ENOMEM;
7699 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7700 if (!tr)
7701 goto out_unlock;
7702
7703 tr->name = kstrdup(name, GFP_KERNEL);
7704 if (!tr->name)
7705 goto out_free_tr;
7706
7707 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7708 goto out_free_tr;
7709
7710 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7711
7712 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7713
7714 raw_spin_lock_init(&tr->start_lock);
7715
7716 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7717
7718 tr->current_trace = &nop_trace;
7719
7720 INIT_LIST_HEAD(&tr->systems);
7721 INIT_LIST_HEAD(&tr->events);
7722
7723 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7724 goto out_free_tr;
7725
7726 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7727 if (!tr->dir)
7728 goto out_free_tr;
7729
7730 ret = event_trace_add_tracer(tr->dir, tr);
7731 if (ret) {
7732 tracefs_remove_recursive(tr->dir);
7733 goto out_free_tr;
7734 }
7735
7736 ftrace_init_trace_array(tr);
7737
7738 init_tracer_tracefs(tr, tr->dir);
7739 init_trace_flags_index(tr);
7740 __update_tracer_options(tr);
7741
7742 list_add(&tr->list, &ftrace_trace_arrays);
7743
7744 mutex_unlock(&trace_types_lock);
7745
7746 return 0;
7747
7748 out_free_tr:
7749 free_trace_buffers(tr);
7750 free_cpumask_var(tr->tracing_cpumask);
7751 kfree(tr->name);
7752 kfree(tr);
7753
7754 out_unlock:
7755 mutex_unlock(&trace_types_lock);
7756
7757 return ret;
7758
7759 }
7760
7761 static int instance_rmdir(const char *name)
7762 {
7763 struct trace_array *tr;
7764 int found = 0;
7765 int ret;
7766 int i;
7767
7768 mutex_lock(&trace_types_lock);
7769
7770 ret = -ENODEV;
7771 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7772 if (tr->name && strcmp(tr->name, name) == 0) {
7773 found = 1;
7774 break;
7775 }
7776 }
7777 if (!found)
7778 goto out_unlock;
7779
7780 ret = -EBUSY;
7781 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7782 goto out_unlock;
7783
7784 list_del(&tr->list);
7785
7786 /* Disable all the flags that were enabled coming in */
7787 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7788 if ((1 << i) & ZEROED_TRACE_FLAGS)
7789 set_tracer_flag(tr, 1 << i, 0);
7790 }
7791
7792 tracing_set_nop(tr);
7793 clear_ftrace_function_probes(tr);
7794 event_trace_del_tracer(tr);
7795 ftrace_clear_pids(tr);
7796 ftrace_destroy_function_files(tr);
7797 tracefs_remove_recursive(tr->dir);
7798 free_trace_buffers(tr);
7799
7800 for (i = 0; i < tr->nr_topts; i++) {
7801 kfree(tr->topts[i].topts);
7802 }
7803 kfree(tr->topts);
7804
7805 free_cpumask_var(tr->tracing_cpumask);
7806 kfree(tr->name);
7807 kfree(tr);
7808
7809 ret = 0;
7810
7811 out_unlock:
7812 mutex_unlock(&trace_types_lock);
7813
7814 return ret;
7815 }
7816
7817 static __init void create_trace_instances(struct dentry *d_tracer)
7818 {
7819 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7820 instance_mkdir,
7821 instance_rmdir);
7822 if (WARN_ON(!trace_instance_dir))
7823 return;
7824 }
7825
7826 static void
7827 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7828 {
7829 int cpu;
7830
7831 trace_create_file("available_tracers", 0444, d_tracer,
7832 tr, &show_traces_fops);
7833
7834 trace_create_file("current_tracer", 0644, d_tracer,
7835 tr, &set_tracer_fops);
7836
7837 trace_create_file("tracing_cpumask", 0644, d_tracer,
7838 tr, &tracing_cpumask_fops);
7839
7840 trace_create_file("trace_options", 0644, d_tracer,
7841 tr, &tracing_iter_fops);
7842
7843 trace_create_file("trace", 0644, d_tracer,
7844 tr, &tracing_fops);
7845
7846 trace_create_file("trace_pipe", 0444, d_tracer,
7847 tr, &tracing_pipe_fops);
7848
7849 trace_create_file("buffer_size_kb", 0644, d_tracer,
7850 tr, &tracing_entries_fops);
7851
7852 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7853 tr, &tracing_total_entries_fops);
7854
7855 trace_create_file("free_buffer", 0200, d_tracer,
7856 tr, &tracing_free_buffer_fops);
7857
7858 trace_create_file("trace_marker", 0220, d_tracer,
7859 tr, &tracing_mark_fops);
7860
7861 trace_create_file("trace_marker_raw", 0220, d_tracer,
7862 tr, &tracing_mark_raw_fops);
7863
7864 trace_create_file("trace_clock", 0644, d_tracer, tr,
7865 &trace_clock_fops);
7866
7867 trace_create_file("tracing_on", 0644, d_tracer,
7868 tr, &rb_simple_fops);
7869
7870 create_trace_options_dir(tr);
7871
7872 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7873 trace_create_file("tracing_max_latency", 0644, d_tracer,
7874 &tr->max_latency, &tracing_max_lat_fops);
7875 #endif
7876
7877 if (ftrace_create_function_files(tr, d_tracer))
7878 WARN(1, "Could not allocate function filter files");
7879
7880 #ifdef CONFIG_TRACER_SNAPSHOT
7881 trace_create_file("snapshot", 0644, d_tracer,
7882 tr, &snapshot_fops);
7883 #endif
7884
7885 for_each_tracing_cpu(cpu)
7886 tracing_init_tracefs_percpu(tr, cpu);
7887
7888 ftrace_init_tracefs(tr, d_tracer);
7889 }
7890
7891 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7892 {
7893 struct vfsmount *mnt;
7894 struct file_system_type *type;
7895
7896 /*
7897 * To maintain backward compatibility for tools that mount
7898 * debugfs to get to the tracing facility, tracefs is automatically
7899 * mounted to the debugfs/tracing directory.
7900 */
7901 type = get_fs_type("tracefs");
7902 if (!type)
7903 return NULL;
7904 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7905 put_filesystem(type);
7906 if (IS_ERR(mnt))
7907 return NULL;
7908 mntget(mnt);
7909
7910 return mnt;
7911 }
7912
7913 /**
7914 * tracing_init_dentry - initialize top level trace array
7915 *
7916 * This is called when creating files or directories in the tracing
7917 * directory. It is called via fs_initcall() by any of the boot up code
7918 * and expects to return the dentry of the top level tracing directory.
7919 */
7920 struct dentry *tracing_init_dentry(void)
7921 {
7922 struct trace_array *tr = &global_trace;
7923
7924 /* The top level trace array uses NULL as parent */
7925 if (tr->dir)
7926 return NULL;
7927
7928 if (WARN_ON(!tracefs_initialized()) ||
7929 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7930 WARN_ON(!debugfs_initialized())))
7931 return ERR_PTR(-ENODEV);
7932
7933 /*
7934 * As there may still be users that expect the tracing
7935 * files to exist in debugfs/tracing, we must automount
7936 * the tracefs file system there, so older tools still
7937 * work with the newer kerenl.
7938 */
7939 tr->dir = debugfs_create_automount("tracing", NULL,
7940 trace_automount, NULL);
7941 if (!tr->dir) {
7942 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7943 return ERR_PTR(-ENOMEM);
7944 }
7945
7946 return NULL;
7947 }
7948
7949 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7950 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7951
7952 static void __init trace_eval_init(void)
7953 {
7954 int len;
7955
7956 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7957 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7958 }
7959
7960 #ifdef CONFIG_MODULES
7961 static void trace_module_add_evals(struct module *mod)
7962 {
7963 if (!mod->num_trace_evals)
7964 return;
7965
7966 /*
7967 * Modules with bad taint do not have events created, do
7968 * not bother with enums either.
7969 */
7970 if (trace_module_has_bad_taint(mod))
7971 return;
7972
7973 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7974 }
7975
7976 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7977 static void trace_module_remove_evals(struct module *mod)
7978 {
7979 union trace_eval_map_item *map;
7980 union trace_eval_map_item **last = &trace_eval_maps;
7981
7982 if (!mod->num_trace_evals)
7983 return;
7984
7985 mutex_lock(&trace_eval_mutex);
7986
7987 map = trace_eval_maps;
7988
7989 while (map) {
7990 if (map->head.mod == mod)
7991 break;
7992 map = trace_eval_jmp_to_tail(map);
7993 last = &map->tail.next;
7994 map = map->tail.next;
7995 }
7996 if (!map)
7997 goto out;
7998
7999 *last = trace_eval_jmp_to_tail(map)->tail.next;
8000 kfree(map);
8001 out:
8002 mutex_unlock(&trace_eval_mutex);
8003 }
8004 #else
8005 static inline void trace_module_remove_evals(struct module *mod) { }
8006 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8007
8008 static int trace_module_notify(struct notifier_block *self,
8009 unsigned long val, void *data)
8010 {
8011 struct module *mod = data;
8012
8013 switch (val) {
8014 case MODULE_STATE_COMING:
8015 trace_module_add_evals(mod);
8016 break;
8017 case MODULE_STATE_GOING:
8018 trace_module_remove_evals(mod);
8019 break;
8020 }
8021
8022 return 0;
8023 }
8024
8025 static struct notifier_block trace_module_nb = {
8026 .notifier_call = trace_module_notify,
8027 .priority = 0,
8028 };
8029 #endif /* CONFIG_MODULES */
8030
8031 static __init int tracer_init_tracefs(void)
8032 {
8033 struct dentry *d_tracer;
8034
8035 trace_access_lock_init();
8036
8037 d_tracer = tracing_init_dentry();
8038 if (IS_ERR(d_tracer))
8039 return 0;
8040
8041 init_tracer_tracefs(&global_trace, d_tracer);
8042 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8043
8044 trace_create_file("tracing_thresh", 0644, d_tracer,
8045 &global_trace, &tracing_thresh_fops);
8046
8047 trace_create_file("README", 0444, d_tracer,
8048 NULL, &tracing_readme_fops);
8049
8050 trace_create_file("saved_cmdlines", 0444, d_tracer,
8051 NULL, &tracing_saved_cmdlines_fops);
8052
8053 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8054 NULL, &tracing_saved_cmdlines_size_fops);
8055
8056 trace_create_file("saved_tgids", 0444, d_tracer,
8057 NULL, &tracing_saved_tgids_fops);
8058
8059 trace_eval_init();
8060
8061 trace_create_eval_file(d_tracer);
8062
8063 #ifdef CONFIG_MODULES
8064 register_module_notifier(&trace_module_nb);
8065 #endif
8066
8067 #ifdef CONFIG_DYNAMIC_FTRACE
8068 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8069 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8070 #endif
8071
8072 create_trace_instances(d_tracer);
8073
8074 update_tracer_options(&global_trace);
8075
8076 return 0;
8077 }
8078
8079 static int trace_panic_handler(struct notifier_block *this,
8080 unsigned long event, void *unused)
8081 {
8082 if (ftrace_dump_on_oops)
8083 ftrace_dump(ftrace_dump_on_oops);
8084 return NOTIFY_OK;
8085 }
8086
8087 static struct notifier_block trace_panic_notifier = {
8088 .notifier_call = trace_panic_handler,
8089 .next = NULL,
8090 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8091 };
8092
8093 static int trace_die_handler(struct notifier_block *self,
8094 unsigned long val,
8095 void *data)
8096 {
8097 switch (val) {
8098 case DIE_OOPS:
8099 if (ftrace_dump_on_oops)
8100 ftrace_dump(ftrace_dump_on_oops);
8101 break;
8102 default:
8103 break;
8104 }
8105 return NOTIFY_OK;
8106 }
8107
8108 static struct notifier_block trace_die_notifier = {
8109 .notifier_call = trace_die_handler,
8110 .priority = 200
8111 };
8112
8113 /*
8114 * printk is set to max of 1024, we really don't need it that big.
8115 * Nothing should be printing 1000 characters anyway.
8116 */
8117 #define TRACE_MAX_PRINT 1000
8118
8119 /*
8120 * Define here KERN_TRACE so that we have one place to modify
8121 * it if we decide to change what log level the ftrace dump
8122 * should be at.
8123 */
8124 #define KERN_TRACE KERN_EMERG
8125
8126 void
8127 trace_printk_seq(struct trace_seq *s)
8128 {
8129 /* Probably should print a warning here. */
8130 if (s->seq.len >= TRACE_MAX_PRINT)
8131 s->seq.len = TRACE_MAX_PRINT;
8132
8133 /*
8134 * More paranoid code. Although the buffer size is set to
8135 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8136 * an extra layer of protection.
8137 */
8138 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8139 s->seq.len = s->seq.size - 1;
8140
8141 /* should be zero ended, but we are paranoid. */
8142 s->buffer[s->seq.len] = 0;
8143
8144 printk(KERN_TRACE "%s", s->buffer);
8145
8146 trace_seq_init(s);
8147 }
8148
8149 void trace_init_global_iter(struct trace_iterator *iter)
8150 {
8151 iter->tr = &global_trace;
8152 iter->trace = iter->tr->current_trace;
8153 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8154 iter->trace_buffer = &global_trace.trace_buffer;
8155
8156 if (iter->trace && iter->trace->open)
8157 iter->trace->open(iter);
8158
8159 /* Annotate start of buffers if we had overruns */
8160 if (ring_buffer_overruns(iter->trace_buffer->buffer))
8161 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8162
8163 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8164 if (trace_clocks[iter->tr->clock_id].in_ns)
8165 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8166 }
8167
8168 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8169 {
8170 /* use static because iter can be a bit big for the stack */
8171 static struct trace_iterator iter;
8172 static atomic_t dump_running;
8173 struct trace_array *tr = &global_trace;
8174 unsigned int old_userobj;
8175 unsigned long flags;
8176 int cnt = 0, cpu;
8177
8178 /* Only allow one dump user at a time. */
8179 if (atomic_inc_return(&dump_running) != 1) {
8180 atomic_dec(&dump_running);
8181 return;
8182 }
8183
8184 /*
8185 * Always turn off tracing when we dump.
8186 * We don't need to show trace output of what happens
8187 * between multiple crashes.
8188 *
8189 * If the user does a sysrq-z, then they can re-enable
8190 * tracing with echo 1 > tracing_on.
8191 */
8192 tracing_off();
8193
8194 local_irq_save(flags);
8195 printk_nmi_direct_enter();
8196
8197 /* Simulate the iterator */
8198 trace_init_global_iter(&iter);
8199
8200 for_each_tracing_cpu(cpu) {
8201 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8202 }
8203
8204 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8205
8206 /* don't look at user memory in panic mode */
8207 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8208
8209 switch (oops_dump_mode) {
8210 case DUMP_ALL:
8211 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8212 break;
8213 case DUMP_ORIG:
8214 iter.cpu_file = raw_smp_processor_id();
8215 break;
8216 case DUMP_NONE:
8217 goto out_enable;
8218 default:
8219 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8220 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8221 }
8222
8223 printk(KERN_TRACE "Dumping ftrace buffer:\n");
8224
8225 /* Did function tracer already get disabled? */
8226 if (ftrace_is_dead()) {
8227 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8228 printk("# MAY BE MISSING FUNCTION EVENTS\n");
8229 }
8230
8231 /*
8232 * We need to stop all tracing on all CPUS to read the
8233 * the next buffer. This is a bit expensive, but is
8234 * not done often. We fill all what we can read,
8235 * and then release the locks again.
8236 */
8237
8238 while (!trace_empty(&iter)) {
8239
8240 if (!cnt)
8241 printk(KERN_TRACE "---------------------------------\n");
8242
8243 cnt++;
8244
8245 /* reset all but tr, trace, and overruns */
8246 memset(&iter.seq, 0,
8247 sizeof(struct trace_iterator) -
8248 offsetof(struct trace_iterator, seq));
8249 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8250 iter.pos = -1;
8251
8252 if (trace_find_next_entry_inc(&iter) != NULL) {
8253 int ret;
8254
8255 ret = print_trace_line(&iter);
8256 if (ret != TRACE_TYPE_NO_CONSUME)
8257 trace_consume(&iter);
8258 }
8259 touch_nmi_watchdog();
8260
8261 trace_printk_seq(&iter.seq);
8262 }
8263
8264 if (!cnt)
8265 printk(KERN_TRACE " (ftrace buffer empty)\n");
8266 else
8267 printk(KERN_TRACE "---------------------------------\n");
8268
8269 out_enable:
8270 tr->trace_flags |= old_userobj;
8271
8272 for_each_tracing_cpu(cpu) {
8273 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8274 }
8275 atomic_dec(&dump_running);
8276 printk_nmi_direct_exit();
8277 local_irq_restore(flags);
8278 }
8279 EXPORT_SYMBOL_GPL(ftrace_dump);
8280
8281 __init static int tracer_alloc_buffers(void)
8282 {
8283 int ring_buf_size;
8284 int ret = -ENOMEM;
8285
8286 /*
8287 * Make sure we don't accidently add more trace options
8288 * than we have bits for.
8289 */
8290 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8291
8292 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8293 goto out;
8294
8295 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8296 goto out_free_buffer_mask;
8297
8298 /* Only allocate trace_printk buffers if a trace_printk exists */
8299 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8300 /* Must be called before global_trace.buffer is allocated */
8301 trace_printk_init_buffers();
8302
8303 /* To save memory, keep the ring buffer size to its minimum */
8304 if (ring_buffer_expanded)
8305 ring_buf_size = trace_buf_size;
8306 else
8307 ring_buf_size = 1;
8308
8309 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8310 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8311
8312 raw_spin_lock_init(&global_trace.start_lock);
8313
8314 /*
8315 * The prepare callbacks allocates some memory for the ring buffer. We
8316 * don't free the buffer if the if the CPU goes down. If we were to free
8317 * the buffer, then the user would lose any trace that was in the
8318 * buffer. The memory will be removed once the "instance" is removed.
8319 */
8320 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8321 "trace/RB:preapre", trace_rb_cpu_prepare,
8322 NULL);
8323 if (ret < 0)
8324 goto out_free_cpumask;
8325 /* Used for event triggers */
8326 ret = -ENOMEM;
8327 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8328 if (!temp_buffer)
8329 goto out_rm_hp_state;
8330
8331 if (trace_create_savedcmd() < 0)
8332 goto out_free_temp_buffer;
8333
8334 /* TODO: make the number of buffers hot pluggable with CPUS */
8335 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8336 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8337 WARN_ON(1);
8338 goto out_free_savedcmd;
8339 }
8340
8341 if (global_trace.buffer_disabled)
8342 tracing_off();
8343
8344 if (trace_boot_clock) {
8345 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8346 if (ret < 0)
8347 pr_warn("Trace clock %s not defined, going back to default\n",
8348 trace_boot_clock);
8349 }
8350
8351 /*
8352 * register_tracer() might reference current_trace, so it
8353 * needs to be set before we register anything. This is
8354 * just a bootstrap of current_trace anyway.
8355 */
8356 global_trace.current_trace = &nop_trace;
8357
8358 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8359
8360 ftrace_init_global_array_ops(&global_trace);
8361
8362 init_trace_flags_index(&global_trace);
8363
8364 register_tracer(&nop_trace);
8365
8366 /* Function tracing may start here (via kernel command line) */
8367 init_function_trace();
8368
8369 /* All seems OK, enable tracing */
8370 tracing_disabled = 0;
8371
8372 atomic_notifier_chain_register(&panic_notifier_list,
8373 &trace_panic_notifier);
8374
8375 register_die_notifier(&trace_die_notifier);
8376
8377 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8378
8379 INIT_LIST_HEAD(&global_trace.systems);
8380 INIT_LIST_HEAD(&global_trace.events);
8381 list_add(&global_trace.list, &ftrace_trace_arrays);
8382
8383 apply_trace_boot_options();
8384
8385 register_snapshot_cmd();
8386
8387 return 0;
8388
8389 out_free_savedcmd:
8390 free_saved_cmdlines_buffer(savedcmd);
8391 out_free_temp_buffer:
8392 ring_buffer_free(temp_buffer);
8393 out_rm_hp_state:
8394 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8395 out_free_cpumask:
8396 free_cpumask_var(global_trace.tracing_cpumask);
8397 out_free_buffer_mask:
8398 free_cpumask_var(tracing_buffer_mask);
8399 out:
8400 return ret;
8401 }
8402
8403 void __init early_trace_init(void)
8404 {
8405 if (tracepoint_printk) {
8406 tracepoint_print_iter =
8407 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8408 if (WARN_ON(!tracepoint_print_iter))
8409 tracepoint_printk = 0;
8410 else
8411 static_key_enable(&tracepoint_printk_key.key);
8412 }
8413 tracer_alloc_buffers();
8414 }
8415
8416 void __init trace_init(void)
8417 {
8418 trace_event_init();
8419 }
8420
8421 __init static int clear_boot_tracer(void)
8422 {
8423 /*
8424 * The default tracer at boot buffer is an init section.
8425 * This function is called in lateinit. If we did not
8426 * find the boot tracer, then clear it out, to prevent
8427 * later registration from accessing the buffer that is
8428 * about to be freed.
8429 */
8430 if (!default_bootup_tracer)
8431 return 0;
8432
8433 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8434 default_bootup_tracer);
8435 default_bootup_tracer = NULL;
8436
8437 return 0;
8438 }
8439
8440 fs_initcall(tracer_init_tracefs);
8441 late_initcall_sync(clear_boot_tracer);