Merge tag 'for-linus-20170510' of git://git.infradead.org/linux-mtd
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
9a3993d4 27#include "util/path.h"
6810fc91 28#include "util/session.h"
752fde44 29#include "util/thread.h"
4b6ab94e 30#include <subcmd/parse-options.h>
2ae3a312 31#include "util/strlist.h"
bdc89661 32#include "util/intlist.h"
514f1c67 33#include "util/thread_map.h"
bf2575c1 34#include "util/stat.h"
fd5cead2 35#include "trace/beauty/beauty.h"
97978b3e 36#include "trace-event.h"
9aca7f17 37#include "util/parse-events.h"
ba504235 38#include "util/bpf-loader.h"
566a0885 39#include "callchain.h"
fea01392 40#include "print_binary.h"
a067558e 41#include "string2.h"
fd0db102 42#include "syscalltbl.h"
96c14451 43#include "rb_resort.h"
514f1c67 44
a43783ae 45#include <errno.h>
fd20e811 46#include <inttypes.h>
fd0db102 47#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 48#include <poll.h>
9607ad3a 49#include <signal.h>
514f1c67 50#include <stdlib.h>
017037ff 51#include <string.h>
8dd2a131 52#include <linux/err.h>
997bba8c
ACM
53#include <linux/filter.h>
54#include <linux/audit.h>
877a7a11 55#include <linux/kernel.h>
39878d49 56#include <linux/random.h>
c6d4a494 57#include <linux/stringify.h>
bd48c63e 58#include <linux/time64.h>
514f1c67 59
3d689ed6
ACM
60#include "sane_ctype.h"
61
c188e7ac
ACM
62#ifndef O_CLOEXEC
63# define O_CLOEXEC 02000000
64#endif
65
d1d438a3
ACM
66struct trace {
67 struct perf_tool tool;
fd0db102 68 struct syscalltbl *sctbl;
d1d438a3
ACM
69 struct {
70 int max;
71 struct syscall *table;
72 struct {
73 struct perf_evsel *sys_enter,
74 *sys_exit;
75 } events;
76 } syscalls;
77 struct record_opts opts;
78 struct perf_evlist *evlist;
79 struct machine *host;
80 struct thread *current;
81 u64 base_time;
82 FILE *output;
83 unsigned long nr_events;
84 struct strlist *ev_qualifier;
85 struct {
86 size_t nr;
87 int *entries;
88 } ev_qualifier_ids;
d1d438a3
ACM
89 struct {
90 size_t nr;
91 pid_t *entries;
92 } filter_pids;
93 double duration_filter;
94 double runtime_ms;
95 struct {
96 u64 vfs_getname,
97 proc_getname;
98 } stats;
c6d4a494 99 unsigned int max_stack;
5cf9c84e 100 unsigned int min_stack;
d1d438a3
ACM
101 bool not_ev_qualifier;
102 bool live;
103 bool full_time;
104 bool sched;
105 bool multiple_threads;
106 bool summary;
107 bool summary_only;
108 bool show_comm;
109 bool show_tool_stats;
110 bool trace_syscalls;
44621819 111 bool kernel_syscallchains;
d1d438a3
ACM
112 bool force;
113 bool vfs_getname;
114 int trace_pgfaults;
fd0db102 115 int open_id;
d1d438a3 116};
a1c2552d 117
77170988
ACM
118struct tp_field {
119 int offset;
120 union {
121 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
122 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
123 };
124};
125
126#define TP_UINT_FIELD(bits) \
127static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
128{ \
55d43bca
DA
129 u##bits value; \
130 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
131 return value; \
77170988
ACM
132}
133
134TP_UINT_FIELD(8);
135TP_UINT_FIELD(16);
136TP_UINT_FIELD(32);
137TP_UINT_FIELD(64);
138
139#define TP_UINT_FIELD__SWAPPED(bits) \
140static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
141{ \
55d43bca
DA
142 u##bits value; \
143 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
144 return bswap_##bits(value);\
145}
146
147TP_UINT_FIELD__SWAPPED(16);
148TP_UINT_FIELD__SWAPPED(32);
149TP_UINT_FIELD__SWAPPED(64);
150
151static int tp_field__init_uint(struct tp_field *field,
152 struct format_field *format_field,
153 bool needs_swap)
154{
155 field->offset = format_field->offset;
156
157 switch (format_field->size) {
158 case 1:
159 field->integer = tp_field__u8;
160 break;
161 case 2:
162 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
163 break;
164 case 4:
165 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
166 break;
167 case 8:
168 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
169 break;
170 default:
171 return -1;
172 }
173
174 return 0;
175}
176
177static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
178{
179 return sample->raw_data + field->offset;
180}
181
182static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
183{
184 field->offset = format_field->offset;
185 field->pointer = tp_field__ptr;
186 return 0;
187}
188
189struct syscall_tp {
190 struct tp_field id;
191 union {
192 struct tp_field args, ret;
193 };
194};
195
196static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
197 struct tp_field *field,
198 const char *name)
199{
200 struct format_field *format_field = perf_evsel__field(evsel, name);
201
202 if (format_field == NULL)
203 return -1;
204
205 return tp_field__init_uint(field, format_field, evsel->needs_swap);
206}
207
208#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
209 ({ struct syscall_tp *sc = evsel->priv;\
210 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
211
212static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
213 struct tp_field *field,
214 const char *name)
215{
216 struct format_field *format_field = perf_evsel__field(evsel, name);
217
218 if (format_field == NULL)
219 return -1;
220
221 return tp_field__init_ptr(field, format_field);
222}
223
224#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
225 ({ struct syscall_tp *sc = evsel->priv;\
226 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
227
228static void perf_evsel__delete_priv(struct perf_evsel *evsel)
229{
04662523 230 zfree(&evsel->priv);
77170988
ACM
231 perf_evsel__delete(evsel);
232}
233
96695d44
NK
234static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
235{
236 evsel->priv = malloc(sizeof(struct syscall_tp));
237 if (evsel->priv != NULL) {
238 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
239 goto out_delete;
240
241 evsel->handler = handler;
242 return 0;
243 }
244
245 return -ENOMEM;
246
247out_delete:
04662523 248 zfree(&evsel->priv);
96695d44
NK
249 return -ENOENT;
250}
251
ef503831 252static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 253{
ef503831 254 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 255
9aca7f17 256 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 257 if (IS_ERR(evsel))
9aca7f17
DA
258 evsel = perf_evsel__newtp("syscalls", direction);
259
8dd2a131
JO
260 if (IS_ERR(evsel))
261 return NULL;
262
263 if (perf_evsel__init_syscall_tp(evsel, handler))
264 goto out_delete;
77170988
ACM
265
266 return evsel;
267
268out_delete:
269 perf_evsel__delete_priv(evsel);
270 return NULL;
271}
272
273#define perf_evsel__sc_tp_uint(evsel, name, sample) \
274 ({ struct syscall_tp *fields = evsel->priv; \
275 fields->name.integer(&fields->name, sample); })
276
277#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
278 ({ struct syscall_tp *fields = evsel->priv; \
279 fields->name.pointer(&fields->name, sample); })
280
1f115cb7 281struct strarray {
03e3adc9 282 int offset;
1f115cb7
ACM
283 int nr_entries;
284 const char **entries;
285};
286
287#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
288 .nr_entries = ARRAY_SIZE(array), \
289 .entries = array, \
290}
291
03e3adc9
ACM
292#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
293 .offset = off, \
294 .nr_entries = ARRAY_SIZE(array), \
295 .entries = array, \
296}
297
975b7c2f
ACM
298static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
299 const char *intfmt,
300 struct syscall_arg *arg)
1f115cb7 301{
1f115cb7 302 struct strarray *sa = arg->parm;
03e3adc9 303 int idx = arg->val - sa->offset;
1f115cb7
ACM
304
305 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 306 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
307
308 return scnprintf(bf, size, "%s", sa->entries[idx]);
309}
310
975b7c2f
ACM
311static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
312 struct syscall_arg *arg)
313{
314 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
315}
316
1f115cb7
ACM
317#define SCA_STRARRAY syscall_arg__scnprintf_strarray
318
844ae5b4
ACM
319#if defined(__i386__) || defined(__x86_64__)
320/*
321 * FIXME: Make this available to all arches as soon as the ioctl beautifier
322 * gets rewritten to support all arches.
323 */
78645cf3
ACM
324static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
325 struct syscall_arg *arg)
326{
327 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
328}
329
330#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 331#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 332
75b757ca
ACM
333static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
334 struct syscall_arg *arg);
335
336#define SCA_FD syscall_arg__scnprintf_fd
337
48e1f91a
ACM
338#ifndef AT_FDCWD
339#define AT_FDCWD -100
340#endif
341
75b757ca
ACM
342static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
343 struct syscall_arg *arg)
344{
345 int fd = arg->val;
346
347 if (fd == AT_FDCWD)
348 return scnprintf(bf, size, "CWD");
349
350 return syscall_arg__scnprintf_fd(bf, size, arg);
351}
352
353#define SCA_FDAT syscall_arg__scnprintf_fd_at
354
355static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
356 struct syscall_arg *arg);
357
358#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
359
6e7eeb51 360static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 361 struct syscall_arg *arg)
13d4ff3e 362{
01533e97 363 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
364}
365
beccb2b5
ACM
366#define SCA_HEX syscall_arg__scnprintf_hex
367
a1c2552d
ACM
368static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
369 struct syscall_arg *arg)
370{
371 return scnprintf(bf, size, "%d", arg->val);
372}
373
374#define SCA_INT syscall_arg__scnprintf_int
375
729a7841
ACM
376static const char *bpf_cmd[] = {
377 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
378 "MAP_GET_NEXT_KEY", "PROG_LOAD",
379};
380static DEFINE_STRARRAY(bpf_cmd);
381
03e3adc9
ACM
382static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
383static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 384
1f115cb7
ACM
385static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
386static DEFINE_STRARRAY(itimers);
387
b62bee1b
ACM
388static const char *keyctl_options[] = {
389 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
390 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
391 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
392 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
393 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
394};
395static DEFINE_STRARRAY(keyctl_options);
396
efe6b882
ACM
397static const char *whences[] = { "SET", "CUR", "END",
398#ifdef SEEK_DATA
399"DATA",
400#endif
401#ifdef SEEK_HOLE
402"HOLE",
403#endif
404};
405static DEFINE_STRARRAY(whences);
f9da0b0c 406
80f587d5
ACM
407static const char *fcntl_cmds[] = {
408 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
409 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
410 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
411 "F_GETOWNER_UIDS",
412};
413static DEFINE_STRARRAY(fcntl_cmds);
414
c045bf02
ACM
415static const char *rlimit_resources[] = {
416 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
417 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
418 "RTTIME",
419};
420static DEFINE_STRARRAY(rlimit_resources);
421
eb5b1b14
ACM
422static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
423static DEFINE_STRARRAY(sighow);
424
4f8c1b74
DA
425static const char *clockid[] = {
426 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
427 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
428 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
429};
430static DEFINE_STRARRAY(clockid);
431
e10bce81
ACM
432static const char *socket_families[] = {
433 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
434 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
435 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
436 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
437 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
438 "ALG", "NFC", "VSOCK",
439};
440static DEFINE_STRARRAY(socket_families);
441
51108999
ACM
442static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
443 struct syscall_arg *arg)
444{
445 size_t printed = 0;
446 int mode = arg->val;
447
448 if (mode == F_OK) /* 0 */
449 return scnprintf(bf, size, "F");
450#define P_MODE(n) \
451 if (mode & n##_OK) { \
452 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
453 mode &= ~n##_OK; \
454 }
455
456 P_MODE(R);
457 P_MODE(W);
458 P_MODE(X);
459#undef P_MODE
460
461 if (mode)
462 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
463
464 return printed;
465}
466
467#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
468
f994592d
ACM
469static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
470 struct syscall_arg *arg);
471
472#define SCA_FILENAME syscall_arg__scnprintf_filename
473
46cce19b
ACM
474static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
475 struct syscall_arg *arg)
476{
477 int printed = 0, flags = arg->val;
478
479#define P_FLAG(n) \
480 if (flags & O_##n) { \
481 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
482 flags &= ~O_##n; \
483 }
484
485 P_FLAG(CLOEXEC);
486 P_FLAG(NONBLOCK);
487#undef P_FLAG
488
489 if (flags)
490 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
491
492 return printed;
493}
494
495#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
496
844ae5b4
ACM
497#if defined(__i386__) || defined(__x86_64__)
498/*
499 * FIXME: Make this available to all arches.
500 */
78645cf3
ACM
501#define TCGETS 0x5401
502
503static const char *tioctls[] = {
504 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
505 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
506 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
507 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
508 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
509 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
510 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
511 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
512 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
513 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
514 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
515 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
516 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
517 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
518 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
519};
520
521static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 522#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 523
a355a61e
ACM
524#ifndef GRND_NONBLOCK
525#define GRND_NONBLOCK 0x0001
526#endif
527#ifndef GRND_RANDOM
528#define GRND_RANDOM 0x0002
529#endif
530
39878d49
ACM
531static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
532 struct syscall_arg *arg)
533{
534 int printed = 0, flags = arg->val;
535
536#define P_FLAG(n) \
537 if (flags & GRND_##n) { \
538 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
539 flags &= ~GRND_##n; \
540 }
541
542 P_FLAG(RANDOM);
543 P_FLAG(NONBLOCK);
544#undef P_FLAG
545
546 if (flags)
547 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
548
549 return printed;
550}
551
552#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
553
453350dd
ACM
554#define STRARRAY(arg, name, array) \
555 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
556 .arg_parm = { [arg] = &strarray__##array, }
557
ea8dc3ce 558#include "trace/beauty/eventfd.c"
8bf382ce 559#include "trace/beauty/flock.c"
d5d71e86 560#include "trace/beauty/futex_op.c"
df4cb167 561#include "trace/beauty/mmap.c"
ba2f22cf 562#include "trace/beauty/mode_t.c"
a30e6259 563#include "trace/beauty/msg_flags.c"
8f48df69 564#include "trace/beauty/open_flags.c"
62de344e 565#include "trace/beauty/perf_event_open.c"
d5d71e86 566#include "trace/beauty/pid.c"
a3bca91f 567#include "trace/beauty/sched_policy.c"
f5cd95ea 568#include "trace/beauty/seccomp.c"
12199d8e 569#include "trace/beauty/signum.c"
bbf86c43 570#include "trace/beauty/socket_type.c"
7206b900 571#include "trace/beauty/waitid_options.c"
a3bca91f 572
514f1c67
ACM
573static struct syscall_fmt {
574 const char *name;
aec1930b 575 const char *alias;
01533e97 576 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 577 void *arg_parm[6];
514f1c67 578 bool errmsg;
11c8e39f 579 bool errpid;
514f1c67 580 bool timeout;
04b34729 581 bool hexret;
514f1c67 582} syscall_fmts[] = {
51108999 583 { .name = "access", .errmsg = true,
12f3ca4f 584 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 585 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 586 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
587 { .name = "brk", .hexret = true,
588 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
589 { .name = "chdir", .errmsg = true, },
590 { .name = "chmod", .errmsg = true, },
591 { .name = "chroot", .errmsg = true, },
4f8c1b74 592 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 593 { .name = "clone", .errpid = true, },
75b757ca 594 { .name = "close", .errmsg = true,
48000a1a 595 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 596 { .name = "connect", .errmsg = true, },
12f3ca4f 597 { .name = "creat", .errmsg = true, },
b6565c90
ACM
598 { .name = "dup", .errmsg = true, },
599 { .name = "dup2", .errmsg = true, },
600 { .name = "dup3", .errmsg = true, },
453350dd 601 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
602 { .name = "eventfd2", .errmsg = true,
603 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 604 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
605 { .name = "fadvise64", .errmsg = true, },
606 { .name = "fallocate", .errmsg = true, },
607 { .name = "fchdir", .errmsg = true, },
608 { .name = "fchmod", .errmsg = true, },
75b757ca 609 { .name = "fchmodat", .errmsg = true,
12f3ca4f 610 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 611 { .name = "fchown", .errmsg = true, },
75b757ca 612 { .name = "fchownat", .errmsg = true,
12f3ca4f 613 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 614 { .name = "fcntl", .errmsg = true,
b6565c90 615 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 616 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 617 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 618 { .name = "flock", .errmsg = true,
b6565c90
ACM
619 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
620 { .name = "fsetxattr", .errmsg = true, },
621 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 622 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
623 { .name = "fstatfs", .errmsg = true, },
624 { .name = "fsync", .errmsg = true, },
625 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
626 { .name = "futex", .errmsg = true,
627 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 628 { .name = "futimesat", .errmsg = true,
12f3ca4f 629 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
630 { .name = "getdents", .errmsg = true, },
631 { .name = "getdents64", .errmsg = true, },
453350dd 632 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 633 { .name = "getpid", .errpid = true, },
d1d438a3 634 { .name = "getpgid", .errpid = true, },
c65f1070 635 { .name = "getppid", .errpid = true, },
39878d49
ACM
636 { .name = "getrandom", .errmsg = true,
637 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 638 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
639 { .name = "getxattr", .errmsg = true, },
640 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 641 { .name = "ioctl", .errmsg = true,
b6565c90 642 .arg_scnprintf = {
844ae5b4
ACM
643#if defined(__i386__) || defined(__x86_64__)
644/*
645 * FIXME: Make this available to all arches.
646 */
78645cf3
ACM
647 [1] = SCA_STRHEXARRAY, /* cmd */
648 [2] = SCA_HEX, /* arg */ },
649 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
650#else
651 [2] = SCA_HEX, /* arg */ }, },
652#endif
b62bee1b 653 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
654 { .name = "kill", .errmsg = true,
655 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
656 { .name = "lchown", .errmsg = true, },
657 { .name = "lgetxattr", .errmsg = true, },
75b757ca 658 { .name = "linkat", .errmsg = true,
48000a1a 659 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
660 { .name = "listxattr", .errmsg = true, },
661 { .name = "llistxattr", .errmsg = true, },
662 { .name = "lremovexattr", .errmsg = true, },
75b757ca 663 { .name = "lseek", .errmsg = true,
b6565c90 664 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 665 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
666 { .name = "lsetxattr", .errmsg = true, },
667 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
668 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
669 { .name = "madvise", .errmsg = true,
670 .arg_scnprintf = { [0] = SCA_HEX, /* start */
671 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 672 { .name = "mkdir", .errmsg = true, },
75b757ca 673 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
674 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
675 { .name = "mknod", .errmsg = true, },
75b757ca 676 { .name = "mknodat", .errmsg = true,
12f3ca4f 677 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
678 { .name = "mlock", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
680 { .name = "mlockall", .errmsg = true,
681 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 682 { .name = "mmap", .hexret = true,
ae685380 683 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 684 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 685 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 686 { .name = "mprotect", .errmsg = true,
ae685380
ACM
687 .arg_scnprintf = { [0] = SCA_HEX, /* start */
688 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
689 { .name = "mq_unlink", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
691 { .name = "mremap", .hexret = true,
692 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 693 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 694 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
695 { .name = "munlock", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
697 { .name = "munmap", .errmsg = true,
698 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 699 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 701 { .name = "newfstatat", .errmsg = true,
12f3ca4f 702 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 703 { .name = "open", .errmsg = true,
12f3ca4f 704 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 705 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
706 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
707 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 708 { .name = "openat", .errmsg = true,
75b757ca
ACM
709 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
710 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 711 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 712 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
713 [3] = SCA_FD, /* group_fd */
714 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
715 { .name = "pipe2", .errmsg = true,
716 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
717 { .name = "poll", .errmsg = true, .timeout = true, },
718 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
719 { .name = "pread", .errmsg = true, .alias = "pread64", },
720 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 721 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
722 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
723 { .name = "pwritev", .errmsg = true, },
724 { .name = "read", .errmsg = true, },
12f3ca4f 725 { .name = "readlink", .errmsg = true, },
75b757ca 726 { .name = "readlinkat", .errmsg = true,
12f3ca4f 727 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 728 { .name = "readv", .errmsg = true, },
b2cc99fd 729 { .name = "recvfrom", .errmsg = true,
b6565c90 730 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 731 { .name = "recvmmsg", .errmsg = true,
b6565c90 732 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 733 { .name = "recvmsg", .errmsg = true,
b6565c90 734 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 735 { .name = "removexattr", .errmsg = true, },
75b757ca 736 { .name = "renameat", .errmsg = true,
48000a1a 737 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 738 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
739 { .name = "rt_sigaction", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 741 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
742 { .name = "rt_sigqueueinfo", .errmsg = true,
743 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
744 { .name = "rt_tgsigqueueinfo", .errmsg = true,
745 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
746 { .name = "sched_getattr", .errmsg = true, },
747 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
748 { .name = "sched_setscheduler", .errmsg = true,
749 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
750 { .name = "seccomp", .errmsg = true,
751 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
752 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 753 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 754 { .name = "sendmmsg", .errmsg = true,
b6565c90 755 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 756 { .name = "sendmsg", .errmsg = true,
b6565c90 757 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 758 { .name = "sendto", .errmsg = true,
b6565c90 759 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 760 { .name = "set_tid_address", .errpid = true, },
453350dd 761 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 762 { .name = "setpgid", .errmsg = true, },
453350dd 763 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 764 { .name = "setxattr", .errmsg = true, },
b6565c90 765 { .name = "shutdown", .errmsg = true, },
e10bce81 766 { .name = "socket", .errmsg = true,
a28b24b2
ACM
767 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
768 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
769 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
770 { .name = "socketpair", .errmsg = true,
771 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
772 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 773 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
774 { .name = "stat", .errmsg = true, .alias = "newstat", },
775 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
776 { .name = "statx", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
778 [2] = SCA_STATX_FLAGS, /* flags */
779 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
780 { .name = "swapoff", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
782 { .name = "swapon", .errmsg = true,
783 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 784 { .name = "symlinkat", .errmsg = true,
48000a1a 785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
786 { .name = "tgkill", .errmsg = true,
787 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
788 { .name = "tkill", .errmsg = true,
789 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 790 { .name = "truncate", .errmsg = true, },
e5959683 791 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 792 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
793 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
794 { .name = "utime", .errmsg = true, },
75b757ca 795 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
796 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
797 { .name = "utimes", .errmsg = true, },
b6565c90 798 { .name = "vmsplice", .errmsg = true, },
11c8e39f 799 { .name = "wait4", .errpid = true,
7206b900 800 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 801 { .name = "waitid", .errpid = true,
7206b900 802 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
803 { .name = "write", .errmsg = true, },
804 { .name = "writev", .errmsg = true, },
514f1c67
ACM
805};
806
807static int syscall_fmt__cmp(const void *name, const void *fmtp)
808{
809 const struct syscall_fmt *fmt = fmtp;
810 return strcmp(name, fmt->name);
811}
812
813static struct syscall_fmt *syscall_fmt__find(const char *name)
814{
815 const int nmemb = ARRAY_SIZE(syscall_fmts);
816 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
817}
818
819struct syscall {
820 struct event_format *tp_format;
f208bd8d
ACM
821 int nr_args;
822 struct format_field *args;
514f1c67 823 const char *name;
5089f20e 824 bool is_exit;
514f1c67 825 struct syscall_fmt *fmt;
01533e97 826 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 827 void **arg_parm;
514f1c67
ACM
828};
829
fd2b2975
ACM
830/*
831 * We need to have this 'calculated' boolean because in some cases we really
832 * don't know what is the duration of a syscall, for instance, when we start
833 * a session and some threads are waiting for a syscall to finish, say 'poll',
834 * in which case all we can do is to print "( ? ) for duration and for the
835 * start timestamp.
836 */
837static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
838{
839 double duration = (double)t / NSEC_PER_MSEC;
840 size_t printed = fprintf(fp, "(");
841
fd2b2975
ACM
842 if (!calculated)
843 printed += fprintf(fp, " ? ");
844 else if (duration >= 1.0)
60c907ab
ACM
845 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
846 else if (duration >= 0.01)
847 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
848 else
849 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 850 return printed + fprintf(fp, "): ");
60c907ab
ACM
851}
852
f994592d
ACM
853/**
854 * filename.ptr: The filename char pointer that will be vfs_getname'd
855 * filename.entry_str_pos: Where to insert the string translated from
856 * filename.ptr by the vfs_getname tracepoint/kprobe.
857 */
752fde44
ACM
858struct thread_trace {
859 u64 entry_time;
752fde44 860 bool entry_pending;
efd5745e 861 unsigned long nr_events;
a2ea67d7 862 unsigned long pfmaj, pfmin;
752fde44 863 char *entry_str;
1302d88e 864 double runtime_ms;
f994592d
ACM
865 struct {
866 unsigned long ptr;
7f4f8001
ACM
867 short int entry_str_pos;
868 bool pending_open;
869 unsigned int namelen;
870 char *name;
f994592d 871 } filename;
75b757ca
ACM
872 struct {
873 int max;
874 char **table;
875 } paths;
bf2575c1
DA
876
877 struct intlist *syscall_stats;
752fde44
ACM
878};
879
880static struct thread_trace *thread_trace__new(void)
881{
75b757ca
ACM
882 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
883
884 if (ttrace)
885 ttrace->paths.max = -1;
886
bf2575c1
DA
887 ttrace->syscall_stats = intlist__new(NULL);
888
75b757ca 889 return ttrace;
752fde44
ACM
890}
891
c24ff998 892static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 893{
efd5745e
ACM
894 struct thread_trace *ttrace;
895
752fde44
ACM
896 if (thread == NULL)
897 goto fail;
898
89dceb22
NK
899 if (thread__priv(thread) == NULL)
900 thread__set_priv(thread, thread_trace__new());
48000a1a 901
89dceb22 902 if (thread__priv(thread) == NULL)
752fde44
ACM
903 goto fail;
904
89dceb22 905 ttrace = thread__priv(thread);
efd5745e
ACM
906 ++ttrace->nr_events;
907
908 return ttrace;
752fde44 909fail:
c24ff998 910 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
911 "WARNING: not enough memory, dropping samples!\n");
912 return NULL;
913}
914
598d02c5
SF
915#define TRACE_PFMAJ (1 << 0)
916#define TRACE_PFMIN (1 << 1)
917
e4d44e83
ACM
918static const size_t trace__entry_str_size = 2048;
919
97119f37 920static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 921{
89dceb22 922 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
923
924 if (fd > ttrace->paths.max) {
925 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
926
927 if (npath == NULL)
928 return -1;
929
930 if (ttrace->paths.max != -1) {
931 memset(npath + ttrace->paths.max + 1, 0,
932 (fd - ttrace->paths.max) * sizeof(char *));
933 } else {
934 memset(npath, 0, (fd + 1) * sizeof(char *));
935 }
936
937 ttrace->paths.table = npath;
938 ttrace->paths.max = fd;
939 }
940
941 ttrace->paths.table[fd] = strdup(pathname);
942
943 return ttrace->paths.table[fd] != NULL ? 0 : -1;
944}
945
97119f37
ACM
946static int thread__read_fd_path(struct thread *thread, int fd)
947{
948 char linkname[PATH_MAX], pathname[PATH_MAX];
949 struct stat st;
950 int ret;
951
952 if (thread->pid_ == thread->tid) {
953 scnprintf(linkname, sizeof(linkname),
954 "/proc/%d/fd/%d", thread->pid_, fd);
955 } else {
956 scnprintf(linkname, sizeof(linkname),
957 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
958 }
959
960 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
961 return -1;
962
963 ret = readlink(linkname, pathname, sizeof(pathname));
964
965 if (ret < 0 || ret > st.st_size)
966 return -1;
967
968 pathname[ret] = '\0';
969 return trace__set_fd_pathname(thread, fd, pathname);
970}
971
c522739d
ACM
972static const char *thread__fd_path(struct thread *thread, int fd,
973 struct trace *trace)
75b757ca 974{
89dceb22 975 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
976
977 if (ttrace == NULL)
978 return NULL;
979
980 if (fd < 0)
981 return NULL;
982
cdcd1e6b 983 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
984 if (!trace->live)
985 return NULL;
986 ++trace->stats.proc_getname;
cdcd1e6b 987 if (thread__read_fd_path(thread, fd))
c522739d
ACM
988 return NULL;
989 }
75b757ca
ACM
990
991 return ttrace->paths.table[fd];
992}
993
994static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
995 struct syscall_arg *arg)
996{
997 int fd = arg->val;
998 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 999 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1000
1001 if (path)
1002 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1003
1004 return printed;
1005}
1006
1007static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1008 struct syscall_arg *arg)
1009{
1010 int fd = arg->val;
1011 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1012 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1013
04662523
ACM
1014 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1015 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1016
1017 return printed;
1018}
1019
f994592d
ACM
1020static void thread__set_filename_pos(struct thread *thread, const char *bf,
1021 unsigned long ptr)
1022{
1023 struct thread_trace *ttrace = thread__priv(thread);
1024
1025 ttrace->filename.ptr = ptr;
1026 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1027}
1028
1029static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1030 struct syscall_arg *arg)
1031{
1032 unsigned long ptr = arg->val;
1033
1034 if (!arg->trace->vfs_getname)
1035 return scnprintf(bf, size, "%#x", ptr);
1036
1037 thread__set_filename_pos(arg->thread, bf, ptr);
1038 return 0;
1039}
1040
ae9ed035
ACM
1041static bool trace__filter_duration(struct trace *trace, double t)
1042{
1043 return t < (trace->duration_filter * NSEC_PER_MSEC);
1044}
1045
fd2b2975 1046static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1047{
1048 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1049
60c907ab 1050 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1051}
1052
fd2b2975
ACM
1053/*
1054 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1055 * using ttrace->entry_time for a thread that receives a sys_exit without
1056 * first having received a sys_enter ("poll" issued before tracing session
1057 * starts, lost sys_enter exit due to ring buffer overflow).
1058 */
1059static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1060{
1061 if (tstamp > 0)
1062 return __trace__fprintf_tstamp(trace, tstamp, fp);
1063
1064 return fprintf(fp, " ? ");
1065}
1066
f15eb531 1067static bool done = false;
ba209f85 1068static bool interrupted = false;
f15eb531 1069
ba209f85 1070static void sig_handler(int sig)
f15eb531
NK
1071{
1072 done = true;
ba209f85 1073 interrupted = sig == SIGINT;
f15eb531
NK
1074}
1075
752fde44 1076static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1077 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1078{
1079 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1080 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1081
50c95cbd
ACM
1082 if (trace->multiple_threads) {
1083 if (trace->show_comm)
1902efe7 1084 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1085 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1086 }
752fde44
ACM
1087
1088 return printed;
1089}
1090
c24ff998 1091static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1092 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1093{
1094 int ret = 0;
1095
1096 switch (event->header.type) {
1097 case PERF_RECORD_LOST:
c24ff998 1098 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1099 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1100 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1101 break;
752fde44 1102 default:
162f0bef 1103 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1104 break;
1105 }
1106
1107 return ret;
1108}
1109
c24ff998 1110static int trace__tool_process(struct perf_tool *tool,
752fde44 1111 union perf_event *event,
162f0bef 1112 struct perf_sample *sample,
752fde44
ACM
1113 struct machine *machine)
1114{
c24ff998 1115 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1116 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1117}
1118
caf8a0d0
ACM
1119static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1120{
1121 struct machine *machine = vmachine;
1122
1123 if (machine->kptr_restrict_warned)
1124 return NULL;
1125
1126 if (symbol_conf.kptr_restrict) {
1127 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1128 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1129 "Kernel samples will not be resolved.\n");
1130 machine->kptr_restrict_warned = true;
1131 return NULL;
1132 }
1133
1134 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1135}
1136
752fde44
ACM
1137static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1138{
0a7e6d1b 1139 int err = symbol__init(NULL);
752fde44
ACM
1140
1141 if (err)
1142 return err;
1143
8fb598e5
DA
1144 trace->host = machine__new_host();
1145 if (trace->host == NULL)
1146 return -ENOMEM;
752fde44 1147
caf8a0d0 1148 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1149 return -errno;
1150
a33fbd56 1151 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1152 evlist->threads, trace__tool_process, false,
1153 trace->opts.proc_map_timeout);
752fde44
ACM
1154 if (err)
1155 symbol__exit();
1156
1157 return err;
1158}
1159
13d4ff3e
ACM
1160static int syscall__set_arg_fmts(struct syscall *sc)
1161{
1162 struct format_field *field;
b6565c90 1163 int idx = 0, len;
13d4ff3e 1164
f208bd8d 1165 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1166 if (sc->arg_scnprintf == NULL)
1167 return -1;
1168
1f115cb7
ACM
1169 if (sc->fmt)
1170 sc->arg_parm = sc->fmt->arg_parm;
1171
f208bd8d 1172 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1173 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1174 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1175 else if (strcmp(field->type, "const char *") == 0 &&
1176 (strcmp(field->name, "filename") == 0 ||
1177 strcmp(field->name, "path") == 0 ||
1178 strcmp(field->name, "pathname") == 0))
1179 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1180 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1181 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1182 else if (strcmp(field->type, "pid_t") == 0)
1183 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1184 else if (strcmp(field->type, "umode_t") == 0)
1185 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1186 else if ((strcmp(field->type, "int") == 0 ||
1187 strcmp(field->type, "unsigned int") == 0 ||
1188 strcmp(field->type, "long") == 0) &&
1189 (len = strlen(field->name)) >= 2 &&
1190 strcmp(field->name + len - 2, "fd") == 0) {
1191 /*
1192 * /sys/kernel/tracing/events/syscalls/sys_enter*
1193 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1194 * 65 int
1195 * 23 unsigned int
1196 * 7 unsigned long
1197 */
1198 sc->arg_scnprintf[idx] = SCA_FD;
1199 }
13d4ff3e
ACM
1200 ++idx;
1201 }
1202
1203 return 0;
1204}
1205
514f1c67
ACM
1206static int trace__read_syscall_info(struct trace *trace, int id)
1207{
1208 char tp_name[128];
1209 struct syscall *sc;
fd0db102 1210 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1211
1212 if (name == NULL)
1213 return -1;
514f1c67
ACM
1214
1215 if (id > trace->syscalls.max) {
1216 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1217
1218 if (nsyscalls == NULL)
1219 return -1;
1220
1221 if (trace->syscalls.max != -1) {
1222 memset(nsyscalls + trace->syscalls.max + 1, 0,
1223 (id - trace->syscalls.max) * sizeof(*sc));
1224 } else {
1225 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1226 }
1227
1228 trace->syscalls.table = nsyscalls;
1229 trace->syscalls.max = id;
1230 }
1231
1232 sc = trace->syscalls.table + id;
3a531260 1233 sc->name = name;
2ae3a312 1234
3a531260 1235 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1236
aec1930b 1237 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1238 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1239
8dd2a131 1240 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1241 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1242 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1243 }
514f1c67 1244
8dd2a131 1245 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1246 return -1;
1247
f208bd8d
ACM
1248 sc->args = sc->tp_format->format.fields;
1249 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1250 /*
1251 * We need to check and discard the first variable '__syscall_nr'
1252 * or 'nr' that mean the syscall number. It is needless here.
1253 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1254 */
1255 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1256 sc->args = sc->args->next;
1257 --sc->nr_args;
1258 }
1259
5089f20e
ACM
1260 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1261
13d4ff3e 1262 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1263}
1264
d0cc439b
ACM
1265static int trace__validate_ev_qualifier(struct trace *trace)
1266{
8b3ce757 1267 int err = 0, i;
d0cc439b
ACM
1268 struct str_node *pos;
1269
8b3ce757
ACM
1270 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1271 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1272 sizeof(trace->ev_qualifier_ids.entries[0]));
1273
1274 if (trace->ev_qualifier_ids.entries == NULL) {
1275 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1276 trace->output);
1277 err = -EINVAL;
1278 goto out;
1279 }
1280
1281 i = 0;
1282
602a1f4d 1283 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1284 const char *sc = pos->s;
fd0db102 1285 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1286
8b3ce757 1287 if (id < 0) {
d0cc439b
ACM
1288 if (err == 0) {
1289 fputs("Error:\tInvalid syscall ", trace->output);
1290 err = -EINVAL;
1291 } else {
1292 fputs(", ", trace->output);
1293 }
1294
1295 fputs(sc, trace->output);
1296 }
8b3ce757
ACM
1297
1298 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1299 }
1300
1301 if (err < 0) {
1302 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1303 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1304 zfree(&trace->ev_qualifier_ids.entries);
1305 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1306 }
8b3ce757 1307out:
d0cc439b
ACM
1308 return err;
1309}
1310
55d43bca
DA
1311/*
1312 * args is to be interpreted as a series of longs but we need to handle
1313 * 8-byte unaligned accesses. args points to raw_data within the event
1314 * and raw_data is guaranteed to be 8-byte unaligned because it is
1315 * preceded by raw_size which is a u32. So we need to copy args to a temp
1316 * variable to read it. Most notably this avoids extended load instructions
1317 * on unaligned addresses
1318 */
1319
752fde44 1320static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1321 unsigned char *args, struct trace *trace,
75b757ca 1322 struct thread *thread)
514f1c67 1323{
514f1c67 1324 size_t printed = 0;
55d43bca
DA
1325 unsigned char *p;
1326 unsigned long val;
514f1c67 1327
f208bd8d 1328 if (sc->args != NULL) {
514f1c67 1329 struct format_field *field;
01533e97
ACM
1330 u8 bit = 1;
1331 struct syscall_arg arg = {
75b757ca
ACM
1332 .idx = 0,
1333 .mask = 0,
1334 .trace = trace,
1335 .thread = thread,
01533e97 1336 };
6e7eeb51 1337
f208bd8d 1338 for (field = sc->args; field;
01533e97
ACM
1339 field = field->next, ++arg.idx, bit <<= 1) {
1340 if (arg.mask & bit)
6e7eeb51 1341 continue;
55d43bca
DA
1342
1343 /* special care for unaligned accesses */
1344 p = args + sizeof(unsigned long) * arg.idx;
1345 memcpy(&val, p, sizeof(val));
1346
4aa58232
ACM
1347 /*
1348 * Suppress this argument if its value is zero and
1349 * and we don't have a string associated in an
1350 * strarray for it.
1351 */
55d43bca 1352 if (val == 0 &&
4aa58232
ACM
1353 !(sc->arg_scnprintf &&
1354 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1355 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1356 continue;
1357
752fde44 1358 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1359 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1360 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1361 arg.val = val;
1f115cb7
ACM
1362 if (sc->arg_parm)
1363 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1364 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1365 size - printed, &arg);
6e7eeb51 1366 } else {
13d4ff3e 1367 printed += scnprintf(bf + printed, size - printed,
55d43bca 1368 "%ld", val);
6e7eeb51 1369 }
514f1c67 1370 }
4c4d6e51
ACM
1371 } else if (IS_ERR(sc->tp_format)) {
1372 /*
1373 * If we managed to read the tracepoint /format file, then we
1374 * may end up not having any args, like with gettid(), so only
1375 * print the raw args when we didn't manage to read it.
1376 */
01533e97
ACM
1377 int i = 0;
1378
514f1c67 1379 while (i < 6) {
55d43bca
DA
1380 /* special care for unaligned accesses */
1381 p = args + sizeof(unsigned long) * i;
1382 memcpy(&val, p, sizeof(val));
752fde44
ACM
1383 printed += scnprintf(bf + printed, size - printed,
1384 "%sarg%d: %ld",
55d43bca 1385 printed ? ", " : "", i, val);
514f1c67
ACM
1386 ++i;
1387 }
1388 }
1389
1390 return printed;
1391}
1392
ba3d7dee 1393typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1394 union perf_event *event,
ba3d7dee
ACM
1395 struct perf_sample *sample);
1396
1397static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1398 struct perf_evsel *evsel, int id)
ba3d7dee 1399{
ba3d7dee
ACM
1400
1401 if (id < 0) {
adaa18bf
ACM
1402
1403 /*
1404 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1405 * before that, leaving at a higher verbosity level till that is
1406 * explained. Reproduced with plain ftrace with:
1407 *
1408 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1409 * grep "NR -1 " /t/trace_pipe
1410 *
1411 * After generating some load on the machine.
1412 */
1413 if (verbose > 1) {
1414 static u64 n;
1415 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1416 id, perf_evsel__name(evsel), ++n);
1417 }
ba3d7dee
ACM
1418 return NULL;
1419 }
1420
1421 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1422 trace__read_syscall_info(trace, id))
1423 goto out_cant_read;
1424
1425 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1426 goto out_cant_read;
1427
1428 return &trace->syscalls.table[id];
1429
1430out_cant_read:
bb963e16 1431 if (verbose > 0) {
7c304ee0
ACM
1432 fprintf(trace->output, "Problems reading syscall %d", id);
1433 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1434 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1435 fputs(" information\n", trace->output);
1436 }
ba3d7dee
ACM
1437 return NULL;
1438}
1439
bf2575c1
DA
1440static void thread__update_stats(struct thread_trace *ttrace,
1441 int id, struct perf_sample *sample)
1442{
1443 struct int_node *inode;
1444 struct stats *stats;
1445 u64 duration = 0;
1446
1447 inode = intlist__findnew(ttrace->syscall_stats, id);
1448 if (inode == NULL)
1449 return;
1450
1451 stats = inode->priv;
1452 if (stats == NULL) {
1453 stats = malloc(sizeof(struct stats));
1454 if (stats == NULL)
1455 return;
1456 init_stats(stats);
1457 inode->priv = stats;
1458 }
1459
1460 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1461 duration = sample->time - ttrace->entry_time;
1462
1463 update_stats(stats, duration);
1464}
1465
e596663e
ACM
1466static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1467{
1468 struct thread_trace *ttrace;
1469 u64 duration;
1470 size_t printed;
1471
1472 if (trace->current == NULL)
1473 return 0;
1474
1475 ttrace = thread__priv(trace->current);
1476
1477 if (!ttrace->entry_pending)
1478 return 0;
1479
1480 duration = sample->time - ttrace->entry_time;
1481
fd2b2975 1482 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1483 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1484 ttrace->entry_pending = false;
1485
1486 return printed;
1487}
1488
ba3d7dee 1489static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1490 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1491 struct perf_sample *sample)
1492{
752fde44 1493 char *msg;
ba3d7dee 1494 void *args;
752fde44 1495 size_t printed = 0;
2ae3a312 1496 struct thread *thread;
b91fc39f 1497 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1498 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1499 struct thread_trace *ttrace;
1500
1501 if (sc == NULL)
1502 return -1;
ba3d7dee 1503
8fb598e5 1504 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1505 ttrace = thread__trace(thread, trace->output);
2ae3a312 1506 if (ttrace == NULL)
b91fc39f 1507 goto out_put;
ba3d7dee 1508
77170988 1509 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1510
1511 if (ttrace->entry_str == NULL) {
e4d44e83 1512 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1513 if (!ttrace->entry_str)
b91fc39f 1514 goto out_put;
752fde44
ACM
1515 }
1516
5cf9c84e 1517 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1518 trace__printf_interrupted_entry(trace, sample);
e596663e 1519
752fde44
ACM
1520 ttrace->entry_time = sample->time;
1521 msg = ttrace->entry_str;
e4d44e83 1522 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1523
e4d44e83 1524 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1525 args, trace, thread);
752fde44 1526
5089f20e 1527 if (sc->is_exit) {
5cf9c84e 1528 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1529 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1530 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1531 }
7f4f8001 1532 } else {
752fde44 1533 ttrace->entry_pending = true;
7f4f8001
ACM
1534 /* See trace__vfs_getname & trace__sys_exit */
1535 ttrace->filename.pending_open = false;
1536 }
ba3d7dee 1537
f3b623b8
ACM
1538 if (trace->current != thread) {
1539 thread__put(trace->current);
1540 trace->current = thread__get(thread);
1541 }
b91fc39f
ACM
1542 err = 0;
1543out_put:
1544 thread__put(thread);
1545 return err;
ba3d7dee
ACM
1546}
1547
5cf9c84e
ACM
1548static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1549 struct perf_sample *sample,
1550 struct callchain_cursor *cursor)
202ff968
ACM
1551{
1552 struct addr_location al;
5cf9c84e
ACM
1553
1554 if (machine__resolve(trace->host, &al, sample) < 0 ||
1555 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1556 return -1;
1557
1558 return 0;
1559}
1560
1561static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1562{
202ff968 1563 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1564 const unsigned int print_opts = EVSEL__PRINT_SYM |
1565 EVSEL__PRINT_DSO |
1566 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1567
d327e60c 1568 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1569}
1570
ba3d7dee 1571static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1572 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1573 struct perf_sample *sample)
1574{
2c82c3ad 1575 long ret;
60c907ab 1576 u64 duration = 0;
fd2b2975 1577 bool duration_calculated = false;
2ae3a312 1578 struct thread *thread;
5cf9c84e 1579 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1580 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1581 struct thread_trace *ttrace;
1582
1583 if (sc == NULL)
1584 return -1;
ba3d7dee 1585
8fb598e5 1586 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1587 ttrace = thread__trace(thread, trace->output);
2ae3a312 1588 if (ttrace == NULL)
b91fc39f 1589 goto out_put;
ba3d7dee 1590
bf2575c1
DA
1591 if (trace->summary)
1592 thread__update_stats(ttrace, id, sample);
1593
77170988 1594 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1595
fd0db102 1596 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1597 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1598 ttrace->filename.pending_open = false;
c522739d
ACM
1599 ++trace->stats.vfs_getname;
1600 }
1601
ae9ed035 1602 if (ttrace->entry_time) {
60c907ab 1603 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1604 if (trace__filter_duration(trace, duration))
1605 goto out;
fd2b2975 1606 duration_calculated = true;
ae9ed035
ACM
1607 } else if (trace->duration_filter)
1608 goto out;
60c907ab 1609
5cf9c84e
ACM
1610 if (sample->callchain) {
1611 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1612 if (callchain_ret == 0) {
1613 if (callchain_cursor.nr < trace->min_stack)
1614 goto out;
1615 callchain_ret = 1;
1616 }
1617 }
1618
fd2eabaf
DA
1619 if (trace->summary_only)
1620 goto out;
1621
fd2b2975 1622 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1623
1624 if (ttrace->entry_pending) {
c24ff998 1625 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1626 } else {
c24ff998
ACM
1627 fprintf(trace->output, " ... [");
1628 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1629 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1630 }
1631
da3c9a44
ACM
1632 if (sc->fmt == NULL) {
1633signed_print:
2c82c3ad 1634 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1635 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1636 char bf[STRERR_BUFSIZE];
c8b5f2c9 1637 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1638 *e = audit_errno_to_name(-ret);
1639
c24ff998 1640 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1641 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1642 fprintf(trace->output, ") = 0 Timeout");
04b34729 1643 else if (sc->fmt->hexret)
2c82c3ad 1644 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1645 else if (sc->fmt->errpid) {
1646 struct thread *child = machine__find_thread(trace->host, ret, ret);
1647
1648 if (child != NULL) {
1649 fprintf(trace->output, ") = %ld", ret);
1650 if (child->comm_set)
1651 fprintf(trace->output, " (%s)", thread__comm_str(child));
1652 thread__put(child);
1653 }
1654 } else
da3c9a44 1655 goto signed_print;
ba3d7dee 1656
c24ff998 1657 fputc('\n', trace->output);
566a0885 1658
5cf9c84e
ACM
1659 if (callchain_ret > 0)
1660 trace__fprintf_callchain(trace, sample);
1661 else if (callchain_ret < 0)
1662 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1663out:
752fde44 1664 ttrace->entry_pending = false;
b91fc39f
ACM
1665 err = 0;
1666out_put:
1667 thread__put(thread);
1668 return err;
ba3d7dee
ACM
1669}
1670
c522739d 1671static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1672 union perf_event *event __maybe_unused,
c522739d
ACM
1673 struct perf_sample *sample)
1674{
f994592d
ACM
1675 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1676 struct thread_trace *ttrace;
1677 size_t filename_len, entry_str_len, to_move;
1678 ssize_t remaining_space;
1679 char *pos;
7f4f8001 1680 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1681
1682 if (!thread)
1683 goto out;
1684
1685 ttrace = thread__priv(thread);
1686 if (!ttrace)
ef65e96e 1687 goto out_put;
f994592d 1688
7f4f8001 1689 filename_len = strlen(filename);
39f0e7a8 1690 if (filename_len == 0)
ef65e96e 1691 goto out_put;
7f4f8001
ACM
1692
1693 if (ttrace->filename.namelen < filename_len) {
1694 char *f = realloc(ttrace->filename.name, filename_len + 1);
1695
1696 if (f == NULL)
ef65e96e 1697 goto out_put;
7f4f8001
ACM
1698
1699 ttrace->filename.namelen = filename_len;
1700 ttrace->filename.name = f;
1701 }
1702
1703 strcpy(ttrace->filename.name, filename);
1704 ttrace->filename.pending_open = true;
1705
f994592d 1706 if (!ttrace->filename.ptr)
ef65e96e 1707 goto out_put;
f994592d
ACM
1708
1709 entry_str_len = strlen(ttrace->entry_str);
1710 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1711 if (remaining_space <= 0)
ef65e96e 1712 goto out_put;
f994592d 1713
f994592d
ACM
1714 if (filename_len > (size_t)remaining_space) {
1715 filename += filename_len - remaining_space;
1716 filename_len = remaining_space;
1717 }
1718
1719 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1720 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1721 memmove(pos + filename_len, pos, to_move);
1722 memcpy(pos, filename, filename_len);
1723
1724 ttrace->filename.ptr = 0;
1725 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1726out_put:
1727 thread__put(thread);
f994592d 1728out:
c522739d
ACM
1729 return 0;
1730}
1731
1302d88e 1732static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1733 union perf_event *event __maybe_unused,
1302d88e
ACM
1734 struct perf_sample *sample)
1735{
1736 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1737 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1738 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1739 sample->pid,
1740 sample->tid);
c24ff998 1741 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1742
1743 if (ttrace == NULL)
1744 goto out_dump;
1745
1746 ttrace->runtime_ms += runtime_ms;
1747 trace->runtime_ms += runtime_ms;
ef65e96e 1748out_put:
b91fc39f 1749 thread__put(thread);
1302d88e
ACM
1750 return 0;
1751
1752out_dump:
c24ff998 1753 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1754 evsel->name,
1755 perf_evsel__strval(evsel, sample, "comm"),
1756 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1757 runtime,
1758 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1759 goto out_put;
1302d88e
ACM
1760}
1761
1d6c9407
WN
1762static void bpf_output__printer(enum binary_printer_ops op,
1763 unsigned int val, void *extra)
1764{
1765 FILE *output = extra;
1766 unsigned char ch = (unsigned char)val;
1767
1768 switch (op) {
1769 case BINARY_PRINT_CHAR_DATA:
1770 fprintf(output, "%c", isprint(ch) ? ch : '.');
1771 break;
1772 case BINARY_PRINT_DATA_BEGIN:
1773 case BINARY_PRINT_LINE_BEGIN:
1774 case BINARY_PRINT_ADDR:
1775 case BINARY_PRINT_NUM_DATA:
1776 case BINARY_PRINT_NUM_PAD:
1777 case BINARY_PRINT_SEP:
1778 case BINARY_PRINT_CHAR_PAD:
1779 case BINARY_PRINT_LINE_END:
1780 case BINARY_PRINT_DATA_END:
1781 default:
1782 break;
1783 }
1784}
1785
1786static void bpf_output__fprintf(struct trace *trace,
1787 struct perf_sample *sample)
1788{
1789 print_binary(sample->raw_data, sample->raw_size, 8,
1790 bpf_output__printer, trace->output);
1791}
1792
14a052df
ACM
1793static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1794 union perf_event *event __maybe_unused,
1795 struct perf_sample *sample)
1796{
7ad35615
ACM
1797 int callchain_ret = 0;
1798
1799 if (sample->callchain) {
1800 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1801 if (callchain_ret == 0) {
1802 if (callchain_cursor.nr < trace->min_stack)
1803 goto out;
1804 callchain_ret = 1;
1805 }
1806 }
1807
14a052df
ACM
1808 trace__printf_interrupted_entry(trace, sample);
1809 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1810
1811 if (trace->trace_syscalls)
1812 fprintf(trace->output, "( ): ");
1813
1814 fprintf(trace->output, "%s:", evsel->name);
14a052df 1815
1d6c9407
WN
1816 if (perf_evsel__is_bpf_output(evsel)) {
1817 bpf_output__fprintf(trace, sample);
1818 } else if (evsel->tp_format) {
14a052df
ACM
1819 event_format__fprintf(evsel->tp_format, sample->cpu,
1820 sample->raw_data, sample->raw_size,
1821 trace->output);
1822 }
1823
1824 fprintf(trace->output, ")\n");
202ff968 1825
7ad35615
ACM
1826 if (callchain_ret > 0)
1827 trace__fprintf_callchain(trace, sample);
1828 else if (callchain_ret < 0)
1829 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1830out:
14a052df
ACM
1831 return 0;
1832}
1833
598d02c5
SF
1834static void print_location(FILE *f, struct perf_sample *sample,
1835 struct addr_location *al,
1836 bool print_dso, bool print_sym)
1837{
1838
bb963e16 1839 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1840 fprintf(f, "%s@", al->map->dso->long_name);
1841
bb963e16 1842 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1843 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1844 al->addr - al->sym->start);
1845 else if (al->map)
4414a3c5 1846 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1847 else
4414a3c5 1848 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1849}
1850
1851static int trace__pgfault(struct trace *trace,
1852 struct perf_evsel *evsel,
473398a2 1853 union perf_event *event __maybe_unused,
598d02c5
SF
1854 struct perf_sample *sample)
1855{
1856 struct thread *thread;
598d02c5
SF
1857 struct addr_location al;
1858 char map_type = 'd';
a2ea67d7 1859 struct thread_trace *ttrace;
b91fc39f 1860 int err = -1;
1df54290 1861 int callchain_ret = 0;
598d02c5
SF
1862
1863 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1864
1865 if (sample->callchain) {
1866 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1867 if (callchain_ret == 0) {
1868 if (callchain_cursor.nr < trace->min_stack)
1869 goto out_put;
1870 callchain_ret = 1;
1871 }
1872 }
1873
a2ea67d7
SF
1874 ttrace = thread__trace(thread, trace->output);
1875 if (ttrace == NULL)
b91fc39f 1876 goto out_put;
a2ea67d7
SF
1877
1878 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1879 ttrace->pfmaj++;
1880 else
1881 ttrace->pfmin++;
1882
1883 if (trace->summary_only)
b91fc39f 1884 goto out;
598d02c5 1885
473398a2 1886 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1887 sample->ip, &al);
1888
fd2b2975 1889 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1890
1891 fprintf(trace->output, "%sfault [",
1892 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1893 "maj" : "min");
1894
1895 print_location(trace->output, sample, &al, false, true);
1896
1897 fprintf(trace->output, "] => ");
1898
473398a2 1899 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1900 sample->addr, &al);
1901
1902 if (!al.map) {
473398a2 1903 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1904 MAP__FUNCTION, sample->addr, &al);
1905
1906 if (al.map)
1907 map_type = 'x';
1908 else
1909 map_type = '?';
1910 }
1911
1912 print_location(trace->output, sample, &al, true, false);
1913
1914 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1915
1df54290
ACM
1916 if (callchain_ret > 0)
1917 trace__fprintf_callchain(trace, sample);
1918 else if (callchain_ret < 0)
1919 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1920out:
1921 err = 0;
1922out_put:
1923 thread__put(thread);
1924 return err;
598d02c5
SF
1925}
1926
e6001980 1927static void trace__set_base_time(struct trace *trace,
8a07a809 1928 struct perf_evsel *evsel,
e6001980
ACM
1929 struct perf_sample *sample)
1930{
8a07a809
ACM
1931 /*
1932 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1933 * and don't use sample->time unconditionally, we may end up having
1934 * some other event in the future without PERF_SAMPLE_TIME for good
1935 * reason, i.e. we may not be interested in its timestamps, just in
1936 * it taking place, picking some piece of information when it
1937 * appears in our event stream (vfs_getname comes to mind).
1938 */
1939 if (trace->base_time == 0 && !trace->full_time &&
1940 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1941 trace->base_time = sample->time;
1942}
1943
6810fc91 1944static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1945 union perf_event *event,
6810fc91
DA
1946 struct perf_sample *sample,
1947 struct perf_evsel *evsel,
1948 struct machine *machine __maybe_unused)
1949{
1950 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1951 struct thread *thread;
6810fc91
DA
1952 int err = 0;
1953
744a9719 1954 tracepoint_handler handler = evsel->handler;
6810fc91 1955
aa07df6e
DA
1956 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1957 if (thread && thread__is_filtered(thread))
ef65e96e 1958 goto out;
bdc89661 1959
e6001980 1960 trace__set_base_time(trace, evsel, sample);
6810fc91 1961
3160565f
DA
1962 if (handler) {
1963 ++trace->nr_events;
0c82adcf 1964 handler(trace, evsel, event, sample);
3160565f 1965 }
ef65e96e
ACM
1966out:
1967 thread__put(thread);
6810fc91
DA
1968 return err;
1969}
1970
1e28fe0a 1971static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1972{
1973 unsigned int rec_argc, i, j;
1974 const char **rec_argv;
1975 const char * const record_args[] = {
1976 "record",
1977 "-R",
1978 "-m", "1024",
1979 "-c", "1",
5e2485b1
DA
1980 };
1981
1e28fe0a
SF
1982 const char * const sc_args[] = { "-e", };
1983 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1984 const char * const majpf_args[] = { "-e", "major-faults" };
1985 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1986 const char * const minpf_args[] = { "-e", "minor-faults" };
1987 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1988
9aca7f17 1989 /* +1 is for the event string below */
1e28fe0a
SF
1990 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1991 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1992 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1993
1994 if (rec_argv == NULL)
1995 return -ENOMEM;
1996
1e28fe0a 1997 j = 0;
5e2485b1 1998 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1999 rec_argv[j++] = record_args[i];
2000
e281a960
SF
2001 if (trace->trace_syscalls) {
2002 for (i = 0; i < sc_args_nr; i++)
2003 rec_argv[j++] = sc_args[i];
2004
2005 /* event string may be different for older kernels - e.g., RHEL6 */
2006 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2007 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2008 else if (is_valid_tracepoint("syscalls:sys_enter"))
2009 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2010 else {
2011 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2012 return -1;
2013 }
9aca7f17 2014 }
9aca7f17 2015
1e28fe0a
SF
2016 if (trace->trace_pgfaults & TRACE_PFMAJ)
2017 for (i = 0; i < majpf_args_nr; i++)
2018 rec_argv[j++] = majpf_args[i];
2019
2020 if (trace->trace_pgfaults & TRACE_PFMIN)
2021 for (i = 0; i < minpf_args_nr; i++)
2022 rec_argv[j++] = minpf_args[i];
2023
2024 for (i = 0; i < (unsigned int)argc; i++)
2025 rec_argv[j++] = argv[i];
5e2485b1 2026
b0ad8ea6 2027 return cmd_record(j, rec_argv);
5e2485b1
DA
2028}
2029
bf2575c1
DA
2030static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2031
08c98776 2032static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2033{
ef503831 2034 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2035
2036 if (IS_ERR(evsel))
08c98776 2037 return false;
c522739d
ACM
2038
2039 if (perf_evsel__field(evsel, "pathname") == NULL) {
2040 perf_evsel__delete(evsel);
08c98776 2041 return false;
c522739d
ACM
2042 }
2043
744a9719 2044 evsel->handler = trace__vfs_getname;
c522739d 2045 perf_evlist__add(evlist, evsel);
08c98776 2046 return true;
c522739d
ACM
2047}
2048
0ae537cb 2049static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2050{
2051 struct perf_evsel *evsel;
2052 struct perf_event_attr attr = {
2053 .type = PERF_TYPE_SOFTWARE,
2054 .mmap_data = 1,
598d02c5
SF
2055 };
2056
2057 attr.config = config;
0524798c 2058 attr.sample_period = 1;
598d02c5
SF
2059
2060 event_attr_init(&attr);
2061
2062 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2063 if (evsel)
2064 evsel->handler = trace__pgfault;
598d02c5 2065
0ae537cb 2066 return evsel;
598d02c5
SF
2067}
2068
ddbb1b13
ACM
2069static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2070{
2071 const u32 type = event->header.type;
2072 struct perf_evsel *evsel;
2073
ddbb1b13
ACM
2074 if (type != PERF_RECORD_SAMPLE) {
2075 trace__process_event(trace, trace->host, event, sample);
2076 return;
2077 }
2078
2079 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2080 if (evsel == NULL) {
2081 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2082 return;
2083 }
2084
e6001980
ACM
2085 trace__set_base_time(trace, evsel, sample);
2086
ddbb1b13
ACM
2087 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2088 sample->raw_data == NULL) {
2089 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2090 perf_evsel__name(evsel), sample->tid,
2091 sample->cpu, sample->raw_size);
2092 } else {
2093 tracepoint_handler handler = evsel->handler;
2094 handler(trace, evsel, event, sample);
2095 }
2096}
2097
c27366f0
ACM
2098static int trace__add_syscall_newtp(struct trace *trace)
2099{
2100 int ret = -1;
2101 struct perf_evlist *evlist = trace->evlist;
2102 struct perf_evsel *sys_enter, *sys_exit;
2103
2104 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2105 if (sys_enter == NULL)
2106 goto out;
2107
2108 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2109 goto out_delete_sys_enter;
2110
2111 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2112 if (sys_exit == NULL)
2113 goto out_delete_sys_enter;
2114
2115 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2116 goto out_delete_sys_exit;
2117
2118 perf_evlist__add(evlist, sys_enter);
2119 perf_evlist__add(evlist, sys_exit);
2120
2ddd5c04 2121 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2122 /*
2123 * We're interested only in the user space callchain
2124 * leading to the syscall, allow overriding that for
2125 * debugging reasons using --kernel_syscall_callchains
2126 */
2127 sys_exit->attr.exclude_callchain_kernel = 1;
2128 }
2129
8b3ce757
ACM
2130 trace->syscalls.events.sys_enter = sys_enter;
2131 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2132
2133 ret = 0;
2134out:
2135 return ret;
2136
2137out_delete_sys_exit:
2138 perf_evsel__delete_priv(sys_exit);
2139out_delete_sys_enter:
2140 perf_evsel__delete_priv(sys_enter);
2141 goto out;
2142}
2143
19867b61
ACM
2144static int trace__set_ev_qualifier_filter(struct trace *trace)
2145{
2146 int err = -1;
b15d0a4c 2147 struct perf_evsel *sys_exit;
19867b61
ACM
2148 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2149 trace->ev_qualifier_ids.nr,
2150 trace->ev_qualifier_ids.entries);
2151
2152 if (filter == NULL)
2153 goto out_enomem;
2154
3541c034
MP
2155 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2156 filter)) {
b15d0a4c 2157 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2158 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2159 }
19867b61
ACM
2160
2161 free(filter);
2162out:
2163 return err;
2164out_enomem:
2165 errno = ENOMEM;
2166 goto out;
2167}
c27366f0 2168
f15eb531 2169static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2170{
14a052df 2171 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2172 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2173 int err = -1, i;
2174 unsigned long before;
f15eb531 2175 const bool forks = argc > 0;
46fb3c21 2176 bool draining = false;
514f1c67 2177
75b757ca
ACM
2178 trace->live = true;
2179
c27366f0 2180 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2181 goto out_error_raw_syscalls;
514f1c67 2182
e281a960 2183 if (trace->trace_syscalls)
08c98776 2184 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2185
0ae537cb
ACM
2186 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2187 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2188 if (pgfault_maj == NULL)
2189 goto out_error_mem;
2190 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2191 }
598d02c5 2192
0ae537cb
ACM
2193 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2194 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2195 if (pgfault_min == NULL)
2196 goto out_error_mem;
2197 perf_evlist__add(evlist, pgfault_min);
2198 }
598d02c5 2199
1302d88e 2200 if (trace->sched &&
2cc990ba
ACM
2201 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2202 trace__sched_stat_runtime))
2203 goto out_error_sched_stat_runtime;
1302d88e 2204
514f1c67
ACM
2205 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2206 if (err < 0) {
c24ff998 2207 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2208 goto out_delete_evlist;
2209 }
2210
752fde44
ACM
2211 err = trace__symbols_init(trace, evlist);
2212 if (err < 0) {
c24ff998 2213 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2214 goto out_delete_evlist;
752fde44
ACM
2215 }
2216
fde54b78
ACM
2217 perf_evlist__config(evlist, &trace->opts, NULL);
2218
0c3a6ef4
ACM
2219 if (callchain_param.enabled) {
2220 bool use_identifier = false;
2221
2222 if (trace->syscalls.events.sys_exit) {
2223 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2224 &trace->opts, &callchain_param);
2225 use_identifier = true;
2226 }
2227
2228 if (pgfault_maj) {
2229 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2230 use_identifier = true;
2231 }
2232
2233 if (pgfault_min) {
2234 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2235 use_identifier = true;
2236 }
2237
2238 if (use_identifier) {
2239 /*
2240 * Now we have evsels with different sample_ids, use
2241 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2242 * from a fixed position in each ring buffer record.
2243 *
2244 * As of this the changeset introducing this comment, this
2245 * isn't strictly needed, as the fields that can come before
2246 * PERF_SAMPLE_ID are all used, but we'll probably disable
2247 * some of those for things like copying the payload of
2248 * pointer syscall arguments, and for vfs_getname we don't
2249 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2250 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2251 */
2252 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2253 perf_evlist__reset_sample_bit(evlist, ID);
2254 }
fde54b78 2255 }
514f1c67 2256
f15eb531
NK
2257 signal(SIGCHLD, sig_handler);
2258 signal(SIGINT, sig_handler);
2259
2260 if (forks) {
6ef73ec4 2261 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2262 argv, false, NULL);
f15eb531 2263 if (err < 0) {
c24ff998 2264 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2265 goto out_delete_evlist;
f15eb531
NK
2266 }
2267 }
2268
514f1c67 2269 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2270 if (err < 0)
2271 goto out_error_open;
514f1c67 2272
ba504235
WN
2273 err = bpf__apply_obj_config();
2274 if (err) {
2275 char errbuf[BUFSIZ];
2276
2277 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2278 pr_err("ERROR: Apply config to BPF failed: %s\n",
2279 errbuf);
2280 goto out_error_open;
2281 }
2282
241b057c
ACM
2283 /*
2284 * Better not use !target__has_task() here because we need to cover the
2285 * case where no threads were specified in the command line, but a
2286 * workload was, and in that case we will fill in the thread_map when
2287 * we fork the workload in perf_evlist__prepare_workload.
2288 */
f078c385
ACM
2289 if (trace->filter_pids.nr > 0)
2290 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2291 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2292 err = perf_evlist__set_filter_pid(evlist, getpid());
2293
94ad89bc
ACM
2294 if (err < 0)
2295 goto out_error_mem;
2296
19867b61
ACM
2297 if (trace->ev_qualifier_ids.nr > 0) {
2298 err = trace__set_ev_qualifier_filter(trace);
2299 if (err < 0)
2300 goto out_errno;
19867b61 2301
2e5e5f87
ACM
2302 pr_debug("event qualifier tracepoint filter: %s\n",
2303 trace->syscalls.events.sys_exit->filter);
2304 }
19867b61 2305
94ad89bc
ACM
2306 err = perf_evlist__apply_filters(evlist, &evsel);
2307 if (err < 0)
2308 goto out_error_apply_filters;
241b057c 2309
f885037e 2310 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2311 if (err < 0)
2312 goto out_error_mmap;
514f1c67 2313
e36b7821 2314 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2315 perf_evlist__enable(evlist);
2316
f15eb531
NK
2317 if (forks)
2318 perf_evlist__start_workload(evlist);
2319
e36b7821
AB
2320 if (trace->opts.initial_delay) {
2321 usleep(trace->opts.initial_delay * 1000);
2322 perf_evlist__enable(evlist);
2323 }
2324
e13798c7 2325 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2326 evlist->threads->nr > 1 ||
2327 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2328again:
efd5745e 2329 before = trace->nr_events;
514f1c67
ACM
2330
2331 for (i = 0; i < evlist->nr_mmaps; i++) {
2332 union perf_event *event;
2333
2334 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2335 struct perf_sample sample;
514f1c67 2336
efd5745e 2337 ++trace->nr_events;
514f1c67 2338
514f1c67
ACM
2339 err = perf_evlist__parse_sample(evlist, event, &sample);
2340 if (err) {
c24ff998 2341 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2342 goto next_event;
514f1c67
ACM
2343 }
2344
ddbb1b13 2345 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2346next_event:
2347 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2348
ba209f85
ACM
2349 if (interrupted)
2350 goto out_disable;
02ac5421
ACM
2351
2352 if (done && !draining) {
2353 perf_evlist__disable(evlist);
2354 draining = true;
2355 }
514f1c67
ACM
2356 }
2357 }
2358
efd5745e 2359 if (trace->nr_events == before) {
ba209f85 2360 int timeout = done ? 100 : -1;
f15eb531 2361
46fb3c21
ACM
2362 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2363 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2364 draining = true;
2365
ba209f85 2366 goto again;
46fb3c21 2367 }
ba209f85
ACM
2368 } else {
2369 goto again;
f15eb531
NK
2370 }
2371
ba209f85 2372out_disable:
f3b623b8
ACM
2373 thread__zput(trace->current);
2374
ba209f85 2375 perf_evlist__disable(evlist);
514f1c67 2376
c522739d
ACM
2377 if (!err) {
2378 if (trace->summary)
2379 trace__fprintf_thread_summary(trace, trace->output);
2380
2381 if (trace->show_tool_stats) {
2382 fprintf(trace->output, "Stats:\n "
2383 " vfs_getname : %" PRIu64 "\n"
2384 " proc_getname: %" PRIu64 "\n",
2385 trace->stats.vfs_getname,
2386 trace->stats.proc_getname);
2387 }
2388 }
bf2575c1 2389
514f1c67
ACM
2390out_delete_evlist:
2391 perf_evlist__delete(evlist);
14a052df 2392 trace->evlist = NULL;
75b757ca 2393 trace->live = false;
514f1c67 2394 return err;
6ef068cb
ACM
2395{
2396 char errbuf[BUFSIZ];
a8f23d8f 2397
2cc990ba 2398out_error_sched_stat_runtime:
988bdb31 2399 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2400 goto out_error;
2401
801c67b0 2402out_error_raw_syscalls:
988bdb31 2403 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2404 goto out_error;
2405
e09b18d4
ACM
2406out_error_mmap:
2407 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2408 goto out_error;
2409
a8f23d8f
ACM
2410out_error_open:
2411 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2412
2413out_error:
6ef068cb 2414 fprintf(trace->output, "%s\n", errbuf);
87f91868 2415 goto out_delete_evlist;
94ad89bc
ACM
2416
2417out_error_apply_filters:
2418 fprintf(trace->output,
2419 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2420 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2421 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2422 goto out_delete_evlist;
514f1c67 2423}
5ed08dae
ACM
2424out_error_mem:
2425 fprintf(trace->output, "Not enough memory to run!\n");
2426 goto out_delete_evlist;
19867b61
ACM
2427
2428out_errno:
2429 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2430 goto out_delete_evlist;
a8f23d8f 2431}
514f1c67 2432
6810fc91
DA
2433static int trace__replay(struct trace *trace)
2434{
2435 const struct perf_evsel_str_handler handlers[] = {
c522739d 2436 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2437 };
f5fc1412
JO
2438 struct perf_data_file file = {
2439 .path = input_name,
2440 .mode = PERF_DATA_MODE_READ,
e366a6d8 2441 .force = trace->force,
f5fc1412 2442 };
6810fc91 2443 struct perf_session *session;
003824e8 2444 struct perf_evsel *evsel;
6810fc91
DA
2445 int err = -1;
2446
2447 trace->tool.sample = trace__process_sample;
2448 trace->tool.mmap = perf_event__process_mmap;
384c671e 2449 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2450 trace->tool.comm = perf_event__process_comm;
2451 trace->tool.exit = perf_event__process_exit;
2452 trace->tool.fork = perf_event__process_fork;
2453 trace->tool.attr = perf_event__process_attr;
f3b3614a 2454 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2455 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2456 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2457
0a8cb85c 2458 trace->tool.ordered_events = true;
6810fc91
DA
2459 trace->tool.ordering_requires_timestamps = true;
2460
2461 /* add tid to output */
2462 trace->multiple_threads = true;
2463
f5fc1412 2464 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2465 if (session == NULL)
52e02834 2466 return -1;
6810fc91 2467
aa07df6e
DA
2468 if (trace->opts.target.pid)
2469 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2470
2471 if (trace->opts.target.tid)
2472 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2473
0a7e6d1b 2474 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2475 goto out;
2476
8fb598e5
DA
2477 trace->host = &session->machines.host;
2478
6810fc91
DA
2479 err = perf_session__set_tracepoints_handlers(session, handlers);
2480 if (err)
2481 goto out;
2482
003824e8
NK
2483 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2484 "raw_syscalls:sys_enter");
9aca7f17
DA
2485 /* older kernels have syscalls tp versus raw_syscalls */
2486 if (evsel == NULL)
2487 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2488 "syscalls:sys_enter");
003824e8 2489
e281a960
SF
2490 if (evsel &&
2491 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2492 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2493 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2494 goto out;
2495 }
2496
2497 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2498 "raw_syscalls:sys_exit");
9aca7f17
DA
2499 if (evsel == NULL)
2500 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2501 "syscalls:sys_exit");
e281a960
SF
2502 if (evsel &&
2503 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2504 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2505 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2506 goto out;
2507 }
2508
e5cadb93 2509 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2510 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2511 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2512 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2513 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2514 evsel->handler = trace__pgfault;
2515 }
2516
6810fc91
DA
2517 setup_pager();
2518
b7b61cbe 2519 err = perf_session__process_events(session);
6810fc91
DA
2520 if (err)
2521 pr_err("Failed to process events, error %d", err);
2522
bf2575c1
DA
2523 else if (trace->summary)
2524 trace__fprintf_thread_summary(trace, trace->output);
2525
6810fc91
DA
2526out:
2527 perf_session__delete(session);
2528
2529 return err;
2530}
2531
1302d88e
ACM
2532static size_t trace__fprintf_threads_header(FILE *fp)
2533{
2534 size_t printed;
2535
99ff7150 2536 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2537
2538 return printed;
2539}
2540
b535d523
ACM
2541DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2542 struct stats *stats;
2543 double msecs;
2544 int syscall;
2545)
2546{
2547 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2548 struct stats *stats = source->priv;
2549
2550 entry->syscall = source->i;
2551 entry->stats = stats;
2552 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2553}
2554
bf2575c1
DA
2555static size_t thread__dump_stats(struct thread_trace *ttrace,
2556 struct trace *trace, FILE *fp)
2557{
bf2575c1
DA
2558 size_t printed = 0;
2559 struct syscall *sc;
b535d523
ACM
2560 struct rb_node *nd;
2561 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2562
b535d523 2563 if (syscall_stats == NULL)
bf2575c1
DA
2564 return 0;
2565
2566 printed += fprintf(fp, "\n");
2567
834fd46d
MW
2568 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2569 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2570 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2571
98a91837 2572 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2573 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2574 if (stats) {
2575 double min = (double)(stats->min) / NSEC_PER_MSEC;
2576 double max = (double)(stats->max) / NSEC_PER_MSEC;
2577 double avg = avg_stats(stats);
2578 double pct;
2579 u64 n = (u64) stats->n;
2580
2581 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2582 avg /= NSEC_PER_MSEC;
2583
b535d523 2584 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2585 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2586 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2587 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2588 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2589 }
bf2575c1
DA
2590 }
2591
b535d523 2592 resort_rb__delete(syscall_stats);
bf2575c1 2593 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2594
2595 return printed;
2596}
2597
96c14451 2598static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2599{
96c14451 2600 size_t printed = 0;
89dceb22 2601 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2602 double ratio;
2603
2604 if (ttrace == NULL)
2605 return 0;
2606
2607 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2608
15e65c69 2609 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2610 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2611 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2612 if (ttrace->pfmaj)
2613 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2614 if (ttrace->pfmin)
2615 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2616 if (trace->sched)
2617 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2618 else if (fputc('\n', fp) != EOF)
2619 ++printed;
2620
bf2575c1 2621 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2622
96c14451
ACM
2623 return printed;
2624}
896cbb56 2625
96c14451
ACM
2626static unsigned long thread__nr_events(struct thread_trace *ttrace)
2627{
2628 return ttrace ? ttrace->nr_events : 0;
2629}
2630
2631DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2632 struct thread *thread;
2633)
2634{
2635 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2636}
2637
1302d88e
ACM
2638static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2639{
96c14451
ACM
2640 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2641 size_t printed = trace__fprintf_threads_header(fp);
2642 struct rb_node *nd;
1302d88e 2643
96c14451
ACM
2644 if (threads == NULL) {
2645 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2646 return 0;
2647 }
2648
98a91837 2649 resort_rb__for_each_entry(nd, threads)
96c14451 2650 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2651
96c14451
ACM
2652 resort_rb__delete(threads);
2653
2654 return printed;
1302d88e
ACM
2655}
2656
ae9ed035
ACM
2657static int trace__set_duration(const struct option *opt, const char *str,
2658 int unset __maybe_unused)
2659{
2660 struct trace *trace = opt->value;
2661
2662 trace->duration_filter = atof(str);
2663 return 0;
2664}
2665
f078c385
ACM
2666static int trace__set_filter_pids(const struct option *opt, const char *str,
2667 int unset __maybe_unused)
2668{
2669 int ret = -1;
2670 size_t i;
2671 struct trace *trace = opt->value;
2672 /*
2673 * FIXME: introduce a intarray class, plain parse csv and create a
2674 * { int nr, int entries[] } struct...
2675 */
2676 struct intlist *list = intlist__new(str);
2677
2678 if (list == NULL)
2679 return -1;
2680
2681 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2682 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2683
2684 if (trace->filter_pids.entries == NULL)
2685 goto out;
2686
2687 trace->filter_pids.entries[0] = getpid();
2688
2689 for (i = 1; i < trace->filter_pids.nr; ++i)
2690 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2691
2692 intlist__delete(list);
2693 ret = 0;
2694out:
2695 return ret;
2696}
2697
c24ff998
ACM
2698static int trace__open_output(struct trace *trace, const char *filename)
2699{
2700 struct stat st;
2701
2702 if (!stat(filename, &st) && st.st_size) {
2703 char oldname[PATH_MAX];
2704
2705 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2706 unlink(oldname);
2707 rename(filename, oldname);
2708 }
2709
2710 trace->output = fopen(filename, "w");
2711
2712 return trace->output == NULL ? -errno : 0;
2713}
2714
598d02c5
SF
2715static int parse_pagefaults(const struct option *opt, const char *str,
2716 int unset __maybe_unused)
2717{
2718 int *trace_pgfaults = opt->value;
2719
2720 if (strcmp(str, "all") == 0)
2721 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2722 else if (strcmp(str, "maj") == 0)
2723 *trace_pgfaults |= TRACE_PFMAJ;
2724 else if (strcmp(str, "min") == 0)
2725 *trace_pgfaults |= TRACE_PFMIN;
2726 else
2727 return -1;
2728
2729 return 0;
2730}
2731
14a052df
ACM
2732static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2733{
2734 struct perf_evsel *evsel;
2735
e5cadb93 2736 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2737 evsel->handler = handler;
2738}
2739
017037ff
ACM
2740/*
2741 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2742 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2743 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2744 *
2745 * It'd be better to introduce a parse_options() variant that would return a
2746 * list with the terms it didn't match to an event...
2747 */
2748static int trace__parse_events_option(const struct option *opt, const char *str,
2749 int unset __maybe_unused)
2750{
2751 struct trace *trace = (struct trace *)opt->value;
2752 const char *s = str;
2753 char *sep = NULL, *lists[2] = { NULL, NULL, };
2754 int len = strlen(str), err = -1, list;
2755 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2756 char group_name[PATH_MAX];
2757
2758 if (strace_groups_dir == NULL)
2759 return -1;
2760
2761 if (*s == '!') {
2762 ++s;
2763 trace->not_ev_qualifier = true;
2764 }
2765
2766 while (1) {
2767 if ((sep = strchr(s, ',')) != NULL)
2768 *sep = '\0';
2769
2770 list = 0;
2771 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2772 list = 1;
2773 } else {
2774 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2775 if (access(group_name, R_OK) == 0)
2776 list = 1;
2777 }
2778
2779 if (lists[list]) {
2780 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2781 } else {
2782 lists[list] = malloc(len);
2783 if (lists[list] == NULL)
2784 goto out;
2785 strcpy(lists[list], s);
2786 }
2787
2788 if (!sep)
2789 break;
2790
2791 *sep = ',';
2792 s = sep + 1;
2793 }
2794
2795 if (lists[1] != NULL) {
2796 struct strlist_config slist_config = {
2797 .dirname = strace_groups_dir,
2798 };
2799
2800 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2801 if (trace->ev_qualifier == NULL) {
2802 fputs("Not enough memory to parse event qualifier", trace->output);
2803 goto out;
2804 }
2805
2806 if (trace__validate_ev_qualifier(trace))
2807 goto out;
2808 }
2809
2810 err = 0;
2811
2812 if (lists[0]) {
2813 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2814 "event selector. use 'perf list' to list available events",
2815 parse_events_option);
2816 err = parse_events_option(&o, lists[0], 0);
2817 }
2818out:
2819 if (sep)
2820 *sep = ',';
2821
2822 return err;
2823}
2824
b0ad8ea6 2825int cmd_trace(int argc, const char **argv)
514f1c67 2826{
6fdd9cb7 2827 const char *trace_usage[] = {
f15eb531
NK
2828 "perf trace [<options>] [<command>]",
2829 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2830 "perf trace record [<options>] [<command>]",
2831 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2832 NULL
2833 };
2834 struct trace trace = {
514f1c67
ACM
2835 .syscalls = {
2836 . max = -1,
2837 },
2838 .opts = {
2839 .target = {
2840 .uid = UINT_MAX,
2841 .uses_mmap = true,
2842 },
2843 .user_freq = UINT_MAX,
2844 .user_interval = ULLONG_MAX,
509051ea 2845 .no_buffering = true,
38d5447d 2846 .mmap_pages = UINT_MAX,
9d9cad76 2847 .proc_map_timeout = 500,
514f1c67 2848 },
007d66a0 2849 .output = stderr,
50c95cbd 2850 .show_comm = true,
e281a960 2851 .trace_syscalls = true,
44621819 2852 .kernel_syscallchains = false,
05614993 2853 .max_stack = UINT_MAX,
514f1c67 2854 };
c24ff998 2855 const char *output_name = NULL;
514f1c67 2856 const struct option trace_options[] = {
017037ff
ACM
2857 OPT_CALLBACK('e', "event", &trace, "event",
2858 "event/syscall selector. use 'perf list' to list available events",
2859 trace__parse_events_option),
50c95cbd
ACM
2860 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2861 "show the thread COMM next to its id"),
c522739d 2862 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2863 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2864 trace__parse_events_option),
c24ff998 2865 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2866 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2867 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2868 "trace events on existing process id"),
ac9be8ee 2869 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2870 "trace events on existing thread id"),
fa0e4ffe
ACM
2871 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2872 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2873 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2874 "system-wide collection from all CPUs"),
ac9be8ee 2875 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2876 "list of cpus to monitor"),
6810fc91 2877 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2878 "child tasks do not inherit counters"),
994a1f78
JO
2879 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2880 "number of mmap data pages",
2881 perf_evlist__parse_mmap_pages),
ac9be8ee 2882 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2883 "user to profile"),
ae9ed035
ACM
2884 OPT_CALLBACK(0, "duration", &trace, "float",
2885 "show only events with duration > N.M ms",
2886 trace__set_duration),
1302d88e 2887 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2888 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2889 OPT_BOOLEAN('T', "time", &trace.full_time,
2890 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2891 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2892 "Show only syscall summary with statistics"),
2893 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2894 "Show all syscalls and summary with statistics"),
598d02c5
SF
2895 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2896 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2897 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2898 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2899 OPT_CALLBACK(0, "call-graph", &trace.opts,
2900 "record_mode[,record_size]", record_callchain_help,
2901 &record_parse_callchain_opt),
44621819
ACM
2902 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2903 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2904 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2905 "Set the minimum stack depth when parsing the callchain, "
2906 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2907 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2908 "Set the maximum stack depth when parsing the callchain, "
2909 "anything beyond the specified depth will be ignored. "
4cb93446 2910 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2911 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2912 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2913 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2914 "ms to wait before starting measurement after program "
2915 "start"),
514f1c67
ACM
2916 OPT_END()
2917 };
ccd62a89 2918 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2919 bool mmap_pages_user_set = true;
6fdd9cb7 2920 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2921 int err;
32caf0d1 2922 char bf[BUFSIZ];
514f1c67 2923
4d08cb80
ACM
2924 signal(SIGSEGV, sighandler_dump_stack);
2925 signal(SIGFPE, sighandler_dump_stack);
2926
14a052df 2927 trace.evlist = perf_evlist__new();
fd0db102 2928 trace.sctbl = syscalltbl__new();
14a052df 2929
fd0db102 2930 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2931 pr_err("Not enough memory to run!\n");
ff8f695c 2932 err = -ENOMEM;
14a052df
ACM
2933 goto out;
2934 }
2935
6fdd9cb7
YS
2936 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2937 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2938
d7888573
WN
2939 err = bpf__setup_stdout(trace.evlist);
2940 if (err) {
2941 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2942 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2943 goto out;
2944 }
2945
59247e33
ACM
2946 err = -1;
2947
598d02c5
SF
2948 if (trace.trace_pgfaults) {
2949 trace.opts.sample_address = true;
2950 trace.opts.sample_time = true;
2951 }
2952
f3e459d1
ACM
2953 if (trace.opts.mmap_pages == UINT_MAX)
2954 mmap_pages_user_set = false;
2955
05614993 2956 if (trace.max_stack == UINT_MAX) {
fe176085 2957 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2958 max_stack_user_set = false;
2959 }
2960
2961#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2962 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2963 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2964#endif
2965
2ddd5c04 2966 if (callchain_param.enabled) {
f3e459d1
ACM
2967 if (!mmap_pages_user_set && geteuid() == 0)
2968 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2969
566a0885 2970 symbol_conf.use_callchain = true;
f3e459d1 2971 }
566a0885 2972
14a052df
ACM
2973 if (trace.evlist->nr_entries > 0)
2974 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2975
1e28fe0a
SF
2976 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2977 return trace__record(&trace, argc-1, &argv[1]);
2978
2979 /* summary_only implies summary option, but don't overwrite summary if set */
2980 if (trace.summary_only)
2981 trace.summary = trace.summary_only;
2982
726f3234
ACM
2983 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2984 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2985 pr_err("Please specify something to trace.\n");
2986 return -1;
2987 }
2988
017037ff 2989 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
2990 pr_err("The -e option can't be used with --no-syscalls.\n");
2991 goto out;
2992 }
2993
c24ff998
ACM
2994 if (output_name != NULL) {
2995 err = trace__open_output(&trace, output_name);
2996 if (err < 0) {
2997 perror("failed to create output file");
2998 goto out;
2999 }
3000 }
3001
fd0db102
ACM
3002 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3003
602ad878 3004 err = target__validate(&trace.opts.target);
32caf0d1 3005 if (err) {
602ad878 3006 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3007 fprintf(trace.output, "%s", bf);
3008 goto out_close;
32caf0d1
NK
3009 }
3010
602ad878 3011 err = target__parse_uid(&trace.opts.target);
514f1c67 3012 if (err) {
602ad878 3013 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3014 fprintf(trace.output, "%s", bf);
3015 goto out_close;
514f1c67
ACM
3016 }
3017
602ad878 3018 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3019 trace.opts.target.system_wide = true;
3020
6810fc91
DA
3021 if (input_name)
3022 err = trace__replay(&trace);
3023 else
3024 err = trace__run(&trace, argc, argv);
1302d88e 3025
c24ff998
ACM
3026out_close:
3027 if (output_name != NULL)
3028 fclose(trace.output);
3029out:
1302d88e 3030 return err;
514f1c67 3031}