Merge branch 'bind_unbind' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh...
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
d1d438a3
ACM
67struct trace {
68 struct perf_tool tool;
fd0db102 69 struct syscalltbl *sctbl;
d1d438a3
ACM
70 struct {
71 int max;
72 struct syscall *table;
73 struct {
74 struct perf_evsel *sys_enter,
75 *sys_exit;
76 } events;
77 } syscalls;
78 struct record_opts opts;
79 struct perf_evlist *evlist;
80 struct machine *host;
81 struct thread *current;
82 u64 base_time;
83 FILE *output;
84 unsigned long nr_events;
85 struct strlist *ev_qualifier;
86 struct {
87 size_t nr;
88 int *entries;
89 } ev_qualifier_ids;
d1d438a3
ACM
90 struct {
91 size_t nr;
92 pid_t *entries;
93 } filter_pids;
94 double duration_filter;
95 double runtime_ms;
96 struct {
97 u64 vfs_getname,
98 proc_getname;
99 } stats;
c6d4a494 100 unsigned int max_stack;
5cf9c84e 101 unsigned int min_stack;
d1d438a3
ACM
102 bool not_ev_qualifier;
103 bool live;
104 bool full_time;
105 bool sched;
106 bool multiple_threads;
107 bool summary;
108 bool summary_only;
109 bool show_comm;
110 bool show_tool_stats;
111 bool trace_syscalls;
44621819 112 bool kernel_syscallchains;
d1d438a3
ACM
113 bool force;
114 bool vfs_getname;
115 int trace_pgfaults;
fd0db102 116 int open_id;
d1d438a3 117};
a1c2552d 118
77170988
ACM
119struct tp_field {
120 int offset;
121 union {
122 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
123 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
124 };
125};
126
127#define TP_UINT_FIELD(bits) \
128static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
129{ \
55d43bca
DA
130 u##bits value; \
131 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
132 return value; \
77170988
ACM
133}
134
135TP_UINT_FIELD(8);
136TP_UINT_FIELD(16);
137TP_UINT_FIELD(32);
138TP_UINT_FIELD(64);
139
140#define TP_UINT_FIELD__SWAPPED(bits) \
141static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
142{ \
55d43bca
DA
143 u##bits value; \
144 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
145 return bswap_##bits(value);\
146}
147
148TP_UINT_FIELD__SWAPPED(16);
149TP_UINT_FIELD__SWAPPED(32);
150TP_UINT_FIELD__SWAPPED(64);
151
152static int tp_field__init_uint(struct tp_field *field,
153 struct format_field *format_field,
154 bool needs_swap)
155{
156 field->offset = format_field->offset;
157
158 switch (format_field->size) {
159 case 1:
160 field->integer = tp_field__u8;
161 break;
162 case 2:
163 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
164 break;
165 case 4:
166 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
167 break;
168 case 8:
169 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
170 break;
171 default:
172 return -1;
173 }
174
175 return 0;
176}
177
178static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
179{
180 return sample->raw_data + field->offset;
181}
182
183static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
184{
185 field->offset = format_field->offset;
186 field->pointer = tp_field__ptr;
187 return 0;
188}
189
190struct syscall_tp {
191 struct tp_field id;
192 union {
193 struct tp_field args, ret;
194 };
195};
196
197static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
198 struct tp_field *field,
199 const char *name)
200{
201 struct format_field *format_field = perf_evsel__field(evsel, name);
202
203 if (format_field == NULL)
204 return -1;
205
206 return tp_field__init_uint(field, format_field, evsel->needs_swap);
207}
208
209#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
210 ({ struct syscall_tp *sc = evsel->priv;\
211 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
212
213static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
214 struct tp_field *field,
215 const char *name)
216{
217 struct format_field *format_field = perf_evsel__field(evsel, name);
218
219 if (format_field == NULL)
220 return -1;
221
222 return tp_field__init_ptr(field, format_field);
223}
224
225#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
226 ({ struct syscall_tp *sc = evsel->priv;\
227 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
228
229static void perf_evsel__delete_priv(struct perf_evsel *evsel)
230{
04662523 231 zfree(&evsel->priv);
77170988
ACM
232 perf_evsel__delete(evsel);
233}
234
96695d44
NK
235static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
236{
237 evsel->priv = malloc(sizeof(struct syscall_tp));
238 if (evsel->priv != NULL) {
239 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
240 goto out_delete;
241
242 evsel->handler = handler;
243 return 0;
244 }
245
246 return -ENOMEM;
247
248out_delete:
04662523 249 zfree(&evsel->priv);
96695d44
NK
250 return -ENOENT;
251}
252
ef503831 253static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 254{
ef503831 255 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 256
9aca7f17 257 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 258 if (IS_ERR(evsel))
9aca7f17
DA
259 evsel = perf_evsel__newtp("syscalls", direction);
260
8dd2a131
JO
261 if (IS_ERR(evsel))
262 return NULL;
263
264 if (perf_evsel__init_syscall_tp(evsel, handler))
265 goto out_delete;
77170988
ACM
266
267 return evsel;
268
269out_delete:
270 perf_evsel__delete_priv(evsel);
271 return NULL;
272}
273
274#define perf_evsel__sc_tp_uint(evsel, name, sample) \
275 ({ struct syscall_tp *fields = evsel->priv; \
276 fields->name.integer(&fields->name, sample); })
277
278#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.pointer(&fields->name, sample); })
281
1f115cb7 282struct strarray {
03e3adc9 283 int offset;
1f115cb7
ACM
284 int nr_entries;
285 const char **entries;
286};
287
288#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
290 .entries = array, \
291}
292
03e3adc9
ACM
293#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .offset = off, \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
297}
298
975b7c2f
ACM
299static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 const char *intfmt,
301 struct syscall_arg *arg)
1f115cb7 302{
1f115cb7 303 struct strarray *sa = arg->parm;
03e3adc9 304 int idx = arg->val - sa->offset;
1f115cb7
ACM
305
306 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 307 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
308
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
310}
311
975b7c2f
ACM
312static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
314{
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316}
317
1f115cb7
ACM
318#define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
844ae5b4
ACM
320#if defined(__i386__) || defined(__x86_64__)
321/*
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * gets rewritten to support all arches.
324 */
78645cf3
ACM
325static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
327{
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329}
330
331#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 332#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 333
75b757ca
ACM
334static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
336
337#define SCA_FD syscall_arg__scnprintf_fd
338
48e1f91a
ACM
339#ifndef AT_FDCWD
340#define AT_FDCWD -100
341#endif
342
75b757ca
ACM
343static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
344 struct syscall_arg *arg)
345{
346 int fd = arg->val;
347
348 if (fd == AT_FDCWD)
349 return scnprintf(bf, size, "CWD");
350
351 return syscall_arg__scnprintf_fd(bf, size, arg);
352}
353
354#define SCA_FDAT syscall_arg__scnprintf_fd_at
355
356static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
357 struct syscall_arg *arg);
358
359#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
360
6e7eeb51 361static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 362 struct syscall_arg *arg)
13d4ff3e 363{
01533e97 364 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
365}
366
beccb2b5
ACM
367#define SCA_HEX syscall_arg__scnprintf_hex
368
a1c2552d
ACM
369static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
370 struct syscall_arg *arg)
371{
372 return scnprintf(bf, size, "%d", arg->val);
373}
374
375#define SCA_INT syscall_arg__scnprintf_int
376
729a7841
ACM
377static const char *bpf_cmd[] = {
378 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
379 "MAP_GET_NEXT_KEY", "PROG_LOAD",
380};
381static DEFINE_STRARRAY(bpf_cmd);
382
03e3adc9
ACM
383static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
384static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 385
1f115cb7
ACM
386static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
387static DEFINE_STRARRAY(itimers);
388
b62bee1b
ACM
389static const char *keyctl_options[] = {
390 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
391 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
392 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
393 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
394 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
395};
396static DEFINE_STRARRAY(keyctl_options);
397
efe6b882
ACM
398static const char *whences[] = { "SET", "CUR", "END",
399#ifdef SEEK_DATA
400"DATA",
401#endif
402#ifdef SEEK_HOLE
403"HOLE",
404#endif
405};
406static DEFINE_STRARRAY(whences);
f9da0b0c 407
80f587d5
ACM
408static const char *fcntl_cmds[] = {
409 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
410 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
411 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
412 "F_GETOWNER_UIDS",
413};
414static DEFINE_STRARRAY(fcntl_cmds);
415
c045bf02
ACM
416static const char *rlimit_resources[] = {
417 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
418 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
419 "RTTIME",
420};
421static DEFINE_STRARRAY(rlimit_resources);
422
eb5b1b14
ACM
423static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
424static DEFINE_STRARRAY(sighow);
425
4f8c1b74
DA
426static const char *clockid[] = {
427 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
428 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
429 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
430};
431static DEFINE_STRARRAY(clockid);
432
e10bce81
ACM
433static const char *socket_families[] = {
434 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
435 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
436 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
437 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
438 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
439 "ALG", "NFC", "VSOCK",
440};
441static DEFINE_STRARRAY(socket_families);
442
51108999
ACM
443static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
444 struct syscall_arg *arg)
445{
446 size_t printed = 0;
447 int mode = arg->val;
448
449 if (mode == F_OK) /* 0 */
450 return scnprintf(bf, size, "F");
451#define P_MODE(n) \
452 if (mode & n##_OK) { \
453 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
454 mode &= ~n##_OK; \
455 }
456
457 P_MODE(R);
458 P_MODE(W);
459 P_MODE(X);
460#undef P_MODE
461
462 if (mode)
463 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
464
465 return printed;
466}
467
468#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
469
f994592d
ACM
470static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
471 struct syscall_arg *arg);
472
473#define SCA_FILENAME syscall_arg__scnprintf_filename
474
46cce19b
ACM
475static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
476 struct syscall_arg *arg)
477{
478 int printed = 0, flags = arg->val;
479
480#define P_FLAG(n) \
481 if (flags & O_##n) { \
482 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
483 flags &= ~O_##n; \
484 }
485
486 P_FLAG(CLOEXEC);
487 P_FLAG(NONBLOCK);
488#undef P_FLAG
489
490 if (flags)
491 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
492
493 return printed;
494}
495
496#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
497
844ae5b4
ACM
498#if defined(__i386__) || defined(__x86_64__)
499/*
500 * FIXME: Make this available to all arches.
501 */
78645cf3
ACM
502#define TCGETS 0x5401
503
504static const char *tioctls[] = {
505 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
506 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
507 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
508 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
509 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
510 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
511 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
512 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
513 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
514 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
515 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
516 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
517 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
518 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
519 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
520};
521
522static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 523#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 524
a355a61e
ACM
525#ifndef GRND_NONBLOCK
526#define GRND_NONBLOCK 0x0001
527#endif
528#ifndef GRND_RANDOM
529#define GRND_RANDOM 0x0002
530#endif
531
39878d49
ACM
532static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
533 struct syscall_arg *arg)
534{
535 int printed = 0, flags = arg->val;
536
537#define P_FLAG(n) \
538 if (flags & GRND_##n) { \
539 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
540 flags &= ~GRND_##n; \
541 }
542
543 P_FLAG(RANDOM);
544 P_FLAG(NONBLOCK);
545#undef P_FLAG
546
547 if (flags)
548 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
549
550 return printed;
551}
552
553#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
554
453350dd
ACM
555#define STRARRAY(arg, name, array) \
556 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
557 .arg_parm = { [arg] = &strarray__##array, }
558
ea8dc3ce 559#include "trace/beauty/eventfd.c"
8bf382ce 560#include "trace/beauty/flock.c"
d5d71e86 561#include "trace/beauty/futex_op.c"
df4cb167 562#include "trace/beauty/mmap.c"
ba2f22cf 563#include "trace/beauty/mode_t.c"
a30e6259 564#include "trace/beauty/msg_flags.c"
8f48df69 565#include "trace/beauty/open_flags.c"
62de344e 566#include "trace/beauty/perf_event_open.c"
d5d71e86 567#include "trace/beauty/pid.c"
a3bca91f 568#include "trace/beauty/sched_policy.c"
f5cd95ea 569#include "trace/beauty/seccomp.c"
12199d8e 570#include "trace/beauty/signum.c"
bbf86c43 571#include "trace/beauty/socket_type.c"
7206b900 572#include "trace/beauty/waitid_options.c"
a3bca91f 573
514f1c67
ACM
574static struct syscall_fmt {
575 const char *name;
aec1930b 576 const char *alias;
01533e97 577 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 578 void *arg_parm[6];
514f1c67 579 bool errmsg;
11c8e39f 580 bool errpid;
514f1c67 581 bool timeout;
04b34729 582 bool hexret;
514f1c67 583} syscall_fmts[] = {
51108999 584 { .name = "access", .errmsg = true,
12f3ca4f 585 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 586 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 587 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
588 { .name = "brk", .hexret = true,
589 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
590 { .name = "chdir", .errmsg = true, },
591 { .name = "chmod", .errmsg = true, },
592 { .name = "chroot", .errmsg = true, },
4f8c1b74 593 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 594 { .name = "clone", .errpid = true, },
75b757ca 595 { .name = "close", .errmsg = true,
48000a1a 596 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 597 { .name = "connect", .errmsg = true, },
12f3ca4f 598 { .name = "creat", .errmsg = true, },
b6565c90
ACM
599 { .name = "dup", .errmsg = true, },
600 { .name = "dup2", .errmsg = true, },
601 { .name = "dup3", .errmsg = true, },
453350dd 602 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
603 { .name = "eventfd2", .errmsg = true,
604 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 605 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
606 { .name = "fadvise64", .errmsg = true, },
607 { .name = "fallocate", .errmsg = true, },
608 { .name = "fchdir", .errmsg = true, },
609 { .name = "fchmod", .errmsg = true, },
75b757ca 610 { .name = "fchmodat", .errmsg = true,
12f3ca4f 611 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 612 { .name = "fchown", .errmsg = true, },
75b757ca 613 { .name = "fchownat", .errmsg = true,
12f3ca4f 614 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 615 { .name = "fcntl", .errmsg = true,
b6565c90 616 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 617 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 618 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 619 { .name = "flock", .errmsg = true,
b6565c90
ACM
620 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
621 { .name = "fsetxattr", .errmsg = true, },
622 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 623 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
624 { .name = "fstatfs", .errmsg = true, },
625 { .name = "fsync", .errmsg = true, },
626 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
627 { .name = "futex", .errmsg = true,
628 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 629 { .name = "futimesat", .errmsg = true,
12f3ca4f 630 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
631 { .name = "getdents", .errmsg = true, },
632 { .name = "getdents64", .errmsg = true, },
453350dd 633 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 634 { .name = "getpid", .errpid = true, },
d1d438a3 635 { .name = "getpgid", .errpid = true, },
c65f1070 636 { .name = "getppid", .errpid = true, },
39878d49
ACM
637 { .name = "getrandom", .errmsg = true,
638 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 639 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
640 { .name = "getxattr", .errmsg = true, },
641 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 642 { .name = "ioctl", .errmsg = true,
b6565c90 643 .arg_scnprintf = {
844ae5b4
ACM
644#if defined(__i386__) || defined(__x86_64__)
645/*
646 * FIXME: Make this available to all arches.
647 */
78645cf3
ACM
648 [1] = SCA_STRHEXARRAY, /* cmd */
649 [2] = SCA_HEX, /* arg */ },
650 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
651#else
652 [2] = SCA_HEX, /* arg */ }, },
653#endif
b62bee1b 654 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
655 { .name = "kill", .errmsg = true,
656 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
657 { .name = "lchown", .errmsg = true, },
658 { .name = "lgetxattr", .errmsg = true, },
75b757ca 659 { .name = "linkat", .errmsg = true,
48000a1a 660 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
661 { .name = "listxattr", .errmsg = true, },
662 { .name = "llistxattr", .errmsg = true, },
663 { .name = "lremovexattr", .errmsg = true, },
75b757ca 664 { .name = "lseek", .errmsg = true,
b6565c90 665 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 666 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
667 { .name = "lsetxattr", .errmsg = true, },
668 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
669 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
670 { .name = "madvise", .errmsg = true,
671 .arg_scnprintf = { [0] = SCA_HEX, /* start */
672 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 673 { .name = "mkdir", .errmsg = true, },
75b757ca 674 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
675 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
676 { .name = "mknod", .errmsg = true, },
75b757ca 677 { .name = "mknodat", .errmsg = true,
12f3ca4f 678 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
679 { .name = "mlock", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681 { .name = "mlockall", .errmsg = true,
682 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 683 { .name = "mmap", .hexret = true,
54265664
JO
684/* The standard mmap maps to old_mmap on s390x */
685#if defined(__s390x__)
686 .alias = "old_mmap",
687#endif
ae685380 688 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 689 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 690 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 691 { .name = "mprotect", .errmsg = true,
ae685380
ACM
692 .arg_scnprintf = { [0] = SCA_HEX, /* start */
693 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
694 { .name = "mq_unlink", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
696 { .name = "mremap", .hexret = true,
697 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 698 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 699 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
700 { .name = "munlock", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
702 { .name = "munmap", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 704 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 706 { .name = "newfstatat", .errmsg = true,
12f3ca4f 707 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 708 { .name = "open", .errmsg = true,
12f3ca4f 709 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 710 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
711 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
712 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 713 { .name = "openat", .errmsg = true,
75b757ca
ACM
714 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
715 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 716 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 717 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
718 [3] = SCA_FD, /* group_fd */
719 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
720 { .name = "pipe2", .errmsg = true,
721 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
722 { .name = "poll", .errmsg = true, .timeout = true, },
723 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
724 { .name = "pread", .errmsg = true, .alias = "pread64", },
725 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 726 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
727 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
728 { .name = "pwritev", .errmsg = true, },
729 { .name = "read", .errmsg = true, },
12f3ca4f 730 { .name = "readlink", .errmsg = true, },
75b757ca 731 { .name = "readlinkat", .errmsg = true,
12f3ca4f 732 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 733 { .name = "readv", .errmsg = true, },
b2cc99fd 734 { .name = "recvfrom", .errmsg = true,
b6565c90 735 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 736 { .name = "recvmmsg", .errmsg = true,
b6565c90 737 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 738 { .name = "recvmsg", .errmsg = true,
b6565c90 739 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 740 { .name = "removexattr", .errmsg = true, },
75b757ca 741 { .name = "renameat", .errmsg = true,
48000a1a 742 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 743 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
744 { .name = "rt_sigaction", .errmsg = true,
745 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 746 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
747 { .name = "rt_sigqueueinfo", .errmsg = true,
748 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
749 { .name = "rt_tgsigqueueinfo", .errmsg = true,
750 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
751 { .name = "sched_getattr", .errmsg = true, },
752 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
753 { .name = "sched_setscheduler", .errmsg = true,
754 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
755 { .name = "seccomp", .errmsg = true,
756 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
757 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 758 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 759 { .name = "sendmmsg", .errmsg = true,
b6565c90 760 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 761 { .name = "sendmsg", .errmsg = true,
b6565c90 762 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 763 { .name = "sendto", .errmsg = true,
b6565c90 764 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 765 { .name = "set_tid_address", .errpid = true, },
453350dd 766 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 767 { .name = "setpgid", .errmsg = true, },
453350dd 768 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 769 { .name = "setxattr", .errmsg = true, },
b6565c90 770 { .name = "shutdown", .errmsg = true, },
e10bce81 771 { .name = "socket", .errmsg = true,
a28b24b2
ACM
772 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
773 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
774 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
775 { .name = "socketpair", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
777 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 778 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
779 { .name = "stat", .errmsg = true, .alias = "newstat", },
780 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
781 { .name = "statx", .errmsg = true,
782 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
783 [2] = SCA_STATX_FLAGS, /* flags */
784 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
785 { .name = "swapoff", .errmsg = true,
786 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
787 { .name = "swapon", .errmsg = true,
788 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 789 { .name = "symlinkat", .errmsg = true,
48000a1a 790 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
791 { .name = "tgkill", .errmsg = true,
792 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
793 { .name = "tkill", .errmsg = true,
794 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 795 { .name = "truncate", .errmsg = true, },
e5959683 796 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 797 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
798 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
799 { .name = "utime", .errmsg = true, },
75b757ca 800 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
801 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
802 { .name = "utimes", .errmsg = true, },
b6565c90 803 { .name = "vmsplice", .errmsg = true, },
11c8e39f 804 { .name = "wait4", .errpid = true,
7206b900 805 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 806 { .name = "waitid", .errpid = true,
7206b900 807 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
808 { .name = "write", .errmsg = true, },
809 { .name = "writev", .errmsg = true, },
514f1c67
ACM
810};
811
812static int syscall_fmt__cmp(const void *name, const void *fmtp)
813{
814 const struct syscall_fmt *fmt = fmtp;
815 return strcmp(name, fmt->name);
816}
817
818static struct syscall_fmt *syscall_fmt__find(const char *name)
819{
820 const int nmemb = ARRAY_SIZE(syscall_fmts);
821 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
822}
823
824struct syscall {
825 struct event_format *tp_format;
f208bd8d
ACM
826 int nr_args;
827 struct format_field *args;
514f1c67 828 const char *name;
5089f20e 829 bool is_exit;
514f1c67 830 struct syscall_fmt *fmt;
01533e97 831 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 832 void **arg_parm;
514f1c67
ACM
833};
834
fd2b2975
ACM
835/*
836 * We need to have this 'calculated' boolean because in some cases we really
837 * don't know what is the duration of a syscall, for instance, when we start
838 * a session and some threads are waiting for a syscall to finish, say 'poll',
839 * in which case all we can do is to print "( ? ) for duration and for the
840 * start timestamp.
841 */
842static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
843{
844 double duration = (double)t / NSEC_PER_MSEC;
845 size_t printed = fprintf(fp, "(");
846
fd2b2975
ACM
847 if (!calculated)
848 printed += fprintf(fp, " ? ");
849 else if (duration >= 1.0)
60c907ab
ACM
850 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
851 else if (duration >= 0.01)
852 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
853 else
854 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 855 return printed + fprintf(fp, "): ");
60c907ab
ACM
856}
857
f994592d
ACM
858/**
859 * filename.ptr: The filename char pointer that will be vfs_getname'd
860 * filename.entry_str_pos: Where to insert the string translated from
861 * filename.ptr by the vfs_getname tracepoint/kprobe.
862 */
752fde44
ACM
863struct thread_trace {
864 u64 entry_time;
752fde44 865 bool entry_pending;
efd5745e 866 unsigned long nr_events;
a2ea67d7 867 unsigned long pfmaj, pfmin;
752fde44 868 char *entry_str;
1302d88e 869 double runtime_ms;
f994592d
ACM
870 struct {
871 unsigned long ptr;
7f4f8001
ACM
872 short int entry_str_pos;
873 bool pending_open;
874 unsigned int namelen;
875 char *name;
f994592d 876 } filename;
75b757ca
ACM
877 struct {
878 int max;
879 char **table;
880 } paths;
bf2575c1
DA
881
882 struct intlist *syscall_stats;
752fde44
ACM
883};
884
885static struct thread_trace *thread_trace__new(void)
886{
75b757ca
ACM
887 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
888
889 if (ttrace)
890 ttrace->paths.max = -1;
891
bf2575c1
DA
892 ttrace->syscall_stats = intlist__new(NULL);
893
75b757ca 894 return ttrace;
752fde44
ACM
895}
896
c24ff998 897static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 898{
efd5745e
ACM
899 struct thread_trace *ttrace;
900
752fde44
ACM
901 if (thread == NULL)
902 goto fail;
903
89dceb22
NK
904 if (thread__priv(thread) == NULL)
905 thread__set_priv(thread, thread_trace__new());
48000a1a 906
89dceb22 907 if (thread__priv(thread) == NULL)
752fde44
ACM
908 goto fail;
909
89dceb22 910 ttrace = thread__priv(thread);
efd5745e
ACM
911 ++ttrace->nr_events;
912
913 return ttrace;
752fde44 914fail:
c24ff998 915 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
916 "WARNING: not enough memory, dropping samples!\n");
917 return NULL;
918}
919
598d02c5
SF
920#define TRACE_PFMAJ (1 << 0)
921#define TRACE_PFMIN (1 << 1)
922
e4d44e83
ACM
923static const size_t trace__entry_str_size = 2048;
924
97119f37 925static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 926{
89dceb22 927 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
928
929 if (fd > ttrace->paths.max) {
930 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
931
932 if (npath == NULL)
933 return -1;
934
935 if (ttrace->paths.max != -1) {
936 memset(npath + ttrace->paths.max + 1, 0,
937 (fd - ttrace->paths.max) * sizeof(char *));
938 } else {
939 memset(npath, 0, (fd + 1) * sizeof(char *));
940 }
941
942 ttrace->paths.table = npath;
943 ttrace->paths.max = fd;
944 }
945
946 ttrace->paths.table[fd] = strdup(pathname);
947
948 return ttrace->paths.table[fd] != NULL ? 0 : -1;
949}
950
97119f37
ACM
951static int thread__read_fd_path(struct thread *thread, int fd)
952{
953 char linkname[PATH_MAX], pathname[PATH_MAX];
954 struct stat st;
955 int ret;
956
957 if (thread->pid_ == thread->tid) {
958 scnprintf(linkname, sizeof(linkname),
959 "/proc/%d/fd/%d", thread->pid_, fd);
960 } else {
961 scnprintf(linkname, sizeof(linkname),
962 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
963 }
964
965 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
966 return -1;
967
968 ret = readlink(linkname, pathname, sizeof(pathname));
969
970 if (ret < 0 || ret > st.st_size)
971 return -1;
972
973 pathname[ret] = '\0';
974 return trace__set_fd_pathname(thread, fd, pathname);
975}
976
c522739d
ACM
977static const char *thread__fd_path(struct thread *thread, int fd,
978 struct trace *trace)
75b757ca 979{
89dceb22 980 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
981
982 if (ttrace == NULL)
983 return NULL;
984
985 if (fd < 0)
986 return NULL;
987
cdcd1e6b 988 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
989 if (!trace->live)
990 return NULL;
991 ++trace->stats.proc_getname;
cdcd1e6b 992 if (thread__read_fd_path(thread, fd))
c522739d
ACM
993 return NULL;
994 }
75b757ca
ACM
995
996 return ttrace->paths.table[fd];
997}
998
999static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1000 struct syscall_arg *arg)
1001{
1002 int fd = arg->val;
1003 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1004 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1005
1006 if (path)
1007 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1008
1009 return printed;
1010}
1011
1012static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1013 struct syscall_arg *arg)
1014{
1015 int fd = arg->val;
1016 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1017 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1018
04662523
ACM
1019 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1020 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1021
1022 return printed;
1023}
1024
f994592d
ACM
1025static void thread__set_filename_pos(struct thread *thread, const char *bf,
1026 unsigned long ptr)
1027{
1028 struct thread_trace *ttrace = thread__priv(thread);
1029
1030 ttrace->filename.ptr = ptr;
1031 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1032}
1033
1034static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1035 struct syscall_arg *arg)
1036{
1037 unsigned long ptr = arg->val;
1038
1039 if (!arg->trace->vfs_getname)
1040 return scnprintf(bf, size, "%#x", ptr);
1041
1042 thread__set_filename_pos(arg->thread, bf, ptr);
1043 return 0;
1044}
1045
ae9ed035
ACM
1046static bool trace__filter_duration(struct trace *trace, double t)
1047{
1048 return t < (trace->duration_filter * NSEC_PER_MSEC);
1049}
1050
fd2b2975 1051static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1052{
1053 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1054
60c907ab 1055 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1056}
1057
fd2b2975
ACM
1058/*
1059 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1060 * using ttrace->entry_time for a thread that receives a sys_exit without
1061 * first having received a sys_enter ("poll" issued before tracing session
1062 * starts, lost sys_enter exit due to ring buffer overflow).
1063 */
1064static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1065{
1066 if (tstamp > 0)
1067 return __trace__fprintf_tstamp(trace, tstamp, fp);
1068
1069 return fprintf(fp, " ? ");
1070}
1071
f15eb531 1072static bool done = false;
ba209f85 1073static bool interrupted = false;
f15eb531 1074
ba209f85 1075static void sig_handler(int sig)
f15eb531
NK
1076{
1077 done = true;
ba209f85 1078 interrupted = sig == SIGINT;
f15eb531
NK
1079}
1080
752fde44 1081static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1082 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1083{
1084 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1085 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1086
50c95cbd
ACM
1087 if (trace->multiple_threads) {
1088 if (trace->show_comm)
1902efe7 1089 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1090 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1091 }
752fde44
ACM
1092
1093 return printed;
1094}
1095
c24ff998 1096static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1097 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1098{
1099 int ret = 0;
1100
1101 switch (event->header.type) {
1102 case PERF_RECORD_LOST:
c24ff998 1103 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1104 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1105 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1106 break;
752fde44 1107 default:
162f0bef 1108 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1109 break;
1110 }
1111
1112 return ret;
1113}
1114
c24ff998 1115static int trace__tool_process(struct perf_tool *tool,
752fde44 1116 union perf_event *event,
162f0bef 1117 struct perf_sample *sample,
752fde44
ACM
1118 struct machine *machine)
1119{
c24ff998 1120 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1121 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1122}
1123
caf8a0d0
ACM
1124static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1125{
1126 struct machine *machine = vmachine;
1127
1128 if (machine->kptr_restrict_warned)
1129 return NULL;
1130
1131 if (symbol_conf.kptr_restrict) {
1132 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1133 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1134 "Kernel samples will not be resolved.\n");
1135 machine->kptr_restrict_warned = true;
1136 return NULL;
1137 }
1138
1139 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1140}
1141
752fde44
ACM
1142static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1143{
0a7e6d1b 1144 int err = symbol__init(NULL);
752fde44
ACM
1145
1146 if (err)
1147 return err;
1148
8fb598e5
DA
1149 trace->host = machine__new_host();
1150 if (trace->host == NULL)
1151 return -ENOMEM;
752fde44 1152
caf8a0d0 1153 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1154 return -errno;
1155
a33fbd56 1156 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1157 evlist->threads, trace__tool_process, false,
1158 trace->opts.proc_map_timeout);
752fde44
ACM
1159 if (err)
1160 symbol__exit();
1161
1162 return err;
1163}
1164
13d4ff3e
ACM
1165static int syscall__set_arg_fmts(struct syscall *sc)
1166{
1167 struct format_field *field;
b6565c90 1168 int idx = 0, len;
13d4ff3e 1169
f208bd8d 1170 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1171 if (sc->arg_scnprintf == NULL)
1172 return -1;
1173
1f115cb7
ACM
1174 if (sc->fmt)
1175 sc->arg_parm = sc->fmt->arg_parm;
1176
f208bd8d 1177 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1178 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1179 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1180 else if (strcmp(field->type, "const char *") == 0 &&
1181 (strcmp(field->name, "filename") == 0 ||
1182 strcmp(field->name, "path") == 0 ||
1183 strcmp(field->name, "pathname") == 0))
1184 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1185 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1186 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1187 else if (strcmp(field->type, "pid_t") == 0)
1188 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1189 else if (strcmp(field->type, "umode_t") == 0)
1190 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1191 else if ((strcmp(field->type, "int") == 0 ||
1192 strcmp(field->type, "unsigned int") == 0 ||
1193 strcmp(field->type, "long") == 0) &&
1194 (len = strlen(field->name)) >= 2 &&
1195 strcmp(field->name + len - 2, "fd") == 0) {
1196 /*
1197 * /sys/kernel/tracing/events/syscalls/sys_enter*
1198 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1199 * 65 int
1200 * 23 unsigned int
1201 * 7 unsigned long
1202 */
1203 sc->arg_scnprintf[idx] = SCA_FD;
1204 }
13d4ff3e
ACM
1205 ++idx;
1206 }
1207
1208 return 0;
1209}
1210
514f1c67
ACM
1211static int trace__read_syscall_info(struct trace *trace, int id)
1212{
1213 char tp_name[128];
1214 struct syscall *sc;
fd0db102 1215 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1216
1217 if (name == NULL)
1218 return -1;
514f1c67
ACM
1219
1220 if (id > trace->syscalls.max) {
1221 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1222
1223 if (nsyscalls == NULL)
1224 return -1;
1225
1226 if (trace->syscalls.max != -1) {
1227 memset(nsyscalls + trace->syscalls.max + 1, 0,
1228 (id - trace->syscalls.max) * sizeof(*sc));
1229 } else {
1230 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1231 }
1232
1233 trace->syscalls.table = nsyscalls;
1234 trace->syscalls.max = id;
1235 }
1236
1237 sc = trace->syscalls.table + id;
3a531260 1238 sc->name = name;
2ae3a312 1239
3a531260 1240 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1241
aec1930b 1242 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1243 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1244
8dd2a131 1245 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1246 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1247 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1248 }
514f1c67 1249
8dd2a131 1250 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1251 return -1;
1252
f208bd8d
ACM
1253 sc->args = sc->tp_format->format.fields;
1254 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1255 /*
1256 * We need to check and discard the first variable '__syscall_nr'
1257 * or 'nr' that mean the syscall number. It is needless here.
1258 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1259 */
1260 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1261 sc->args = sc->args->next;
1262 --sc->nr_args;
1263 }
1264
5089f20e
ACM
1265 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1266
13d4ff3e 1267 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1268}
1269
d0cc439b
ACM
1270static int trace__validate_ev_qualifier(struct trace *trace)
1271{
8b3ce757 1272 int err = 0, i;
d0cc439b
ACM
1273 struct str_node *pos;
1274
8b3ce757
ACM
1275 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1276 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1277 sizeof(trace->ev_qualifier_ids.entries[0]));
1278
1279 if (trace->ev_qualifier_ids.entries == NULL) {
1280 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1281 trace->output);
1282 err = -EINVAL;
1283 goto out;
1284 }
1285
1286 i = 0;
1287
602a1f4d 1288 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1289 const char *sc = pos->s;
fd0db102 1290 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1291
8b3ce757 1292 if (id < 0) {
d0cc439b
ACM
1293 if (err == 0) {
1294 fputs("Error:\tInvalid syscall ", trace->output);
1295 err = -EINVAL;
1296 } else {
1297 fputs(", ", trace->output);
1298 }
1299
1300 fputs(sc, trace->output);
1301 }
8b3ce757
ACM
1302
1303 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1304 }
1305
1306 if (err < 0) {
1307 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1308 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1309 zfree(&trace->ev_qualifier_ids.entries);
1310 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1311 }
8b3ce757 1312out:
d0cc439b
ACM
1313 return err;
1314}
1315
55d43bca
DA
1316/*
1317 * args is to be interpreted as a series of longs but we need to handle
1318 * 8-byte unaligned accesses. args points to raw_data within the event
1319 * and raw_data is guaranteed to be 8-byte unaligned because it is
1320 * preceded by raw_size which is a u32. So we need to copy args to a temp
1321 * variable to read it. Most notably this avoids extended load instructions
1322 * on unaligned addresses
1323 */
1324
752fde44 1325static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1326 unsigned char *args, struct trace *trace,
75b757ca 1327 struct thread *thread)
514f1c67 1328{
514f1c67 1329 size_t printed = 0;
55d43bca
DA
1330 unsigned char *p;
1331 unsigned long val;
514f1c67 1332
f208bd8d 1333 if (sc->args != NULL) {
514f1c67 1334 struct format_field *field;
01533e97
ACM
1335 u8 bit = 1;
1336 struct syscall_arg arg = {
75b757ca
ACM
1337 .idx = 0,
1338 .mask = 0,
1339 .trace = trace,
1340 .thread = thread,
01533e97 1341 };
6e7eeb51 1342
f208bd8d 1343 for (field = sc->args; field;
01533e97
ACM
1344 field = field->next, ++arg.idx, bit <<= 1) {
1345 if (arg.mask & bit)
6e7eeb51 1346 continue;
55d43bca
DA
1347
1348 /* special care for unaligned accesses */
1349 p = args + sizeof(unsigned long) * arg.idx;
1350 memcpy(&val, p, sizeof(val));
1351
4aa58232
ACM
1352 /*
1353 * Suppress this argument if its value is zero and
1354 * and we don't have a string associated in an
1355 * strarray for it.
1356 */
55d43bca 1357 if (val == 0 &&
4aa58232
ACM
1358 !(sc->arg_scnprintf &&
1359 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1360 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1361 continue;
1362
752fde44 1363 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1364 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1365 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1366 arg.val = val;
1f115cb7
ACM
1367 if (sc->arg_parm)
1368 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1369 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1370 size - printed, &arg);
6e7eeb51 1371 } else {
13d4ff3e 1372 printed += scnprintf(bf + printed, size - printed,
55d43bca 1373 "%ld", val);
6e7eeb51 1374 }
514f1c67 1375 }
4c4d6e51
ACM
1376 } else if (IS_ERR(sc->tp_format)) {
1377 /*
1378 * If we managed to read the tracepoint /format file, then we
1379 * may end up not having any args, like with gettid(), so only
1380 * print the raw args when we didn't manage to read it.
1381 */
01533e97
ACM
1382 int i = 0;
1383
514f1c67 1384 while (i < 6) {
55d43bca
DA
1385 /* special care for unaligned accesses */
1386 p = args + sizeof(unsigned long) * i;
1387 memcpy(&val, p, sizeof(val));
752fde44
ACM
1388 printed += scnprintf(bf + printed, size - printed,
1389 "%sarg%d: %ld",
55d43bca 1390 printed ? ", " : "", i, val);
514f1c67
ACM
1391 ++i;
1392 }
1393 }
1394
1395 return printed;
1396}
1397
ba3d7dee 1398typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1399 union perf_event *event,
ba3d7dee
ACM
1400 struct perf_sample *sample);
1401
1402static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1403 struct perf_evsel *evsel, int id)
ba3d7dee 1404{
ba3d7dee
ACM
1405
1406 if (id < 0) {
adaa18bf
ACM
1407
1408 /*
1409 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1410 * before that, leaving at a higher verbosity level till that is
1411 * explained. Reproduced with plain ftrace with:
1412 *
1413 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1414 * grep "NR -1 " /t/trace_pipe
1415 *
1416 * After generating some load on the machine.
1417 */
1418 if (verbose > 1) {
1419 static u64 n;
1420 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1421 id, perf_evsel__name(evsel), ++n);
1422 }
ba3d7dee
ACM
1423 return NULL;
1424 }
1425
1426 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1427 trace__read_syscall_info(trace, id))
1428 goto out_cant_read;
1429
1430 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1431 goto out_cant_read;
1432
1433 return &trace->syscalls.table[id];
1434
1435out_cant_read:
bb963e16 1436 if (verbose > 0) {
7c304ee0
ACM
1437 fprintf(trace->output, "Problems reading syscall %d", id);
1438 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1439 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1440 fputs(" information\n", trace->output);
1441 }
ba3d7dee
ACM
1442 return NULL;
1443}
1444
bf2575c1
DA
1445static void thread__update_stats(struct thread_trace *ttrace,
1446 int id, struct perf_sample *sample)
1447{
1448 struct int_node *inode;
1449 struct stats *stats;
1450 u64 duration = 0;
1451
1452 inode = intlist__findnew(ttrace->syscall_stats, id);
1453 if (inode == NULL)
1454 return;
1455
1456 stats = inode->priv;
1457 if (stats == NULL) {
1458 stats = malloc(sizeof(struct stats));
1459 if (stats == NULL)
1460 return;
1461 init_stats(stats);
1462 inode->priv = stats;
1463 }
1464
1465 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1466 duration = sample->time - ttrace->entry_time;
1467
1468 update_stats(stats, duration);
1469}
1470
e596663e
ACM
1471static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1472{
1473 struct thread_trace *ttrace;
1474 u64 duration;
1475 size_t printed;
1476
1477 if (trace->current == NULL)
1478 return 0;
1479
1480 ttrace = thread__priv(trace->current);
1481
1482 if (!ttrace->entry_pending)
1483 return 0;
1484
1485 duration = sample->time - ttrace->entry_time;
1486
fd2b2975 1487 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1488 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1489 ttrace->entry_pending = false;
1490
1491 return printed;
1492}
1493
ba3d7dee 1494static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1495 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1496 struct perf_sample *sample)
1497{
752fde44 1498 char *msg;
ba3d7dee 1499 void *args;
752fde44 1500 size_t printed = 0;
2ae3a312 1501 struct thread *thread;
b91fc39f 1502 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1503 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1504 struct thread_trace *ttrace;
1505
1506 if (sc == NULL)
1507 return -1;
ba3d7dee 1508
8fb598e5 1509 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1510 ttrace = thread__trace(thread, trace->output);
2ae3a312 1511 if (ttrace == NULL)
b91fc39f 1512 goto out_put;
ba3d7dee 1513
77170988 1514 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1515
1516 if (ttrace->entry_str == NULL) {
e4d44e83 1517 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1518 if (!ttrace->entry_str)
b91fc39f 1519 goto out_put;
752fde44
ACM
1520 }
1521
5cf9c84e 1522 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1523 trace__printf_interrupted_entry(trace, sample);
e596663e 1524
752fde44
ACM
1525 ttrace->entry_time = sample->time;
1526 msg = ttrace->entry_str;
e4d44e83 1527 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1528
e4d44e83 1529 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1530 args, trace, thread);
752fde44 1531
5089f20e 1532 if (sc->is_exit) {
5cf9c84e 1533 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1534 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1535 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1536 }
7f4f8001 1537 } else {
752fde44 1538 ttrace->entry_pending = true;
7f4f8001
ACM
1539 /* See trace__vfs_getname & trace__sys_exit */
1540 ttrace->filename.pending_open = false;
1541 }
ba3d7dee 1542
f3b623b8
ACM
1543 if (trace->current != thread) {
1544 thread__put(trace->current);
1545 trace->current = thread__get(thread);
1546 }
b91fc39f
ACM
1547 err = 0;
1548out_put:
1549 thread__put(thread);
1550 return err;
ba3d7dee
ACM
1551}
1552
5cf9c84e
ACM
1553static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1554 struct perf_sample *sample,
1555 struct callchain_cursor *cursor)
202ff968
ACM
1556{
1557 struct addr_location al;
5cf9c84e
ACM
1558
1559 if (machine__resolve(trace->host, &al, sample) < 0 ||
1560 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1561 return -1;
1562
1563 return 0;
1564}
1565
1566static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1567{
202ff968 1568 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1569 const unsigned int print_opts = EVSEL__PRINT_SYM |
1570 EVSEL__PRINT_DSO |
1571 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1572
d327e60c 1573 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1574}
1575
ba3d7dee 1576static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1577 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1578 struct perf_sample *sample)
1579{
2c82c3ad 1580 long ret;
60c907ab 1581 u64 duration = 0;
fd2b2975 1582 bool duration_calculated = false;
2ae3a312 1583 struct thread *thread;
5cf9c84e 1584 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1585 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1586 struct thread_trace *ttrace;
1587
1588 if (sc == NULL)
1589 return -1;
ba3d7dee 1590
8fb598e5 1591 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1592 ttrace = thread__trace(thread, trace->output);
2ae3a312 1593 if (ttrace == NULL)
b91fc39f 1594 goto out_put;
ba3d7dee 1595
bf2575c1
DA
1596 if (trace->summary)
1597 thread__update_stats(ttrace, id, sample);
1598
77170988 1599 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1600
fd0db102 1601 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1602 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1603 ttrace->filename.pending_open = false;
c522739d
ACM
1604 ++trace->stats.vfs_getname;
1605 }
1606
ae9ed035 1607 if (ttrace->entry_time) {
60c907ab 1608 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1609 if (trace__filter_duration(trace, duration))
1610 goto out;
fd2b2975 1611 duration_calculated = true;
ae9ed035
ACM
1612 } else if (trace->duration_filter)
1613 goto out;
60c907ab 1614
5cf9c84e
ACM
1615 if (sample->callchain) {
1616 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1617 if (callchain_ret == 0) {
1618 if (callchain_cursor.nr < trace->min_stack)
1619 goto out;
1620 callchain_ret = 1;
1621 }
1622 }
1623
fd2eabaf
DA
1624 if (trace->summary_only)
1625 goto out;
1626
fd2b2975 1627 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1628
1629 if (ttrace->entry_pending) {
c24ff998 1630 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1631 } else {
c24ff998
ACM
1632 fprintf(trace->output, " ... [");
1633 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1634 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1635 }
1636
da3c9a44
ACM
1637 if (sc->fmt == NULL) {
1638signed_print:
2c82c3ad 1639 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1640 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1641 char bf[STRERR_BUFSIZE];
c8b5f2c9 1642 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1643 *e = audit_errno_to_name(-ret);
1644
c24ff998 1645 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1646 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1647 fprintf(trace->output, ") = 0 Timeout");
04b34729 1648 else if (sc->fmt->hexret)
2c82c3ad 1649 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1650 else if (sc->fmt->errpid) {
1651 struct thread *child = machine__find_thread(trace->host, ret, ret);
1652
1653 if (child != NULL) {
1654 fprintf(trace->output, ") = %ld", ret);
1655 if (child->comm_set)
1656 fprintf(trace->output, " (%s)", thread__comm_str(child));
1657 thread__put(child);
1658 }
1659 } else
da3c9a44 1660 goto signed_print;
ba3d7dee 1661
c24ff998 1662 fputc('\n', trace->output);
566a0885 1663
5cf9c84e
ACM
1664 if (callchain_ret > 0)
1665 trace__fprintf_callchain(trace, sample);
1666 else if (callchain_ret < 0)
1667 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1668out:
752fde44 1669 ttrace->entry_pending = false;
b91fc39f
ACM
1670 err = 0;
1671out_put:
1672 thread__put(thread);
1673 return err;
ba3d7dee
ACM
1674}
1675
c522739d 1676static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1677 union perf_event *event __maybe_unused,
c522739d
ACM
1678 struct perf_sample *sample)
1679{
f994592d
ACM
1680 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1681 struct thread_trace *ttrace;
1682 size_t filename_len, entry_str_len, to_move;
1683 ssize_t remaining_space;
1684 char *pos;
7f4f8001 1685 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1686
1687 if (!thread)
1688 goto out;
1689
1690 ttrace = thread__priv(thread);
1691 if (!ttrace)
ef65e96e 1692 goto out_put;
f994592d 1693
7f4f8001 1694 filename_len = strlen(filename);
39f0e7a8 1695 if (filename_len == 0)
ef65e96e 1696 goto out_put;
7f4f8001
ACM
1697
1698 if (ttrace->filename.namelen < filename_len) {
1699 char *f = realloc(ttrace->filename.name, filename_len + 1);
1700
1701 if (f == NULL)
ef65e96e 1702 goto out_put;
7f4f8001
ACM
1703
1704 ttrace->filename.namelen = filename_len;
1705 ttrace->filename.name = f;
1706 }
1707
1708 strcpy(ttrace->filename.name, filename);
1709 ttrace->filename.pending_open = true;
1710
f994592d 1711 if (!ttrace->filename.ptr)
ef65e96e 1712 goto out_put;
f994592d
ACM
1713
1714 entry_str_len = strlen(ttrace->entry_str);
1715 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1716 if (remaining_space <= 0)
ef65e96e 1717 goto out_put;
f994592d 1718
f994592d
ACM
1719 if (filename_len > (size_t)remaining_space) {
1720 filename += filename_len - remaining_space;
1721 filename_len = remaining_space;
1722 }
1723
1724 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1725 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1726 memmove(pos + filename_len, pos, to_move);
1727 memcpy(pos, filename, filename_len);
1728
1729 ttrace->filename.ptr = 0;
1730 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1731out_put:
1732 thread__put(thread);
f994592d 1733out:
c522739d
ACM
1734 return 0;
1735}
1736
1302d88e 1737static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1738 union perf_event *event __maybe_unused,
1302d88e
ACM
1739 struct perf_sample *sample)
1740{
1741 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1742 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1743 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1744 sample->pid,
1745 sample->tid);
c24ff998 1746 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1747
1748 if (ttrace == NULL)
1749 goto out_dump;
1750
1751 ttrace->runtime_ms += runtime_ms;
1752 trace->runtime_ms += runtime_ms;
ef65e96e 1753out_put:
b91fc39f 1754 thread__put(thread);
1302d88e
ACM
1755 return 0;
1756
1757out_dump:
c24ff998 1758 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1759 evsel->name,
1760 perf_evsel__strval(evsel, sample, "comm"),
1761 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1762 runtime,
1763 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1764 goto out_put;
1302d88e
ACM
1765}
1766
1d6c9407
WN
1767static void bpf_output__printer(enum binary_printer_ops op,
1768 unsigned int val, void *extra)
1769{
1770 FILE *output = extra;
1771 unsigned char ch = (unsigned char)val;
1772
1773 switch (op) {
1774 case BINARY_PRINT_CHAR_DATA:
1775 fprintf(output, "%c", isprint(ch) ? ch : '.');
1776 break;
1777 case BINARY_PRINT_DATA_BEGIN:
1778 case BINARY_PRINT_LINE_BEGIN:
1779 case BINARY_PRINT_ADDR:
1780 case BINARY_PRINT_NUM_DATA:
1781 case BINARY_PRINT_NUM_PAD:
1782 case BINARY_PRINT_SEP:
1783 case BINARY_PRINT_CHAR_PAD:
1784 case BINARY_PRINT_LINE_END:
1785 case BINARY_PRINT_DATA_END:
1786 default:
1787 break;
1788 }
1789}
1790
1791static void bpf_output__fprintf(struct trace *trace,
1792 struct perf_sample *sample)
1793{
1794 print_binary(sample->raw_data, sample->raw_size, 8,
1795 bpf_output__printer, trace->output);
1796}
1797
14a052df
ACM
1798static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1799 union perf_event *event __maybe_unused,
1800 struct perf_sample *sample)
1801{
7ad35615
ACM
1802 int callchain_ret = 0;
1803
1804 if (sample->callchain) {
1805 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1806 if (callchain_ret == 0) {
1807 if (callchain_cursor.nr < trace->min_stack)
1808 goto out;
1809 callchain_ret = 1;
1810 }
1811 }
1812
14a052df
ACM
1813 trace__printf_interrupted_entry(trace, sample);
1814 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1815
1816 if (trace->trace_syscalls)
1817 fprintf(trace->output, "( ): ");
1818
1819 fprintf(trace->output, "%s:", evsel->name);
14a052df 1820
1d6c9407
WN
1821 if (perf_evsel__is_bpf_output(evsel)) {
1822 bpf_output__fprintf(trace, sample);
1823 } else if (evsel->tp_format) {
14a052df
ACM
1824 event_format__fprintf(evsel->tp_format, sample->cpu,
1825 sample->raw_data, sample->raw_size,
1826 trace->output);
1827 }
1828
1829 fprintf(trace->output, ")\n");
202ff968 1830
7ad35615
ACM
1831 if (callchain_ret > 0)
1832 trace__fprintf_callchain(trace, sample);
1833 else if (callchain_ret < 0)
1834 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1835out:
14a052df
ACM
1836 return 0;
1837}
1838
598d02c5
SF
1839static void print_location(FILE *f, struct perf_sample *sample,
1840 struct addr_location *al,
1841 bool print_dso, bool print_sym)
1842{
1843
bb963e16 1844 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1845 fprintf(f, "%s@", al->map->dso->long_name);
1846
bb963e16 1847 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1848 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1849 al->addr - al->sym->start);
1850 else if (al->map)
4414a3c5 1851 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1852 else
4414a3c5 1853 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1854}
1855
1856static int trace__pgfault(struct trace *trace,
1857 struct perf_evsel *evsel,
473398a2 1858 union perf_event *event __maybe_unused,
598d02c5
SF
1859 struct perf_sample *sample)
1860{
1861 struct thread *thread;
598d02c5
SF
1862 struct addr_location al;
1863 char map_type = 'd';
a2ea67d7 1864 struct thread_trace *ttrace;
b91fc39f 1865 int err = -1;
1df54290 1866 int callchain_ret = 0;
598d02c5
SF
1867
1868 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1869
1870 if (sample->callchain) {
1871 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1872 if (callchain_ret == 0) {
1873 if (callchain_cursor.nr < trace->min_stack)
1874 goto out_put;
1875 callchain_ret = 1;
1876 }
1877 }
1878
a2ea67d7
SF
1879 ttrace = thread__trace(thread, trace->output);
1880 if (ttrace == NULL)
b91fc39f 1881 goto out_put;
a2ea67d7
SF
1882
1883 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1884 ttrace->pfmaj++;
1885 else
1886 ttrace->pfmin++;
1887
1888 if (trace->summary_only)
b91fc39f 1889 goto out;
598d02c5 1890
473398a2 1891 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1892 sample->ip, &al);
1893
fd2b2975 1894 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1895
1896 fprintf(trace->output, "%sfault [",
1897 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1898 "maj" : "min");
1899
1900 print_location(trace->output, sample, &al, false, true);
1901
1902 fprintf(trace->output, "] => ");
1903
473398a2 1904 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1905 sample->addr, &al);
1906
1907 if (!al.map) {
473398a2 1908 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1909 MAP__FUNCTION, sample->addr, &al);
1910
1911 if (al.map)
1912 map_type = 'x';
1913 else
1914 map_type = '?';
1915 }
1916
1917 print_location(trace->output, sample, &al, true, false);
1918
1919 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1920
1df54290
ACM
1921 if (callchain_ret > 0)
1922 trace__fprintf_callchain(trace, sample);
1923 else if (callchain_ret < 0)
1924 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1925out:
1926 err = 0;
1927out_put:
1928 thread__put(thread);
1929 return err;
598d02c5
SF
1930}
1931
e6001980 1932static void trace__set_base_time(struct trace *trace,
8a07a809 1933 struct perf_evsel *evsel,
e6001980
ACM
1934 struct perf_sample *sample)
1935{
8a07a809
ACM
1936 /*
1937 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1938 * and don't use sample->time unconditionally, we may end up having
1939 * some other event in the future without PERF_SAMPLE_TIME for good
1940 * reason, i.e. we may not be interested in its timestamps, just in
1941 * it taking place, picking some piece of information when it
1942 * appears in our event stream (vfs_getname comes to mind).
1943 */
1944 if (trace->base_time == 0 && !trace->full_time &&
1945 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1946 trace->base_time = sample->time;
1947}
1948
6810fc91 1949static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1950 union perf_event *event,
6810fc91
DA
1951 struct perf_sample *sample,
1952 struct perf_evsel *evsel,
1953 struct machine *machine __maybe_unused)
1954{
1955 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1956 struct thread *thread;
6810fc91
DA
1957 int err = 0;
1958
744a9719 1959 tracepoint_handler handler = evsel->handler;
6810fc91 1960
aa07df6e
DA
1961 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1962 if (thread && thread__is_filtered(thread))
ef65e96e 1963 goto out;
bdc89661 1964
e6001980 1965 trace__set_base_time(trace, evsel, sample);
6810fc91 1966
3160565f
DA
1967 if (handler) {
1968 ++trace->nr_events;
0c82adcf 1969 handler(trace, evsel, event, sample);
3160565f 1970 }
ef65e96e
ACM
1971out:
1972 thread__put(thread);
6810fc91
DA
1973 return err;
1974}
1975
1e28fe0a 1976static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1977{
1978 unsigned int rec_argc, i, j;
1979 const char **rec_argv;
1980 const char * const record_args[] = {
1981 "record",
1982 "-R",
1983 "-m", "1024",
1984 "-c", "1",
5e2485b1
DA
1985 };
1986
1e28fe0a
SF
1987 const char * const sc_args[] = { "-e", };
1988 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1989 const char * const majpf_args[] = { "-e", "major-faults" };
1990 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1991 const char * const minpf_args[] = { "-e", "minor-faults" };
1992 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1993
9aca7f17 1994 /* +1 is for the event string below */
1e28fe0a
SF
1995 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1996 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1997 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1998
1999 if (rec_argv == NULL)
2000 return -ENOMEM;
2001
1e28fe0a 2002 j = 0;
5e2485b1 2003 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2004 rec_argv[j++] = record_args[i];
2005
e281a960
SF
2006 if (trace->trace_syscalls) {
2007 for (i = 0; i < sc_args_nr; i++)
2008 rec_argv[j++] = sc_args[i];
2009
2010 /* event string may be different for older kernels - e.g., RHEL6 */
2011 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2012 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2013 else if (is_valid_tracepoint("syscalls:sys_enter"))
2014 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2015 else {
2016 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2017 return -1;
2018 }
9aca7f17 2019 }
9aca7f17 2020
1e28fe0a
SF
2021 if (trace->trace_pgfaults & TRACE_PFMAJ)
2022 for (i = 0; i < majpf_args_nr; i++)
2023 rec_argv[j++] = majpf_args[i];
2024
2025 if (trace->trace_pgfaults & TRACE_PFMIN)
2026 for (i = 0; i < minpf_args_nr; i++)
2027 rec_argv[j++] = minpf_args[i];
2028
2029 for (i = 0; i < (unsigned int)argc; i++)
2030 rec_argv[j++] = argv[i];
5e2485b1 2031
b0ad8ea6 2032 return cmd_record(j, rec_argv);
5e2485b1
DA
2033}
2034
bf2575c1
DA
2035static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2036
08c98776 2037static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2038{
ef503831 2039 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2040
2041 if (IS_ERR(evsel))
08c98776 2042 return false;
c522739d
ACM
2043
2044 if (perf_evsel__field(evsel, "pathname") == NULL) {
2045 perf_evsel__delete(evsel);
08c98776 2046 return false;
c522739d
ACM
2047 }
2048
744a9719 2049 evsel->handler = trace__vfs_getname;
c522739d 2050 perf_evlist__add(evlist, evsel);
08c98776 2051 return true;
c522739d
ACM
2052}
2053
0ae537cb 2054static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2055{
2056 struct perf_evsel *evsel;
2057 struct perf_event_attr attr = {
2058 .type = PERF_TYPE_SOFTWARE,
2059 .mmap_data = 1,
598d02c5
SF
2060 };
2061
2062 attr.config = config;
0524798c 2063 attr.sample_period = 1;
598d02c5
SF
2064
2065 event_attr_init(&attr);
2066
2067 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2068 if (evsel)
2069 evsel->handler = trace__pgfault;
598d02c5 2070
0ae537cb 2071 return evsel;
598d02c5
SF
2072}
2073
ddbb1b13
ACM
2074static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2075{
2076 const u32 type = event->header.type;
2077 struct perf_evsel *evsel;
2078
ddbb1b13
ACM
2079 if (type != PERF_RECORD_SAMPLE) {
2080 trace__process_event(trace, trace->host, event, sample);
2081 return;
2082 }
2083
2084 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2085 if (evsel == NULL) {
2086 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2087 return;
2088 }
2089
e6001980
ACM
2090 trace__set_base_time(trace, evsel, sample);
2091
ddbb1b13
ACM
2092 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2093 sample->raw_data == NULL) {
2094 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2095 perf_evsel__name(evsel), sample->tid,
2096 sample->cpu, sample->raw_size);
2097 } else {
2098 tracepoint_handler handler = evsel->handler;
2099 handler(trace, evsel, event, sample);
2100 }
2101}
2102
c27366f0
ACM
2103static int trace__add_syscall_newtp(struct trace *trace)
2104{
2105 int ret = -1;
2106 struct perf_evlist *evlist = trace->evlist;
2107 struct perf_evsel *sys_enter, *sys_exit;
2108
2109 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2110 if (sys_enter == NULL)
2111 goto out;
2112
2113 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2114 goto out_delete_sys_enter;
2115
2116 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2117 if (sys_exit == NULL)
2118 goto out_delete_sys_enter;
2119
2120 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2121 goto out_delete_sys_exit;
2122
2123 perf_evlist__add(evlist, sys_enter);
2124 perf_evlist__add(evlist, sys_exit);
2125
2ddd5c04 2126 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2127 /*
2128 * We're interested only in the user space callchain
2129 * leading to the syscall, allow overriding that for
2130 * debugging reasons using --kernel_syscall_callchains
2131 */
2132 sys_exit->attr.exclude_callchain_kernel = 1;
2133 }
2134
8b3ce757
ACM
2135 trace->syscalls.events.sys_enter = sys_enter;
2136 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2137
2138 ret = 0;
2139out:
2140 return ret;
2141
2142out_delete_sys_exit:
2143 perf_evsel__delete_priv(sys_exit);
2144out_delete_sys_enter:
2145 perf_evsel__delete_priv(sys_enter);
2146 goto out;
2147}
2148
19867b61
ACM
2149static int trace__set_ev_qualifier_filter(struct trace *trace)
2150{
2151 int err = -1;
b15d0a4c 2152 struct perf_evsel *sys_exit;
19867b61
ACM
2153 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2154 trace->ev_qualifier_ids.nr,
2155 trace->ev_qualifier_ids.entries);
2156
2157 if (filter == NULL)
2158 goto out_enomem;
2159
3541c034
MP
2160 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2161 filter)) {
b15d0a4c 2162 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2163 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2164 }
19867b61
ACM
2165
2166 free(filter);
2167out:
2168 return err;
2169out_enomem:
2170 errno = ENOMEM;
2171 goto out;
2172}
c27366f0 2173
f15eb531 2174static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2175{
14a052df 2176 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2177 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2178 int err = -1, i;
2179 unsigned long before;
f15eb531 2180 const bool forks = argc > 0;
46fb3c21 2181 bool draining = false;
514f1c67 2182
75b757ca
ACM
2183 trace->live = true;
2184
c27366f0 2185 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2186 goto out_error_raw_syscalls;
514f1c67 2187
e281a960 2188 if (trace->trace_syscalls)
08c98776 2189 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2190
0ae537cb
ACM
2191 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2192 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2193 if (pgfault_maj == NULL)
2194 goto out_error_mem;
2195 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2196 }
598d02c5 2197
0ae537cb
ACM
2198 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2199 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2200 if (pgfault_min == NULL)
2201 goto out_error_mem;
2202 perf_evlist__add(evlist, pgfault_min);
2203 }
598d02c5 2204
1302d88e 2205 if (trace->sched &&
2cc990ba
ACM
2206 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2207 trace__sched_stat_runtime))
2208 goto out_error_sched_stat_runtime;
1302d88e 2209
514f1c67
ACM
2210 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2211 if (err < 0) {
c24ff998 2212 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2213 goto out_delete_evlist;
2214 }
2215
752fde44
ACM
2216 err = trace__symbols_init(trace, evlist);
2217 if (err < 0) {
c24ff998 2218 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2219 goto out_delete_evlist;
752fde44
ACM
2220 }
2221
fde54b78
ACM
2222 perf_evlist__config(evlist, &trace->opts, NULL);
2223
0c3a6ef4
ACM
2224 if (callchain_param.enabled) {
2225 bool use_identifier = false;
2226
2227 if (trace->syscalls.events.sys_exit) {
2228 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2229 &trace->opts, &callchain_param);
2230 use_identifier = true;
2231 }
2232
2233 if (pgfault_maj) {
2234 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2235 use_identifier = true;
2236 }
2237
2238 if (pgfault_min) {
2239 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2240 use_identifier = true;
2241 }
2242
2243 if (use_identifier) {
2244 /*
2245 * Now we have evsels with different sample_ids, use
2246 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2247 * from a fixed position in each ring buffer record.
2248 *
2249 * As of this the changeset introducing this comment, this
2250 * isn't strictly needed, as the fields that can come before
2251 * PERF_SAMPLE_ID are all used, but we'll probably disable
2252 * some of those for things like copying the payload of
2253 * pointer syscall arguments, and for vfs_getname we don't
2254 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2255 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2256 */
2257 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2258 perf_evlist__reset_sample_bit(evlist, ID);
2259 }
fde54b78 2260 }
514f1c67 2261
f15eb531
NK
2262 signal(SIGCHLD, sig_handler);
2263 signal(SIGINT, sig_handler);
2264
2265 if (forks) {
6ef73ec4 2266 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2267 argv, false, NULL);
f15eb531 2268 if (err < 0) {
c24ff998 2269 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2270 goto out_delete_evlist;
f15eb531
NK
2271 }
2272 }
2273
514f1c67 2274 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2275 if (err < 0)
2276 goto out_error_open;
514f1c67 2277
ba504235
WN
2278 err = bpf__apply_obj_config();
2279 if (err) {
2280 char errbuf[BUFSIZ];
2281
2282 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2283 pr_err("ERROR: Apply config to BPF failed: %s\n",
2284 errbuf);
2285 goto out_error_open;
2286 }
2287
241b057c
ACM
2288 /*
2289 * Better not use !target__has_task() here because we need to cover the
2290 * case where no threads were specified in the command line, but a
2291 * workload was, and in that case we will fill in the thread_map when
2292 * we fork the workload in perf_evlist__prepare_workload.
2293 */
f078c385
ACM
2294 if (trace->filter_pids.nr > 0)
2295 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2296 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2297 err = perf_evlist__set_filter_pid(evlist, getpid());
2298
94ad89bc
ACM
2299 if (err < 0)
2300 goto out_error_mem;
2301
19867b61
ACM
2302 if (trace->ev_qualifier_ids.nr > 0) {
2303 err = trace__set_ev_qualifier_filter(trace);
2304 if (err < 0)
2305 goto out_errno;
19867b61 2306
2e5e5f87
ACM
2307 pr_debug("event qualifier tracepoint filter: %s\n",
2308 trace->syscalls.events.sys_exit->filter);
2309 }
19867b61 2310
94ad89bc
ACM
2311 err = perf_evlist__apply_filters(evlist, &evsel);
2312 if (err < 0)
2313 goto out_error_apply_filters;
241b057c 2314
f885037e 2315 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2316 if (err < 0)
2317 goto out_error_mmap;
514f1c67 2318
e36b7821 2319 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2320 perf_evlist__enable(evlist);
2321
f15eb531
NK
2322 if (forks)
2323 perf_evlist__start_workload(evlist);
2324
e36b7821
AB
2325 if (trace->opts.initial_delay) {
2326 usleep(trace->opts.initial_delay * 1000);
2327 perf_evlist__enable(evlist);
2328 }
2329
e13798c7 2330 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2331 evlist->threads->nr > 1 ||
2332 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2333again:
efd5745e 2334 before = trace->nr_events;
514f1c67
ACM
2335
2336 for (i = 0; i < evlist->nr_mmaps; i++) {
2337 union perf_event *event;
2338
2339 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2340 struct perf_sample sample;
514f1c67 2341
efd5745e 2342 ++trace->nr_events;
514f1c67 2343
514f1c67
ACM
2344 err = perf_evlist__parse_sample(evlist, event, &sample);
2345 if (err) {
c24ff998 2346 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2347 goto next_event;
514f1c67
ACM
2348 }
2349
ddbb1b13 2350 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2351next_event:
2352 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2353
ba209f85
ACM
2354 if (interrupted)
2355 goto out_disable;
02ac5421
ACM
2356
2357 if (done && !draining) {
2358 perf_evlist__disable(evlist);
2359 draining = true;
2360 }
514f1c67
ACM
2361 }
2362 }
2363
efd5745e 2364 if (trace->nr_events == before) {
ba209f85 2365 int timeout = done ? 100 : -1;
f15eb531 2366
46fb3c21
ACM
2367 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2368 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2369 draining = true;
2370
ba209f85 2371 goto again;
46fb3c21 2372 }
ba209f85
ACM
2373 } else {
2374 goto again;
f15eb531
NK
2375 }
2376
ba209f85 2377out_disable:
f3b623b8
ACM
2378 thread__zput(trace->current);
2379
ba209f85 2380 perf_evlist__disable(evlist);
514f1c67 2381
c522739d
ACM
2382 if (!err) {
2383 if (trace->summary)
2384 trace__fprintf_thread_summary(trace, trace->output);
2385
2386 if (trace->show_tool_stats) {
2387 fprintf(trace->output, "Stats:\n "
2388 " vfs_getname : %" PRIu64 "\n"
2389 " proc_getname: %" PRIu64 "\n",
2390 trace->stats.vfs_getname,
2391 trace->stats.proc_getname);
2392 }
2393 }
bf2575c1 2394
514f1c67
ACM
2395out_delete_evlist:
2396 perf_evlist__delete(evlist);
14a052df 2397 trace->evlist = NULL;
75b757ca 2398 trace->live = false;
514f1c67 2399 return err;
6ef068cb
ACM
2400{
2401 char errbuf[BUFSIZ];
a8f23d8f 2402
2cc990ba 2403out_error_sched_stat_runtime:
988bdb31 2404 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2405 goto out_error;
2406
801c67b0 2407out_error_raw_syscalls:
988bdb31 2408 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2409 goto out_error;
2410
e09b18d4
ACM
2411out_error_mmap:
2412 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2413 goto out_error;
2414
a8f23d8f
ACM
2415out_error_open:
2416 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2417
2418out_error:
6ef068cb 2419 fprintf(trace->output, "%s\n", errbuf);
87f91868 2420 goto out_delete_evlist;
94ad89bc
ACM
2421
2422out_error_apply_filters:
2423 fprintf(trace->output,
2424 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2425 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2426 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2427 goto out_delete_evlist;
514f1c67 2428}
5ed08dae
ACM
2429out_error_mem:
2430 fprintf(trace->output, "Not enough memory to run!\n");
2431 goto out_delete_evlist;
19867b61
ACM
2432
2433out_errno:
2434 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2435 goto out_delete_evlist;
a8f23d8f 2436}
514f1c67 2437
6810fc91
DA
2438static int trace__replay(struct trace *trace)
2439{
2440 const struct perf_evsel_str_handler handlers[] = {
c522739d 2441 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2442 };
f5fc1412
JO
2443 struct perf_data_file file = {
2444 .path = input_name,
2445 .mode = PERF_DATA_MODE_READ,
e366a6d8 2446 .force = trace->force,
f5fc1412 2447 };
6810fc91 2448 struct perf_session *session;
003824e8 2449 struct perf_evsel *evsel;
6810fc91
DA
2450 int err = -1;
2451
2452 trace->tool.sample = trace__process_sample;
2453 trace->tool.mmap = perf_event__process_mmap;
384c671e 2454 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2455 trace->tool.comm = perf_event__process_comm;
2456 trace->tool.exit = perf_event__process_exit;
2457 trace->tool.fork = perf_event__process_fork;
2458 trace->tool.attr = perf_event__process_attr;
f3b3614a 2459 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2460 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2461 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2462
0a8cb85c 2463 trace->tool.ordered_events = true;
6810fc91
DA
2464 trace->tool.ordering_requires_timestamps = true;
2465
2466 /* add tid to output */
2467 trace->multiple_threads = true;
2468
f5fc1412 2469 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2470 if (session == NULL)
52e02834 2471 return -1;
6810fc91 2472
aa07df6e
DA
2473 if (trace->opts.target.pid)
2474 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2475
2476 if (trace->opts.target.tid)
2477 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2478
0a7e6d1b 2479 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2480 goto out;
2481
8fb598e5
DA
2482 trace->host = &session->machines.host;
2483
6810fc91
DA
2484 err = perf_session__set_tracepoints_handlers(session, handlers);
2485 if (err)
2486 goto out;
2487
003824e8
NK
2488 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2489 "raw_syscalls:sys_enter");
9aca7f17
DA
2490 /* older kernels have syscalls tp versus raw_syscalls */
2491 if (evsel == NULL)
2492 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2493 "syscalls:sys_enter");
003824e8 2494
e281a960
SF
2495 if (evsel &&
2496 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2497 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2498 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2499 goto out;
2500 }
2501
2502 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2503 "raw_syscalls:sys_exit");
9aca7f17
DA
2504 if (evsel == NULL)
2505 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2506 "syscalls:sys_exit");
e281a960
SF
2507 if (evsel &&
2508 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2509 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2510 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2511 goto out;
2512 }
2513
e5cadb93 2514 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2515 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2516 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2517 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2518 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2519 evsel->handler = trace__pgfault;
2520 }
2521
6810fc91
DA
2522 setup_pager();
2523
b7b61cbe 2524 err = perf_session__process_events(session);
6810fc91
DA
2525 if (err)
2526 pr_err("Failed to process events, error %d", err);
2527
bf2575c1
DA
2528 else if (trace->summary)
2529 trace__fprintf_thread_summary(trace, trace->output);
2530
6810fc91
DA
2531out:
2532 perf_session__delete(session);
2533
2534 return err;
2535}
2536
1302d88e
ACM
2537static size_t trace__fprintf_threads_header(FILE *fp)
2538{
2539 size_t printed;
2540
99ff7150 2541 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2542
2543 return printed;
2544}
2545
b535d523
ACM
2546DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2547 struct stats *stats;
2548 double msecs;
2549 int syscall;
2550)
2551{
2552 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2553 struct stats *stats = source->priv;
2554
2555 entry->syscall = source->i;
2556 entry->stats = stats;
2557 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2558}
2559
bf2575c1
DA
2560static size_t thread__dump_stats(struct thread_trace *ttrace,
2561 struct trace *trace, FILE *fp)
2562{
bf2575c1
DA
2563 size_t printed = 0;
2564 struct syscall *sc;
b535d523
ACM
2565 struct rb_node *nd;
2566 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2567
b535d523 2568 if (syscall_stats == NULL)
bf2575c1
DA
2569 return 0;
2570
2571 printed += fprintf(fp, "\n");
2572
834fd46d
MW
2573 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2574 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2575 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2576
98a91837 2577 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2578 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2579 if (stats) {
2580 double min = (double)(stats->min) / NSEC_PER_MSEC;
2581 double max = (double)(stats->max) / NSEC_PER_MSEC;
2582 double avg = avg_stats(stats);
2583 double pct;
2584 u64 n = (u64) stats->n;
2585
2586 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2587 avg /= NSEC_PER_MSEC;
2588
b535d523 2589 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2590 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2591 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2592 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2593 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2594 }
bf2575c1
DA
2595 }
2596
b535d523 2597 resort_rb__delete(syscall_stats);
bf2575c1 2598 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2599
2600 return printed;
2601}
2602
96c14451 2603static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2604{
96c14451 2605 size_t printed = 0;
89dceb22 2606 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2607 double ratio;
2608
2609 if (ttrace == NULL)
2610 return 0;
2611
2612 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2613
15e65c69 2614 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2615 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2616 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2617 if (ttrace->pfmaj)
2618 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2619 if (ttrace->pfmin)
2620 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2621 if (trace->sched)
2622 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2623 else if (fputc('\n', fp) != EOF)
2624 ++printed;
2625
bf2575c1 2626 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2627
96c14451
ACM
2628 return printed;
2629}
896cbb56 2630
96c14451
ACM
2631static unsigned long thread__nr_events(struct thread_trace *ttrace)
2632{
2633 return ttrace ? ttrace->nr_events : 0;
2634}
2635
2636DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2637 struct thread *thread;
2638)
2639{
2640 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2641}
2642
1302d88e
ACM
2643static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2644{
96c14451
ACM
2645 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2646 size_t printed = trace__fprintf_threads_header(fp);
2647 struct rb_node *nd;
1302d88e 2648
96c14451
ACM
2649 if (threads == NULL) {
2650 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2651 return 0;
2652 }
2653
98a91837 2654 resort_rb__for_each_entry(nd, threads)
96c14451 2655 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2656
96c14451
ACM
2657 resort_rb__delete(threads);
2658
2659 return printed;
1302d88e
ACM
2660}
2661
ae9ed035
ACM
2662static int trace__set_duration(const struct option *opt, const char *str,
2663 int unset __maybe_unused)
2664{
2665 struct trace *trace = opt->value;
2666
2667 trace->duration_filter = atof(str);
2668 return 0;
2669}
2670
f078c385
ACM
2671static int trace__set_filter_pids(const struct option *opt, const char *str,
2672 int unset __maybe_unused)
2673{
2674 int ret = -1;
2675 size_t i;
2676 struct trace *trace = opt->value;
2677 /*
2678 * FIXME: introduce a intarray class, plain parse csv and create a
2679 * { int nr, int entries[] } struct...
2680 */
2681 struct intlist *list = intlist__new(str);
2682
2683 if (list == NULL)
2684 return -1;
2685
2686 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2687 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2688
2689 if (trace->filter_pids.entries == NULL)
2690 goto out;
2691
2692 trace->filter_pids.entries[0] = getpid();
2693
2694 for (i = 1; i < trace->filter_pids.nr; ++i)
2695 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2696
2697 intlist__delete(list);
2698 ret = 0;
2699out:
2700 return ret;
2701}
2702
c24ff998
ACM
2703static int trace__open_output(struct trace *trace, const char *filename)
2704{
2705 struct stat st;
2706
2707 if (!stat(filename, &st) && st.st_size) {
2708 char oldname[PATH_MAX];
2709
2710 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2711 unlink(oldname);
2712 rename(filename, oldname);
2713 }
2714
2715 trace->output = fopen(filename, "w");
2716
2717 return trace->output == NULL ? -errno : 0;
2718}
2719
598d02c5
SF
2720static int parse_pagefaults(const struct option *opt, const char *str,
2721 int unset __maybe_unused)
2722{
2723 int *trace_pgfaults = opt->value;
2724
2725 if (strcmp(str, "all") == 0)
2726 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2727 else if (strcmp(str, "maj") == 0)
2728 *trace_pgfaults |= TRACE_PFMAJ;
2729 else if (strcmp(str, "min") == 0)
2730 *trace_pgfaults |= TRACE_PFMIN;
2731 else
2732 return -1;
2733
2734 return 0;
2735}
2736
14a052df
ACM
2737static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2738{
2739 struct perf_evsel *evsel;
2740
e5cadb93 2741 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2742 evsel->handler = handler;
2743}
2744
017037ff
ACM
2745/*
2746 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2747 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2748 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2749 *
2750 * It'd be better to introduce a parse_options() variant that would return a
2751 * list with the terms it didn't match to an event...
2752 */
2753static int trace__parse_events_option(const struct option *opt, const char *str,
2754 int unset __maybe_unused)
2755{
2756 struct trace *trace = (struct trace *)opt->value;
2757 const char *s = str;
2758 char *sep = NULL, *lists[2] = { NULL, NULL, };
2759 int len = strlen(str), err = -1, list;
2760 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2761 char group_name[PATH_MAX];
2762
2763 if (strace_groups_dir == NULL)
2764 return -1;
2765
2766 if (*s == '!') {
2767 ++s;
2768 trace->not_ev_qualifier = true;
2769 }
2770
2771 while (1) {
2772 if ((sep = strchr(s, ',')) != NULL)
2773 *sep = '\0';
2774
2775 list = 0;
2776 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2777 list = 1;
2778 } else {
2779 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2780 if (access(group_name, R_OK) == 0)
2781 list = 1;
2782 }
2783
2784 if (lists[list]) {
2785 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2786 } else {
2787 lists[list] = malloc(len);
2788 if (lists[list] == NULL)
2789 goto out;
2790 strcpy(lists[list], s);
2791 }
2792
2793 if (!sep)
2794 break;
2795
2796 *sep = ',';
2797 s = sep + 1;
2798 }
2799
2800 if (lists[1] != NULL) {
2801 struct strlist_config slist_config = {
2802 .dirname = strace_groups_dir,
2803 };
2804
2805 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2806 if (trace->ev_qualifier == NULL) {
2807 fputs("Not enough memory to parse event qualifier", trace->output);
2808 goto out;
2809 }
2810
2811 if (trace__validate_ev_qualifier(trace))
2812 goto out;
2813 }
2814
2815 err = 0;
2816
2817 if (lists[0]) {
2818 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2819 "event selector. use 'perf list' to list available events",
2820 parse_events_option);
2821 err = parse_events_option(&o, lists[0], 0);
2822 }
2823out:
2824 if (sep)
2825 *sep = ',';
2826
2827 return err;
2828}
2829
b0ad8ea6 2830int cmd_trace(int argc, const char **argv)
514f1c67 2831{
6fdd9cb7 2832 const char *trace_usage[] = {
f15eb531
NK
2833 "perf trace [<options>] [<command>]",
2834 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2835 "perf trace record [<options>] [<command>]",
2836 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2837 NULL
2838 };
2839 struct trace trace = {
514f1c67
ACM
2840 .syscalls = {
2841 . max = -1,
2842 },
2843 .opts = {
2844 .target = {
2845 .uid = UINT_MAX,
2846 .uses_mmap = true,
2847 },
2848 .user_freq = UINT_MAX,
2849 .user_interval = ULLONG_MAX,
509051ea 2850 .no_buffering = true,
38d5447d 2851 .mmap_pages = UINT_MAX,
9d9cad76 2852 .proc_map_timeout = 500,
514f1c67 2853 },
007d66a0 2854 .output = stderr,
50c95cbd 2855 .show_comm = true,
e281a960 2856 .trace_syscalls = true,
44621819 2857 .kernel_syscallchains = false,
05614993 2858 .max_stack = UINT_MAX,
514f1c67 2859 };
c24ff998 2860 const char *output_name = NULL;
514f1c67 2861 const struct option trace_options[] = {
017037ff
ACM
2862 OPT_CALLBACK('e', "event", &trace, "event",
2863 "event/syscall selector. use 'perf list' to list available events",
2864 trace__parse_events_option),
50c95cbd
ACM
2865 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2866 "show the thread COMM next to its id"),
c522739d 2867 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2868 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2869 trace__parse_events_option),
c24ff998 2870 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2871 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2872 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2873 "trace events on existing process id"),
ac9be8ee 2874 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2875 "trace events on existing thread id"),
fa0e4ffe
ACM
2876 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2877 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2878 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2879 "system-wide collection from all CPUs"),
ac9be8ee 2880 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2881 "list of cpus to monitor"),
6810fc91 2882 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2883 "child tasks do not inherit counters"),
994a1f78
JO
2884 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2885 "number of mmap data pages",
2886 perf_evlist__parse_mmap_pages),
ac9be8ee 2887 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2888 "user to profile"),
ae9ed035
ACM
2889 OPT_CALLBACK(0, "duration", &trace, "float",
2890 "show only events with duration > N.M ms",
2891 trace__set_duration),
1302d88e 2892 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2893 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2894 OPT_BOOLEAN('T', "time", &trace.full_time,
2895 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2896 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2897 "Show only syscall summary with statistics"),
2898 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2899 "Show all syscalls and summary with statistics"),
598d02c5
SF
2900 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2901 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2902 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2903 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2904 OPT_CALLBACK(0, "call-graph", &trace.opts,
2905 "record_mode[,record_size]", record_callchain_help,
2906 &record_parse_callchain_opt),
44621819
ACM
2907 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2908 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2909 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2910 "Set the minimum stack depth when parsing the callchain, "
2911 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2912 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2913 "Set the maximum stack depth when parsing the callchain, "
2914 "anything beyond the specified depth will be ignored. "
4cb93446 2915 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2916 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2917 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2918 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2919 "ms to wait before starting measurement after program "
2920 "start"),
514f1c67
ACM
2921 OPT_END()
2922 };
ccd62a89 2923 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2924 bool mmap_pages_user_set = true;
6fdd9cb7 2925 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2926 int err;
32caf0d1 2927 char bf[BUFSIZ];
514f1c67 2928
4d08cb80
ACM
2929 signal(SIGSEGV, sighandler_dump_stack);
2930 signal(SIGFPE, sighandler_dump_stack);
2931
14a052df 2932 trace.evlist = perf_evlist__new();
fd0db102 2933 trace.sctbl = syscalltbl__new();
14a052df 2934
fd0db102 2935 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2936 pr_err("Not enough memory to run!\n");
ff8f695c 2937 err = -ENOMEM;
14a052df
ACM
2938 goto out;
2939 }
2940
6fdd9cb7
YS
2941 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2942 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2943
d7888573
WN
2944 err = bpf__setup_stdout(trace.evlist);
2945 if (err) {
2946 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2947 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2948 goto out;
2949 }
2950
59247e33
ACM
2951 err = -1;
2952
598d02c5
SF
2953 if (trace.trace_pgfaults) {
2954 trace.opts.sample_address = true;
2955 trace.opts.sample_time = true;
2956 }
2957
f3e459d1
ACM
2958 if (trace.opts.mmap_pages == UINT_MAX)
2959 mmap_pages_user_set = false;
2960
05614993 2961 if (trace.max_stack == UINT_MAX) {
fe176085 2962 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2963 max_stack_user_set = false;
2964 }
2965
2966#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2967 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2968 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2969#endif
2970
2ddd5c04 2971 if (callchain_param.enabled) {
f3e459d1
ACM
2972 if (!mmap_pages_user_set && geteuid() == 0)
2973 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2974
566a0885 2975 symbol_conf.use_callchain = true;
f3e459d1 2976 }
566a0885 2977
14a052df
ACM
2978 if (trace.evlist->nr_entries > 0)
2979 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2980
1e28fe0a
SF
2981 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2982 return trace__record(&trace, argc-1, &argv[1]);
2983
2984 /* summary_only implies summary option, but don't overwrite summary if set */
2985 if (trace.summary_only)
2986 trace.summary = trace.summary_only;
2987
726f3234
ACM
2988 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2989 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2990 pr_err("Please specify something to trace.\n");
2991 return -1;
2992 }
2993
017037ff 2994 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
2995 pr_err("The -e option can't be used with --no-syscalls.\n");
2996 goto out;
2997 }
2998
c24ff998
ACM
2999 if (output_name != NULL) {
3000 err = trace__open_output(&trace, output_name);
3001 if (err < 0) {
3002 perror("failed to create output file");
3003 goto out;
3004 }
3005 }
3006
fd0db102
ACM
3007 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3008
602ad878 3009 err = target__validate(&trace.opts.target);
32caf0d1 3010 if (err) {
602ad878 3011 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3012 fprintf(trace.output, "%s", bf);
3013 goto out_close;
32caf0d1
NK
3014 }
3015
602ad878 3016 err = target__parse_uid(&trace.opts.target);
514f1c67 3017 if (err) {
602ad878 3018 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3019 fprintf(trace.output, "%s", bf);
3020 goto out_close;
514f1c67
ACM
3021 }
3022
602ad878 3023 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3024 trace.opts.target.system_wide = true;
3025
6810fc91
DA
3026 if (input_name)
3027 err = trace__replay(&trace);
3028 else
3029 err = trace__run(&trace, argc, argv);
1302d88e 3030
c24ff998
ACM
3031out_close:
3032 if (output_name != NULL)
3033 fclose(trace.output);
3034out:
1302d88e 3035 return err;
514f1c67 3036}