perf tools: Use __maybe_used for unused variables
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / tools / perf / builtin-kmem.c
1 #include "builtin.h"
2 #include "perf.h"
3
4 #include "util/evsel.h"
5 #include "util/util.h"
6 #include "util/cache.h"
7 #include "util/symbol.h"
8 #include "util/thread.h"
9 #include "util/header.h"
10 #include "util/session.h"
11 #include "util/tool.h"
12
13 #include "util/parse-options.h"
14 #include "util/trace-event.h"
15
16 #include "util/debug.h"
17
18 #include <linux/rbtree.h>
19
20 struct alloc_stat;
21 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
22
23 static const char *input_name;
24
25 static int alloc_flag;
26 static int caller_flag;
27
28 static int alloc_lines = -1;
29 static int caller_lines = -1;
30
31 static bool raw_ip;
32
33 static char default_sort_order[] = "frag,hit,bytes";
34
35 static int *cpunode_map;
36 static int max_cpu_num;
37
38 struct alloc_stat {
39 u64 call_site;
40 u64 ptr;
41 u64 bytes_req;
42 u64 bytes_alloc;
43 u32 hit;
44 u32 pingpong;
45
46 short alloc_cpu;
47
48 struct rb_node node;
49 };
50
51 static struct rb_root root_alloc_stat;
52 static struct rb_root root_alloc_sorted;
53 static struct rb_root root_caller_stat;
54 static struct rb_root root_caller_sorted;
55
56 static unsigned long total_requested, total_allocated;
57 static unsigned long nr_allocs, nr_cross_allocs;
58
59 #define PATH_SYS_NODE "/sys/devices/system/node"
60
61 static int init_cpunode_map(void)
62 {
63 FILE *fp;
64 int i, err = -1;
65
66 fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
67 if (!fp) {
68 max_cpu_num = 4096;
69 return 0;
70 }
71
72 if (fscanf(fp, "%d", &max_cpu_num) < 1) {
73 pr_err("Failed to read 'kernel_max' from sysfs");
74 goto out_close;
75 }
76
77 max_cpu_num++;
78
79 cpunode_map = calloc(max_cpu_num, sizeof(int));
80 if (!cpunode_map) {
81 pr_err("%s: calloc failed\n", __func__);
82 goto out_close;
83 }
84
85 for (i = 0; i < max_cpu_num; i++)
86 cpunode_map[i] = -1;
87
88 err = 0;
89 out_close:
90 fclose(fp);
91 return err;
92 }
93
94 static int setup_cpunode_map(void)
95 {
96 struct dirent *dent1, *dent2;
97 DIR *dir1, *dir2;
98 unsigned int cpu, mem;
99 char buf[PATH_MAX];
100
101 if (init_cpunode_map())
102 return -1;
103
104 dir1 = opendir(PATH_SYS_NODE);
105 if (!dir1)
106 return -1;
107
108 while ((dent1 = readdir(dir1)) != NULL) {
109 if (dent1->d_type != DT_DIR ||
110 sscanf(dent1->d_name, "node%u", &mem) < 1)
111 continue;
112
113 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
114 dir2 = opendir(buf);
115 if (!dir2)
116 continue;
117 while ((dent2 = readdir(dir2)) != NULL) {
118 if (dent2->d_type != DT_LNK ||
119 sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
120 continue;
121 cpunode_map[cpu] = mem;
122 }
123 closedir(dir2);
124 }
125 closedir(dir1);
126 return 0;
127 }
128
129 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
130 int bytes_req, int bytes_alloc, int cpu)
131 {
132 struct rb_node **node = &root_alloc_stat.rb_node;
133 struct rb_node *parent = NULL;
134 struct alloc_stat *data = NULL;
135
136 while (*node) {
137 parent = *node;
138 data = rb_entry(*node, struct alloc_stat, node);
139
140 if (ptr > data->ptr)
141 node = &(*node)->rb_right;
142 else if (ptr < data->ptr)
143 node = &(*node)->rb_left;
144 else
145 break;
146 }
147
148 if (data && data->ptr == ptr) {
149 data->hit++;
150 data->bytes_req += bytes_req;
151 data->bytes_alloc += bytes_alloc;
152 } else {
153 data = malloc(sizeof(*data));
154 if (!data) {
155 pr_err("%s: malloc failed\n", __func__);
156 return -1;
157 }
158 data->ptr = ptr;
159 data->pingpong = 0;
160 data->hit = 1;
161 data->bytes_req = bytes_req;
162 data->bytes_alloc = bytes_alloc;
163
164 rb_link_node(&data->node, parent, node);
165 rb_insert_color(&data->node, &root_alloc_stat);
166 }
167 data->call_site = call_site;
168 data->alloc_cpu = cpu;
169 return 0;
170 }
171
172 static int insert_caller_stat(unsigned long call_site,
173 int bytes_req, int bytes_alloc)
174 {
175 struct rb_node **node = &root_caller_stat.rb_node;
176 struct rb_node *parent = NULL;
177 struct alloc_stat *data = NULL;
178
179 while (*node) {
180 parent = *node;
181 data = rb_entry(*node, struct alloc_stat, node);
182
183 if (call_site > data->call_site)
184 node = &(*node)->rb_right;
185 else if (call_site < data->call_site)
186 node = &(*node)->rb_left;
187 else
188 break;
189 }
190
191 if (data && data->call_site == call_site) {
192 data->hit++;
193 data->bytes_req += bytes_req;
194 data->bytes_alloc += bytes_alloc;
195 } else {
196 data = malloc(sizeof(*data));
197 if (!data) {
198 pr_err("%s: malloc failed\n", __func__);
199 return -1;
200 }
201 data->call_site = call_site;
202 data->pingpong = 0;
203 data->hit = 1;
204 data->bytes_req = bytes_req;
205 data->bytes_alloc = bytes_alloc;
206
207 rb_link_node(&data->node, parent, node);
208 rb_insert_color(&data->node, &root_caller_stat);
209 }
210
211 return 0;
212 }
213
214 static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
215 struct perf_sample *sample, int node)
216 {
217 struct event_format *event = evsel->tp_format;
218 void *data = sample->raw_data;
219 unsigned long call_site;
220 unsigned long ptr;
221 int bytes_req, cpu = sample->cpu;
222 int bytes_alloc;
223 int node1, node2;
224
225 ptr = raw_field_value(event, "ptr", data);
226 call_site = raw_field_value(event, "call_site", data);
227 bytes_req = raw_field_value(event, "bytes_req", data);
228 bytes_alloc = raw_field_value(event, "bytes_alloc", data);
229
230 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu) ||
231 insert_caller_stat(call_site, bytes_req, bytes_alloc))
232 return -1;
233
234 total_requested += bytes_req;
235 total_allocated += bytes_alloc;
236
237 if (node) {
238 node1 = cpunode_map[cpu];
239 node2 = raw_field_value(event, "node", data);
240 if (node1 != node2)
241 nr_cross_allocs++;
242 }
243 nr_allocs++;
244 return 0;
245 }
246
247 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
248 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
249
250 static struct alloc_stat *search_alloc_stat(unsigned long ptr,
251 unsigned long call_site,
252 struct rb_root *root,
253 sort_fn_t sort_fn)
254 {
255 struct rb_node *node = root->rb_node;
256 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
257
258 while (node) {
259 struct alloc_stat *data;
260 int cmp;
261
262 data = rb_entry(node, struct alloc_stat, node);
263
264 cmp = sort_fn(&key, data);
265 if (cmp < 0)
266 node = node->rb_left;
267 else if (cmp > 0)
268 node = node->rb_right;
269 else
270 return data;
271 }
272 return NULL;
273 }
274
275 static int perf_evsel__process_free_event(struct perf_evsel *evsel,
276 struct perf_sample *sample)
277 {
278 unsigned long ptr = raw_field_value(evsel->tp_format, "ptr",
279 sample->raw_data);
280 struct alloc_stat *s_alloc, *s_caller;
281
282 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
283 if (!s_alloc)
284 return 0;
285
286 if ((short)sample->cpu != s_alloc->alloc_cpu) {
287 s_alloc->pingpong++;
288
289 s_caller = search_alloc_stat(0, s_alloc->call_site,
290 &root_caller_stat, callsite_cmp);
291 if (!s_caller)
292 return -1;
293 s_caller->pingpong++;
294 }
295 s_alloc->alloc_cpu = -1;
296
297 return 0;
298 }
299
300 static int perf_evsel__process_kmem_event(struct perf_evsel *evsel,
301 struct perf_sample *sample)
302 {
303 struct event_format *event = evsel->tp_format;
304
305 if (!strcmp(event->name, "kmalloc") ||
306 !strcmp(event->name, "kmem_cache_alloc")) {
307 return perf_evsel__process_alloc_event(evsel, sample, 0);
308 }
309
310 if (!strcmp(event->name, "kmalloc_node") ||
311 !strcmp(event->name, "kmem_cache_alloc_node")) {
312 return perf_evsel__process_alloc_event(evsel, sample, 1);
313 }
314
315 if (!strcmp(event->name, "kfree") ||
316 !strcmp(event->name, "kmem_cache_free")) {
317 return perf_evsel__process_free_event(evsel, sample);
318 }
319
320 return 0;
321 }
322
323 static int process_sample_event(struct perf_tool *tool __maybe_unused,
324 union perf_event *event,
325 struct perf_sample *sample,
326 struct perf_evsel *evsel,
327 struct machine *machine)
328 {
329 struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
330
331 if (thread == NULL) {
332 pr_debug("problem processing %d event, skipping it.\n",
333 event->header.type);
334 return -1;
335 }
336
337 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
338
339 return perf_evsel__process_kmem_event(evsel, sample);
340 }
341
342 static struct perf_tool perf_kmem = {
343 .sample = process_sample_event,
344 .comm = perf_event__process_comm,
345 .ordered_samples = true,
346 };
347
348 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
349 {
350 if (n_alloc == 0)
351 return 0.0;
352 else
353 return 100.0 - (100.0 * n_req / n_alloc);
354 }
355
356 static void __print_result(struct rb_root *root, struct perf_session *session,
357 int n_lines, int is_caller)
358 {
359 struct rb_node *next;
360 struct machine *machine;
361
362 printf("%.102s\n", graph_dotted_line);
363 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
364 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
365 printf("%.102s\n", graph_dotted_line);
366
367 next = rb_first(root);
368
369 machine = perf_session__find_host_machine(session);
370 if (!machine) {
371 pr_err("__print_result: couldn't find kernel information\n");
372 return;
373 }
374 while (next && n_lines--) {
375 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
376 node);
377 struct symbol *sym = NULL;
378 struct map *map;
379 char buf[BUFSIZ];
380 u64 addr;
381
382 if (is_caller) {
383 addr = data->call_site;
384 if (!raw_ip)
385 sym = machine__find_kernel_function(machine, addr, &map, NULL);
386 } else
387 addr = data->ptr;
388
389 if (sym != NULL)
390 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
391 addr - map->unmap_ip(map, sym->start));
392 else
393 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
394 printf(" %-34s |", buf);
395
396 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
397 (unsigned long long)data->bytes_alloc,
398 (unsigned long)data->bytes_alloc / data->hit,
399 (unsigned long long)data->bytes_req,
400 (unsigned long)data->bytes_req / data->hit,
401 (unsigned long)data->hit,
402 (unsigned long)data->pingpong,
403 fragmentation(data->bytes_req, data->bytes_alloc));
404
405 next = rb_next(next);
406 }
407
408 if (n_lines == -1)
409 printf(" ... | ... | ... | ... | ... | ... \n");
410
411 printf("%.102s\n", graph_dotted_line);
412 }
413
414 static void print_summary(void)
415 {
416 printf("\nSUMMARY\n=======\n");
417 printf("Total bytes requested: %lu\n", total_requested);
418 printf("Total bytes allocated: %lu\n", total_allocated);
419 printf("Total bytes wasted on internal fragmentation: %lu\n",
420 total_allocated - total_requested);
421 printf("Internal fragmentation: %f%%\n",
422 fragmentation(total_requested, total_allocated));
423 printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
424 }
425
426 static void print_result(struct perf_session *session)
427 {
428 if (caller_flag)
429 __print_result(&root_caller_sorted, session, caller_lines, 1);
430 if (alloc_flag)
431 __print_result(&root_alloc_sorted, session, alloc_lines, 0);
432 print_summary();
433 }
434
435 struct sort_dimension {
436 const char name[20];
437 sort_fn_t cmp;
438 struct list_head list;
439 };
440
441 static LIST_HEAD(caller_sort);
442 static LIST_HEAD(alloc_sort);
443
444 static void sort_insert(struct rb_root *root, struct alloc_stat *data,
445 struct list_head *sort_list)
446 {
447 struct rb_node **new = &(root->rb_node);
448 struct rb_node *parent = NULL;
449 struct sort_dimension *sort;
450
451 while (*new) {
452 struct alloc_stat *this;
453 int cmp = 0;
454
455 this = rb_entry(*new, struct alloc_stat, node);
456 parent = *new;
457
458 list_for_each_entry(sort, sort_list, list) {
459 cmp = sort->cmp(data, this);
460 if (cmp)
461 break;
462 }
463
464 if (cmp > 0)
465 new = &((*new)->rb_left);
466 else
467 new = &((*new)->rb_right);
468 }
469
470 rb_link_node(&data->node, parent, new);
471 rb_insert_color(&data->node, root);
472 }
473
474 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
475 struct list_head *sort_list)
476 {
477 struct rb_node *node;
478 struct alloc_stat *data;
479
480 for (;;) {
481 node = rb_first(root);
482 if (!node)
483 break;
484
485 rb_erase(node, root);
486 data = rb_entry(node, struct alloc_stat, node);
487 sort_insert(root_sorted, data, sort_list);
488 }
489 }
490
491 static void sort_result(void)
492 {
493 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
494 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
495 }
496
497 static int __cmd_kmem(void)
498 {
499 int err = -EINVAL;
500 struct perf_session *session;
501
502 session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem);
503 if (session == NULL)
504 return -ENOMEM;
505
506 if (perf_session__create_kernel_maps(session) < 0)
507 goto out_delete;
508
509 if (!perf_session__has_traces(session, "kmem record"))
510 goto out_delete;
511
512 setup_pager();
513 err = perf_session__process_events(session, &perf_kmem);
514 if (err != 0)
515 goto out_delete;
516 sort_result();
517 print_result(session);
518 out_delete:
519 perf_session__delete(session);
520 return err;
521 }
522
523 static const char * const kmem_usage[] = {
524 "perf kmem [<options>] {record|stat}",
525 NULL
526 };
527
528 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
529 {
530 if (l->ptr < r->ptr)
531 return -1;
532 else if (l->ptr > r->ptr)
533 return 1;
534 return 0;
535 }
536
537 static struct sort_dimension ptr_sort_dimension = {
538 .name = "ptr",
539 .cmp = ptr_cmp,
540 };
541
542 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
543 {
544 if (l->call_site < r->call_site)
545 return -1;
546 else if (l->call_site > r->call_site)
547 return 1;
548 return 0;
549 }
550
551 static struct sort_dimension callsite_sort_dimension = {
552 .name = "callsite",
553 .cmp = callsite_cmp,
554 };
555
556 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
557 {
558 if (l->hit < r->hit)
559 return -1;
560 else if (l->hit > r->hit)
561 return 1;
562 return 0;
563 }
564
565 static struct sort_dimension hit_sort_dimension = {
566 .name = "hit",
567 .cmp = hit_cmp,
568 };
569
570 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
571 {
572 if (l->bytes_alloc < r->bytes_alloc)
573 return -1;
574 else if (l->bytes_alloc > r->bytes_alloc)
575 return 1;
576 return 0;
577 }
578
579 static struct sort_dimension bytes_sort_dimension = {
580 .name = "bytes",
581 .cmp = bytes_cmp,
582 };
583
584 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
585 {
586 double x, y;
587
588 x = fragmentation(l->bytes_req, l->bytes_alloc);
589 y = fragmentation(r->bytes_req, r->bytes_alloc);
590
591 if (x < y)
592 return -1;
593 else if (x > y)
594 return 1;
595 return 0;
596 }
597
598 static struct sort_dimension frag_sort_dimension = {
599 .name = "frag",
600 .cmp = frag_cmp,
601 };
602
603 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
604 {
605 if (l->pingpong < r->pingpong)
606 return -1;
607 else if (l->pingpong > r->pingpong)
608 return 1;
609 return 0;
610 }
611
612 static struct sort_dimension pingpong_sort_dimension = {
613 .name = "pingpong",
614 .cmp = pingpong_cmp,
615 };
616
617 static struct sort_dimension *avail_sorts[] = {
618 &ptr_sort_dimension,
619 &callsite_sort_dimension,
620 &hit_sort_dimension,
621 &bytes_sort_dimension,
622 &frag_sort_dimension,
623 &pingpong_sort_dimension,
624 };
625
626 #define NUM_AVAIL_SORTS \
627 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
628
629 static int sort_dimension__add(const char *tok, struct list_head *list)
630 {
631 struct sort_dimension *sort;
632 int i;
633
634 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
635 if (!strcmp(avail_sorts[i]->name, tok)) {
636 sort = malloc(sizeof(*sort));
637 if (!sort) {
638 pr_err("%s: malloc failed\n", __func__);
639 return -1;
640 }
641 memcpy(sort, avail_sorts[i], sizeof(*sort));
642 list_add_tail(&sort->list, list);
643 return 0;
644 }
645 }
646
647 return -1;
648 }
649
650 static int setup_sorting(struct list_head *sort_list, const char *arg)
651 {
652 char *tok;
653 char *str = strdup(arg);
654
655 if (!str) {
656 pr_err("%s: strdup failed\n", __func__);
657 return -1;
658 }
659
660 while (true) {
661 tok = strsep(&str, ",");
662 if (!tok)
663 break;
664 if (sort_dimension__add(tok, sort_list) < 0) {
665 error("Unknown --sort key: '%s'", tok);
666 free(str);
667 return -1;
668 }
669 }
670
671 free(str);
672 return 0;
673 }
674
675 static int parse_sort_opt(const struct option *opt __maybe_unused,
676 const char *arg, int unset __maybe_unused)
677 {
678 if (!arg)
679 return -1;
680
681 if (caller_flag > alloc_flag)
682 return setup_sorting(&caller_sort, arg);
683 else
684 return setup_sorting(&alloc_sort, arg);
685
686 return 0;
687 }
688
689 static int parse_caller_opt(const struct option *opt __maybe_unused,
690 const char *arg __maybe_unused,
691 int unset __maybe_unused)
692 {
693 caller_flag = (alloc_flag + 1);
694 return 0;
695 }
696
697 static int parse_alloc_opt(const struct option *opt __maybe_unused,
698 const char *arg __maybe_unused,
699 int unset __maybe_unused)
700 {
701 alloc_flag = (caller_flag + 1);
702 return 0;
703 }
704
705 static int parse_line_opt(const struct option *opt __maybe_unused,
706 const char *arg, int unset __maybe_unused)
707 {
708 int lines;
709
710 if (!arg)
711 return -1;
712
713 lines = strtoul(arg, NULL, 10);
714
715 if (caller_flag > alloc_flag)
716 caller_lines = lines;
717 else
718 alloc_lines = lines;
719
720 return 0;
721 }
722
723 static const struct option kmem_options[] = {
724 OPT_STRING('i', "input", &input_name, "file",
725 "input file name"),
726 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
727 "show per-callsite statistics",
728 parse_caller_opt),
729 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
730 "show per-allocation statistics",
731 parse_alloc_opt),
732 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
733 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
734 parse_sort_opt),
735 OPT_CALLBACK('l', "line", NULL, "num",
736 "show n lines",
737 parse_line_opt),
738 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
739 OPT_END()
740 };
741
742 static const char *record_args[] = {
743 "record",
744 "-a",
745 "-R",
746 "-f",
747 "-c", "1",
748 "-e", "kmem:kmalloc",
749 "-e", "kmem:kmalloc_node",
750 "-e", "kmem:kfree",
751 "-e", "kmem:kmem_cache_alloc",
752 "-e", "kmem:kmem_cache_alloc_node",
753 "-e", "kmem:kmem_cache_free",
754 };
755
756 static int __cmd_record(int argc, const char **argv)
757 {
758 unsigned int rec_argc, i, j;
759 const char **rec_argv;
760
761 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
762 rec_argv = calloc(rec_argc + 1, sizeof(char *));
763
764 if (rec_argv == NULL)
765 return -ENOMEM;
766
767 for (i = 0; i < ARRAY_SIZE(record_args); i++)
768 rec_argv[i] = strdup(record_args[i]);
769
770 for (j = 1; j < (unsigned int)argc; j++, i++)
771 rec_argv[i] = argv[j];
772
773 return cmd_record(i, rec_argv, NULL);
774 }
775
776 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
777 {
778 argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
779
780 if (!argc)
781 usage_with_options(kmem_usage, kmem_options);
782
783 symbol__init();
784
785 if (!strncmp(argv[0], "rec", 3)) {
786 return __cmd_record(argc, argv);
787 } else if (!strcmp(argv[0], "stat")) {
788 if (setup_cpunode_map())
789 return -1;
790
791 if (list_empty(&caller_sort))
792 setup_sorting(&caller_sort, default_sort_order);
793 if (list_empty(&alloc_sort))
794 setup_sorting(&alloc_sort, default_sort_order);
795
796 return __cmd_kmem();
797 } else
798 usage_with_options(kmem_usage, kmem_options);
799
800 return 0;
801 }
802