Commit | Line | Data |
---|---|---|
ba77c9e1 LZ |
1 | #include "builtin.h" |
2 | #include "perf.h" | |
3 | ||
4 | #include "util/util.h" | |
5 | #include "util/cache.h" | |
6 | #include "util/symbol.h" | |
7 | #include "util/thread.h" | |
8 | #include "util/header.h" | |
9 | ||
10 | #include "util/parse-options.h" | |
11 | #include "util/trace-event.h" | |
12 | ||
13 | #include "util/debug.h" | |
14 | #include "util/data_map.h" | |
15 | ||
16 | #include <linux/rbtree.h> | |
17 | ||
18 | struct alloc_stat; | |
19 | typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); | |
20 | ||
21 | static char const *input_name = "perf.data"; | |
22 | ||
23 | static struct perf_header *header; | |
24 | static u64 sample_type; | |
25 | ||
26 | static int alloc_flag; | |
27 | static int caller_flag; | |
28 | ||
29 | sort_fn_t alloc_sort_fn; | |
30 | sort_fn_t caller_sort_fn; | |
31 | ||
32 | static int alloc_lines = -1; | |
33 | static int caller_lines = -1; | |
34 | ||
35 | static char *cwd; | |
36 | static int cwdlen; | |
37 | ||
38 | struct alloc_stat { | |
39 | union { | |
40 | struct { | |
41 | char *name; | |
42 | u64 call_site; | |
43 | }; | |
44 | u64 ptr; | |
45 | }; | |
46 | u64 bytes_req; | |
47 | u64 bytes_alloc; | |
48 | u32 hit; | |
49 | ||
50 | struct rb_node node; | |
51 | }; | |
52 | ||
53 | static struct rb_root root_alloc_stat; | |
54 | static struct rb_root root_alloc_sorted; | |
55 | static struct rb_root root_caller_stat; | |
56 | static struct rb_root root_caller_sorted; | |
57 | ||
58 | static unsigned long total_requested, total_allocated; | |
59 | ||
60 | struct raw_event_sample { | |
61 | u32 size; | |
62 | char data[0]; | |
63 | }; | |
64 | ||
65 | static int | |
66 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | |
67 | { | |
68 | struct thread *thread = threads__findnew(event->comm.pid); | |
69 | ||
70 | dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", | |
71 | (void *)(offset + head), | |
72 | (void *)(long)(event->header.size), | |
73 | event->comm.comm, event->comm.pid); | |
74 | ||
75 | if (thread == NULL || | |
76 | thread__set_comm(thread, event->comm.comm)) { | |
77 | dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); | |
78 | return -1; | |
79 | } | |
80 | ||
81 | return 0; | |
82 | } | |
83 | ||
84 | static void insert_alloc_stat(unsigned long ptr, | |
85 | int bytes_req, int bytes_alloc) | |
86 | { | |
87 | struct rb_node **node = &root_alloc_stat.rb_node; | |
88 | struct rb_node *parent = NULL; | |
89 | struct alloc_stat *data = NULL; | |
90 | ||
91 | if (!alloc_flag) | |
92 | return; | |
93 | ||
94 | while (*node) { | |
95 | parent = *node; | |
96 | data = rb_entry(*node, struct alloc_stat, node); | |
97 | ||
98 | if (ptr > data->ptr) | |
99 | node = &(*node)->rb_right; | |
100 | else if (ptr < data->ptr) | |
101 | node = &(*node)->rb_left; | |
102 | else | |
103 | break; | |
104 | } | |
105 | ||
106 | if (data && data->ptr == ptr) { | |
107 | data->hit++; | |
108 | data->bytes_req += bytes_req; | |
109 | data->bytes_alloc += bytes_req; | |
110 | } else { | |
111 | data = malloc(sizeof(*data)); | |
112 | data->ptr = ptr; | |
113 | data->hit = 1; | |
114 | data->bytes_req = bytes_req; | |
115 | data->bytes_alloc = bytes_alloc; | |
116 | ||
117 | rb_link_node(&data->node, parent, node); | |
118 | rb_insert_color(&data->node, &root_alloc_stat); | |
119 | } | |
120 | } | |
121 | ||
122 | static void insert_caller_stat(unsigned long call_site, | |
123 | int bytes_req, int bytes_alloc) | |
124 | { | |
125 | struct rb_node **node = &root_caller_stat.rb_node; | |
126 | struct rb_node *parent = NULL; | |
127 | struct alloc_stat *data = NULL; | |
128 | ||
129 | if (!caller_flag) | |
130 | return; | |
131 | ||
132 | while (*node) { | |
133 | parent = *node; | |
134 | data = rb_entry(*node, struct alloc_stat, node); | |
135 | ||
136 | if (call_site > data->call_site) | |
137 | node = &(*node)->rb_right; | |
138 | else if (call_site < data->call_site) | |
139 | node = &(*node)->rb_left; | |
140 | else | |
141 | break; | |
142 | } | |
143 | ||
144 | if (data && data->call_site == call_site) { | |
145 | data->hit++; | |
146 | data->bytes_req += bytes_req; | |
147 | data->bytes_alloc += bytes_req; | |
148 | } else { | |
149 | data = malloc(sizeof(*data)); | |
150 | data->call_site = call_site; | |
151 | data->hit = 1; | |
152 | data->bytes_req = bytes_req; | |
153 | data->bytes_alloc = bytes_alloc; | |
154 | ||
155 | rb_link_node(&data->node, parent, node); | |
156 | rb_insert_color(&data->node, &root_caller_stat); | |
157 | } | |
158 | } | |
159 | ||
160 | static void process_alloc_event(struct raw_event_sample *raw, | |
161 | struct event *event, | |
162 | int cpu __used, | |
163 | u64 timestamp __used, | |
164 | struct thread *thread __used, | |
165 | int node __used) | |
166 | { | |
167 | unsigned long call_site; | |
168 | unsigned long ptr; | |
169 | int bytes_req; | |
170 | int bytes_alloc; | |
171 | ||
172 | ptr = raw_field_value(event, "ptr", raw->data); | |
173 | call_site = raw_field_value(event, "call_site", raw->data); | |
174 | bytes_req = raw_field_value(event, "bytes_req", raw->data); | |
175 | bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); | |
176 | ||
177 | insert_alloc_stat(ptr, bytes_req, bytes_alloc); | |
178 | insert_caller_stat(call_site, bytes_req, bytes_alloc); | |
179 | ||
180 | total_requested += bytes_req; | |
181 | total_allocated += bytes_alloc; | |
182 | } | |
183 | ||
184 | static void process_free_event(struct raw_event_sample *raw __used, | |
185 | struct event *event __used, | |
186 | int cpu __used, | |
187 | u64 timestamp __used, | |
188 | struct thread *thread __used) | |
189 | { | |
190 | } | |
191 | ||
192 | static void | |
193 | process_raw_event(event_t *raw_event __used, void *more_data, | |
194 | int cpu, u64 timestamp, struct thread *thread) | |
195 | { | |
196 | struct raw_event_sample *raw = more_data; | |
197 | struct event *event; | |
198 | int type; | |
199 | ||
200 | type = trace_parse_common_type(raw->data); | |
201 | event = trace_find_event(type); | |
202 | ||
203 | if (!strcmp(event->name, "kmalloc") || | |
204 | !strcmp(event->name, "kmem_cache_alloc")) { | |
205 | process_alloc_event(raw, event, cpu, timestamp, thread, 0); | |
206 | return; | |
207 | } | |
208 | ||
209 | if (!strcmp(event->name, "kmalloc_node") || | |
210 | !strcmp(event->name, "kmem_cache_alloc_node")) { | |
211 | process_alloc_event(raw, event, cpu, timestamp, thread, 1); | |
212 | return; | |
213 | } | |
214 | ||
215 | if (!strcmp(event->name, "kfree") || | |
216 | !strcmp(event->name, "kmem_cache_free")) { | |
217 | process_free_event(raw, event, cpu, timestamp, thread); | |
218 | return; | |
219 | } | |
220 | } | |
221 | ||
222 | static int | |
223 | process_sample_event(event_t *event, unsigned long offset, unsigned long head) | |
224 | { | |
225 | u64 ip = event->ip.ip; | |
226 | u64 timestamp = -1; | |
227 | u32 cpu = -1; | |
228 | u64 period = 1; | |
229 | void *more_data = event->ip.__more_data; | |
230 | struct thread *thread = threads__findnew(event->ip.pid); | |
231 | ||
232 | if (sample_type & PERF_SAMPLE_TIME) { | |
233 | timestamp = *(u64 *)more_data; | |
234 | more_data += sizeof(u64); | |
235 | } | |
236 | ||
237 | if (sample_type & PERF_SAMPLE_CPU) { | |
238 | cpu = *(u32 *)more_data; | |
239 | more_data += sizeof(u32); | |
240 | more_data += sizeof(u32); /* reserved */ | |
241 | } | |
242 | ||
243 | if (sample_type & PERF_SAMPLE_PERIOD) { | |
244 | period = *(u64 *)more_data; | |
245 | more_data += sizeof(u64); | |
246 | } | |
247 | ||
248 | dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", | |
249 | (void *)(offset + head), | |
250 | (void *)(long)(event->header.size), | |
251 | event->header.misc, | |
252 | event->ip.pid, event->ip.tid, | |
253 | (void *)(long)ip, | |
254 | (long long)period); | |
255 | ||
256 | if (thread == NULL) { | |
257 | pr_debug("problem processing %d event, skipping it.\n", | |
258 | event->header.type); | |
259 | return -1; | |
260 | } | |
261 | ||
262 | dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); | |
263 | ||
264 | process_raw_event(event, more_data, cpu, timestamp, thread); | |
265 | ||
266 | return 0; | |
267 | } | |
268 | ||
269 | static int sample_type_check(u64 type) | |
270 | { | |
271 | sample_type = type; | |
272 | ||
273 | if (!(sample_type & PERF_SAMPLE_RAW)) { | |
274 | fprintf(stderr, | |
275 | "No trace sample to read. Did you call perf record " | |
276 | "without -R?"); | |
277 | return -1; | |
278 | } | |
279 | ||
280 | return 0; | |
281 | } | |
282 | ||
283 | static struct perf_file_handler file_handler = { | |
284 | .process_sample_event = process_sample_event, | |
285 | .process_comm_event = process_comm_event, | |
286 | .sample_type_check = sample_type_check, | |
287 | }; | |
288 | ||
289 | static int read_events(void) | |
290 | { | |
291 | register_idle_thread(); | |
292 | register_perf_file_handler(&file_handler); | |
293 | ||
cc612d81 | 294 | return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, |
ba77c9e1 LZ |
295 | &cwdlen, &cwd); |
296 | } | |
297 | ||
298 | static double fragmentation(unsigned long n_req, unsigned long n_alloc) | |
299 | { | |
300 | if (n_alloc == 0) | |
301 | return 0.0; | |
302 | else | |
303 | return 100.0 - (100.0 * n_req / n_alloc); | |
304 | } | |
305 | ||
306 | static void __print_result(struct rb_root *root, int n_lines, int is_caller) | |
307 | { | |
308 | struct rb_node *next; | |
309 | ||
1b145ae5 ACM |
310 | printf("%.78s\n", graph_dotted_line); |
311 | printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); | |
312 | printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); | |
313 | printf("%.78s\n", graph_dotted_line); | |
ba77c9e1 LZ |
314 | |
315 | next = rb_first(root); | |
316 | ||
317 | while (next && n_lines--) { | |
1b145ae5 ACM |
318 | struct alloc_stat *data = rb_entry(next, struct alloc_stat, |
319 | node); | |
320 | struct symbol *sym = NULL; | |
321 | char bf[BUFSIZ]; | |
322 | u64 addr; | |
323 | ||
324 | if (is_caller) { | |
325 | addr = data->call_site; | |
326 | sym = kernel_maps__find_symbol(addr, NULL, NULL); | |
327 | } else | |
328 | addr = data->ptr; | |
329 | ||
330 | if (sym != NULL) | |
331 | snprintf(bf, sizeof(bf), "%s/%Lx", sym->name, | |
332 | addr - sym->start); | |
333 | else | |
334 | snprintf(bf, sizeof(bf), "%#Lx", addr); | |
ba77c9e1 | 335 | |
1b145ae5 ACM |
336 | printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", |
337 | bf, (unsigned long long)data->bytes_alloc, | |
ba77c9e1 LZ |
338 | (unsigned long)data->bytes_alloc / data->hit, |
339 | (unsigned long long)data->bytes_req, | |
340 | (unsigned long)data->bytes_req / data->hit, | |
341 | (unsigned long)data->hit, | |
342 | fragmentation(data->bytes_req, data->bytes_alloc)); | |
343 | ||
344 | next = rb_next(next); | |
345 | } | |
346 | ||
347 | if (n_lines == -1) | |
348 | printf(" ... | ... | ... | ... | ... \n"); | |
349 | ||
350 | printf(" ------------------------------------------------------------------------------\n"); | |
351 | } | |
352 | ||
353 | static void print_summary(void) | |
354 | { | |
355 | printf("\nSUMMARY\n=======\n"); | |
356 | printf("Total bytes requested: %lu\n", total_requested); | |
357 | printf("Total bytes allocated: %lu\n", total_allocated); | |
358 | printf("Total bytes wasted on internal fragmentation: %lu\n", | |
359 | total_allocated - total_requested); | |
360 | printf("Internal fragmentation: %f%%\n", | |
361 | fragmentation(total_requested, total_allocated)); | |
362 | } | |
363 | ||
364 | static void print_result(void) | |
365 | { | |
366 | if (caller_flag) | |
367 | __print_result(&root_caller_sorted, caller_lines, 1); | |
368 | if (alloc_flag) | |
369 | __print_result(&root_alloc_sorted, alloc_lines, 0); | |
370 | print_summary(); | |
371 | } | |
372 | ||
373 | static void sort_insert(struct rb_root *root, struct alloc_stat *data, | |
374 | sort_fn_t sort_fn) | |
375 | { | |
376 | struct rb_node **new = &(root->rb_node); | |
377 | struct rb_node *parent = NULL; | |
378 | ||
379 | while (*new) { | |
380 | struct alloc_stat *this; | |
381 | int cmp; | |
382 | ||
383 | this = rb_entry(*new, struct alloc_stat, node); | |
384 | parent = *new; | |
385 | ||
386 | cmp = sort_fn(data, this); | |
387 | ||
388 | if (cmp > 0) | |
389 | new = &((*new)->rb_left); | |
390 | else | |
391 | new = &((*new)->rb_right); | |
392 | } | |
393 | ||
394 | rb_link_node(&data->node, parent, new); | |
395 | rb_insert_color(&data->node, root); | |
396 | } | |
397 | ||
398 | static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, | |
399 | sort_fn_t sort_fn) | |
400 | { | |
401 | struct rb_node *node; | |
402 | struct alloc_stat *data; | |
403 | ||
404 | for (;;) { | |
405 | node = rb_first(root); | |
406 | if (!node) | |
407 | break; | |
408 | ||
409 | rb_erase(node, root); | |
410 | data = rb_entry(node, struct alloc_stat, node); | |
411 | sort_insert(root_sorted, data, sort_fn); | |
412 | } | |
413 | } | |
414 | ||
415 | static void sort_result(void) | |
416 | { | |
417 | __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn); | |
418 | __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn); | |
419 | } | |
420 | ||
421 | static int __cmd_kmem(void) | |
422 | { | |
423 | setup_pager(); | |
424 | read_events(); | |
425 | sort_result(); | |
426 | print_result(); | |
427 | ||
428 | return 0; | |
429 | } | |
430 | ||
431 | static const char * const kmem_usage[] = { | |
432 | "perf kmem [<options>] {record}", | |
433 | NULL | |
434 | }; | |
435 | ||
436 | ||
437 | static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) | |
438 | { | |
439 | if (l->ptr < r->ptr) | |
440 | return -1; | |
441 | else if (l->ptr > r->ptr) | |
442 | return 1; | |
443 | return 0; | |
444 | } | |
445 | ||
446 | static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) | |
447 | { | |
448 | if (l->call_site < r->call_site) | |
449 | return -1; | |
450 | else if (l->call_site > r->call_site) | |
451 | return 1; | |
452 | return 0; | |
453 | } | |
454 | ||
f3ced7cd PE |
455 | static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) |
456 | { | |
457 | if (l->hit < r->hit) | |
458 | return -1; | |
459 | else if (l->hit > r->hit) | |
460 | return 1; | |
461 | return 0; | |
462 | } | |
463 | ||
ba77c9e1 LZ |
464 | static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) |
465 | { | |
466 | if (l->bytes_alloc < r->bytes_alloc) | |
467 | return -1; | |
468 | else if (l->bytes_alloc > r->bytes_alloc) | |
469 | return 1; | |
470 | return 0; | |
471 | } | |
472 | ||
f3ced7cd PE |
473 | static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) |
474 | { | |
475 | double x, y; | |
476 | ||
477 | x = fragmentation(l->bytes_req, l->bytes_alloc); | |
478 | y = fragmentation(r->bytes_req, r->bytes_alloc); | |
479 | ||
480 | if (x < y) | |
481 | return -1; | |
482 | else if (x > y) | |
483 | return 1; | |
484 | return 0; | |
485 | } | |
486 | ||
ba77c9e1 LZ |
487 | static int parse_sort_opt(const struct option *opt __used, |
488 | const char *arg, int unset __used) | |
489 | { | |
490 | sort_fn_t sort_fn; | |
491 | ||
492 | if (!arg) | |
493 | return -1; | |
494 | ||
495 | if (strcmp(arg, "ptr") == 0) | |
496 | sort_fn = ptr_cmp; | |
497 | else if (strcmp(arg, "call_site") == 0) | |
498 | sort_fn = callsite_cmp; | |
f3ced7cd PE |
499 | else if (strcmp(arg, "hit") == 0) |
500 | sort_fn = hit_cmp; | |
ba77c9e1 LZ |
501 | else if (strcmp(arg, "bytes") == 0) |
502 | sort_fn = bytes_cmp; | |
f3ced7cd PE |
503 | else if (strcmp(arg, "frag") == 0) |
504 | sort_fn = frag_cmp; | |
ba77c9e1 LZ |
505 | else |
506 | return -1; | |
507 | ||
508 | if (caller_flag > alloc_flag) | |
509 | caller_sort_fn = sort_fn; | |
510 | else | |
511 | alloc_sort_fn = sort_fn; | |
512 | ||
513 | return 0; | |
514 | } | |
515 | ||
516 | static int parse_stat_opt(const struct option *opt __used, | |
517 | const char *arg, int unset __used) | |
518 | { | |
519 | if (!arg) | |
520 | return -1; | |
521 | ||
522 | if (strcmp(arg, "alloc") == 0) | |
523 | alloc_flag = (caller_flag + 1); | |
524 | else if (strcmp(arg, "caller") == 0) | |
525 | caller_flag = (alloc_flag + 1); | |
526 | else | |
527 | return -1; | |
528 | return 0; | |
529 | } | |
530 | ||
531 | static int parse_line_opt(const struct option *opt __used, | |
532 | const char *arg, int unset __used) | |
533 | { | |
534 | int lines; | |
535 | ||
536 | if (!arg) | |
537 | return -1; | |
538 | ||
539 | lines = strtoul(arg, NULL, 10); | |
540 | ||
541 | if (caller_flag > alloc_flag) | |
542 | caller_lines = lines; | |
543 | else | |
544 | alloc_lines = lines; | |
545 | ||
546 | return 0; | |
547 | } | |
548 | ||
549 | static const struct option kmem_options[] = { | |
550 | OPT_STRING('i', "input", &input_name, "file", | |
551 | "input file name"), | |
552 | OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", | |
553 | "stat selector, Pass 'alloc' or 'caller'.", | |
554 | parse_stat_opt), | |
555 | OPT_CALLBACK('s', "sort", NULL, "key", | |
f3ced7cd | 556 | "sort by key: ptr, call_site, hit, bytes, frag", |
ba77c9e1 LZ |
557 | parse_sort_opt), |
558 | OPT_CALLBACK('l', "line", NULL, "num", | |
559 | "show n lins", | |
560 | parse_line_opt), | |
561 | OPT_END() | |
562 | }; | |
563 | ||
564 | static const char *record_args[] = { | |
565 | "record", | |
566 | "-a", | |
567 | "-R", | |
568 | "-M", | |
569 | "-f", | |
570 | "-c", "1", | |
571 | "-e", "kmem:kmalloc", | |
572 | "-e", "kmem:kmalloc_node", | |
573 | "-e", "kmem:kfree", | |
574 | "-e", "kmem:kmem_cache_alloc", | |
575 | "-e", "kmem:kmem_cache_alloc_node", | |
576 | "-e", "kmem:kmem_cache_free", | |
577 | }; | |
578 | ||
579 | static int __cmd_record(int argc, const char **argv) | |
580 | { | |
581 | unsigned int rec_argc, i, j; | |
582 | const char **rec_argv; | |
583 | ||
584 | rec_argc = ARRAY_SIZE(record_args) + argc - 1; | |
585 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); | |
586 | ||
587 | for (i = 0; i < ARRAY_SIZE(record_args); i++) | |
588 | rec_argv[i] = strdup(record_args[i]); | |
589 | ||
590 | for (j = 1; j < (unsigned int)argc; j++, i++) | |
591 | rec_argv[i] = argv[j]; | |
592 | ||
593 | return cmd_record(i, rec_argv, NULL); | |
594 | } | |
595 | ||
596 | int cmd_kmem(int argc, const char **argv, const char *prefix __used) | |
597 | { | |
598 | symbol__init(0); | |
599 | ||
600 | argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); | |
601 | ||
602 | if (argc && !strncmp(argv[0], "rec", 3)) | |
603 | return __cmd_record(argc, argv); | |
604 | else if (argc) | |
605 | usage_with_options(kmem_usage, kmem_options); | |
606 | ||
607 | if (!alloc_sort_fn) | |
608 | alloc_sort_fn = bytes_cmp; | |
609 | if (!caller_sort_fn) | |
610 | caller_sort_fn = bytes_cmp; | |
611 | ||
612 | return __cmd_kmem(); | |
613 | } | |
614 |