perf bench: Add memcpy() benchmark
authorHitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Tue, 17 Nov 2009 15:20:09 +0000 (00:20 +0900)
committerIngo Molnar <mingo@elte.hu>
Thu, 19 Nov 2009 05:21:48 +0000 (06:21 +0100)
'perf bench mem memcpy' is a benchmark suite for measuring memcpy()
performance.

Example on a Intel(R) Core(TM)2 Duo CPU E6850 @ 3.00GHz:

| % perf bench mem memcpy -l 1GB
| # Running mem/memcpy benchmark...
| # Copying 1MB Bytes from 0xb7d98008 to 0xb7e99008 ...
|
|     726.216412 MB/Sec

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1258471212-30281-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
[ v2: updated changelog, clarified history of builtin-bench.c ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
tools/perf/Makefile
tools/perf/bench/bench.h
tools/perf/bench/mem-memcpy.c [new file with mode: 0644]
tools/perf/builtin-bench.c

index 3f0666af93de7e0cac32b4bb51b17f7a61bf3b0a..53e663a5fa2ff6f3d75a48eb2511ef8ef838f913 100644 (file)
@@ -432,6 +432,7 @@ BUILTIN_OBJS += builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += bench/sched-messaging.o
 BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o
 
 BUILTIN_OBJS += builtin-help.o
 BUILTIN_OBJS += builtin-sched.o
index 9fbd8d745fa1ee933ecbf14028bd199f4874d92d..f7781c6267c06098fa2ad1376e60c69fe2497ffb 100644 (file)
@@ -3,6 +3,7 @@
 
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644 (file)
index 0000000..d4f4f98
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine    = "default";
+static int use_clock = 0;
+
+static const struct option options[] = {
+       OPT_STRING('l', "length", &length_str, "1MB",
+                   "Specify length of memory to copy. "
+                   "available unit: B, MB, GB (upper and lower)"),
+       OPT_STRING('r', "routine", &routine, "default",
+                   "Specify routine to copy"),
+       OPT_BOOLEAN('c', "clock", &use_clock,
+                   "Use CPU clock for measuring"),
+       OPT_END()
+};
+
+struct routine {
+       const char *name;
+       const char *desc;
+       void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+       { "default",
+         "Default memcpy() provided by glibc",
+         memcpy },
+       { NULL,
+         NULL,
+         NULL   }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+       "perf bench mem memcpy <options>",
+       NULL
+};
+
+static int clock_fd;
+
+static struct perf_event_attr clock_attr = {
+       .type = PERF_TYPE_HARDWARE,
+       .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+       clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+       BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+       int ret;
+       u64 clk;
+
+       ret = read(clock_fd, &clk, sizeof(u64));
+       BUG_ON(ret != sizeof(u64));
+
+       return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+       return (double)ts->tv_sec +
+               (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+                    const char *prefix __used)
+{
+       int i;
+       void *dst, *src;
+       size_t length;
+       double bps = 0.0;
+       struct timeval tv_start, tv_end, tv_diff;
+       u64 clock_start, clock_end, clock_diff;
+
+       clock_start = clock_end = clock_diff = 0ULL;
+       argc = parse_options(argc, argv, options,
+                            bench_mem_memcpy_usage, 0);
+
+       tv_diff.tv_sec = 0;
+       tv_diff.tv_usec = 0;
+       length = (size_t)perf_atoll((char *)length_str);
+       if ((long long int)length <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       for (i = 0; routines[i].name; i++) {
+               if (!strcmp(routines[i].name, routine))
+                       break;
+       }
+       if (!routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              routines[i].name, routines[i].desc);
+               }
+               return 1;
+       }
+
+       dst = calloc(length, sizeof(char));
+       assert(dst);
+       src = calloc(length, sizeof(char));
+       assert(src);
+
+       if (bench_format == BENCH_FORMAT_DEFAULT) {
+               printf("# Copying %s Bytes from %p to %p ...\n\n",
+                      length_str, src, dst);
+       }
+
+       if (use_clock) {
+               init_clock();
+               clock_start = get_clock();
+       } else
+               BUG_ON(gettimeofday(&tv_start, NULL));
+
+       routines[i].fn(dst, src, length);
+
+       if (use_clock) {
+               clock_end = get_clock();
+               clock_diff = clock_end - clock_start;
+       } else {
+               BUG_ON(gettimeofday(&tv_end, NULL));
+               timersub(&tv_end, &tv_start, &tv_diff);
+               bps = (double)((double)length / timeval2double(&tv_diff));
+       }
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               if (use_clock) {
+                       printf(" %14lf Clock/Byte\n",
+                              (double)clock_diff / (double)length);
+               } else {
+                       if (bps < K)
+                               printf(" %14lf B/Sec\n", bps);
+                       else if (bps < K * K)
+                               printf(" %14lfd KB/Sec\n", bps / 1024);
+                       else if (bps < K * K * K)
+                               printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+                       else {
+                               printf(" %14lf GB/Sec\n",
+                                      bps / 1024 / 1024 / 1024);
+                       }
+               }
+               break;
+       case BENCH_FORMAT_SIMPLE:
+               if (use_clock) {
+                       printf("%14lf\n",
+                              (double)clock_diff / (double)length);
+               } else
+                       printf("%lf\n", bps);
+               break;
+       default:
+               /* reaching here is something disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+               break;
+       }
+
+       return 0;
+}
index 90c39baae0de0bf9fe58c2ddb94caca8f8ab1556..e043eb83092aa3576a89016f70e8698e36c97e49 100644 (file)
@@ -12,6 +12,7 @@
  *
  * Available subsystem list:
  *  sched ... scheduler and IPC mechanism
+ *  mem   ... memory access performance
  *
  */
 
@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
          NULL                  }
 };
 
+static struct bench_suite mem_suites[] = {
+       { "memcpy",
+         "Simple memory copy in various ways",
+         bench_mem_memcpy },
+       { NULL,
+         NULL,
+         NULL             }
+};
+
 struct bench_subsys {
        const char *name;
        const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
        { "sched",
          "scheduler and IPC mechanism",
          sched_suites },
+       { "mem",
+         "memory access performance",
+         mem_suites },
        { NULL,
          NULL,
-         NULL         }
+         NULL       }
 };
 
 static void dump_suites(int subsys_index)