bpf: Add lru_hash_lookup performance test
authorMartin KaFai Lau <kafai@fb.com>
Fri, 1 Sep 2017 06:27:11 +0000 (23:27 -0700)
committerDavid S. Miller <davem@davemloft.net>
Fri, 1 Sep 2017 16:57:38 +0000 (09:57 -0700)
Create a new case to test the LRU lookup performance.

At the beginning, the LRU map is fully loaded (i.e. the number of keys
is equal to map->max_entries).   The lookup is done through key 0
to num_map_entries and then repeats from 0 again.

This patch also creates an anonymous struct to properly
name the test params in stress_lru_hmap_alloc() in map_perf_test_kern.c.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
samples/bpf/map_perf_test_kern.c
samples/bpf/map_perf_test_user.c

index ca3b22ed577a3f016b861272f9a3166e5909347d..098c857f1eda6396317ea59a5bdfb9a52d6eb8ef 100644 (file)
@@ -88,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = {
        .max_entries = MAX_ENTRIES,
 };
 
+struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
+       .type = BPF_MAP_TYPE_LRU_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = MAX_ENTRIES,
+};
+
 SEC("kprobe/sys_getuid")
 int stress_hmap(struct pt_regs *ctx)
 {
@@ -148,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
 SEC("kprobe/sys_connect")
 int stress_lru_hmap_alloc(struct pt_regs *ctx)
 {
+       char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
+       union {
+               u16 dst6[8];
+               struct {
+                       u16 magic0;
+                       u16 magic1;
+                       u16 tcase;
+                       u16 unused16;
+                       u32 unused32;
+                       u32 key;
+               };
+       } test_params;
        struct sockaddr_in6 *in6;
-       u16 test_case, dst6[8];
+       u16 test_case;
        int addrlen, ret;
-       char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
        long val = 1;
-       u32 key = bpf_get_prandom_u32();
+       u32 key = 0;
 
        in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
        addrlen = (int)PT_REGS_PARM3(ctx);
@@ -161,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
        if (addrlen != sizeof(*in6))
                return 0;
 
-       ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+       ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
+                            &in6->sin6_addr);
        if (ret)
                goto done;
 
-       if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+       if (test_params.magic0 != 0xdead ||
+           test_params.magic1 != 0xbeef)
                return 0;
 
-       test_case = dst6[7];
+       test_case = test_params.tcase;
+       if (test_case != 3)
+               key = bpf_get_prandom_u32();
 
        if (test_case == 0) {
                ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
@@ -188,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 
                ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
                                          BPF_ANY);
+       } else if (test_case == 3) {
+               u32 i;
+
+               key = test_params.key;
+
+#pragma clang loop unroll(full)
+               for (i = 0; i < 32; i++) {
+                       bpf_map_lookup_elem(&lru_hash_lookup_map, &key);
+                       key++;
+               }
        } else {
                ret = -EINVAL;
        }
index bccbf8478e43d15b51e8ac6b6844d05cca18403c..f388254896f67b7120bcf20661691288f72e028e 100644 (file)
@@ -46,6 +46,7 @@ enum test_type {
        HASH_LOOKUP,
        ARRAY_LOOKUP,
        INNER_LRU_HASH_PREALLOC,
+       LRU_HASH_LOOKUP,
        NR_TESTS,
 };
 
@@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = {
        [HASH_LOOKUP] = "hash_map",
        [ARRAY_LOOKUP] = "array_map",
        [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
+       [LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
 };
 
 static int test_flags = ~0;
@@ -67,6 +69,8 @@ static uint32_t num_map_entries;
 static uint32_t inner_lru_hash_size;
 static int inner_lru_hash_idx = -1;
 static int array_of_lru_hashs_idx = -1;
+static int lru_hash_lookup_idx = -1;
+static int lru_hash_lookup_test_entries = 32;
 static uint32_t max_cnt = 1000000;
 
 static int check_test_flags(enum test_type t)
@@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu)
               cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
+static int pre_test_lru_hash_lookup(int tasks)
+{
+       int fd = map_fd[lru_hash_lookup_idx];
+       uint32_t key;
+       long val = 1;
+       int ret;
+
+       if (num_map_entries > lru_hash_lookup_test_entries)
+               lru_hash_lookup_test_entries = num_map_entries;
+
+       /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test.
+        *
+        * It is fine that the user requests for a map with
+        * num_map_entries < 32 and some of the later lru hash lookup
+        * may return not found.  For LRU map, we are not interested
+        * in such small map performance.
+        */
+       for (key = 0; key < lru_hash_lookup_test_entries; key++) {
+               ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static void do_test_lru(enum test_type test, int cpu)
 {
        static int inner_lru_map_fds[MAX_NR_CPUS];
@@ -135,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu)
 
        if (test == LRU_HASH_PREALLOC) {
                test_name = "lru_hash_map_perf";
-               in6.sin6_addr.s6_addr16[7] = 0;
+               in6.sin6_addr.s6_addr16[2] = 0;
        } else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
                test_name = "nocommon_lru_hash_map_perf";
-               in6.sin6_addr.s6_addr16[7] = 1;
+               in6.sin6_addr.s6_addr16[2] = 1;
        } else if (test == INNER_LRU_HASH_PREALLOC) {
                test_name = "inner_lru_hash_map_perf";
-               in6.sin6_addr.s6_addr16[7] = 2;
+               in6.sin6_addr.s6_addr16[2] = 2;
+       } else if (test == LRU_HASH_LOOKUP) {
+               test_name = "lru_hash_lookup_perf";
+               in6.sin6_addr.s6_addr16[2] = 3;
+               in6.sin6_addr.s6_addr32[3] = 0;
        } else {
                assert(0);
        }
@@ -150,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu)
        for (i = 0; i < max_cnt; i++) {
                ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
                assert(ret == -1 && errno == EBADF);
+               if (in6.sin6_addr.s6_addr32[3] <
+                   lru_hash_lookup_test_entries - 32)
+                       in6.sin6_addr.s6_addr32[3] += 32;
+               else
+                       in6.sin6_addr.s6_addr32[3] = 0;
        }
        printf("%d:%s pre-alloc %lld events per sec\n",
               cpu, test_name,
@@ -171,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu)
        do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
 }
 
+static void test_lru_hash_lookup(int cpu)
+{
+       do_test_lru(LRU_HASH_LOOKUP, cpu);
+}
+
 static void test_percpu_hash_prealloc(int cpu)
 {
        __u64 start_time;
@@ -243,6 +287,11 @@ static void test_array_lookup(int cpu)
               cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
+typedef int (*pre_test_func)(int tasks);
+const pre_test_func pre_test_funcs[] = {
+       [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup,
+};
+
 typedef void (*test_func)(int cpu);
 const test_func test_funcs[] = {
        [HASH_PREALLOC] = test_hash_prealloc,
@@ -255,8 +304,25 @@ const test_func test_funcs[] = {
        [HASH_LOOKUP] = test_hash_lookup,
        [ARRAY_LOOKUP] = test_array_lookup,
        [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
+       [LRU_HASH_LOOKUP] = test_lru_hash_lookup,
 };
 
+static int pre_test(int tasks)
+{
+       int i;
+
+       for (i = 0; i < NR_TESTS; i++) {
+               if (pre_test_funcs[i] && check_test_flags(i)) {
+                       int ret = pre_test_funcs[i](tasks);
+
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
 static void loop(int cpu)
 {
        cpu_set_t cpuset;
@@ -277,6 +343,8 @@ static void run_perf_test(int tasks)
        pid_t pid[tasks];
        int i;
 
+       assert(!pre_test(tasks));
+
        for (i = 0; i < tasks; i++) {
                pid[i] = fork();
                if (pid[i] == 0) {
@@ -344,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx)
                array_of_lru_hashs_idx = idx;
        }
 
+       if (!strcmp("lru_hash_lookup_map", map->name))
+               lru_hash_lookup_idx = idx;
+
        if (num_map_entries <= 0)
                return;