bpf: Add percpu LRU list
authorMartin KaFai Lau <kafai@fb.com>
Fri, 11 Nov 2016 18:55:07 +0000 (10:55 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 15 Nov 2016 16:50:20 +0000 (11:50 -0500)
Instead of having a common LRU list, this patch allows a
percpu LRU list which can be selected by specifying a map
attribute.  The map attribute will be added in the later
patch.

While the common use case for LRU is #reads >> #updates,
percpu LRU list allows bpf prog to absorb unusual #updates
under pathological case (e.g. external traffic facing machine which
could be under attack).

Each percpu LRU is isolated from each other.  The LRU nodes (including
free nodes) cannot be moved across different LRU Lists.

Here are the update performance comparison between
common LRU list and percpu LRU list (the test code is
at the last patch):

[root@kerneltest003.31.prn1 ~]# for i in 1 4 8; do echo -n "$i cpus: "; \
./map_perf_test 16 $i | awk '{r += $3}END{print r " updates"}'; done
 1 cpus: 2934082 updates
 4 cpus: 7391434 updates
 8 cpus: 6500576 updates

[root@kerneltest003.31.prn1 ~]# for i in 1 4 8; do echo -n "$i cpus: "; \
./map_perf_test 32 $i | awk '{r += $3}END{printr " updates"}'; done
  1 cpus: 2896553 updates
  4 cpus: 9766395 updates
  8 cpus: 17460553 updates

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
kernel/bpf/bpf_lru_list.c
kernel/bpf/bpf_lru_list.h

index 73f67094f93a09e0b86913cb9042bb92f6809bf8..bfebff010ba90c5e43e1e2fbbbc67292226ef90d 100644 (file)
@@ -13,6 +13,9 @@
 #define LOCAL_FREE_TARGET              (128)
 #define LOCAL_NR_SCANS                 LOCAL_FREE_TARGET
 
+#define PERCPU_FREE_TARGET             (16)
+#define PERCPU_NR_SCANS                        PERCPU_FREE_TARGET
+
 /* Helpers to get the local list index */
 #define LOCAL_LIST_IDX(t)      ((t) - BPF_LOCAL_LIST_T_OFFSET)
 #define LOCAL_FREE_LIST_IDX    LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
@@ -396,7 +399,40 @@ ignore_ref:
        return NULL;
 }
 
-struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
+static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+                                                   u32 hash)
+{
+       struct list_head *free_list;
+       struct bpf_lru_node *node = NULL;
+       struct bpf_lru_list *l;
+       unsigned long flags;
+       int cpu = raw_smp_processor_id();
+
+       l = per_cpu_ptr(lru->percpu_lru, cpu);
+
+       raw_spin_lock_irqsave(&l->lock, flags);
+
+       __bpf_lru_list_rotate(lru, l);
+
+       free_list = &l->lists[BPF_LRU_LIST_T_FREE];
+       if (list_empty(free_list))
+               __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list,
+                                     BPF_LRU_LIST_T_FREE);
+
+       if (!list_empty(free_list)) {
+               node = list_first_entry(free_list, struct bpf_lru_node, list);
+               *(u32 *)((void *)node + lru->hash_offset) = hash;
+               node->ref = 0;
+               __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+       }
+
+       raw_spin_unlock_irqrestore(&l->lock, flags);
+
+       return node;
+}
+
+static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
+                                                   u32 hash)
 {
        struct bpf_lru_locallist *loc_l, *steal_loc_l;
        struct bpf_common_lru *clru = &lru->common_lru;
@@ -458,7 +494,16 @@ struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
        return node;
 }
 
-void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
+struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
+{
+       if (lru->percpu)
+               return bpf_percpu_lru_pop_free(lru, hash);
+       else
+               return bpf_common_lru_pop_free(lru, hash);
+}
+
+static void bpf_common_lru_push_free(struct bpf_lru *lru,
+                                    struct bpf_lru_node *node)
 {
        unsigned long flags;
 
@@ -490,8 +535,31 @@ check_lru_list:
        bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
 }
 
-void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
-                     u32 elem_size, u32 nr_elems)
+static void bpf_percpu_lru_push_free(struct bpf_lru *lru,
+                                    struct bpf_lru_node *node)
+{
+       struct bpf_lru_list *l;
+       unsigned long flags;
+
+       l = per_cpu_ptr(lru->percpu_lru, node->cpu);
+
+       raw_spin_lock_irqsave(&l->lock, flags);
+
+       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
+
+       raw_spin_unlock_irqrestore(&l->lock, flags);
+}
+
+void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
+{
+       if (lru->percpu)
+               bpf_percpu_lru_push_free(lru, node);
+       else
+               bpf_common_lru_push_free(lru, node);
+}
+
+void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                            u32 elem_size, u32 nr_elems)
 {
        struct bpf_lru_list *l = &lru->common_lru.lru_list;
        u32 i;
@@ -507,6 +575,47 @@ void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
        }
 }
 
+void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                            u32 elem_size, u32 nr_elems)
+{
+       u32 i, pcpu_entries;
+       int cpu;
+       struct bpf_lru_list *l;
+
+       pcpu_entries = nr_elems / num_possible_cpus();
+
+       i = 0;
+
+       for_each_possible_cpu(cpu) {
+               struct bpf_lru_node *node;
+
+               l = per_cpu_ptr(lru->percpu_lru, cpu);
+again:
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->cpu = cpu;
+               node->type = BPF_LRU_LIST_T_FREE;
+               node->ref = 0;
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               i++;
+               buf += elem_size;
+               if (i == nr_elems)
+                       break;
+               if (i % pcpu_entries)
+                       goto again;
+       }
+}
+
+void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                     u32 elem_size, u32 nr_elems)
+{
+       if (lru->percpu)
+               bpf_percpu_lru_populate(lru, buf, node_offset, elem_size,
+                                       nr_elems);
+       else
+               bpf_common_lru_populate(lru, buf, node_offset, elem_size,
+                                       nr_elems);
+}
+
 static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
 {
        int i;
@@ -534,26 +643,42 @@ static void bpf_lru_list_init(struct bpf_lru_list *l)
        raw_spin_lock_init(&l->lock);
 }
 
-int bpf_lru_init(struct bpf_lru *lru, u32 hash_offset,
+int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
                 del_from_htab_func del_from_htab, void *del_arg)
 {
        int cpu;
-       struct bpf_common_lru *clru = &lru->common_lru;
 
-       clru->local_list = alloc_percpu(struct bpf_lru_locallist);
-       if (!clru->local_list)
-               return -ENOMEM;
+       if (percpu) {
+               lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
+               if (!lru->percpu_lru)
+                       return -ENOMEM;
 
-       for_each_possible_cpu(cpu) {
-               struct bpf_lru_locallist *loc_l;
+               for_each_possible_cpu(cpu) {
+                       struct bpf_lru_list *l;
 
-               loc_l = per_cpu_ptr(clru->local_list, cpu);
-               bpf_lru_locallist_init(loc_l, cpu);
-       }
+                       l = per_cpu_ptr(lru->percpu_lru, cpu);
+                       bpf_lru_list_init(l);
+               }
+               lru->nr_scans = PERCPU_NR_SCANS;
+       } else {
+               struct bpf_common_lru *clru = &lru->common_lru;
 
-       bpf_lru_list_init(&clru->lru_list);
-       lru->nr_scans = LOCAL_NR_SCANS;
+               clru->local_list = alloc_percpu(struct bpf_lru_locallist);
+               if (!clru->local_list)
+                       return -ENOMEM;
 
+               for_each_possible_cpu(cpu) {
+                       struct bpf_lru_locallist *loc_l;
+
+                       loc_l = per_cpu_ptr(clru->local_list, cpu);
+                       bpf_lru_locallist_init(loc_l, cpu);
+               }
+
+               bpf_lru_list_init(&clru->lru_list);
+               lru->nr_scans = LOCAL_NR_SCANS;
+       }
+
+       lru->percpu = percpu;
        lru->del_from_htab = del_from_htab;
        lru->del_arg = del_arg;
        lru->hash_offset = hash_offset;
@@ -563,5 +688,8 @@ int bpf_lru_init(struct bpf_lru *lru, u32 hash_offset,
 
 void bpf_lru_destroy(struct bpf_lru *lru)
 {
-       free_percpu(lru->common_lru.local_list);
+       if (lru->percpu)
+               free_percpu(lru->percpu_lru);
+       else
+               free_percpu(lru->common_lru.local_list);
 }
index aaa2445ed1cadea6f3569a7360d392c9d657868e..5c35a98d02bf281d0d9527248c37aca0b9876fb1 100644 (file)
@@ -53,11 +53,15 @@ struct bpf_common_lru {
 typedef bool (*del_from_htab_func)(void *arg, struct bpf_lru_node *node);
 
 struct bpf_lru {
-       struct bpf_common_lru common_lru;
+       union {
+               struct bpf_common_lru common_lru;
+               struct bpf_lru_list __percpu *percpu_lru;
+       };
        del_from_htab_func del_from_htab;
        void *del_arg;
        unsigned int hash_offset;
        unsigned int nr_scans;
+       bool percpu;
 };
 
 static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
@@ -68,7 +72,7 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
        node->ref = 1;
 }
 
-int bpf_lru_init(struct bpf_lru *lru, u32 hash_offset,
+int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
                 del_from_htab_func del_from_htab, void *delete_arg);
 void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
                      u32 elem_size, u32 nr_elems);