ipv4: percpu nh_rth_output cache
authorEric Dumazet <edumazet@google.com>
Tue, 31 Jul 2012 05:45:30 +0000 (05:45 +0000)
committerDavid S. Miller <davem@davemloft.net>
Tue, 31 Jul 2012 21:41:39 +0000 (14:41 -0700)
Input path is mostly run under RCU and doesnt touch dst refcnt

But output path on forwarding or UDP workloads hits
badly dst refcount, and we have lot of false sharing, for example
in ipv4_mtu() when reading rt->rt_pmtu

Using a percpu cache for nh_rth_output gives a nice performance
increase at a small cost.

24 udpflood test on my 24 cpu machine (dummy0 output device)
(each process sends 1.000.000 udp frames, 24 processes are started)

before : 5.24 s
after : 2.06 s
For reference, time on linux-3.5 : 6.60 s

Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip_fib.h
net/ipv4/fib_semantics.c
net/ipv4/route.c

index e521a03515b170cbff8c94d65e08f8cd69d88627..e331746029b42c1a92037b33afef10e3f54ae754 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
+#include <linux/percpu.h>
 
 struct fib_config {
        u8                      fc_dst_len;
@@ -81,7 +82,7 @@ struct fib_nh {
        __be32                  nh_gw;
        __be32                  nh_saddr;
        int                     nh_saddr_genid;
-       struct rtable __rcu     *nh_rth_output;
+       struct rtable __rcu * __percpu *nh_pcpu_rth_output;
        struct rtable __rcu     *nh_rth_input;
        struct fnhe_hash_bucket *nh_exceptions;
 };
index 625cf185c489b4ad871f152f802bbb0027652c5a..fe2ca02a1979bbec4ffd842c9957771b6b3cd8a9 100644 (file)
@@ -176,6 +176,23 @@ static void rt_nexthop_free(struct rtable __rcu **rtp)
        dst_free(&rt->dst);
 }
 
+static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp)
+{
+       int cpu;
+
+       if (!rtp)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct rtable *rt;
+
+               rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
+               if (rt)
+                       dst_free(&rt->dst);
+       }
+       free_percpu(rtp);
+}
+
 /* Release a nexthop info record */
 static void free_fib_info_rcu(struct rcu_head *head)
 {
@@ -186,7 +203,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
                        dev_put(nexthop_nh->nh_dev);
                if (nexthop_nh->nh_exceptions)
                        free_nh_exceptions(nexthop_nh);
-               rt_nexthop_free(&nexthop_nh->nh_rth_output);
+               rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output);
                rt_nexthop_free(&nexthop_nh->nh_rth_input);
        } endfor_nexthops(fi);
 
@@ -817,6 +834,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
        fi->fib_nhs = nhs;
        change_nexthops(fi) {
                nexthop_nh->nh_parent = fi;
+               nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
        } endfor_nexthops(fi)
 
        if (cfg->fc_mx) {
index 2bd1074774692e670e6a07c1e3da46f8c2d4af48..4f6276ce0af317a4cf159b780bd8a1718126fb73 100644 (file)
@@ -1206,11 +1206,15 @@ static inline void rt_free(struct rtable *rt)
 
 static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 {
-       struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output;
+       struct rtable *orig, *prev, **p;
 
-       if (rt_is_input_route(rt))
+       if (rt_is_input_route(rt)) {
                p = (struct rtable **)&nh->nh_rth_input;
-
+       } else {
+               if (!nh->nh_pcpu_rth_output)
+                       goto nocache;
+               p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+       }
        orig = *p;
 
        prev = cmpxchg(p, orig, rt);
@@ -1223,6 +1227,7 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
                 * unsuccessful at storing this route into the cache
                 * we really need to set it.
                 */
+nocache:
                rt->dst.flags |= DST_NOCACHE;
        }
 }
@@ -1749,8 +1754,11 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
        fnhe = NULL;
        if (fi) {
                fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
-               if (!fnhe) {
-                       rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output);
+               if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) {
+                       struct rtable __rcu **prth;
+
+                       prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+                       rth = rcu_dereference(*prth);
                        if (rt_cache_valid(rth)) {
                                dst_hold(&rth->dst);
                                return rth;