ipv4: Flush per-ns routing cache more sanely.
authorDavid S. Miller <davem@davemloft.net>
Mon, 20 Dec 2010 05:11:20 +0000 (21:11 -0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 20 Dec 2010 18:37:19 +0000 (10:37 -0800)
Flush the routing cache only of entries that match the
network namespace in which the purge event occurred.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
include/net/route.h
net/ipv4/fib_frontend.c
net/ipv4/route.c

index 27002362944a036617c337cdd25d31c471c5efea..93e10c453f6b8a08cc12dd3a70cb86d6473a4b19 100644 (file)
@@ -114,7 +114,7 @@ extern int          ip_rt_init(void);
 extern void            ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
                                       __be32 src, struct net_device *dev);
 extern void            rt_cache_flush(struct net *net, int how);
-extern void            rt_cache_flush_batch(void);
+extern void            rt_cache_flush_batch(struct net *net);
 extern int             __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int             ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int             ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
index d3a1112b9d9c4f12f488d1e00b1b54ebd84263df..9f8bb68911e40caa6acacbec1657c1f155b5fcdc 100644 (file)
@@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
                rt_cache_flush(dev_net(dev), 0);
                break;
        case NETDEV_UNREGISTER_BATCH:
-               rt_cache_flush_batch();
+               /* The batch unregister is only called on the first
+                * device in the list of devices being unregistered.
+                * Therefore we should not pass dev_net(dev) in here.
+                */
+               rt_cache_flush_batch(NULL);
                break;
        }
        return NOTIFY_DONE;
index ae520963540f9db886513f509bf9c1e7cc7afae0..d8b4f4d0d66e5da7f1859ae36d5dcc3bd69c2554 100644 (file)
@@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
  * Can be called by a softirq or a process.
  * In the later case, we want to be reschedule if necessary
  */
-static void rt_do_flush(int process_context)
+static void rt_do_flush(struct net *net, int process_context)
 {
        unsigned int i;
        struct rtable *rth, *next;
-       struct rtable * tail;
 
        for (i = 0; i <= rt_hash_mask; i++) {
+               struct rtable __rcu **pprev;
+               struct rtable *list;
+
                if (process_context && need_resched())
                        cond_resched();
                rth = rcu_dereference_raw(rt_hash_table[i].chain);
@@ -731,50 +733,32 @@ static void rt_do_flush(int process_context)
                        continue;
 
                spin_lock_bh(rt_hash_lock_addr(i));
-#ifdef CONFIG_NET_NS
-               {
-               struct rtable __rcu **prev;
-               struct rtable *p;
 
-               rth = rcu_dereference_protected(rt_hash_table[i].chain,
+               list = NULL;
+               pprev = &rt_hash_table[i].chain;
+               rth = rcu_dereference_protected(*pprev,
                        lockdep_is_held(rt_hash_lock_addr(i)));
 
-               /* defer releasing the head of the list after spin_unlock */
-               for (tail = rth; tail;
-                    tail = rcu_dereference_protected(tail->dst.rt_next,
-                               lockdep_is_held(rt_hash_lock_addr(i))))
-                       if (!rt_is_expired(tail))
-                               break;
-               if (rth != tail)
-                       rt_hash_table[i].chain = tail;
-
-               /* call rt_free on entries after the tail requiring flush */
-               prev = &rt_hash_table[i].chain;
-               for (p = rcu_dereference_protected(*prev,
+               while (rth) {
+                       next = rcu_dereference_protected(rth->dst.rt_next,
                                lockdep_is_held(rt_hash_lock_addr(i)));
-                    p != NULL;
-                    p = next) {
-                       next = rcu_dereference_protected(p->dst.rt_next,
-                               lockdep_is_held(rt_hash_lock_addr(i)));
-                       if (!rt_is_expired(p)) {
-                               prev = &p->dst.rt_next;
+
+                       if (!net ||
+                           net_eq(dev_net(rth->dst.dev), net)) {
+                               rcu_assign_pointer(*pprev, next);
+                               rcu_assign_pointer(rth->dst.rt_next, list);
+                               list = rth;
                        } else {
-                               *prev = next;
-                               rt_free(p);
+                               pprev = &rth->dst.rt_next;
                        }
+                       rth = next;
                }
-               }
-#else
-               rth = rcu_dereference_protected(rt_hash_table[i].chain,
-                       lockdep_is_held(rt_hash_lock_addr(i)));
-               rcu_assign_pointer(rt_hash_table[i].chain, NULL);
-               tail = NULL;
-#endif
+
                spin_unlock_bh(rt_hash_lock_addr(i));
 
-               for (; rth != tail; rth = next) {
-                       next = rcu_dereference_protected(rth->dst.rt_next, 1);
-                       rt_free(rth);
+               for (; list; list = next) {
+                       next = rcu_dereference_protected(list->dst.rt_next, 1);
+                       rt_free(list);
                }
        }
 }
@@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
 {
        rt_cache_invalidate(net);
        if (delay >= 0)
-               rt_do_flush(!in_softirq());
+               rt_do_flush(net, !in_softirq());
 }
 
 /* Flush previous cache invalidated entries from the cache */
-void rt_cache_flush_batch(void)
+void rt_cache_flush_batch(struct net *net)
 {
-       rt_do_flush(!in_softirq());
+       rt_do_flush(net, !in_softirq());
 }
 
 static void rt_emergency_hash_rebuild(struct net *net)