net: Batch network namespace destruction.
authorEric W. Biederman <eric@conroxe.ebiederm.org>
Sun, 29 Nov 2009 22:25:27 +0000 (22:25 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 2 Dec 2009 00:15:51 +0000 (16:15 -0800)
It is fairly common to kill several network namespaces at once.  Either
because they are nested one inside the other or because they are cooperating
in multiple machine networking experiments.  As the network stack control logic
does not parallelize easily batch up multiple network namespaces existing
together.

To get the full benefit of batching the virtual network devices to be
removed must be all removed in one batch.  For that purpose I have added
a loop after the last network device operations have run that batches
up all remaining network devices and deletes them.

An extra benefit is that the reorganization slightly shrinks the size
of the per network namespace data structures replaceing a work_struct
with a list_head.

In a trivial test with 4K namespaces this change reduced the cost of
a destroying 4K namespaces from 7+ minutes (at 12% cpu) to 44 seconds
(at 60% cpu).  The bulk of that 44s was spent in inet_twsk_purge.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/net_namespace.h
net/core/net_namespace.c

index 0addd45038ac59027565f7c5a57f86ec1be88d2b..d69b4796030feec0aac850e74892021c0c7ba47b 100644 (file)
@@ -42,7 +42,7 @@ struct net {
                                                 */
 #endif
        struct list_head        list;           /* list of network namespaces */
-       struct work_struct      work;           /* work struct for freeing */
+       struct list_head        cleanup_list;   /* namespaces on death row */
 
        struct proc_dir_entry   *proc_net;
        struct proc_dir_entry   *proc_net_stat;
index 86ed7f44d0832251c66220fe345f0a96564b15c1..a42caa2b909b33447aaa0a53d18510a4285d0e56 100644 (file)
@@ -8,8 +8,10 @@
 #include <linux/idr.h>
 #include <linux/rculist.h>
 #include <linux/nsproxy.h>
+#include <linux/netdevice.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/rtnetlink.h>
 
 /*
  *     Our network namespace constructor/destructor lists
@@ -27,6 +29,20 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS   13 /* +1 for len +2 for rcu_head */
 
+static void unregister_netdevices(struct net *net, struct list_head *list)
+{
+       struct net_device *dev;
+       /* At exit all network devices most be removed from a network
+        * namespace.  Do this in the reverse order of registeration.
+        */
+       for_each_netdev_reverse(net, dev) {
+               if (dev->rtnl_link_ops)
+                       dev->rtnl_link_ops->dellink(dev, list);
+               else
+                       unregister_netdevice_queue(dev, list);
+       }
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -59,6 +75,13 @@ out_undo:
        list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
                if (ops->exit)
                        ops->exit(net);
+               if (&ops->list == first_device) {
+                       LIST_HEAD(dev_kill_list);
+                       rtnl_lock();
+                       unregister_netdevices(net, &dev_kill_list);
+                       unregister_netdevice_many(&dev_kill_list);
+                       rtnl_unlock();
+               }
        }
 
        rcu_barrier();
@@ -147,18 +170,26 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
        return net_create();
 }
 
+static DEFINE_SPINLOCK(cleanup_list_lock);
+static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+
 static void cleanup_net(struct work_struct *work)
 {
        struct pernet_operations *ops;
-       struct net *net;
+       struct net *net, *tmp;
+       LIST_HEAD(net_kill_list);
 
-       net = container_of(work, struct net, work);
+       /* Atomically snapshot the list of namespaces to cleanup */
+       spin_lock_irq(&cleanup_list_lock);
+       list_replace_init(&cleanup_list, &net_kill_list);
+       spin_unlock_irq(&cleanup_list_lock);
 
        mutex_lock(&net_mutex);
 
        /* Don't let anyone else find us. */
        rtnl_lock();
-       list_del_rcu(&net->list);
+       list_for_each_entry(net, &net_kill_list, cleanup_list)
+               list_del_rcu(&net->list);
        rtnl_unlock();
 
        /*
@@ -170,8 +201,18 @@ static void cleanup_net(struct work_struct *work)
 
        /* Run all of the network namespace exit methods */
        list_for_each_entry_reverse(ops, &pernet_list, list) {
-               if (ops->exit)
-                       ops->exit(net);
+               if (ops->exit) {
+                       list_for_each_entry(net, &net_kill_list, cleanup_list)
+                               ops->exit(net);
+               }
+               if (&ops->list == first_device) {
+                       LIST_HEAD(dev_kill_list);
+                       rtnl_lock();
+                       list_for_each_entry(net, &net_kill_list, cleanup_list)
+                               unregister_netdevices(net, &dev_kill_list);
+                       unregister_netdevice_many(&dev_kill_list);
+                       rtnl_unlock();
+               }
        }
 
        mutex_unlock(&net_mutex);
@@ -182,14 +223,23 @@ static void cleanup_net(struct work_struct *work)
        rcu_barrier();
 
        /* Finally it is safe to free my network namespace structure */
-       net_free(net);
+       list_for_each_entry_safe(net, tmp, &net_kill_list, cleanup_list) {
+               list_del_init(&net->cleanup_list);
+               net_free(net);
+       }
 }
+static DECLARE_WORK(net_cleanup_work, cleanup_net);
 
 void __put_net(struct net *net)
 {
        /* Cleanup the network namespace in process context */
-       INIT_WORK(&net->work, cleanup_net);
-       queue_work(netns_wq, &net->work);
+       unsigned long flags;
+
+       spin_lock_irqsave(&cleanup_list_lock, flags);
+       list_add(&net->cleanup_list, &cleanup_list);
+       spin_unlock_irqrestore(&cleanup_list_lock, flags);
+
+       queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);