netfilter: conntrack: replace notify chain by function pointer
authorPablo Neira Ayuso <pablo@netfilter.org>
Wed, 3 Jun 2009 08:32:06 +0000 (10:32 +0200)
committerpablo <pablo@1984.(none)>
Wed, 3 Jun 2009 08:32:06 +0000 (10:32 +0200)
This patch removes the notify chain infrastructure and replace it
by a simple function pointer. This issue has been mentioned in the
mailing list several times: the use of the notify chain adds
too much overhead for something that is only used by ctnetlink.

This patch also changes nfnetlink_send(). It seems that gfp_any()
returns GFP_KERNEL for user-context request, like those via
ctnetlink, inside the RCU read-side section which is not valid.
Using GFP_KERNEL is also evil since netlink may schedule(),
this leads to "scheduling while atomic" bug reports.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
include/linux/netfilter/nfnetlink.h
include/net/netfilter/nf_conntrack_ecache.h
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nfnetlink.c

index c600083cbdf50ccc29774d2bd8c8f8cb1ca551ba..2214e516146105104668e4b6d72bb6d03013cc23 100644 (file)
@@ -75,7 +75,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 
 extern int nfnetlink_has_listeners(unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
-                         int echo);
+                         int echo, gfp_t flags);
 extern void nfnetlink_set_err(u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
 
index 2e17a2d0eb3bd48819aef72ab450a2c8d10f0744..1afb907e015a98315f414602459ae4870a544934 100644 (file)
@@ -6,7 +6,6 @@
 #define _NF_CONNTRACK_ECACHE_H
 #include <net/netfilter/nf_conntrack.h>
 
-#include <linux/notifier.h>
 #include <linux/interrupt.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -69,9 +68,13 @@ struct nf_ct_event {
        int report;
 };
 
-extern struct atomic_notifier_head nf_conntrack_chain;
-extern int nf_conntrack_register_notifier(struct notifier_block *nb);
-extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+struct nf_ct_event_notifier {
+       int (*fcn)(unsigned int events, struct nf_ct_event *item);
+};
+
+extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
+extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
 extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -97,13 +100,23 @@ nf_conntrack_event_report(enum ip_conntrack_events event,
                          u32 pid,
                          int report)
 {
-       struct nf_ct_event item = {
-               .ct     = ct,
-               .pid    = pid,
-               .report = report
-       };
-       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct))
-               atomic_notifier_call_chain(&nf_conntrack_chain, event, &item);
+       struct nf_ct_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
+       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+               struct nf_ct_event item = {
+                       .ct     = ct,
+                       .pid    = pid,
+                       .report = report
+               };
+               notify->fcn(event, &item);
+       }
+out_unlock:
+       rcu_read_unlock();
 }
 
 static inline void
@@ -118,9 +131,13 @@ struct nf_exp_event {
        int report;
 };
 
-extern struct atomic_notifier_head nf_ct_expect_chain;
-extern int nf_ct_expect_register_notifier(struct notifier_block *nb);
-extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb);
+struct nf_exp_event_notifier {
+       int (*fcn)(unsigned int events, struct nf_exp_event *item);
+};
+
+extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
+extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
 static inline void
 nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
@@ -128,12 +145,23 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
                          u32 pid,
                          int report)
 {
-       struct nf_exp_event item = {
-               .exp    = exp,
-               .pid    = pid,
-               .report = report
-       };
-       atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item);
+       struct nf_exp_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_expect_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
+       {
+               struct nf_exp_event item = {
+                       .exp    = exp,
+                       .pid    = pid,
+                       .report = report
+               };
+               notify->fcn(event, &item);
+       }
+out_unlock:
+       rcu_read_unlock();
 }
 
 static inline void
index dee4190209ccc2420709bcbcab29527270625730..5516b3e64b4330baa7ec14778d762cf760de1df1 100644 (file)
 #include <linux/stddef.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
-#include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
 
 /* deliver cached events and clear cache entry - must be called with locally
  * disabled softirqs */
 static inline void
 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
 {
+       struct nf_ct_event_notifier *notify;
+
+       rcu_read_lock();
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify == NULL)
+               goto out_unlock;
+
        if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
            && ecache->events) {
                struct nf_ct_event item = {
@@ -42,14 +50,15 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
                        .report = 0
                };
 
-               atomic_notifier_call_chain(&nf_conntrack_chain,
-                                          ecache->events,
-                                          &item);
+               notify->fcn(ecache->events, &item);
        }
 
        ecache->events = 0;
        nf_ct_put(ecache->ct);
        ecache->ct = NULL;
+
+out_unlock:
+       rcu_read_unlock();
 }
 
 /* Deliver all cached events for a particular conntrack. This is called
@@ -111,26 +120,68 @@ void nf_conntrack_ecache_fini(struct net *net)
        free_percpu(net->ct.ecache);
 }
 
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
-       return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+       int ret = 0;
+       struct nf_ct_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       if (notify != NULL) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+       rcu_assign_pointer(nf_conntrack_event_cb, new);
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
-       return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+       struct nf_ct_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_conntrack_event_cb);
+       BUG_ON(notify != new);
+       rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+       mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
-       return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+       int ret = 0;
+       struct nf_exp_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_expect_event_cb);
+       if (notify != NULL) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+       rcu_assign_pointer(nf_expect_event_cb, new);
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
+
+out_unlock:
+       mutex_unlock(&nf_ct_ecache_mutex);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
-       return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+       struct nf_exp_event_notifier *notify;
+
+       mutex_lock(&nf_ct_ecache_mutex);
+       notify = rcu_dereference(nf_expect_event_cb);
+       BUG_ON(notify != new);
+       rcu_assign_pointer(nf_expect_event_cb, NULL);
+       mutex_unlock(&nf_ct_ecache_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
index b1b9e4fb7dedb53356f922809db9cc58dad83ea7..4448b062de0c4b722e70162ea6c4af1d719d5f51 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
@@ -454,13 +453,12 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
               ;
 }
 
-static int ctnetlink_conntrack_event(struct notifier_block *this,
-                                    unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
        struct nlattr *nest_parms;
-       struct nf_ct_event *item = (struct nf_ct_event *)ptr;
        struct nf_conn *ct = item->ct;
        struct sk_buff *skb;
        unsigned int type;
@@ -468,7 +466,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 
        /* ignore our fake conntrack entry */
        if (ct == &nf_conntrack_untracked)
-               return NOTIFY_DONE;
+               return 0;
 
        if (events & IPCT_DESTROY) {
                type = IPCTNL_MSG_CT_DELETE;
@@ -481,10 +479,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
                type = IPCTNL_MSG_CT_NEW;
                group = NFNLGRP_CONNTRACK_UPDATE;
        } else
-               return NOTIFY_DONE;
+               return 0;
 
        if (!item->report && !nfnetlink_has_listeners(group))
-               return NOTIFY_DONE;
+               return 0;
 
        skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
        if (skb == NULL)
@@ -560,8 +558,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
        rcu_read_unlock();
 
        nlmsg_end(skb, nlh);
-       nfnetlink_send(skb, item->pid, group, item->report);
-       return NOTIFY_DONE;
+       nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+       return 0;
 
 nla_put_failure:
        rcu_read_unlock();
@@ -570,7 +568,7 @@ nlmsg_failure:
        kfree_skb(skb);
 errout:
        nfnetlink_set_err(0, group, -ENOBUFS);
-       return NOTIFY_DONE;
+       return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
@@ -1507,12 +1505,11 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
-                                 unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 {
        struct nlmsghdr *nlh;
        struct nfgenmsg *nfmsg;
-       struct nf_exp_event *item = (struct nf_exp_event *)ptr;
        struct nf_conntrack_expect *exp = item->exp;
        struct sk_buff *skb;
        unsigned int type;
@@ -1522,11 +1519,11 @@ static int ctnetlink_expect_event(struct notifier_block *this,
                type = IPCTNL_MSG_EXP_NEW;
                flags = NLM_F_CREATE|NLM_F_EXCL;
        } else
-               return NOTIFY_DONE;
+               return 0;
 
        if (!item->report &&
            !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
-               return NOTIFY_DONE;
+               return 0;
 
        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
        if (skb == NULL)
@@ -1548,8 +1545,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
        rcu_read_unlock();
 
        nlmsg_end(skb, nlh);
-       nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
-       return NOTIFY_DONE;
+       nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+                      item->report, GFP_ATOMIC);
+       return 0;
 
 nla_put_failure:
        rcu_read_unlock();
@@ -1558,7 +1556,7 @@ nlmsg_failure:
        kfree_skb(skb);
 errout:
        nfnetlink_set_err(0, 0, -ENOBUFS);
-       return NOTIFY_DONE;
+       return 0;
 }
 #endif
 static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1864,12 +1862,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
-       .notifier_call  = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+       .fcn = ctnetlink_conntrack_event,
 };
 
-static struct notifier_block ctnl_notifier_exp = {
-       .notifier_call  = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+       .fcn = ctnetlink_expect_event,
 };
 #endif
 
index 9dbd5709aad745b8bd09a3ebc08b5bd963a0fdbf..92761a9883751ce2f266c5c8881c7074a94a07c0 100644 (file)
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+                  unsigned group, int echo, gfp_t flags)
 {
-       return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+       return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);