netfilter: conntrack: simplify early_drop
authorFlorian Westphal <fw@strlen.de>
Sun, 3 Jul 2016 18:44:01 +0000 (20:44 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Mon, 11 Jul 2016 09:46:22 +0000 (11:46 +0200)
We don't need to acquire the bucket lock during early drop, we can
use lockless traveral just like ____nf_conntrack_find.

The timer deletion serves as synchronization point, if another cpu
attempts to evict same entry, only one will succeed with timer deletion.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
include/net/netfilter/nf_conntrack.h
net/netfilter/nf_conntrack_core.c

index 5d3397f3458364347644210742c4b0dfa92c143a..2a5133e214c946e514067db236a33aa4d228e458 100644 (file)
@@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)       __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
 
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
index 1289e7e5e0deb939cea99b5514fa7130f4840213..e0e9c9a0f5ba323a1346849a6d6c82df6b87985b 100644 (file)
@@ -834,67 +834,66 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+                                   struct hlist_nulls_head *head)
 {
-       /* Use oldest entry, which is roughly LRU */
        struct nf_conntrack_tuple_hash *h;
-       struct nf_conn *tmp;
        struct hlist_nulls_node *n;
-       unsigned int i, hash, sequence;
-       struct nf_conn *ct = NULL;
-       spinlock_t *lockp;
-       bool ret = false;
+       unsigned int drops = 0;
+       struct nf_conn *tmp;
 
-       i = 0;
+       hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+               tmp = nf_ct_tuplehash_to_ctrack(h);
 
-       local_bh_disable();
-restart:
-       sequence = read_seqcount_begin(&nf_conntrack_generation);
-       for (; i < NF_CT_EVICTION_RANGE; i++) {
-               hash = scale_hash(_hash++);
-               lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-               nf_conntrack_lock(lockp);
-               if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
-                       spin_unlock(lockp);
-                       goto restart;
-               }
-               hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
-                                              hnnode) {
-                       tmp = nf_ct_tuplehash_to_ctrack(h);
-
-                       if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
-                           !net_eq(nf_ct_net(tmp), net) ||
-                           nf_ct_is_dying(tmp))
-                               continue;
-
-                       if (atomic_inc_not_zero(&tmp->ct_general.use)) {
-                               ct = tmp;
-                               break;
-                       }
-               }
+               if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+                   !net_eq(nf_ct_net(tmp), net) ||
+                   nf_ct_is_dying(tmp))
+                       continue;
 
-               spin_unlock(lockp);
-               if (ct)
-                       break;
+               if (!atomic_inc_not_zero(&tmp->ct_general.use))
+                       continue;
+
+               /* kill only if still in same netns -- might have moved due to
+                * SLAB_DESTROY_BY_RCU rules.
+                *
+                * We steal the timer reference.  If that fails timer has
+                * already fired or someone else deleted it. Just drop ref
+                * and move to next entry.
+                */
+               if (net_eq(nf_ct_net(tmp), net) &&
+                   nf_ct_is_confirmed(tmp) &&
+                   del_timer(&tmp->timeout) &&
+                   nf_ct_delete(tmp, 0, 0))
+                       drops++;
+
+               nf_ct_put(tmp);
        }
 
-       local_bh_enable();
+       return drops;
+}
 
-       if (!ct)
-               return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+       unsigned int i;
 
-       /* kill only if in same netns -- might have moved due to
-        * SLAB_DESTROY_BY_RCU rules
-        */
-       if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
-               if (nf_ct_delete(ct, 0, 0)) {
-                       NF_CT_STAT_INC_ATOMIC(net, early_drop);
-                       ret = true;
+       for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+               struct hlist_nulls_head *ct_hash;
+               unsigned hash, sequence, drops;
+
+               do {
+                       sequence = read_seqcount_begin(&nf_conntrack_generation);
+                       hash = scale_hash(_hash++);
+                       ct_hash = nf_conntrack_hash;
+               } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+               drops = early_drop_list(net, &ct_hash[hash]);
+               if (drops) {
+                       NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+                       return true;
                }
        }
 
-       nf_ct_put(ct);
-       return ret;
+       return false;
 }
 
 static struct nf_conn *