netfilter: ipset: Prepare the ipset core to use RCU at set level
authorJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Sat, 13 Jun 2015 12:22:25 +0000 (14:22 +0200)
committerJozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Sun, 14 Jun 2015 08:40:16 +0000 (10:40 +0200)
Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking
accordingly. Convert the comment extension into an rcu-avare object. Also,
simplify the timeout routines.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
include/linux/netfilter/ipset/ip_set.h
include/linux/netfilter/ipset/ip_set_comment.h
include/linux/netfilter/ipset/ip_set_timeout.h
net/netfilter/ipset/ip_set_core.c

index 5674b6ac6646e4f1b75688b62416e79211487d40..19b4969a25fe2e91bcfc72597f6ace3b5a107f48 100644 (file)
@@ -108,8 +108,13 @@ struct ip_set_counter {
        atomic64_t packets;
 };
 
+struct ip_set_comment_rcu {
+       struct rcu_head rcu;
+       char str[0];
+};
+
 struct ip_set_comment {
-       char *str;
+       struct ip_set_comment_rcu __rcu *c;
 };
 
 struct ip_set_skbinfo {
@@ -226,7 +231,7 @@ struct ip_set {
        /* The name of the set */
        char name[IPSET_MAXNAMELEN];
        /* Lock protecting the set data */
-       rwlock_t lock;
+       spinlock_t lock;
        /* References to the set */
        u32 ref;
        /* The core set type */
index 21217ea008d79611051c17eb7aa0900c3f3e6663..8d024852595704fa67b0d5588c76fcca3ef67b9d 100644 (file)
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
        return nla_data(tb);
 }
 
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
 static inline void
 ip_set_init_comment(struct ip_set_comment *comment,
                    const struct ip_set_ext *ext)
 {
+       struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
        size_t len = ext->comment ? strlen(ext->comment) : 0;
 
-       if (unlikely(comment->str)) {
-               kfree(comment->str);
-               comment->str = NULL;
+       if (unlikely(c)) {
+               kfree_rcu(c, rcu);
+               rcu_assign_pointer(comment->c, NULL);
        }
        if (!len)
                return;
        if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
                len = IPSET_MAX_COMMENT_SIZE;
-       comment->str = kzalloc(len + 1, GFP_ATOMIC);
-       if (unlikely(!comment->str))
+       c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+       if (unlikely(!c))
                return;
-       strlcpy(comment->str, ext->comment, len + 1);
+       strlcpy(c->str, ext->comment, len + 1);
+       rcu_assign_pointer(comment->c, c);
 }
 
+/* Used only when dumping a set, protected by rcu_read_lock_bh() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
 {
-       if (!comment->str)
+       struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+
+       if (!c)
                return 0;
-       return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+       return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
 }
 
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
 static inline void
 ip_set_comment_free(struct ip_set_comment *comment)
 {
-       if (unlikely(!comment->str))
+       struct ip_set_comment_rcu *c;
+
+       c = rcu_dereference_protected(comment->c, 1);
+       if (unlikely(!c))
                return;
-       kfree(comment->str);
-       comment->str = NULL;
+       kfree_rcu(c, rcu);
+       rcu_assign_pointer(comment->c, NULL);
 }
 
 #endif
index 3c8842bcedaae8eac05576749292c5b488998e7c..1d6a935c1ac5f4becf782394a4c8e03f7a913eb8 100644 (file)
@@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
 }
 
 static inline bool
-ip_set_timeout_test(unsigned long timeout)
+ip_set_timeout_expired(unsigned long *t)
 {
-       return timeout == IPSET_ELEM_PERMANENT ||
-              time_is_after_jiffies(timeout);
-}
-
-static inline bool
-ip_set_timeout_expired(unsigned long *timeout)
-{
-       return *timeout != IPSET_ELEM_PERMANENT &&
-              time_is_before_jiffies(*timeout);
+       return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
 }
 
 static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
 {
-       if (!t) {
+       unsigned long t;
+
+       if (!value) {
                *timeout = IPSET_ELEM_PERMANENT;
                return;
        }
 
-       *timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
-       if (*timeout == IPSET_ELEM_PERMANENT)
+       t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+       if (t == IPSET_ELEM_PERMANENT)
                /* Bingo! :-) */
-               (*timeout)--;
+               t--;
+       *timeout = t;
 }
 
 static inline u32
index 87b4182660baa23def9623e4fc07f4ffb2d86cd3..2b21a1983a98a7a140073a8c48b53ed4d6882daf 100644 (file)
@@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type)
                pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
                        type->name, family_name(type->family),
                        type->revision_min);
-               ret = -EINVAL;
-               goto unlock;
+               ip_set_type_unlock();
+               return -EINVAL;
        }
        list_add_rcu(&type->list, &ip_set_type_list);
        pr_debug("type %s, family %s, revision %u:%u registered.\n",
                 type->name, family_name(type->family),
                 type->revision_min, type->revision_max);
-unlock:
        ip_set_type_unlock();
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(ip_set_type_register);
@@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type)
                pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
                        type->name, family_name(type->family),
                        type->revision_min);
-               goto unlock;
+               ip_set_type_unlock();
+               return;
        }
        list_del_rcu(&type->list);
        pr_debug("type %s, family %s with revision min %u unregistered.\n",
                 type->name, family_name(type->family), type->revision_min);
-unlock:
        ip_set_type_unlock();
 
        synchronize_rcu();
@@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
                return 0;
 
-       read_lock_bh(&set->lock);
+       rcu_read_lock_bh();
        ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
-       read_unlock_bh(&set->lock);
+       rcu_read_unlock_bh();
 
        if (ret == -EAGAIN) {
                /* Type requests element to be completed */
                pr_debug("element must be completed, ADD is triggered\n");
-               write_lock_bh(&set->lock);
+               spin_lock_bh(&set->lock);
                set->variant->kadt(set, skb, par, IPSET_ADD, opt);
-               write_unlock_bh(&set->lock);
+               spin_unlock_bh(&set->lock);
                ret = 1;
        } else {
                /* --return-nomatch: invert matched element */
@@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
                return -IPSET_ERR_TYPE_MISMATCH;
 
-       write_lock_bh(&set->lock);
+       spin_lock_bh(&set->lock);
        ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
-       write_unlock_bh(&set->lock);
+       spin_unlock_bh(&set->lock);
 
        return ret;
 }
@@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
            !(opt->family == set->family || set->family == NFPROTO_UNSPEC))
                return -IPSET_ERR_TYPE_MISMATCH;
 
-       write_lock_bh(&set->lock);
+       spin_lock_bh(&set->lock);
        ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
-       write_unlock_bh(&set->lock);
+       spin_unlock_bh(&set->lock);
 
        return ret;
 }
@@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
        set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
        if (!set)
                return -ENOMEM;
-       rwlock_init(&set->lock);
+       spin_lock_init(&set->lock);
        strlcpy(set->name, name, IPSET_MAXNAMELEN);
        set->family = family;
        set->revision = revision;
@@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set)
 {
        pr_debug("set: %s\n",  set->name);
 
-       write_lock_bh(&set->lock);
+       spin_lock_bh(&set->lock);
        set->variant->flush(set);
-       write_unlock_bh(&set->lock);
+       spin_unlock_bh(&set->lock);
 }
 
 static int
@@ -1377,9 +1377,9 @@ dump_last:
                                set->variant->uref(set, cb, true);
                        /* Fall through and add elements */
                default:
-                       read_lock_bh(&set->lock);
+                       rcu_read_lock_bh();
                        ret = set->variant->list(set, skb, cb);
-                       read_unlock_bh(&set->lock);
+                       rcu_read_unlock_bh();
                        if (!cb->args[IPSET_CB_ARG0])
                                /* Set is done, proceed with next one */
                                goto next_set;
@@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
        bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
 
        do {
-               write_lock_bh(&set->lock);
+               spin_lock_bh(&set->lock);
                ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
-               write_unlock_bh(&set->lock);
+               spin_unlock_bh(&set->lock);
                retried = true;
        } while (ret == -EAGAIN &&
                 set->variant->resize &&
@@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
                             set->type->adt_policy))
                return -IPSET_ERR_PROTOCOL;
 
-       read_lock_bh(&set->lock);
+       rcu_read_lock_bh();
        ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
-       read_unlock_bh(&set->lock);
+       rcu_read_unlock_bh();
        /* Userspace can't trigger element to be re-added */
        if (ret == -EAGAIN)
                ret = 1;