neigh: Protect neigh->ha[] with a seqlock
authorEric Dumazet <eric.dumazet@gmail.com>
Thu, 7 Oct 2010 10:44:07 +0000 (10:44 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 11 Oct 2010 19:54:04 +0000 (12:54 -0700)
Add a seqlock in struct neighbour to protect neigh->ha[], and avoid
dirtying neighbour in stress situation (many different flows / dsts)

Dirtying takes place because of read_lock(&n->lock) and n->used writes.

Switching to a seqlock, and writing n->used only on jiffies changes
permits less dirtying.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/neighbour.h
net/core/neighbour.c
net/ipv4/arp.c
net/sched/sch_teql.c

index a4538d5537047d1bf92e461a6ad89c8f4c90305e..f04e7a2522c51557ce9d10dca421fad7a9e123e3 100644 (file)
@@ -105,6 +105,7 @@ struct neighbour {
        atomic_t                refcnt;
        atomic_t                probes;
        rwlock_t                lock;
+       seqlock_t               ha_lock;
        unsigned char           ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
        struct hh_cache         *hh;
        int                     (*output)(struct sk_buff *skb);
@@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh)
 
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
-       neigh->used = jiffies;
+       unsigned long now = ACCESS_ONCE(jiffies);
+       
+       if (neigh->used != now)
+               neigh->used = now;
        if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
                return __neigh_event_send(neigh, skb);
        return 0;
@@ -373,4 +377,14 @@ struct neighbour_cb {
 
 #define NEIGH_CB(skb)  ((struct neighbour_cb *)(skb)->cb)
 
+static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
+                                    const struct net_device *dev)
+{
+       unsigned int seq;
+
+       do {
+               seq = read_seqbegin(&n->ha_lock);
+               memcpy(dst, n->ha, dev->addr_len);
+       } while (read_seqretry(&n->ha_lock, seq));
+}
 #endif
index 2044906ecd1a2d906a670b614c8e41d776db8f9d..b165b96355bfc05245b43fae754f8358942e217f 100644 (file)
@@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 
        skb_queue_head_init(&n->arp_queue);
        rwlock_init(&n->lock);
+       seqlock_init(&n->ha_lock);
        n->updated        = n->used = now;
        n->nud_state      = NUD_NONE;
        n->output         = neigh_blackhole;
@@ -1015,7 +1016,7 @@ out_unlock_bh:
 }
 EXPORT_SYMBOL(__neigh_event_send);
 
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
 {
        struct hh_cache *hh;
        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
        }
 
        if (lladdr != neigh->ha) {
+               write_seqlock(&neigh->ha_lock);
                memcpy(&neigh->ha, lladdr, dev->addr_len);
+               write_sequnlock(&neigh->ha_lock);
                neigh_update_hhs(neigh);
                if (!(new & NUD_CONNECTED))
                        neigh->confirmed = jiffies -
@@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
 {
        struct hh_cache *hh;
 
+       smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
        for (hh = n->hh; hh; hh = hh->hh_next) {
                if (hh->hh_type == protocol) {
                        atomic_inc(&hh->hh_refcnt);
@@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
                kfree(hh);
                return;
        }
-       read_unlock(&n->lock);
-       write_lock(&n->lock);
+
+       write_lock_bh(&n->lock);
 
        /* must check if another thread already did the insert */
        if (neigh_hh_lookup(n, dst, protocol)) {
@@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
                hh->hh_output = n->ops->output;
 
        hh->hh_next = n->hh;
+       smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
        n->hh       = hh;
 
        if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
                hh_cache_put(hh);
 end:
-       write_unlock(&n->lock);
-       read_lock(&n->lock);
+       write_unlock_bh(&n->lock);
 }
 
 /* This function can be used in contexts, where only old dev_queue_xmit
@@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb)
        if (!neigh_event_send(neigh, skb)) {
                int err;
                struct net_device *dev = neigh->dev;
+               unsigned int seq;
 
-               read_lock_bh(&neigh->lock);
                if (dev->header_ops->cache &&
                    !dst->hh &&
                    !(dst->flags & DST_NOCACHE))
                        neigh_hh_init(neigh, dst, dst->ops->protocol);
 
-               err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-                                     neigh->ha, NULL, skb->len);
-               read_unlock_bh(&neigh->lock);
+               do {
+                       seq = read_seqbegin(&neigh->ha_lock);
+                       err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+                                             neigh->ha, NULL, skb->len);
+               } while (read_seqretry(&neigh->ha_lock, seq));
 
                if (err >= 0)
                        rc = neigh->ops->queue_xmit(skb);
@@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
        struct dst_entry *dst = skb_dst(skb);
        struct neighbour *neigh = dst->neighbour;
        struct net_device *dev = neigh->dev;
+       unsigned int seq;
 
        __skb_pull(skb, skb_network_offset(skb));
 
-       read_lock_bh(&neigh->lock);
-       err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-                             neigh->ha, NULL, skb->len);
-       read_unlock_bh(&neigh->lock);
+       do {
+               seq = read_seqbegin(&neigh->ha_lock);
+               err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+                                     neigh->ha, NULL, skb->len);
+       } while (read_seqretry(&neigh->ha_lock, seq));
+
        if (err >= 0)
                err = neigh->ops->queue_xmit(skb);
        else {
@@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 
        read_lock_bh(&neigh->lock);
        ndm->ndm_state   = neigh->nud_state;
-       if ((neigh->nud_state & NUD_VALID) &&
-           nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
-               read_unlock_bh(&neigh->lock);
-               goto nla_put_failure;
+       if (neigh->nud_state & NUD_VALID) {
+               char haddr[MAX_ADDR_LEN];
+
+               neigh_ha_snapshot(haddr, neigh, neigh->dev);
+               if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+                       read_unlock_bh(&neigh->lock);
+                       goto nla_put_failure;
+               }
        }
 
        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
index f35309578170960bd8f0f73cac1eeebd8bb4c78d..d8e540c5b0710327fd44c8f269eb51e6fcfedfc5 100644 (file)
@@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 
        if (n) {
                n->used = jiffies;
-               if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
-                       read_lock_bh(&n->lock);
-                       memcpy(haddr, n->ha, dev->addr_len);
-                       read_unlock_bh(&n->lock);
+               if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
+                       neigh_ha_snapshot(haddr, n, dev);
                        neigh_release(n);
                        return 0;
                }
index feaabc103ce6a061e350ddb07e1a2faf5d1ee4bd..401af95967092c218b499b064df39c4b1d272216 100644 (file)
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
        }
        if (neigh_event_send(n, skb_res) == 0) {
                int err;
+               char haddr[MAX_ADDR_LEN];
 
-               read_lock(&n->lock);
-               err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-                                     n->ha, NULL, skb->len);
-               read_unlock(&n->lock);
+               neigh_ha_snapshot(haddr, n, dev);
+               err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
+                                     NULL, skb->len);
 
                if (err < 0) {
                        neigh_release(n);