packet: use percpu mmap tx frame pending refcount

author Daniel Borkmann <dborkman@redhat.com>

Wed, 15 Jan 2014 15:25:36 +0000 (16:25 +0100)

committer David S. Miller <davem@davemloft.net>

Fri, 17 Jan 2014 00:17:12 +0000 (16:17 -0800)
author Daniel Borkmann <dborkman@redhat.com>
Wed, 15 Jan 2014 15:25:36 +0000 (16:25 +0100)
committer David S. Miller <davem@davemloft.net>
Fri, 17 Jan 2014 00:17:12 +0000 (16:17 -0800)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index d5495d87f399db7b9e0b3e7c12457832fd1bdf5d..12f2f725a945c55c48605ed09e5d666369721233 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -89,6 +89,7 @@
  #include <linux/errqueue.h>
  #include <linux/net_tstamp.h>
  #include <linux/reciprocal_div.h>
+#include <linux/percpu.h>
  #ifdef CONFIG_INET
  #include <net/inet_common.h>
  #endif
@@ -1168,6 +1169,47 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
         buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
  }
  
+static void packet_inc_pending(struct packet_ring_buffer *rb)
+{
+       this_cpu_inc(*rb->pending_refcnt);
+}
+
+static void packet_dec_pending(struct packet_ring_buffer *rb)
+{
+       this_cpu_dec(*rb->pending_refcnt);
+}
+
+static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
+{
+       unsigned int refcnt = 0;
+       int cpu;
+
+       /* We don't use pending refcount in rx_ring. */
+       if (rb->pending_refcnt == NULL)
+               return 0;
+
+       for_each_possible_cpu(cpu)
+               refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
+
+       return refcnt;
+}
+
+static int packet_alloc_pending(struct packet_sock *po)
+{
+       po->rx_ring.pending_refcnt = NULL;
+
+       po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
+       if (unlikely(po->tx_ring.pending_refcnt == NULL))
+               return -ENOBUFS;
+
+       return 0;
+}
+
+static void packet_free_pending(struct packet_sock *po)
+{
+       free_percpu(po->tx_ring.pending_refcnt);
+}
+
  static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
  {
         struct sock *sk = &po->sk;
@@ -2014,8 +2056,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
                 __u32 ts;
  
                 ph = skb_shinfo(skb)->destructor_arg;
-               BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
-               atomic_dec(&po->tx_ring.pending);
+               packet_dec_pending(&po->tx_ring);
  
                 ts = __packet_set_timestamp(po, ph, skb);
                 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
@@ -2236,7 +2277,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
                 skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
                 skb->destructor = tpacket_destruct_skb;
                 __packet_set_status(po, ph, TP_STATUS_SENDING);
-               atomic_inc(&po->tx_ring.pending);
+               packet_inc_pending(&po->tx_ring);
  
                 status = TP_STATUS_SEND_REQUEST;
                 err = po->xmit(skb);
@@ -2256,8 +2297,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
                 }
                 packet_increment_head(&po->tx_ring);
                 len_sum += tp_len;
-       } while (likely((ph != NULL) || (need_wait &&
-                                        atomic_read(&po->tx_ring.pending))));
+       } while (likely((ph != NULL) ||
+               /* Note: packet_read_pending() might be slow if we have
+                * to call it as it's per_cpu variable, but in fast-path
+                * we already short-circuit the loop with the first
+                * condition, and luckily don't have to go that path
+                * anyway.
+                */
+                (need_wait && packet_read_pending(&po->tx_ring))));
  
         err = len_sum;
         goto out_put;
@@ -2556,6 +2603,7 @@ static int packet_release(struct socket *sock)
         /* Purge queues */
  
         skb_queue_purge(&sk->sk_receive_queue);
+       packet_free_pending(po);
         sk_refcnt_debug_release(sk);
  
         sock_put(sk);
@@ -2717,6 +2765,10 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
         po->num = proto;
         po->xmit = dev_queue_xmit;
  
+       err = packet_alloc_pending(po);
+       if (err)
+               goto out2;
+
         packet_cached_dev_reset(po);
  
         sk->sk_destruct = packet_sock_destruct;
@@ -2749,6 +2801,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
         preempt_enable();
  
         return 0;
+out2:
+       sk_free(sk);
  out:
         return err;
  }
@@ -3676,7 +3730,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
         if (!closing) {
                 if (atomic_read(&po->mapped))
                         goto out;
-               if (atomic_read(&rb->pending))
+               if (packet_read_pending(rb))
                         goto out;
         }
  
diff --git a/net/packet/diag.c b/net/packet/diag.c

index a9584a2f6d6948cd74a5179fcf3e11bc6403b45a..533ce4ff108ad94ff0a1e5205bc17f9c91c0b3ce 100644 (file)
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -3,6 +3,7 @@
  #include <linux/net.h>
  #include <linux/netdevice.h>
  #include <linux/packet_diag.h>
+#include <linux/percpu.h>
  #include <net/net_namespace.h>
  #include <net/sock.h>
  
diff --git a/net/packet/internal.h b/net/packet/internal.h

index 0a87d7b36c9e2470836a3d00f0dc261e8983d41b..eb9580a6b25ff4474a7af54900efbe2579325d00 100644 (file)
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -64,7 +64,7 @@ struct packet_ring_buffer {
         unsigned int            pg_vec_pages;
         unsigned int            pg_vec_len;
  
-       atomic_t                pending;
+       unsigned int __percpu   *pending_refcnt;
  
         struct tpacket_kbdq_core        prb_bdqc;
  };
author	Daniel Borkmann <dborkman@redhat.com>
	Wed, 15 Jan 2014 15:25:36 +0000 (16:25 +0100)
committer	David S. Miller <davem@davemloft.net>
	Fri, 17 Jan 2014 00:17:12 +0000 (16:17 -0800)
net/packet/af_packet.c		patch \| blob \| blame \| history
net/packet/diag.c		patch \| blob \| blame \| history
net/packet/internal.h		patch \| blob \| blame \| history