packet: tx_ring: allow the user to choose tx data offset
authorPaul Chavent <Paul.Chavent@onera.fr>
Tue, 6 Nov 2012 23:10:47 +0000 (23:10 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 7 Nov 2012 23:54:30 +0000 (18:54 -0500)
The tx data offset of packet mmap tx ring used to be :
(TPACKET2_HDRLEN - sizeof(struct sockaddr_ll))

The problem is that, with SOCK_RAW socket, the payload (14 bytes after
the beginning of the user data) is misaligned.

This patch allows to let the user gives an offset for it's tx data if
he desires.

Set sock option PACKET_TX_HAS_OFF to 1, then specify in each frame of
your tx ring tp_net for SOCK_DGRAM, or tp_mac for SOCK_RAW.

Signed-off-by: Paul Chavent <paul.chavent@onera.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/packet_mmap.txt
include/uapi/linux/if_packet.h
net/packet/af_packet.c
net/packet/internal.h

index 1c08a4b0981fb7f648cee60ffea8b3c45bb319c4..7cd879eba5dcdfb6432f3b69dd0d74e69d796a8a 100644 (file)
@@ -163,6 +163,19 @@ As capture, each frame contains two parts:
 
  A complete tutorial is available at: http://wiki.gnu-log.net/
 
+By default, the user should put data at :
+ frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
+
+So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW),
+the beginning of the user data will be at :
+ frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
+
+If you wish to put user data at a custom offset from the beginning of
+the frame (for payload alignment with SOCK_RAW mode for instance) you
+can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order
+to make this work it must be enabled previously with setsockopt()
+and the PACKET_TX_HAS_OFF option.
+
 --------------------------------------------------------------------------------
 + PACKET_MMAP settings
 --------------------------------------------------------------------------------
index f3799295d231c7d34a114cbb75b8950e6a61da3c..f9a60375f0d0121855af7f3a157afccc909bf547 100644 (file)
@@ -50,6 +50,7 @@ struct sockaddr_ll {
 #define PACKET_TX_TIMESTAMP            16
 #define PACKET_TIMESTAMP               17
 #define PACKET_FANOUT                  18
+#define PACKET_TX_HAS_OFF              19
 
 #define PACKET_FANOUT_HASH             0
 #define PACKET_FANOUT_LB               1
index 9034f52659b560f79a1a6d3e2dc3b6081f661d98..f262dbfc7f0684c34f44bc39ea7615aa8e82e3f7 100644 (file)
@@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
        skb_reserve(skb, hlen);
        skb_reset_network_header(skb);
 
-       data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+       if (po->tp_tx_has_off) {
+               int off_min, off_max, off;
+               off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+               off_max = po->tx_ring.frame_size - tp_len;
+               if (sock->type == SOCK_DGRAM) {
+                       switch (po->tp_version) {
+                       case TPACKET_V2:
+                               off = ph.h2->tp_net;
+                               break;
+                       default:
+                               off = ph.h1->tp_net;
+                               break;
+                       }
+               } else {
+                       switch (po->tp_version) {
+                       case TPACKET_V2:
+                               off = ph.h2->tp_mac;
+                               break;
+                       default:
+                               off = ph.h1->tp_mac;
+                               break;
+                       }
+               }
+               if (unlikely((off < off_min) || (off_max < off)))
+                       return -EINVAL;
+               data = ph.raw + off;
+       } else {
+               data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+       }
        to_write = tp_len;
 
        if (sock->type == SOCK_DGRAM) {
@@ -3109,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
                return fanout_add(sk, val & 0xffff, val >> 16);
        }
+       case PACKET_TX_HAS_OFF:
+       {
+               unsigned int val;
+
+               if (optlen != sizeof(val))
+                       return -EINVAL;
+               if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+                       return -EBUSY;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
+               po->tp_tx_has_off = !!val;
+               return 0;
+       }
        default:
                return -ENOPROTOOPT;
        }
@@ -3200,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
                        ((u32)po->fanout->type << 16)) :
                       0);
                break;
+       case PACKET_TX_HAS_OFF:
+               val = po->tp_tx_has_off;
+               break;
        default:
                return -ENOPROTOOPT;
        }
index 44945f6b7252c5c42e3fddf265a610824b6a646c..e84cab8cb7a99a6a07fe26daa9f33c03f8bb3bda 100644 (file)
@@ -109,6 +109,7 @@ struct packet_sock {
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
        unsigned int            tp_loss:1;
+       unsigned int            tp_tx_has_off:1;
        unsigned int            tp_tstamp;
        struct packet_type      prot_hook ____cacheline_aligned_in_smp;
 };