v4 GRE: Add TCP segmentation offload for GRE
authorPravin B Shelar <pshelar@nicira.com>
Thu, 14 Feb 2013 14:02:41 +0000 (14:02 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 15 Feb 2013 20:17:11 +0000 (15:17 -0500)
Following patch adds GRE protocol offload handler so that
skb_gso_segment() can segment GRE packets.
SKB GSO CB is added to keep track of total header length so that
skb_segment can push entire header. e.g. in case of GRE, skb_segment
need to push inner and outer headers to every segment.
New NETIF_F_GRE_GSO feature is added for devices which support HW
GRE TSO offload. Currently none of devices support it therefore GRE GSO
always fall backs to software GSO.

[ Compute pkt_len before ip_local_out() invocation. -DaveM ]

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
12 files changed:
include/linux/netdev_features.h
include/linux/skbuff.h
net/core/dev.c
net/core/ethtool.c
net/core/skbuff.c
net/ipv4/af_inet.c
net/ipv4/gre.c
net/ipv4/ip_gre.c
net/ipv4/tcp.c
net/ipv4/udp.c
net/ipv6/ip6_offload.c
net/ipv6/udp_offload.c

index 5ac32123035a5159c7c017a3ac29aec8ee9291af..3dd39340430e6b0ff369d8f10642d1e9f2301f26 100644 (file)
@@ -41,7 +41,7 @@ enum {
        NETIF_F_TSO_ECN_BIT,            /* ... TCP ECN support */
        NETIF_F_TSO6_BIT,               /* ... TCPv6 segmentation */
        NETIF_F_FSO_BIT,                /* ... FCoE segmentation */
-       NETIF_F_GSO_RESERVED1,          /* ... free (fill GSO_MASK to 8 bits) */
+       NETIF_F_GSO_GRE_BIT,            /* ... GRE with TSO */
        /**/NETIF_F_GSO_LAST,           /* [can't be last bit, see GSO_MASK] */
        NETIF_F_GSO_RESERVED2           /* ... free (fill GSO_MASK to 8 bits) */
                = NETIF_F_GSO_LAST,
@@ -102,6 +102,7 @@ enum {
 #define NETIF_F_VLAN_CHALLENGED        __NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS          __NETIF_F(RXFCS)
 #define NETIF_F_RXALL          __NETIF_F(RXALL)
+#define NETIF_F_GRE_GSO                __NETIF_F(GSO_GRE)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
index ca6ee7d93edb4b325bf6a4a85545b93c497597ee..821c7f45d2a7357085c757c2309177e443b34990 100644 (file)
@@ -314,6 +314,8 @@ enum {
        SKB_GSO_TCPV6 = 1 << 4,
 
        SKB_GSO_FCOE = 1 << 5,
+
+       SKB_GSO_GRE = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 }
 #endif
 
+/* Keeps track of mac header offset relative to skb->head.
+ * It is useful for TSO of Tunneling protocol. e.g. GRE.
+ * For non-tunnel skb it points to skb_mac_header() and for
+ * tunnel skb it points to outer mac header. */
+struct skb_gso_cb {
+       int mac_offset;
+};
+#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
+
+static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+{
+       return (skb_mac_header(inner_skb) - inner_skb->head) -
+               SKB_GSO_CB(inner_skb)->mac_offset;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
        return skb_shinfo(skb)->gso_size;
index 67deae60214cb0967a665f0ac8266b48788456cb..1cd6297fd34b182b8cf0accd5a7618bc85aef6a3 100644 (file)
@@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
                        return ERR_PTR(err);
        }
 
+       SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
        skb_reset_mac_header(skb);
        skb_reset_mac_len(skb);
 
index d9d55209db679ba4e0e16789a9a988e147fd1022..3e9b2c3e30f063cb3e59545f2af31176ea7b0151 100644 (file)
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
        [NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
        [NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
        [NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+       [NETIF_F_GSO_GRE_BIT] =          "tx-gre-segmentation",
 
        [NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
        [NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
index 6c1ad09f8796db3f2a211c3cf777f8e24cab4874..2a3ca33c30aabd28569616824dd12c837b68faa4 100644 (file)
@@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
        unsigned int mss = skb_shinfo(skb)->gso_size;
        unsigned int doffset = skb->data - skb_mac_header(skb);
        unsigned int offset = doffset;
+       unsigned int tnl_hlen = skb_tnl_header_len(skb);
        unsigned int headroom;
        unsigned int len;
        int sg = !!(features & NETIF_F_SG);
@@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
                skb_set_network_header(nskb, skb->mac_len);
                nskb->transport_header = (nskb->network_header +
                                          skb_network_header_len(skb));
-               skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+               skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+                                                nskb->data - tnl_hlen,
+                                                doffset + tnl_hlen);
 
                if (fskb != skb_shinfo(skb)->frag_list)
                        continue;
index e6e5d85063367b17db05130fa10fe613039f8fce..e225a4e5b572a5283b8c146f18d47dfc3d5f0e8e 100644 (file)
@@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
                       SKB_GSO_UDP |
                       SKB_GSO_DODGY |
                       SKB_GSO_TCP_ECN |
+                      SKB_GSO_GRE |
                       0)))
                goto out;
 
index 42a491055c7614f9658cf1f0e9bd4d54ba9959ff..7a4c710c4cddef241766bf5816b20ef768e8a45b 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
+struct gre_base_hdr {
+       __be16 flags;
+       __be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info)
        rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+                                      netdev_features_t features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       netdev_features_t enc_features;
+       int ghl = GRE_HEADER_SECTION;
+       struct gre_base_hdr *greh;
+       int mac_len = skb->mac_len;
+       int tnl_hlen;
+       bool csum;
+
+       if (unlikely(skb_shinfo(skb)->gso_type &
+                               ~(SKB_GSO_TCPV4 |
+                                 SKB_GSO_TCPV6 |
+                                 SKB_GSO_UDP |
+                                 SKB_GSO_DODGY |
+                                 SKB_GSO_TCP_ECN |
+                                 SKB_GSO_GRE)))
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+               goto out;
+
+       greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+       if (greh->flags & GRE_KEY)
+               ghl += GRE_HEADER_SECTION;
+       if (greh->flags & GRE_SEQ)
+               ghl += GRE_HEADER_SECTION;
+       if (greh->flags & GRE_CSUM) {
+               ghl += GRE_HEADER_SECTION;
+               csum = true;
+       } else
+               csum = false;
+
+       /* setup inner skb. */
+       if (greh->protocol == htons(ETH_P_TEB)) {
+               struct ethhdr *eth = eth_hdr(skb);
+               skb->protocol = eth->h_proto;
+       } else {
+               skb->protocol = greh->protocol;
+       }
+
+       skb->encapsulation = 0;
+
+       if (unlikely(!pskb_may_pull(skb, ghl)))
+               goto out;
+       __skb_pull(skb, ghl);
+       skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb_inner_network_offset(skb));
+       skb->mac_len = skb_inner_network_offset(skb);
+
+       /* segment inner packet. */
+       enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+       segs = skb_mac_gso_segment(skb, enc_features);
+       if (!segs || IS_ERR(segs))
+               goto out;
+
+       skb = segs;
+       tnl_hlen = skb_tnl_header_len(skb);
+       do {
+               __skb_push(skb, ghl);
+               if (csum) {
+                       __be32 *pcsum;
+
+                       if (skb_has_shared_frag(skb)) {
+                               int err;
+
+                               err = __skb_linearize(skb);
+                               if (err) {
+                                       kfree_skb(segs);
+                                       segs = ERR_PTR(err);
+                                       goto out;
+                               }
+                       }
+
+                       greh = (struct gre_base_hdr *)(skb->data);
+                       pcsum = (__be32 *)(greh + 1);
+                       *pcsum = 0;
+                       *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+               }
+               __skb_push(skb, tnl_hlen - ghl);
+
+               skb_reset_mac_header(skb);
+               skb_set_network_header(skb, mac_len);
+               skb->mac_len = mac_len;
+       } while ((skb = skb->next));
+out:
+       return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+       if (!skb->encapsulation)
+               return -EINVAL;
+       return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
        .handler     = gre_rcv,
        .err_handler = gre_err,
        .netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+       .callbacks = {
+               .gso_send_check =       gre_gso_send_check,
+               .gso_segment    =       gre_gso_segment,
+       },
+};
+
 static int __init gre_init(void)
 {
        pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +238,18 @@ static int __init gre_init(void)
                return -EAGAIN;
        }
 
+       if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+               pr_err("can't add protocol offload\n");
+               inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+               return -EAGAIN;
+       }
+
        return 0;
 }
 
 static void __exit gre_exit(void)
 {
+       inet_del_offload(&gre_offload, IPPROTO_GRE);
        inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
index 00a14b9864ea91b3d87955efa06c45d1ffe64d10..a56f1182c176dd87cff194914bd379f9674f6b00 100644 (file)
@@ -735,8 +735,33 @@ drop:
        return 0;
 }
 
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
+{
+       int err;
+
+       if (skb_is_gso(skb)) {
+               err = skb_unclone(skb, GFP_ATOMIC);
+               if (unlikely(err))
+                       goto error;
+               skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+               return skb;
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               err = skb_checksum_help(skb);
+               if (unlikely(err))
+                       goto error;
+       }
+       skb->ip_summed = CHECKSUM_NONE;
+
+       return skb;
+
+error:
+       kfree_skb(skb);
+       return ERR_PTR(err);
+}
+
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+       struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
        struct ip_tunnel *tunnel = netdev_priv(dev);
        const struct iphdr  *old_iph;
        const struct iphdr  *tiph;
@@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        __be32 dst;
        int    mtu;
        u8     ttl;
+       int    err;
+       int    pkt_len;
 
-       if (skb->ip_summed == CHECKSUM_PARTIAL &&
-           skb_checksum_help(skb))
-               goto tx_error;
+       skb = handle_offloads(skb);
+       if (IS_ERR(skb)) {
+               dev->stats.tx_dropped++;
+               return NETDEV_TX_OK;
+       }
+
+       if (!skb->encapsulation) {
+               skb_reset_inner_headers(skb);
+               skb->encapsulation = 1;
+       }
 
        old_iph = ip_hdr(skb);
 
@@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        if (skb->protocol == htons(ETH_P_IP)) {
                df |= (old_iph->frag_off&htons(IP_DF));
 
-               if ((old_iph->frag_off&htons(IP_DF)) &&
+               if (!skb_is_gso(skb) &&
+                   (old_iph->frag_off&htons(IP_DF)) &&
                    mtu < ntohs(old_iph->tot_len)) {
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
                        ip_rt_put(rt);
@@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                        }
                }
 
-               if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+               if (!skb_is_gso(skb) &&
+                   mtu >= IPV6_MIN_MTU &&
+                   mtu < skb->len - tunnel->hlen + gre_hlen) {
                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                        ip_rt_put(rt);
                        goto tx_error;
@@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
        iph->daddr              =       fl4.daddr;
        iph->saddr              =       fl4.saddr;
        iph->ttl                =       ttl;
+       iph->id                 =       0;
 
        if (ttl == 0) {
                if (skb->protocol == htons(ETH_P_IP))
@@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                        *ptr = tunnel->parms.o_key;
                        ptr--;
                }
-               if (tunnel->parms.o_flags&GRE_CSUM) {
+               /* Skip GRE checksum if skb is getting offloaded. */
+               if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
+                   (tunnel->parms.o_flags&GRE_CSUM)) {
                        int offset = skb_transport_offset(skb);
 
+                       if (skb_has_shared_frag(skb)) {
+                               err = __skb_linearize(skb);
+                               if (err) {
+                                       ip_rt_put(rt);
+                                       goto tx_error;
+                               }
+                       }
+
                        *ptr = 0;
                        *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
                                                                 skb->len - offset,
@@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
                }
        }
 
-       iptunnel_xmit(skb, dev);
+       nf_reset(skb);
+
+       pkt_len = skb->len - skb_transport_offset(skb);
+       err = ip_local_out(skb);
+       if (likely(net_xmit_eval(err) == 0)) {
+               u64_stats_update_begin(&tstats->syncp);
+               tstats->tx_bytes += pkt_len;
+               tstats->tx_packets++;
+               u64_stats_update_end(&tstats->syncp);
+       } else {
+               dev->stats.tx_errors++;
+               dev->stats.tx_aborted_errors++;
+       }
        return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
                mtu = 68;
 
        tunnel->hlen = addend;
+       /* TCP offload with GRE SEQ is not supported. */
+       if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+               dev->features           |= NETIF_F_GSO_SOFTWARE;
+               dev->hw_features        |= NETIF_F_GSO_SOFTWARE;
+       }
 
        return mtu;
 }
@@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev)
 
        dev->iflink             = 0;
        dev->features           |= NETIF_F_NETNS_LOCAL;
+
+       dev->features           |= GRE_FEATURES;
+       dev->hw_features        |= GRE_FEATURES;
 }
 
 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
index 1f0bedb8622f04bedd775412b9ab9023f948f48d..7a5ba48c2cc985616aff8d2ca524c5cd4cc39e8d 100644 (file)
@@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                               SKB_GSO_DODGY |
                               SKB_GSO_TCP_ECN |
                               SKB_GSO_TCPV6 |
+                              SKB_GSO_GRE |
                               0) ||
                             !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
                        goto out;
index 6791aac06ea9e4a71289ba8313e7c10b53f5f031..39a5e7a9a77fe2e0043cf518939b0f91b5814edf 100644 (file)
@@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
                /* Packet is from an untrusted source, reset gso_segs. */
                int type = skb_shinfo(skb)->gso_type;
 
-               if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+               if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+                                     SKB_GSO_GRE) ||
                             !(type & (SKB_GSO_UDP))))
                        goto out;
 
index f26f0da7f095763050d891e2543105c3f996320e..8234c1dcdf7286f23c32bc150279cffaeb818ef4 100644 (file)
@@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
                     ~(SKB_GSO_UDP |
                       SKB_GSO_DODGY |
                       SKB_GSO_TCP_ECN |
+                      SKB_GSO_GRE |
                       SKB_GSO_TCPV6 |
                       0)))
                goto out;
index 0c8934a317c2f72ebdd9ada034812bbd8e261597..cf05cf073c517d2e0fae4c967acffffa77befcbc 100644 (file)
@@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
                /* Packet is from an untrusted source, reset gso_segs. */
                int type = skb_shinfo(skb)->gso_type;
 
-               if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+               if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+                                     SKB_GSO_GRE) ||
                             !(type & (SKB_GSO_UDP))))
                        goto out;