gre: add collect_md mode to ERSPAN tunnel
authorWilliam Tu <u9012063@gmail.com>
Fri, 25 Aug 2017 16:21:28 +0000 (09:21 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 28 Aug 2017 22:04:52 +0000 (15:04 -0700)
Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to
operate in 'collect metadata' mode.  bpf_skb_[gs]et_tunnel_key() helpers
can make use of it right away.  OVS can use it as well in the future.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip_tunnels.h
net/ipv4/ip_gre.c

index 625c29329372f8fc293d96dea881a2d188a12a18..992652856fe8c7c1032e0f5f92ce7ee5aa0119da 100644 (file)
@@ -154,8 +154,10 @@ struct ip_tunnel {
 #define TUNNEL_GENEVE_OPT      __cpu_to_be16(0x0800)
 #define TUNNEL_VXLAN_OPT       __cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE         __cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT      __cpu_to_be16(0x4000)
 
-#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+#define TUNNEL_OPTIONS_PRESENT \
+               (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 
 struct tnl_ptk_info {
        __be16 flags;
index 453b7925b940c62d0cd498bc766ee3523e715806..0162fb955b33abf18514cbfd482e72a0ebce6e48 100644 (file)
@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+                               __be32 id, u32 index, bool truncate);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                                           false, false) < 0)
                        goto drop;
 
-               tunnel->index = ntohl(index);
+               if (tunnel->collect_md) {
+                       struct ip_tunnel_info *info;
+                       struct erspan_metadata *md;
+                       __be64 tun_id;
+                       __be16 flags;
+
+                       tpi->flags |= TUNNEL_KEY;
+                       flags = tpi->flags;
+                       tun_id = key32_to_tunnel_id(tpi->key);
+
+                       tun_dst = ip_tun_rx_dst(skb, flags,
+                                               tun_id, sizeof(*md));
+                       if (!tun_dst)
+                               return PACKET_REJECT;
+
+                       md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+                       if (!md)
+                               return PACKET_REJECT;
+
+                       md->index = index;
+                       info = &tun_dst->u.tun_info;
+                       info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+                       info->options_len = sizeof(*md);
+               } else {
+                       tunnel->index = ntohl(index);
+               }
+
                skb_reset_mac_header(skb);
                ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
                return PACKET_RCVD;
@@ -523,6 +551,64 @@ err_free_skb:
        dev->stats.tx_dropped++;
 }
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+                          __be16 proto)
+{
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+       struct ip_tunnel_info *tun_info;
+       const struct ip_tunnel_key *key;
+       struct erspan_metadata *md;
+       struct rtable *rt = NULL;
+       bool truncate = false;
+       struct flowi4 fl;
+       int tunnel_hlen;
+       __be16 df;
+
+       tun_info = skb_tunnel_info(skb);
+       if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+                    ip_tunnel_info_af(tun_info) != AF_INET))
+               goto err_free_skb;
+
+       key = &tun_info->key;
+
+       /* ERSPAN has fixed 8 byte GRE header */
+       tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+       rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+       if (!rt)
+               return;
+
+       if (gre_handle_offloads(skb, false))
+               goto err_free_rt;
+
+       if (skb->len > dev->mtu) {
+               pskb_trim(skb, dev->mtu);
+               truncate = true;
+       }
+
+       md = ip_tunnel_info_opts(tun_info);
+       if (!md)
+               goto err_free_rt;
+
+       erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+                           ntohl(md->index), truncate);
+
+       gre_build_header(skb, 8, TUNNEL_SEQ,
+                        htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+       df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+       iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+                     key->tos, key->ttl, df, false);
+       return;
+
+err_free_rt:
+       ip_rt_put(rt);
+err_free_skb:
+       kfree_skb(skb);
+       dev->stats.tx_dropped++;
+}
+
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
        struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -636,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
        struct ip_tunnel *tunnel = netdev_priv(dev);
        bool truncate = false;
 
+       if (tunnel->collect_md) {
+               erspan_fb_xmit(skb, dev, skb->protocol);
+               return NETDEV_TX_OK;
+       }
+
        if (gre_handle_offloads(skb, false))
                goto free_skb;
 
@@ -998,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
                return ret;
 
        /* ERSPAN should only have GRE sequence and key flag */
-       flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
-       flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
-       if (flags != (GRE_SEQ | GRE_KEY))
+       if (data[IFLA_GRE_OFLAGS])
+               flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+       if (data[IFLA_GRE_IFLAGS])
+               flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+       if (!data[IFLA_GRE_COLLECT_METADATA] &&
+           flags != (GRE_SEQ | GRE_KEY))
                return -EINVAL;
 
        /* ERSPAN Session ID only has 10-bit. Since we reuse