[RAMEN9610-20741]UPSTREAM: xfrm: Add virtual xfrm interfaces
authorSteffen Klassert <steffen.klassert@secunet.com>
Tue, 12 Jun 2018 12:07:12 +0000 (14:07 +0200)
committerrobot <robot@samsung.com>
Sun, 27 Oct 2019 17:53:41 +0000 (02:53 +0900)
This patch adds support for virtual xfrm interfaces.
Packets that are routed through such an interface
are guaranteed to be IPsec transformed or dropped.
It is a generic virtual interface that ensures IPsec
transformation, no need to know what happens behind
the interface. This means that we can tunnel IPv4 and
IPv6 through the same interface and support all xfrm
modes (tunnel, transport and beet) on it.

Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
(cherry picked from commit f203b76d78092faf248db3f851840fbecf80b40e)
Bug: 113046120
Change-Id: I05e8fe1e8a8a4b01886504ce694ddda29e4fbec6

include/net/xfrm.h
include/uapi/linux/if_link.h
net/xfrm/Kconfig
net/xfrm/Makefile
net/xfrm/xfrm_input.c
net/xfrm/xfrm_interface.c [new file with mode: 0644]
net/xfrm/xfrm_policy.c

index 687750094fcdce8bbec145d73720e46a3fce7215..2ee065ec429cf2026dd076efa42a845da142af0a 100644 (file)
@@ -23,6 +23,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -293,6 +294,13 @@ struct xfrm_replay {
        int     (*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+       struct xfrm_if  *(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -1008,6 +1016,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+       char name[IFNAMSIZ];    /* name of XFRM device */
+       int link;               /* ifindex of underlying L2 interface */
+       u32 if_id;              /* interface identifyer */
+};
+
+struct xfrm_if {
+       struct xfrm_if __rcu *next;     /* next interface in list */
+       struct net_device *dev;         /* virtual device associated with interface */
+       struct net_device *phydev;      /* physical device */
+       struct net *net;                /* netns for packet i/o */
+       struct xfrm_if_parms p;         /* interface parms */
+
+       struct gro_cells gro_cells;
+};
+
 struct xfrm_offload {
        /* Output sequence number for replay protection on offloading. */
        struct {
index 1f00f0cd67907c43bd801139a255128952f35619..b9a7c97f3cc2dea8bccb64dd0c25ce522ea6a0ec 100644 (file)
@@ -451,6 +451,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+       IFLA_XFRM_UNSPEC,
+       IFLA_XFRM_LINK,
+       IFLA_XFRM_IF_ID,
+       __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
        MACSEC_VALIDATE_DISABLED = 0,
        MACSEC_VALIDATE_CHECK = 1,
index 286ed25c1a698ae9bb2b89b110d0469475a1e2de..53381888a7b3ccc86a5f7ef0faaa3c9e95d784ee 100644 (file)
@@ -25,6 +25,14 @@ config XFRM_USER
 
          If unsure, say Y.
 
+config XFRM_INTERFACE
+       tristate "Transformation virtual interface"
+       depends on XFRM && IPV6
+       ---help---
+         This provides a virtual interface to route IPsec traffic.
+
+         If unsure, say N.
+
 config XFRM_SUB_POLICY
        bool "Transformation sub policy support"
        depends on XFRM
index 0bd2465a8c5a8e095d87642f9d71a19fbd6572e1..fbc4552d17b85646d0ac934f1054604e53bce75a 100644 (file)
@@ -10,3 +10,4 @@ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
+obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
index 949fe7aea4383a2e598f92475c21fc4a1a5f65d6..0a0273d21f66fb062e27eaee27c4509fe1e99b39 100644 (file)
@@ -320,6 +320,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
        seq = 0;
        if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+               secpath_reset(skb);
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
                goto drop;
        }
@@ -328,12 +329,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                                   XFRM_SPI_SKB_CB(skb)->daddroff);
        do {
                if (skb->sp->len == XFRM_MAX_DEPTH) {
+                       secpath_reset(skb);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
                        goto drop;
                }
 
                x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
                if (x == NULL) {
+                       secpath_reset(skb);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
                        xfrm_audit_state_notfound(skb, family, spi, seq);
                        goto drop;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
new file mode 100644 (file)
index 0000000..31cb1c7
--- /dev/null
@@ -0,0 +1,972 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *     XFRM virtual interface
+ *
+ *     Copyright (C) 2018 secunet Security Networks AG
+ *
+ *     Author:
+ *     Steffen Klassert <steffen.klassert@secunet.com>
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/etherdevice.h>
+
+static int xfrmi_dev_init(struct net_device *dev);
+static void xfrmi_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
+static unsigned int xfrmi_net_id __read_mostly;
+
+struct xfrmi_net {
+       /* lists for storing interfaces in use */
+       struct xfrm_if __rcu *xfrmi[1];
+};
+
+#define for_each_xfrmi_rcu(start, xi) \
+       for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+
+static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
+{
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       struct xfrm_if *xi;
+
+       for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+               if (x->if_id == xi->p.if_id &&
+                   (xi->dev->flags & IFF_UP))
+                       return xi;
+       }
+
+       return NULL;
+}
+
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+{
+       struct xfrmi_net *xfrmn;
+       int ifindex;
+       struct xfrm_if *xi;
+
+       if (!skb->dev)
+               return NULL;
+
+       xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+       ifindex = skb->dev->ifindex;
+
+       for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+               if (ifindex == xi->dev->ifindex &&
+                       (xi->dev->flags & IFF_UP))
+                               return xi;
+       }
+
+       return NULL;
+}
+
+static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+       struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
+
+       rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
+       rcu_assign_pointer(*xip, xi);
+}
+
+static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+       struct xfrm_if __rcu **xip;
+       struct xfrm_if *iter;
+
+       for (xip = &xfrmn->xfrmi[0];
+            (iter = rtnl_dereference(*xip)) != NULL;
+            xip = &iter->next) {
+               if (xi == iter) {
+                       rcu_assign_pointer(*xip, xi->next);
+                       break;
+               }
+       }
+}
+
+static void xfrmi_dev_free(struct net_device *dev)
+{
+       free_percpu(dev->tstats);
+}
+
+static int xfrmi_create2(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       int err;
+
+       dev->rtnl_link_ops = &xfrmi_link_ops;
+       err = register_netdevice(dev);
+       if (err < 0)
+               goto out;
+
+       strcpy(xi->p.name, dev->name);
+
+       dev_hold(dev);
+       xfrmi_link(xfrmn, xi);
+
+       return 0;
+
+out:
+       return err;
+}
+
+static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
+{
+       struct net_device *dev;
+       struct xfrm_if *xi;
+       char name[IFNAMSIZ];
+       int err;
+
+       if (p->name[0])
+               strlcpy(name, p->name, IFNAMSIZ);
+       else
+               goto failed;
+
+       dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
+       if (!dev)
+               goto failed;
+
+       dev_net_set(dev, net);
+
+       xi = netdev_priv(dev);
+       xi->p = *p;
+       xi->net = net;
+       xi->dev = dev;
+       xi->phydev = dev_get_by_index(net, p->link);
+       if (!xi->phydev)
+               goto failed_free;
+
+       err = xfrmi_create2(dev);
+       if (err < 0)
+               goto failed_dev_put;
+
+       return xi;
+
+failed_dev_put:
+       dev_put(xi->phydev);
+failed_free:
+       free_netdev(dev);
+failed:
+       return NULL;
+}
+
+static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
+                                  int create)
+{
+       struct xfrm_if __rcu **xip;
+       struct xfrm_if *xi;
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+       for (xip = &xfrmn->xfrmi[0];
+            (xi = rtnl_dereference(*xip)) != NULL;
+            xip = &xi->next) {
+               if (xi->p.if_id == p->if_id) {
+                       if (create)
+                               return NULL;
+
+                       return xi;
+               }
+       }
+       if (!create)
+               return NULL;
+       return xfrmi_create(net, p);
+}
+
+static void xfrmi_dev_uninit(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+
+       xfrmi_unlink(xfrmn, xi);
+       dev_put(xi->phydev);
+       dev_put(dev);
+}
+
+static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+       skb->tstamp = 0;
+       skb->pkt_type = PACKET_HOST;
+       skb->skb_iif = 0;
+       skb->ignore_df = 0;
+       skb_dst_drop(skb);
+       nf_reset(skb);
+       nf_reset_trace(skb);
+
+       if (!xnet)
+               return;
+
+       ipvs_reset(skb);
+       secpath_reset(skb);
+       skb_orphan(skb);
+       skb->mark = 0;
+}
+
+static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
+{
+       struct pcpu_sw_netstats *tstats;
+       struct xfrm_mode *inner_mode;
+       struct net_device *dev;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       bool xnet;
+
+       if (err && !skb->sp)
+               return 0;
+
+       x = xfrm_input_state(skb);
+
+       xi = xfrmi_lookup(xs_net(x), x);
+       if (!xi)
+               return 1;
+
+       dev = xi->dev;
+       skb->dev = dev;
+
+       if (err) {
+               dev->stats.rx_errors++;
+               dev->stats.rx_dropped++;
+
+               return 0;
+       }
+
+       xnet = !net_eq(xi->net, dev_net(skb->dev));
+
+       if (xnet) {
+               inner_mode = x->inner_mode;
+
+               if (x->sel.family == AF_UNSPEC) {
+                       inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+                       if (inner_mode == NULL) {
+                               XFRM_INC_STATS(dev_net(skb->dev),
+                                              LINUX_MIB_XFRMINSTATEMODEERROR);
+                               return -EINVAL;
+                       }
+               }
+
+               if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
+                                      inner_mode->afinfo->family))
+                       return -EPERM;
+       }
+
+       xfrmi_scrub_packet(skb, xnet);
+
+       tstats = this_cpu_ptr(dev->tstats);
+
+       u64_stats_update_begin(&tstats->syncp);
+       tstats->rx_packets++;
+       tstats->rx_bytes += skb->len;
+       u64_stats_update_end(&tstats->syncp);
+
+       return 0;
+}
+
+static int
+xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device_stats *stats = &xi->dev->stats;
+       struct dst_entry *dst = skb_dst(skb);
+       unsigned int length = skb->len;
+       struct net_device *tdev;
+       struct xfrm_state *x;
+       int err = -1;
+       int mtu;
+
+       if (!dst)
+               goto tx_err_link_failure;
+
+       fl->flowi_xfrm.if_id = xi->p.if_id;
+
+       dst_hold(dst);
+       dst = xfrm_lookup(xi->net, dst, fl, NULL, 0);
+       if (IS_ERR(dst)) {
+               err = PTR_ERR(dst);
+               dst = NULL;
+               goto tx_err_link_failure;
+       }
+
+       x = dst->xfrm;
+       if (!x)
+               goto tx_err_link_failure;
+
+       if (x->if_id != xi->p.if_id)
+               goto tx_err_link_failure;
+
+       tdev = dst->dev;
+
+       if (tdev == dev) {
+               stats->collisions++;
+               net_warn_ratelimited("%s: Local routing loop detected!\n",
+                                    xi->p.name);
+               goto tx_err_dst_release;
+       }
+
+       mtu = dst_mtu(dst);
+       if (!skb->ignore_df && skb->len > mtu) {
+               skb_dst_update_pmtu(skb, mtu);
+
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
+                               mtu = IPV6_MIN_MTU;
+
+                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+               } else {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+               }
+
+               dst_release(dst);
+               return -EMSGSIZE;
+       }
+
+       xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
+       skb_dst_set(skb, dst);
+       skb->dev = tdev;
+
+       err = dst_output(xi->net, skb->sk, skb);
+       if (net_xmit_eval(err) == 0) {
+               struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+               u64_stats_update_begin(&tstats->syncp);
+               tstats->tx_bytes += length;
+               tstats->tx_packets++;
+               u64_stats_update_end(&tstats->syncp);
+       } else {
+               stats->tx_errors++;
+               stats->tx_aborted_errors++;
+       }
+
+       return 0;
+tx_err_link_failure:
+       stats->tx_carrier_errors++;
+       dst_link_failure(skb);
+tx_err_dst_release:
+       dst_release(dst);
+       return err;
+}
+
+static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device_stats *stats = &xi->dev->stats;
+       struct flowi fl;
+       int ret;
+
+       memset(&fl, 0, sizeof(fl));
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IPV6):
+               xfrm_decode_session(skb, &fl, AF_INET6);
+               memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               break;
+       case htons(ETH_P_IP):
+               xfrm_decode_session(skb, &fl, AF_INET);
+               memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               break;
+       default:
+               goto tx_err;
+       }
+
+       fl.flowi_oif = xi->phydev->ifindex;
+
+       ret = xfrmi_xmit2(skb, dev, &fl);
+       if (ret < 0)
+               goto tx_err;
+
+       return NETDEV_TX_OK;
+
+tx_err:
+       stats->tx_errors++;
+       stats->tx_dropped++;
+       kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+static int xfrmi4_err(struct sk_buff *skb, u32 info)
+{
+       const struct iphdr *iph = (const struct iphdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
+       int protocol = iph->protocol;
+       struct ip_comp_hdr *ipch;
+       struct ip_esp_hdr *esph;
+       struct ip_auth_hdr *ah ;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       __be32 spi;
+
+       switch (protocol) {
+       case IPPROTO_ESP:
+               esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+               spi = esph->spi;
+               break;
+       case IPPROTO_AH:
+               ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+               spi = ah->spi;
+               break;
+       case IPPROTO_COMP:
+               ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+               spi = htonl(ntohs(ipch->cpi));
+               break;
+       default:
+               return 0;
+       }
+
+       switch (icmp_hdr(skb)->type) {
+       case ICMP_DEST_UNREACH:
+               if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+                       return 0;
+       case ICMP_REDIRECT:
+               break;
+       default:
+               return 0;
+       }
+
+       x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+                             spi, protocol, AF_INET);
+       if (!x)
+               return 0;
+
+       xi = xfrmi_lookup(net, x);
+       if (!xi) {
+               xfrm_state_put(x);
+               return -1;
+       }
+
+       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+               ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+       else
+               ipv4_redirect(skb, net, 0, 0, protocol, 0);
+       xfrm_state_put(x);
+
+       return 0;
+}
+
+static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+                   u8 type, u8 code, int offset, __be32 info)
+{
+       const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+       struct net *net = dev_net(skb->dev);
+       int protocol = iph->nexthdr;
+       struct ip_comp_hdr *ipch;
+       struct ip_esp_hdr *esph;
+       struct ip_auth_hdr *ah;
+       struct xfrm_state *x;
+       struct xfrm_if *xi;
+       __be32 spi;
+
+       switch (protocol) {
+       case IPPROTO_ESP:
+               esph = (struct ip_esp_hdr *)(skb->data + offset);
+               spi = esph->spi;
+               break;
+       case IPPROTO_AH:
+               ah = (struct ip_auth_hdr *)(skb->data + offset);
+               spi = ah->spi;
+               break;
+       case IPPROTO_COMP:
+               ipch = (struct ip_comp_hdr *)(skb->data + offset);
+               spi = htonl(ntohs(ipch->cpi));
+               break;
+       default:
+               return 0;
+       }
+
+       if (type != ICMPV6_PKT_TOOBIG &&
+           type != NDISC_REDIRECT)
+               return 0;
+
+       x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+                             spi, protocol, AF_INET6);
+       if (!x)
+               return 0;
+
+       xi = xfrmi_lookup(net, x);
+       if (!xi) {
+               xfrm_state_put(x);
+               return -1;
+       }
+
+       if (type == NDISC_REDIRECT)
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
+       else
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+       xfrm_state_put(x);
+
+       return 0;
+}
+
+static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
+{
+       if (xi->p.link != p->link)
+               return -EINVAL;
+
+       xi->p.if_id = p->if_id;
+
+       return 0;
+}
+
+static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+{
+       struct net *net = dev_net(xi->dev);
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+       int err;
+
+       xfrmi_unlink(xfrmn, xi);
+       synchronize_net();
+       err = xfrmi_change(xi, p);
+       xfrmi_link(xfrmn, xi);
+       netdev_state_change(xi->dev);
+       return err;
+}
+
+static void xfrmi_get_stats64(struct net_device *dev,
+                              struct rtnl_link_stats64 *s)
+{
+       int cpu;
+
+       if (!dev->tstats)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct pcpu_sw_netstats *stats;
+               struct pcpu_sw_netstats tmp;
+               int start;
+
+               stats = per_cpu_ptr(dev->tstats, cpu);
+               do {
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
+                       tmp.rx_packets = stats->rx_packets;
+                       tmp.rx_bytes   = stats->rx_bytes;
+                       tmp.tx_packets = stats->tx_packets;
+                       tmp.tx_bytes   = stats->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+               s->rx_packets += tmp.rx_packets;
+               s->rx_bytes   += tmp.rx_bytes;
+               s->tx_packets += tmp.tx_packets;
+               s->tx_bytes   += tmp.tx_bytes;
+       }
+
+       s->rx_dropped = dev->stats.rx_dropped;
+       s->tx_dropped = dev->stats.tx_dropped;
+}
+
+static int xfrmi_get_iflink(const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+
+       return xi->phydev->ifindex;
+}
+
+
+static const struct net_device_ops xfrmi_netdev_ops = {
+       .ndo_init       = xfrmi_dev_init,
+       .ndo_uninit     = xfrmi_dev_uninit,
+       .ndo_start_xmit = xfrmi_xmit,
+       .ndo_get_stats64 = xfrmi_get_stats64,
+       .ndo_get_iflink = xfrmi_get_iflink,
+};
+
+static void xfrmi_dev_setup(struct net_device *dev)
+{
+       dev->netdev_ops         = &xfrmi_netdev_ops;
+       dev->type               = ARPHRD_NONE;
+       dev->hard_header_len    = ETH_HLEN;
+       dev->min_header_len     = ETH_HLEN;
+       dev->mtu                = ETH_DATA_LEN;
+       dev->min_mtu            = ETH_MIN_MTU;
+       dev->max_mtu            = ETH_DATA_LEN;
+       dev->addr_len           = ETH_ALEN;
+       dev->flags              = IFF_NOARP;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = xfrmi_dev_free;
+       netif_keep_dst(dev);
+}
+
+static int xfrmi_dev_init(struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net_device *phydev = xi->phydev;
+       int err;
+
+       dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+       if (!dev->tstats)
+               return -ENOMEM;
+
+       err = gro_cells_init(&xi->gro_cells, dev);
+       if (err) {
+               free_percpu(dev->tstats);
+               return err;
+       }
+
+       dev->features |= NETIF_F_LLTX;
+
+       dev->needed_headroom = phydev->needed_headroom;
+       dev->needed_tailroom = phydev->needed_tailroom;
+
+       if (is_zero_ether_addr(dev->dev_addr))
+               eth_hw_addr_inherit(dev, phydev);
+       if (is_zero_ether_addr(dev->broadcast))
+               memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+
+       return 0;
+}
+
+static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
+                        struct netlink_ext_ack *extack)
+{
+       return 0;
+}
+
+static void xfrmi_netlink_parms(struct nlattr *data[],
+                              struct xfrm_if_parms *parms)
+{
+       memset(parms, 0, sizeof(*parms));
+
+       if (!data)
+               return;
+
+       if (data[IFLA_XFRM_LINK])
+               parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
+
+       if (data[IFLA_XFRM_IF_ID])
+               parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+}
+
+static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
+                       struct nlattr *tb[], struct nlattr *data[],
+                       struct netlink_ext_ack *extack)
+{
+       struct net *net = dev_net(dev);
+       struct xfrm_if_parms *p;
+       struct xfrm_if *xi;
+
+       xi = netdev_priv(dev);
+       p = &xi->p;
+
+       xfrmi_netlink_parms(data, p);
+
+       if (!tb[IFLA_IFNAME])
+               return -EINVAL;
+
+       nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
+
+       if (!xfrmi_locate(net, p, 1))
+               return -EEXIST;
+
+       return 0;
+}
+
+static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+{
+       unregister_netdevice_queue(dev, head);
+}
+
+static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
+                          struct nlattr *data[],
+                          struct netlink_ext_ack *extack)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct net *net = dev_net(dev);
+
+       xfrmi_netlink_parms(data, &xi->p);
+
+       xi = xfrmi_locate(net, &xi->p, 0);
+
+       if (xi) {
+               if (xi->dev != dev)
+                       return -EEXIST;
+       } else
+               xi = netdev_priv(dev);
+
+       return xfrmi_update(xi, &xi->p);
+}
+
+static size_t xfrmi_get_size(const struct net_device *dev)
+{
+       return
+               /* IFLA_XFRM_LINK */
+               nla_total_size(4) +
+               /* IFLA_XFRM_IF_ID */
+               nla_total_size(4) +
+               0;
+}
+
+static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+       struct xfrm_if_parms *parm = &xi->p;
+
+       if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
+           nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
+struct net *xfrmi_get_link_net(const struct net_device *dev)
+{
+       struct xfrm_if *xi = netdev_priv(dev);
+
+       return dev_net(xi->phydev);
+}
+
+static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
+       [IFLA_XFRM_LINK]        = { .type = NLA_U32 },
+       [IFLA_XFRM_IF_ID]       = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
+       .kind           = "xfrm",
+       .maxtype        = IFLA_XFRM_MAX,
+       .policy         = xfrmi_policy,
+       .priv_size      = sizeof(struct xfrm_if),
+       .setup          = xfrmi_dev_setup,
+       .validate       = xfrmi_validate,
+       .newlink        = xfrmi_newlink,
+       .dellink        = xfrmi_dellink,
+       .changelink     = xfrmi_changelink,
+       .get_size       = xfrmi_get_size,
+       .fill_info      = xfrmi_fill_info,
+       .get_link_net   = xfrmi_get_link_net,
+};
+
+static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
+{
+       struct xfrm_if *xi;
+       LIST_HEAD(list);
+
+       xi = rtnl_dereference(xfrmn->xfrmi[0]);
+       if (!xi)
+               return;
+
+       unregister_netdevice_queue(xi->dev, &list);
+       unregister_netdevice_many(&list);
+}
+
+static int __net_init xfrmi_init_net(struct net *net)
+{
+       return 0;
+}
+
+static void __net_exit xfrmi_exit_net(struct net *net)
+{
+       struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+       rtnl_lock();
+       xfrmi_destroy_interfaces(xfrmn);
+       rtnl_unlock();
+}
+
+static struct pernet_operations xfrmi_net_ops = {
+       .init = xfrmi_init_net,
+       .exit = xfrmi_exit_net,
+       .id   = &xfrmi_net_id,
+       .size = sizeof(struct xfrmi_net),
+};
+
+static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
+       .handler        =       xfrm6_rcv,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi6_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
+       .handler        =       xfrm4_rcv,
+       .input_handler  =       xfrm_input,
+       .cb_handler     =       xfrmi_rcv_cb,
+       .err_handler    =       xfrmi4_err,
+       .priority       =       10,
+};
+
+static int __init xfrmi4_init(void)
+{
+       int err;
+
+       err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
+       if (err < 0)
+               goto xfrm_proto_esp_failed;
+       err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
+       if (err < 0)
+               goto xfrm_proto_ah_failed;
+       err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+       if (err < 0)
+               goto xfrm_proto_comp_failed;
+
+       return 0;
+
+xfrm_proto_comp_failed:
+       xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+       xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+       return err;
+}
+
+static void xfrmi4_fini(void)
+{
+       xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+       xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+       xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+}
+
+static int __init xfrmi6_init(void)
+{
+       int err;
+
+       err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
+       if (err < 0)
+               goto xfrm_proto_esp_failed;
+       err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
+       if (err < 0)
+               goto xfrm_proto_ah_failed;
+       err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+       if (err < 0)
+               goto xfrm_proto_comp_failed;
+
+       return 0;
+
+xfrm_proto_comp_failed:
+       xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+       xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+       return err;
+}
+
+static void xfrmi6_fini(void)
+{
+       xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+       xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+       xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+}
+
+static const struct xfrm_if_cb xfrm_if_cb = {
+       .decode_session =       xfrmi_decode_session,
+};
+
+static int __init xfrmi_init(void)
+{
+       const char *msg;
+       int err;
+
+       pr_info("IPsec XFRM device driver\n");
+
+       msg = "tunnel device";
+       err = register_pernet_device(&xfrmi_net_ops);
+       if (err < 0)
+               goto pernet_dev_failed;
+
+       msg = "xfrm4 protocols";
+       err = xfrmi4_init();
+       if (err < 0)
+               goto xfrmi4_failed;
+
+       msg = "xfrm6 protocols";
+       err = xfrmi6_init();
+       if (err < 0)
+               goto xfrmi6_failed;
+
+
+       msg = "netlink interface";
+       err = rtnl_link_register(&xfrmi_link_ops);
+       if (err < 0)
+               goto rtnl_link_failed;
+
+       xfrm_if_register_cb(&xfrm_if_cb);
+
+       return err;
+
+rtnl_link_failed:
+       xfrmi6_fini();
+xfrmi6_failed:
+       xfrmi4_fini();
+xfrmi4_failed:
+       unregister_pernet_device(&xfrmi_net_ops);
+pernet_dev_failed:
+       pr_err("xfrmi init: failed to register %s\n", msg);
+       return err;
+}
+
+static void __exit xfrmi_fini(void)
+{
+       xfrm_if_unregister_cb();
+       rtnl_link_unregister(&xfrmi_link_ops);
+       xfrmi4_fini();
+       xfrmi6_fini();
+       unregister_pernet_device(&xfrmi_net_ops);
+}
+
+module_init(xfrmi_init);
+module_exit(xfrmi_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("xfrm");
+MODULE_ALIAS_NETDEV("xfrm0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("XFRM virtual interface");
index 29b1e6a83a539f40d7e18c8a2f2cbf0ff06ea001..0c753ad50a85d93e43d144ea5fe9653dfeeea010 100644 (file)
@@ -47,6 +47,9 @@ struct xfrm_flo {
 
 static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
 static struct work_struct *xfrm_pcpu_work __read_mostly;
+static DEFINE_SPINLOCK(xfrm_if_cb_lock);
+static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
+
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                __read_mostly;
@@ -119,6 +122,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
        return afinfo;
 }
 
+/* Called with rcu_read_lock(). */
+static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+{
+       return rcu_dereference(xfrm_if_cb);
+}
+
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
                                    const xfrm_address_t *saddr,
                                    const xfrm_address_t *daddr,
@@ -2085,6 +2094,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 
        if (IS_ERR(xdst)) {
                err = PTR_ERR(xdst);
+               if (err == -EREMOTE) {
+                       xfrm_pols_put(pols, num_pols);
+                       return NULL;
+               }
+
                if (err != -EAGAIN)
                        goto error;
                goto make_dummy_bundle;
@@ -2178,6 +2192,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                        if (IS_ERR(xdst)) {
                                xfrm_pols_put(pols, num_pols);
                                err = PTR_ERR(xdst);
+                               if (err == -EREMOTE)
+                                       goto nopol;
+
                                goto dropdst;
                        } else if (xdst == NULL) {
                                num_xfrms = 0;
@@ -2373,12 +2390,20 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
                          unsigned int family, int reverse)
 {
        const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+       const struct xfrm_if_cb *ifcb = xfrm_if_get_cb();
+       struct xfrm_if *xi;
        int err;
 
        if (unlikely(afinfo == NULL))
                return -EAFNOSUPPORT;
 
        afinfo->decode_session(skb, fl, reverse);
+       if (ifcb) {
+               xi = ifcb->decode_session(skb);
+               if (xi)
+                       fl->flowi_xfrm.if_id = xi->p.if_id;
+       }
+
        err = security_xfrm_decode_session(skb, &fl->flowi_secid);
        rcu_read_unlock();
        return err;
@@ -2832,6 +2857,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
+{
+       spin_lock(&xfrm_if_cb_lock);
+       rcu_assign_pointer(xfrm_if_cb, ifcb);
+       spin_unlock(&xfrm_if_cb_lock);
+}
+EXPORT_SYMBOL(xfrm_if_register_cb);
+
+void xfrm_if_unregister_cb(void)
+{
+       RCU_INIT_POINTER(xfrm_if_cb, NULL);
+       synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_if_unregister_cb);
+
 #ifdef CONFIG_XFRM_STATISTICS
 static int __net_init xfrm_statistics_init(struct net *net)
 {
@@ -3013,6 +3053,9 @@ void __init xfrm_init(void)
        register_pernet_subsys(&xfrm_net_ops);
        seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
+
+       RCU_INIT_POINTER(xfrm_if_cb, NULL);
+       synchronize_rcu();
 }
 
 #ifdef CONFIG_AUDITSYSCALL