net/mlx5e: IPSec, Add Innova IPSec offload TX data path
authorIlan Tayari <ilant@mellanox.com>
Tue, 18 Apr 2017 13:08:23 +0000 (16:08 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Tue, 27 Jun 2017 13:36:48 +0000 (16:36 +0300)
In the TX data path, prepend a special metadata ethertype which
instructs the hardware to perform cryptography.

In addition, fill Software-Parser segment in TX descriptor so
that the hardware may parse the ESP protocol, and perform TX
checksum offload on the inner payload.

Support GSO, by providing the inverse of gso_size in the metadata.
This allows the FPGA to update the ESP header (seqno and seqiv) on the
resulting packets, by calculating the packet number within the GSO
back from the TCP sequence number.

Note that for GSO SKBs, the stack does not include an ESP trailer,
unlike the non-GSO case.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index 535ffd78a34edbe662d86e2213644256d51a2c37..e1b7ddfecd011436c1520edc93eb30e8e15221d4 100644 (file)
@@ -328,6 +328,7 @@ struct mlx5e_sq_dma {
 
 enum {
        MLX5E_SQ_STATE_ENABLED,
+       MLX5E_SQ_STATE_IPSEC,
 };
 
 struct mlx5e_sq_wqe_info {
index bb69660893eeda526dc2da2310eede337b14ac81..bac5103efad3d4e25bc9b0c8d615de387ab87916 100644 (file)
@@ -399,10 +399,26 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
        priv->ipsec = NULL;
 }
 
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+       if (x->props.family == AF_INET) {
+               /* Offload with IPv4 options is not supported yet */
+               if (ip_hdr(skb)->ihl > 5)
+                       return false;
+       } else {
+               /* Offload with IPv6 extension headers is not support yet */
+               if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+                       return false;
+       }
+
+       return true;
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
        .xdo_dev_state_add      = mlx5e_xfrm_add_state,
        .xdo_dev_state_delete   = mlx5e_xfrm_del_state,
        .xdo_dev_state_free     = mlx5e_xfrm_free_state,
+       .xdo_dev_offload_ok     = mlx5e_ipsec_offload_ok,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -431,4 +447,15 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 
        netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
        netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+           !MLX5_CAP_ETH(mdev, swp_lso)) {
+               mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+               return;
+       }
+
+       mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+       netdev->features |= NETIF_F_GSO_ESP;
+       netdev->hw_features |= NETIF_F_GSO_ESP;
+       netdev->hw_enc_features |= NETIF_F_GSO_ESP;
 }
index 4d745d3dd4b149af09b0e7b6509673c2d2148db9..ffc90b3c6ac7bddc840b4385c9f4bff460bf0078 100644 (file)
@@ -50,6 +50,11 @@ struct mlx5e_ipsec_sw_stats {
        atomic64_t ipsec_rx_drop_sp_alloc;
        atomic64_t ipsec_rx_drop_sadb_miss;
        atomic64_t ipsec_rx_drop_syndrome;
+       atomic64_t ipsec_tx_drop_bundle;
+       atomic64_t ipsec_tx_drop_no_state;
+       atomic64_t ipsec_tx_drop_not_ip;
+       atomic64_t ipsec_tx_drop_trailer;
+       atomic64_t ipsec_tx_drop_metadata;
 };
 
 struct mlx5e_ipsec {
@@ -60,6 +65,7 @@ struct mlx5e_ipsec {
        struct mlx5e_ipsec_sw_stats sw_stats;
 };
 
+void mlx5e_ipsec_build_inverse_table(void);
 int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -69,6 +75,10 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
 
 #else
 
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
 static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 {
        return 0;
index 56ab2e80553e9e311c9f47643b90a28e81412871..4a78aefdf15779e4c50935bf1db858a0b756b9a8 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <crypto/aead.h>
 #include <net/xfrm.h>
+#include <net/esp.h>
 
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/ipsec.h"
@@ -48,17 +49,228 @@ struct mlx5e_ipsec_rx_metadata {
        __be32          sa_handle;
 } __packed;
 
+enum {
+       MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+       MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+       __be16 mss_inv;         /* 1/MSS in 16bit fixed point, only for LSO */
+       __be16 seq;             /* LSBs of the first TCP seq, only for LSO */
+       u8     esp_next_proto;  /* Next protocol of ESP */
+} __packed;
+
 struct mlx5e_ipsec_metadata {
        unsigned char syndrome;
        union {
                unsigned char raw[5];
                /* from FPGA to host, on successful decrypt */
                struct mlx5e_ipsec_rx_metadata rx;
+               /* from host to FPGA */
+               struct mlx5e_ipsec_tx_metadata tx;
        } __packed content;
        /* packet type ID field */
        __be16 ethertype;
 } __packed;
 
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+       return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+       struct mlx5e_ipsec_metadata *mdata;
+       struct ethhdr *eth;
+
+       if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+               return ERR_PTR(-ENOMEM);
+
+       eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+       skb->mac_header -= sizeof(*mdata);
+       mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+       memmove(skb->data, skb->data + sizeof(*mdata),
+               2 * ETH_ALEN);
+
+       eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+       memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+       return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+       unsigned int alen = crypto_aead_authsize(x->data);
+       struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+       struct iphdr *ipv4hdr = ip_hdr(skb);
+       unsigned int trailer_len;
+       u8 plen;
+       int ret;
+
+       ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+       if (unlikely(ret))
+               return ret;
+
+       trailer_len = alen + plen + 2;
+
+       pskb_trim(skb, skb->len - trailer_len);
+       if (skb->protocol == htons(ETH_P_IP)) {
+               ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+               ip_send_check(ipv4hdr);
+       } else {
+               ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+                                            trailer_len);
+       }
+       return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+                               struct mlx5_wqe_eth_seg *eseg, u8 mode,
+                               struct xfrm_offload *xo)
+{
+       u8 proto;
+
+       /* Tunnel Mode:
+        * SWP:      OutL3       InL3  InL4
+        * Pkt: MAC  IP     ESP  IP    L4
+        *
+        * Transport Mode:
+        * SWP:      OutL3       InL4
+        *           InL3
+        * Pkt: MAC  IP     ESP  L4
+        *
+        * Offsets are in 2-byte words, counting from start of frame
+        */
+       eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+       if (skb->protocol == htons(ETH_P_IPV6))
+               eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+       if (mode == XFRM_MODE_TUNNEL) {
+               eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+               if (xo->proto == IPPROTO_IPV6) {
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+                       proto = inner_ipv6_hdr(skb)->nexthdr;
+               } else {
+                       proto = inner_ip_hdr(skb)->protocol;
+               }
+       } else {
+               eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+               if (skb->protocol == htons(ETH_P_IPV6))
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+               proto = xo->proto;
+       }
+       switch (proto) {
+       case IPPROTO_UDP:
+               eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+               /* Fall through */
+       case IPPROTO_TCP:
+               eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+               break;
+       }
+}
+
+static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+{
+       int iv_offset;
+       __be64 seqno;
+
+       /* Place the SN in the IV field */
+       seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+       iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+       skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+                                    struct mlx5e_ipsec_metadata *mdata,
+                                    struct xfrm_offload *xo)
+{
+       struct ip_esp_hdr *esph;
+       struct tcphdr *tcph;
+
+       if (skb_is_gso(skb)) {
+               /* Add LSO metadata indication */
+               esph = ip_esp_hdr(skb);
+               tcph = inner_tcp_hdr(skb);
+               netdev_dbg(skb->dev, "   Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+                          skb->network_header,
+                          skb->transport_header,
+                          skb->inner_network_header,
+                          skb->inner_transport_header);
+               netdev_dbg(skb->dev, "   Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+                          skb->len, skb_shinfo(skb)->gso_size,
+                          ntohs(tcph->source), ntohs(tcph->dest),
+                          ntohl(tcph->seq), ntohl(esph->seq_no));
+               mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+               mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+               mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+       } else {
+               mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+       }
+       mdata->content.tx.esp_next_proto = xo->proto;
+
+       netdev_dbg(skb->dev, "   TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+                  mdata->syndrome, mdata->content.tx.esp_next_proto,
+                  ntohs(mdata->content.tx.mss_inv),
+                  ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+                                         struct mlx5e_tx_wqe *wqe,
+                                         struct sk_buff *skb)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct xfrm_offload *xo = xfrm_offload(skb);
+       struct mlx5e_ipsec_metadata *mdata;
+       struct xfrm_state *x;
+
+       if (!xo)
+               return skb;
+
+       if (unlikely(skb->sp->len != 1)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+               goto drop;
+       }
+
+       x = xfrm_input_state(skb);
+       if (unlikely(!x)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+               goto drop;
+       }
+
+       if (unlikely(!x->xso.offload_handle ||
+                    (skb->protocol != htons(ETH_P_IP) &&
+                     skb->protocol != htons(ETH_P_IPV6)))) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+               goto drop;
+       }
+
+       if (!skb_is_gso(skb))
+               if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+                       atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+                       goto drop;
+               }
+       mdata = mlx5e_ipsec_add_metadata(skb);
+       if (unlikely(IS_ERR(mdata))) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+               goto drop;
+       }
+       mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+       mlx5e_ipsec_set_iv(skb, xo);
+       mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+       return skb;
+
+drop:
+       kfree_skb(skb);
+       return NULL;
+}
+
 static inline struct xfrm_state *
 mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
                     struct mlx5e_ipsec_metadata *mdata)
@@ -133,3 +345,34 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
 
        return skb;
 }
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+                              netdev_features_t features)
+{
+       struct xfrm_state *x;
+
+       if (skb->sp && skb->sp->len) {
+               x = skb->sp->xvec[0];
+               if (x && x->xso.offload_handle)
+                       return true;
+       }
+       return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+       u16 mss_inv;
+       u32 mss;
+
+       /* Calculate 1/x inverse table for use in GSO data path.
+        * Using this table, we provide the IPSec accelerator with the value of
+        * 1/gso_size so that it can infer the position of each segment inside
+        * the GSO, and increment the ESP sequence number, and generate the IV.
+        * The HW needs this value in Q0.16 fixed-point number format
+        */
+       mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+       for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+               mss_inv = ((1ULL << 32) / mss) >> 16;
+               mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+       }
+}
index 4e2fab8c5d7479bf295c0d436d9a7fec29747057..e37ae2598dbbec33826c759a24f4484921468825 100644 (file)
@@ -34,6 +34,8 @@
 #ifndef __MLX5E_IPSEC_RXTX_H__
 #define __MLX5E_IPSEC_RXTX_H__
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+
 #include <linux/skbuff.h>
 #include "en.h"
 
@@ -41,4 +43,13 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
                                          struct sk_buff *skb);
 void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
-#endif /* __MLX5E_IPSEC_RXTX_H__ */
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+                              netdev_features_t features);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+                                         struct mlx5e_tx_wqe *wqe,
+                                         struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
index 170a9378d1f7e15da9c98080a3c95c3eff88d661..a037bd7edb46f23dd7978e5cfe60f7177e08aa25 100644 (file)
@@ -1114,6 +1114,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->max_inline      = params->tx_max_inline;
        sq->min_inline_mode = params->tx_min_inline_mode;
+       if (MLX5_IPSEC_DEV(c->priv->mdev))
+               set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
        param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1933,6 +1935,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
 
        mlx5e_build_sq_param_common(priv, param);
        MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+       MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -3527,6 +3530,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
        features = vlan_features_check(skb, features);
        features = vxlan_features_check(skb, features);
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+       if (mlx5e_ipsec_feature_check(skb, netdev, features))
+               return features;
+#endif
+
        /* Validate if the tunneled packet is being offloaded by HW */
        if (skb->encapsulation &&
            (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -4505,6 +4513,7 @@ static struct mlx5_interface mlx5e_interface = {
 
 void mlx5e_init(void)
 {
+       mlx5e_ipsec_build_inverse_table();
        mlx5e_build_ptys2ethtool_map();
        mlx5_register_interface(&mlx5e_interface);
 }
index ccec3b00e17c70383475126c1a115fa98b4d8a5f..aaa0f4ebba9aee5229cfd7cd22088bfc3ac3027c 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/if_vlan.h>
 #include "en.h"
 #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        }
 }
 
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                                struct mlx5e_tx_wqe *wqe, u16 pi)
 {
-       struct mlx5_wq_cyc       *wq   = &sq->wq;
-
-       u16 pi = sq->pc & wq->sz_m1;
-       struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
        struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
 
        struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
        u16 ds_cnt;
        u16 ihs;
 
-       memset(wqe, 0, sizeof(*wqe));
-
        mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
 
        if (skb_is_gso(skb)) {
@@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       u16 pi = sq->pc & wq->sz_m1;
+       struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+       memset(wqe, 0, sizeof(*wqe));
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+       if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+               skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
+               if (unlikely(!skb))
+                       return NETDEV_TX_OK;
+       }
+#endif
 
-       return mlx5e_sq_xmit(sq, skb);
+       return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)