mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
authorMartin KaFai Lau <kafai@fb.com>
Wed, 7 Dec 2016 23:53:12 +0000 (15:53 -0800)
committerDavid S. Miller <davem@davemloft.net>
Thu, 8 Dec 2016 19:25:13 +0000 (14:25 -0500)
When XDP is active in mlx4, mlx4 is using one page/pkt.
At the same time (i.e. when XDP is active), it is currently
limiting MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN)
which is 1514 in x86.  AFAICT, we can at least raise the MTU
limit up to PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this
patch is doing.  It will be useful in the next patch which
allows XDP program to extend the packet by adding new header(s).

Note: In the earlier XDP patches, there is already existing guard
to ensure the page/pkt scheme only applies when XDP is active
in mlx4.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c

index f441eda63beccd3e3f16578b480a8bde4861ebc5..c97d25b06444bf6546cfc5ac959263742c67dae3 100644 (file)
@@ -51,6 +51,8 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN)))
+
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2249,6 +2251,19 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
        free_netdev(dev);
 }
 
+static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+
+       if (mtu > MLX4_EN_MAX_XDP_MTU) {
+               en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
+                      mtu, MLX4_EN_MAX_XDP_MTU);
+               return false;
+       }
+
+       return true;
+}
+
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2258,11 +2273,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
        en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
                 dev->mtu, new_mtu);
 
-       if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
-               en_err(priv, "MTU size:%d requires frags but XDP running\n",
-                      new_mtu);
-               return -EOPNOTSUPP;
-       }
+       if (priv->tx_ring_num[TX_XDP] &&
+           !mlx4_en_check_xdp_mtu(dev, new_mtu))
+               return -ENOTSUPP;
+
        dev->mtu = new_mtu;
 
        if (netif_running(dev)) {
@@ -2715,10 +2729,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                return 0;
        }
 
-       if (priv->num_frags > 1) {
-               en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+       if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
                return -EOPNOTSUPP;
-       }
 
        tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
        if (!tmp)
index 6562f78b07f4370b5c1ea2c5e3a4221d7ebaeba8..23e9d04d1ef41feeec0601174621d3561c546403 100644 (file)
@@ -1164,37 +1164,39 @@ static const int frag_sizes[] = {
 
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
-       enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
        struct mlx4_en_priv *priv = netdev_priv(dev);
        int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
-       int order = MLX4_EN_ALLOC_PREFER_ORDER;
-       u32 align = SMP_CACHE_BYTES;
-       int buf_size = 0;
        int i = 0;
 
        /* bpf requires buffers to be set up as 1 packet per page.
         * This only works when num_frags == 1.
         */
        if (priv->tx_ring_num[TX_XDP]) {
-               dma_dir = PCI_DMA_BIDIRECTIONAL;
-               /* This will gain efficient xdp frame recycling at the expense
-                * of more costly truesize accounting
+               priv->frag_info[0].order = 0;
+               priv->frag_info[0].frag_size = eff_mtu;
+               priv->frag_info[0].frag_prefix_size = 0;
+               /* This will gain efficient xdp frame recycling at the
+                * expense of more costly truesize accounting
                 */
-               align = PAGE_SIZE;
-               order = 0;
-       }
-
-       while (buf_size < eff_mtu) {
-               priv->frag_info[i].order = order;
-               priv->frag_info[i].frag_size =
-                       (eff_mtu > buf_size + frag_sizes[i]) ?
-                               frag_sizes[i] : eff_mtu - buf_size;
-               priv->frag_info[i].frag_prefix_size = buf_size;
-               priv->frag_info[i].frag_stride =
-                               ALIGN(priv->frag_info[i].frag_size, align);
-               priv->frag_info[i].dma_dir = dma_dir;
-               buf_size += priv->frag_info[i].frag_size;
-               i++;
+               priv->frag_info[0].frag_stride = PAGE_SIZE;
+               priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
+               i = 1;
+       } else {
+               int buf_size = 0;
+
+               while (buf_size < eff_mtu) {
+                       priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
+                       priv->frag_info[i].frag_size =
+                               (eff_mtu > buf_size + frag_sizes[i]) ?
+                                       frag_sizes[i] : eff_mtu - buf_size;
+                       priv->frag_info[i].frag_prefix_size = buf_size;
+                       priv->frag_info[i].frag_stride =
+                               ALIGN(priv->frag_info[i].frag_size,
+                                     SMP_CACHE_BYTES);
+                       priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
+                       buf_size += priv->frag_info[i].frag_size;
+                       i++;
+               }
        }
 
        priv->num_frags = i;