Add support for the BPF_PROG_TYPE_XDP hook in mlx4 driver.
In tc/socket bpf programs, helpers linearize skb fragments as needed
when the program touches the packet data. However, in the pursuit of
speed, XDP programs will not be allowed to use these slower functions,
especially if it involves allocating an skb.
Therefore, disallow MTU settings that would produce a multi-fragment
packet that XDP programs would fail to access. Future enhancements could
be done to increase the allowable MTU.
The xdp program is present as a per-ring data structure, but as of yet
it is not possible to set at that granularity through any ndo.
Signed-off-by: Brenden Blanco <bblanco@plumgrid.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
*
*/
+#include <linux/bpf.h>
#include <linux/etherdevice.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>
en_err(priv, "Bad MTU size:%d.\n", new_mtu);
return -EPERM;
}
+ if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+ en_err(priv, "MTU size:%d requires frags but XDP running\n",
+ new_mtu);
+ return -EOPNOTSUPP;
+ }
dev->mtu = new_mtu;
if (netif_running(dev)) {
return err;
}
+static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+ int xdp_ring_num;
+ int i;
+
+ xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+
+ if (priv->num_frags > 1) {
+ en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ priv->xdp_ring_num = xdp_ring_num;
+
+ /* This xchg is paired with READ_ONCE in the fast path */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+ return 0;
+}
+
+static bool mlx4_xdp_attached(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return !!priv->xdp_ring_num;
+}
+
+static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx4_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = mlx4_xdp_attached(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops mlx4_netdev_ops = {
.ndo_open = mlx4_en_open,
.ndo_stop = mlx4_en_close,
.ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
.ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_xdp = mlx4_xdp,
};
static const struct net_device_ops mlx4_netdev_ops_master = {
.ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
.ndo_features_check = mlx4_en_features_check,
.ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_xdp = mlx4_xdp,
};
struct mlx4_en_bond {
*/
#include <net/busy_poll.h>
+#include <linux/bpf.h>
#include <linux/mlx4/cq.h>
#include <linux/slab.h>
#include <linux/mlx4/qp.h>
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rx_ring *ring = *pring;
+ if (ring->xdp_prog)
+ bpf_prog_put(ring->xdp_prog);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
vfree(ring->rx_info);
ring->rx_info = NULL;
struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
struct mlx4_en_rx_alloc *frags;
struct mlx4_en_rx_desc *rx_desc;
+ struct bpf_prog *xdp_prog;
struct sk_buff *skb;
int index;
int nr;
if (budget <= 0)
return polled;
+ xdp_prog = READ_ONCE(ring->xdp_prog);
+
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */
l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
+ /* A bpf program gets first chance to drop the packet. It may
+ * read bytes but not past the end of the frag.
+ */
+ if (xdp_prog) {
+ struct xdp_buff xdp;
+ dma_addr_t dma;
+ u32 act;
+
+ dma = be64_to_cpu(rx_desc->data[0].addr);
+ dma_sync_single_for_cpu(priv->ddev, dma,
+ priv->frag_info[0].frag_size,
+ DMA_FROM_DEVICE);
+
+ xdp.data = page_address(frags[0].page) +
+ frags[0].page_offset;
+ xdp.data_end = xdp.data + length;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_ABORTED:
+ case XDP_DROP:
+ goto next;
+ }
+ }
+
if (likely(dev->features & NETIF_F_RXCSUM)) {
if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
MLX4_CQE_STATUS_UDP)) {
void mlx4_en_calc_rx_buf(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
- * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
- */
- int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN);
+ int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
int buf_size = 0;
int i = 0;
#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
#define MLX4_EN_MIN_MTU 46
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
#define ETH_BCAST 0xffffffffffffULL
#define MLX4_EN_LOOPBACK_RETRIES 5
u8 fcs_del;
void *buf;
void *rx_info;
+ struct bpf_prog *xdp_prog;
unsigned long bytes;
unsigned long packets;
unsigned long csum_ok;
struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
u16 num_frags;
u16 log_rx_info;
+ int xdp_ring_num;
struct mlx4_en_tx_ring **tx_ring;
struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];