static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
struct bpf_prog *old_prog;
int xdp_ring_num;
+ int port_up = 0;
+ int err;
int i;
xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+ /* No need to reconfigure buffers when simply swapping the
+ * program for a new one.
+ */
+ if (priv->xdp_ring_num == xdp_ring_num) {
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ /* This xchg is paired with READ_ONCE in the fastpath */
+ old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+ return 0;
+ }
+
if (priv->num_frags > 1) {
en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
return -EOPNOTSUPP;
return PTR_ERR(prog);
}
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
priv->xdp_ring_num = xdp_ring_num;
- /* This xchg is paired with READ_ONCE in the fast path */
for (i = 0; i < priv->rx_ring_num; i++) {
old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
}
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed starting port %d for XDP change\n",
+ priv->port);
+ queue_work(mdev->workqueue, &priv->watchdog_task);
+ }
+ }
+
+ mutex_unlock(&mdev->state_lock);
return 0;
}
struct page *page;
dma_addr_t dma;
- for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
+ for (order = frag_info->order; ;) {
gfp_t gfp = _gfp;
if (order)
return -ENOMEM;
}
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
- PCI_DMA_FROMDEVICE);
+ frag_info->dma_dir);
if (dma_mapping_error(priv->ddev, dma)) {
put_page(page);
return -ENOMEM;
while (i--) {
if (page_alloc[i].page != ring_alloc[i].page) {
dma_unmap_page(priv->ddev, page_alloc[i].dma,
- page_alloc[i].page_size, PCI_DMA_FROMDEVICE);
+ page_alloc[i].page_size,
+ priv->frag_info[i].dma_dir);
page = page_alloc[i].page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc[i].page_size /
if (next_frag_end > frags[i].page_size)
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
- PCI_DMA_FROMDEVICE);
+ frag_info->dma_dir);
if (frags[i].page)
put_page(frags[i].page);
page_alloc = &ring->page_alloc[i];
dma_unmap_page(priv->ddev, page_alloc->dma,
- page_alloc->page_size, PCI_DMA_FROMDEVICE);
+ page_alloc->page_size,
+ priv->frag_info[i].dma_dir);
page = page_alloc->page;
/* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc->page_size /
i, page_count(page_alloc->page));
dma_unmap_page(priv->ddev, page_alloc->dma,
- page_alloc->page_size, PCI_DMA_FROMDEVICE);
+ page_alloc->page_size, frag_info->dma_dir);
while (page_alloc->page_offset + frag_info->frag_stride <
page_alloc->page_size) {
put_page(page_alloc->page);
struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info);
+ if (ring->page_cache.index > 0) {
+ frags[0] = ring->page_cache.buf[--ring->page_cache.index];
+ rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+ return 0;
+ }
+
return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
}
}
}
+/* When the rx ring is running in page-per-packet mode, a released frame can go
+ * directly into a small cache, to avoid unmapping or touching the page
+ * allocator. In bpf prog performance scenarios, buffers are either forwarded
+ * or dropped, never converted to skbs, so every page can come directly from
+ * this cache when it is sized to be a multiple of the napi budget.
+ */
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame)
+{
+ struct mlx4_en_page_cache *cache = &ring->page_cache;
+
+ if (cache->index >= MLX4_EN_CACHE_SIZE)
+ return false;
+
+ cache->buf[cache->index++] = *frame;
+ return true;
+}
+
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride)
void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring)
{
+ int i;
+
+ for (i = 0; i < ring->page_cache.index; i++) {
+ struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i];
+
+ dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
+ priv->frag_info[0].dma_dir);
+ put_page(frame->page);
+ }
+ ring->page_cache.index = 0;
mlx4_en_free_rx_buf(priv, ring);
if (ring->stride <= TXBB_SIZE)
ring->buf -= TXBB_SIZE;
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
case XDP_DROP:
+ if (mlx4_en_rx_recycle(ring, frags))
+ goto consumed;
goto next;
}
}
for (nr = 0; nr < priv->num_frags; nr++)
mlx4_en_free_frag(priv, frags, nr);
+consumed:
++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask;
cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
void mlx4_en_calc_rx_buf(struct net_device *dev)
{
+ enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
struct mlx4_en_priv *priv = netdev_priv(dev);
int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
+ int order = MLX4_EN_ALLOC_PREFER_ORDER;
+ u32 align = SMP_CACHE_BYTES;
int buf_size = 0;
int i = 0;
+ /* bpf requires buffers to be set up as 1 packet per page.
+ * This only works when num_frags == 1.
+ */
+ if (priv->xdp_ring_num) {
+ /* This will gain efficient xdp frame recycling at the expense
+ * of more costly truesize accounting
+ */
+ align = PAGE_SIZE;
+ order = 0;
+ }
+
while (buf_size < eff_mtu) {
+ priv->frag_info[i].order = order;
priv->frag_info[i].frag_size =
(eff_mtu > buf_size + frag_sizes[i]) ?
frag_sizes[i] : eff_mtu - buf_size;
priv->frag_info[i].frag_prefix_size = buf_size;
priv->frag_info[i].frag_stride =
- ALIGN(priv->frag_info[i].frag_size,
- SMP_CACHE_BYTES);
+ ALIGN(priv->frag_info[i].frag_size, align);
+ priv->frag_info[i].dma_dir = dma_dir;
buf_size += priv->frag_info[i].frag_size;
i++;
}