s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+ s->rx_page_reuse += rq_stats->page_reuse;
s->rx_cache_reuse += rq_stats->cache_reuse;
s->rx_cache_full += rq_stats->cache_full;
s->rx_cache_empty += rq_stats->cache_empty;
void *rqc = rqp->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 byte_count;
- u32 frag_sz;
int npages;
int wq_sz;
int err;
goto err_destroy_umr_mkey;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
- rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
- GFP_KERNEL, cpu_to_node(c->cpu));
- if (!rq->dma_info) {
+ rq->wqe.frag_info =
+ kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->wqe.frag_info) {
err = -ENOMEM;
goto err_rq_wq_destroy;
}
rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
if (!rq->handle_rx_cqe) {
- kfree(rq->dma_info);
+ kfree(rq->wqe.frag_info);
err = -EINVAL;
netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
goto err_rq_wq_destroy;
rq->buff.wqe_sz = params->lro_en ?
params->lro_wqe_sz :
MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
+ rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
byte_count = rq->buff.wqe_sz;
/* calc the required page order */
- frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count);
- npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE);
+ rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count);
+ npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE);
rq->buff.page_order = order_base_2(npages);
byte_count |= MLX5_HW_START_PADDING;
mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
- kfree(rq->dma_info);
+ kfree(rq->wqe.frag_info);
}
for (i = rq->page_cache.head; i != rq->page_cache.tail;
mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
&wqe->next.next_wqe_index);
}
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST && rq->wqe.page_reuse) {
+ /* Clean outstanding pages on handled WQEs that decided to do page-reuse,
+ * but yet to be re-posted.
+ */
+ int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+
+ for (wqe_ix = 0; wqe_ix < wq_sz; wqe_ix++)
+ rq->dealloc_wqe(rq, wqe_ix);
+ }
}
static int mlx5e_open_rq(struct mlx5e_channel *c,
#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
+static inline bool mlx5e_page_is_reserved(struct page *page)
+{
+ return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id();
+}
+
static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info)
{
put_page(dma_info->page);
}
+static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi)
+{
+ return rq->wqe.page_reuse && wi->di.page &&
+ (wi->offset + rq->wqe.frag_sz <= RQ_PAGE_SIZE(rq)) &&
+ !mlx5e_page_is_reserved(wi->di.page);
+}
+
int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
{
- struct mlx5e_dma_info *di = &rq->dma_info[ix];
+ struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix];
- if (unlikely(mlx5e_page_alloc_mapped(rq, di)))
- return -ENOMEM;
+ /* check if page exists, hence can be reused */
+ if (!wi->di.page) {
+ if (unlikely(mlx5e_page_alloc_mapped(rq, &wi->di)))
+ return -ENOMEM;
+ wi->offset = 0;
+ }
- wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom);
+ wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset +
+ rq->rx_headroom);
return 0;
}
+static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi)
+{
+ mlx5e_page_release(rq, &wi->di, true);
+ wi->di.page = NULL;
+}
+
+static inline void mlx5e_free_rx_wqe_reuse(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi)
+{
+ if (mlx5e_page_reuse(rq, wi)) {
+ rq->stats.page_reuse++;
+ return;
+ }
+
+ mlx5e_free_rx_wqe(rq, wi);
+}
+
void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
{
- struct mlx5e_dma_info *di = &rq->dma_info[ix];
+ struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix];
- mlx5e_page_release(rq, di, true);
+ if (wi->di.page)
+ mlx5e_free_rx_wqe(rq, wi);
}
static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE ||
MLX5E_SW2HW_MTU(rq->channel->priv, rq->netdev->mtu) < dma_len)) {
rq->stats.xdp_drop++;
- mlx5e_page_release(rq, di, true);
return false;
}
sq->db.doorbell = false;
}
rq->stats.xdp_tx_full++;
- mlx5e_page_release(rq, di, true);
return false;
}
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+ /* move page to reference to sq responsibility,
+ * and mark so it's not put back in page-cache.
+ */
+ rq->wqe.xdp_xmit = true;
sq->db.di[pi] = *di;
sq->pc++;
sq->db.doorbell = true;
+
rq->stats.xdp_tx++;
return true;
}
trace_xdp_exception(rq->netdev, prog, act);
case XDP_DROP:
rq->stats.xdp_drop++;
- mlx5e_page_release(rq, di, true);
return true;
}
}
static inline
struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
- u16 wqe_counter, u32 cqe_bcnt)
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
{
- struct mlx5e_dma_info *di;
+ struct mlx5e_dma_info *di = &wi->di;
struct sk_buff *skb;
void *va, *data;
u16 rx_headroom = rq->rx_headroom;
bool consumed;
u32 frag_size;
- di = &rq->dma_info[wqe_counter];
- va = page_address(di->page);
+ va = page_address(di->page) + wi->offset;
data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
dma_sync_single_range_for_cpu(rq->pdev,
- di->addr,
- rx_headroom,
- rq->buff.wqe_sz,
+ di->addr + wi->offset,
+ 0, frag_size,
DMA_FROM_DEVICE);
prefetch(data);
+ wi->offset += frag_size;
if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
rq->stats.wqe_err++;
- mlx5e_page_release(rq, di, true);
return NULL;
}
if (consumed)
return NULL; /* page/packet was consumed by XDP */
- frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
skb = build_skb(va, frag_size);
if (unlikely(!skb)) {
rq->stats.buff_alloc_err++;
- mlx5e_page_release(rq, di, true);
return NULL;
}
- /* queue up for recycling ..*/
+ /* queue up for recycling/reuse */
page_ref_inc(di->page);
- mlx5e_page_release(rq, di, true);
skb_reserve(skb, rx_headroom);
skb_put(skb, cqe_bcnt);
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
+ struct mlx5e_wqe_frag_info *wi;
struct mlx5e_rx_wqe *wqe;
__be16 wqe_counter_be;
struct sk_buff *skb;
wqe_counter_be = cqe->wqe_counter;
wqe_counter = be16_to_cpu(wqe_counter_be);
wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ wi = &rq->wqe.frag_info[wqe_counter];
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
- if (!skb)
+ skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (rq->wqe.xdp_xmit) {
+ wi->di.page = NULL;
+ rq->wqe.xdp_xmit = false;
+ /* do not return page to cache, it will be returned on XDP_TX completion */
+ goto wq_ll_pop;
+ }
+ /* probably an XDP_DROP, save the page-reuse checks */
+ mlx5e_free_rx_wqe(rq, wi);
goto wq_ll_pop;
+ }
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
napi_gro_receive(rq->cq.napi, skb);
+ mlx5e_free_rx_wqe_reuse(rq, wi);
wq_ll_pop:
mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
&wqe->next.next_wqe_index);
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5e_wqe_frag_info *wi;
struct mlx5e_rx_wqe *wqe;
struct sk_buff *skb;
__be16 wqe_counter_be;
wqe_counter_be = cqe->wqe_counter;
wqe_counter = be16_to_cpu(wqe_counter_be);
wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ wi = &rq->wqe.frag_info[wqe_counter];
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
- if (!skb)
+ skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ if (rq->wqe.xdp_xmit) {
+ wi->di.page = NULL;
+ rq->wqe.xdp_xmit = false;
+ /* do not return page to cache, it will be returned on XDP_TX completion */
+ goto wq_ll_pop;
+ }
+ /* probably an XDP_DROP, save the page-reuse checks */
+ mlx5e_free_rx_wqe(rq, wi);
goto wq_ll_pop;
+ }
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
napi_gro_receive(rq->cq.napi, skb);
+ mlx5e_free_rx_wqe_reuse(rq, wi);
wq_ll_pop:
mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
&wqe->next.next_wqe_index);
void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
+ struct mlx5e_wqe_frag_info *wi;
struct mlx5e_rx_wqe *wqe;
__be16 wqe_counter_be;
struct sk_buff *skb;
wqe_counter_be = cqe->wqe_counter;
wqe_counter = be16_to_cpu(wqe_counter_be);
wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ wi = &rq->wqe.frag_info[wqe_counter];
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
- skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+ skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
if (!skb)
- goto wq_ll_pop;
+ goto wq_free_wqe;
mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
napi_gro_receive(rq->cq.napi, skb);
-wq_ll_pop:
+wq_free_wqe:
+ mlx5e_free_rx_wqe_reuse(rq, wi);
mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
&wqe->next.next_wqe_index);
}