return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
}
+/* We copy the packet for XDP in the following cases:
+ *
+ * 1) Packet is scattered across multiple rx buffers.
+ * 2) Headroom space is insufficient.
+ *
+ * This is inefficient but it's a temporary condition that
+ * we hit right after XDP is enabled and until queue is refilled
+ * with large buffers with sufficient headroom - so it should affect
+ * at most queue size packets.
+ * Afterwards, the conditions to enable
+ * XDP should preclude the underlying device from sending packets
+ * across multiple buffers (num_buf > 1), and we make sure buffers
+ * have enough headroom.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+ u16 *num_buf,
+ struct page *p,
+ int offset,
+ int page_off,
+ unsigned int *len)
+{
+ struct page *page = alloc_page(GFP_ATOMIC);
+
+ if (!page)
+ return NULL;
+
+ memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+ page_off += *len;
+
+ while (--*num_buf) {
+ unsigned int buflen;
+ void *buf;
+ int off;
+
+ buf = virtqueue_get_buf(rq->vq, &buflen);
+ if (unlikely(!buf))
+ goto err_buf;
+
+ p = virt_to_head_page(buf);
+ off = buf - page_address(p);
+
+ /* guard against a misconfigured or uncooperative backend that
+ * is sending packet larger than the MTU.
+ */
+ if ((page_off + buflen) > PAGE_SIZE) {
+ put_page(p);
+ goto err_buf;
+ }
+
+ memcpy(page_address(page) + page_off,
+ page_address(p) + off, buflen);
+ page_off += buflen;
+ put_page(p);
+ }
+
+ /* Headroom does not contribute to packet length */
+ *len = page_off - VIRTIO_XDP_HEADROOM;
+ return page;
+err_buf:
+ __free_pages(page, 0);
+ return NULL;
+}
+
static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
{
struct sk_buff *skb;
struct bpf_prog *xdp_prog;
- unsigned int xdp_headroom = virtnet_get_headroom(vi);
+ unsigned int xdp_headroom = (unsigned long)ctx;
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
unsigned int headroom = vi->hdr_len + header_offset;
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page *page = virt_to_head_page(buf);
unsigned int delta = 0;
+ struct page *xdp_page;
len -= vi->hdr_len;
rcu_read_lock();
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp;
+ if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+ int offset = buf - page_address(page) + header_offset;
+ unsigned int tlen = len + vi->hdr_len;
+ u16 num_buf = 1;
+
+ xdp_headroom = virtnet_get_headroom(vi);
+ header_offset = VIRTNET_RX_PAD + xdp_headroom;
+ headroom = vi->hdr_len + header_offset;
+ buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ xdp_page = xdp_linearize_page(rq, &num_buf, page,
+ offset, header_offset,
+ &tlen);
+ if (!xdp_page)
+ goto err_xdp;
+
+ buf = page_address(xdp_page);
+ put_page(page);
+ page = xdp_page;
+ }
+
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
xdp.data = xdp.data_hard_start + xdp_headroom;
xdp.data_end = xdp.data + len;
skb = build_skb(buf, buflen);
if (!skb) {
- put_page(virt_to_head_page(buf));
+ put_page(page);
goto err;
}
skb_reserve(skb, headroom - delta);
err_xdp:
rcu_read_unlock();
dev->stats.rx_dropped++;
- put_page(virt_to_head_page(buf));
+ put_page(page);
xdp_xmit:
return NULL;
}
return NULL;
}
-/* The conditions to enable XDP should preclude the underlying device from
- * sending packets across multiple buffers (num_buf > 1). However per spec
- * it does not appear to be illegal to do so but rather just against convention.
- * So in order to avoid making a system unresponsive the packets are pushed
- * into a page and the XDP program is run. This will be extremely slow and we
- * push a warning to the user to fix this as soon as possible. Fixing this may
- * require resolving the underlying hardware to determine why multiple buffers
- * are being received or simply loading the XDP program in the ingress stack
- * after the skb is built because there is no advantage to running it here
- * anymore.
- */
-static struct page *xdp_linearize_page(struct receive_queue *rq,
- u16 *num_buf,
- struct page *p,
- int offset,
- unsigned int *len)
-{
- struct page *page = alloc_page(GFP_ATOMIC);
- unsigned int page_off = VIRTIO_XDP_HEADROOM;
-
- if (!page)
- return NULL;
-
- memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
- page_off += *len;
-
- while (--*num_buf) {
- unsigned int buflen;
- void *buf;
- int off;
-
- buf = virtqueue_get_buf(rq->vq, &buflen);
- if (unlikely(!buf))
- goto err_buf;
-
- p = virt_to_head_page(buf);
- off = buf - page_address(p);
-
- /* guard against a misconfigured or uncooperative backend that
- * is sending packet larger than the MTU.
- */
- if ((page_off + buflen) > PAGE_SIZE) {
- put_page(p);
- goto err_buf;
- }
-
- memcpy(page_address(page) + page_off,
- page_address(p) + off, buflen);
- page_off += buflen;
- put_page(p);
- }
-
- /* Headroom does not contribute to packet length */
- *len = page_off - VIRTIO_XDP_HEADROOM;
- return page;
-err_buf:
- __free_pages(page, 0);
- return NULL;
-}
-
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct sk_buff *head_skb, *curr_skb;
struct bpf_prog *xdp_prog;
unsigned int truesize;
+ unsigned int headroom = mergeable_ctx_to_headroom(ctx);
head_skb = NULL;
u32 act;
/* This happens when rx buffer size is underestimated */
- if (unlikely(num_buf > 1)) {
+ if (unlikely(num_buf > 1 ||
+ headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, &num_buf,
- page, offset, &len);
+ page, offset,
+ VIRTIO_XDP_HEADROOM,
+ &len);
if (!xdp_page)
goto err_xdp;
offset = VIRTIO_XDP_HEADROOM;
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0)
put_page(virt_to_head_page(buf));
-
return err;
}
}
static int init_vqs(struct virtnet_info *vi);
-static void _remove_vq_common(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
return err;
}
-static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
-{
- struct virtio_device *dev = vi->vdev;
- int ret;
-
- virtio_config_disable(dev);
- dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
- virtnet_freeze_down(dev);
- _remove_vq_common(vi);
-
- virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
- virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
-
- ret = virtio_finalize_features(dev);
- if (ret)
- goto err;
-
- vi->xdp_queue_pairs = xdp_qp;
- ret = virtnet_restore_up(dev);
- if (ret)
- goto err;
- ret = _virtnet_set_queues(vi, curr_qp);
- if (ret)
- goto err;
-
- virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
- virtio_config_enable(dev);
- return 0;
-err:
- virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
- return ret;
-}
-
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
return PTR_ERR(prog);
}
- /* Changing the headroom in buffers is a disruptive operation because
- * existing buffers must be flushed and reallocated. This will happen
- * when a xdp program is initially added or xdp is disabled by removing
- * the xdp program resulting in number of XDP queues changing.
- */
- if (vi->xdp_queue_pairs != xdp_qp) {
- err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
- if (err) {
- dev_warn(&dev->dev, "XDP reset failure.\n");
- goto virtio_reset_err;
- }
- }
+ /* Make sure NAPI is not using any XDP TX queues for RX. */
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ napi_disable(&vi->rq[i].napi);
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+ err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+ if (err)
+ goto err;
+ vi->xdp_queue_pairs = xdp_qp;
for (i = 0; i < vi->max_queue_pairs; i++) {
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
}
return 0;
-virtio_reset_err:
- /* On reset error do our best to unwind XDP changes inflight and return
- * error up to user space for resolution. The underlying reset hung on
- * us so not much we can do here.
- */
+err:
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
return err;
}
-static void _remove_vq_common(struct virtnet_info *vi)
-{
- vi->vdev->config->reset(vi->vdev);
- free_unused_bufs(vi);
- _free_receive_bufs(vi);
- free_receive_page_frags(vi);
- virtnet_del_vqs(vi);
-}
-
static void remove_vq_common(struct virtnet_info *vi)
{
vi->vdev->config->reset(vi->vdev);