/* Other side has made a mess, don't try any more. */
bool broken;
+ /* Host supports indirect buffers */
+ bool indirect;
+
/* Number of free buffers */
unsigned int num_free;
/* Head of free buffer list. */
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+/* Set up an indirect table of descriptors and add it to the queue. */
+static int vring_add_indirect(struct vring_virtqueue *vq,
+ struct scatterlist sg[],
+ unsigned int out,
+ unsigned int in)
+{
+ struct vring_desc *desc;
+ unsigned head;
+ int i;
+
+ desc = kmalloc((out + in) * sizeof(struct vring_desc), GFP_ATOMIC);
+ if (!desc)
+ return vq->vring.num;
+
+ /* Transfer entries from the sg list into the indirect page */
+ for (i = 0; i < out; i++) {
+ desc[i].flags = VRING_DESC_F_NEXT;
+ desc[i].addr = sg_phys(sg);
+ desc[i].len = sg->length;
+ desc[i].next = i+1;
+ sg++;
+ }
+ for (; i < (out + in); i++) {
+ desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+ desc[i].addr = sg_phys(sg);
+ desc[i].len = sg->length;
+ desc[i].next = i+1;
+ sg++;
+ }
+
+ /* Last one doesn't continue. */
+ desc[i-1].flags &= ~VRING_DESC_F_NEXT;
+ desc[i-1].next = 0;
+
+ /* We're about to use a buffer */
+ vq->num_free--;
+
+ /* Use a single buffer which doesn't continue */
+ head = vq->free_head;
+ vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+ vq->vring.desc[head].addr = virt_to_phys(desc);
+ vq->vring.desc[head].len = i * sizeof(struct vring_desc);
+
+ /* Update free pointer */
+ vq->free_head = vq->vring.desc[head].next;
+
+ return head;
+}
+
static int vring_add_buf(struct virtqueue *_vq,
struct scatterlist sg[],
unsigned int out,
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int i, avail, head, uninitialized_var(prev);
+ START_USE(vq);
+
BUG_ON(data == NULL);
+
+ /* If the host supports indirect descriptor tables, and we have multiple
+ * buffers, then go indirect. FIXME: tune this threshold */
+ if (vq->indirect && (out + in) > 1 && vq->num_free) {
+ head = vring_add_indirect(vq, sg, out, in);
+ if (head != vq->vring.num)
+ goto add_head;
+ }
+
BUG_ON(out + in > vq->vring.num);
BUG_ON(out + in == 0);
- START_USE(vq);
-
if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n",
out + in, vq->num_free);
/* Update free pointer */
vq->free_head = i;
+add_head:
/* Set token. */
vq->data[head] = data;
/* Put back on free list: find end */
i = head;
+
+ /* Free the indirect table */
+ if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
+ kfree(phys_to_virt(vq->vring.desc[i].addr));
+
while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
i = vq->vring.desc[i].next;
vq->num_free++;
vq->in_use = false;
#endif
+ vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
+
/* No callback? Tell other side not to bother us. */
if (!callback)
vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
switch (i) {
+ case VIRTIO_RING_F_INDIRECT_DESC:
+ break;
default:
/* We don't understand this bit. */
clear_bit(i, vdev->features);
#define VRING_DESC_F_NEXT 1
/* This marks a buffer as write-only (otherwise read-only). */
#define VRING_DESC_F_WRITE 2
+/* This means the buffer contains a list of buffer descriptors. */
+#define VRING_DESC_F_INDIRECT 4
/* The Host uses this in used->flags to advise the Guest: don't kick me when
* you add a buffer. It's unreliable, so it's simply an optimization. Guest
* optimization. */
#define VRING_AVAIL_F_NO_INTERRUPT 1
+/* We support indirect buffer descriptors */
+#define VIRTIO_RING_F_INDIRECT_DESC 28
+
/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */
struct vring_desc
{