From 42b36cc0ce717deeb10030141a43dede763a3ebe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 12 Nov 2007 13:39:18 +1100 Subject: [PATCH] virtio: Force use of power-of-two for descriptor ring sizes The virtio descriptor rings of size N-1 were nicely set up to be aligned to an N-byte boundary. But as Anthony Liguori points out, the free-running indices used by virtio require that the sizes be a power of 2, otherwise we get problems on wrap (demonstrated with lguest). So we replace the clever "2^n-1" scheme with a simple "align to page boundary" scheme: this means that all virtio rings take at least two pages, but it's safer than guessing cache alignment. Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 9 +++++---- drivers/lguest/lguest_device.c | 3 ++- drivers/virtio/virtio_ring.c | 8 +++++++- include/linux/virtio_ring.h | 19 +++++++++++-------- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 157f6a26b93..42008395534 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -62,8 +62,8 @@ typedef uint8_t u8; #endif /* We can have up to 256 pages for devices. */ #define DEVICE_PAGES 256 -/* This fits nicely in a single 4096-byte page. */ -#define VIRTQUEUE_NUM 127 +/* This will occupy 2 pages: it must be a power of 2. */ +#define VIRTQUEUE_NUM 128 /*L:120 verbose is both a global flag and a macro. The C preprocessor allows * this, and although I wouldn't recommend it, it works quite nicely here. */ @@ -1036,7 +1036,8 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, void *p; /* First we need some pages for this virtqueue. */ - pages = (vring_size(num_descs) + getpagesize() - 1) / getpagesize(); + pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) + / getpagesize(); p = get_pages(pages); /* Initialize the configuration. */ @@ -1045,7 +1046,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p); + vring_init(&vq->vring, num_descs, p, getpagesize()); /* Add the configuration information to this device's descriptor. */ add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 8904f72f97c..66f38722253 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -200,7 +200,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* Figure out how many pages the ring will take, and map that memory */ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, - DIV_ROUND_UP(vring_size(lvq->config.num), + DIV_ROUND_UP(vring_size(lvq->config.num, + PAGE_SIZE), PAGE_SIZE)); if (!lvq->pages) { err = -ENOMEM; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 0e1bf053d8c..1dc04b6684e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -277,11 +277,17 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, struct vring_virtqueue *vq; unsigned int i; + /* We assume num is a power of 2. */ + if (num & (num - 1)) { + dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); + return NULL; + } + vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); if (!vq) return NULL; - vring_init(&vq->vring, num, pages); + vring_init(&vq->vring, num, pages, PAGE_SIZE); vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.vq_ops = &vring_vq_ops; diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 5b88d215af5..1a4ed49f647 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -67,7 +67,7 @@ struct vring { }; /* The standard layout for the ring is a continuous chunk of memory which looks - * like this. The used fields will be aligned to a "num+1" boundary. + * like this. We assume num is a power of 2. * * struct vring * { @@ -79,8 +79,8 @@ struct vring { * __u16 avail_idx; * __u16 available[num]; * - * // Padding so a correctly-chosen num value will cache-align used_idx. - * char pad[sizeof(struct vring_desc) - sizeof(avail_flags)]; + * // Padding to the next page boundary. + * char pad[]; * * // A ring of used descriptor heads with free-running index. * __u16 used_flags; @@ -88,18 +88,21 @@ struct vring { * struct vring_used_elem used[num]; * }; */ -static inline void vring_init(struct vring *vr, unsigned int num, void *p) +static inline void vring_init(struct vring *vr, unsigned int num, void *p, + unsigned int pagesize) { vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = p + (num+1)*(sizeof(struct vring_desc) + sizeof(__u16)); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1) + & ~(pagesize - 1)); } -static inline unsigned vring_size(unsigned int num) +static inline unsigned vring_size(unsigned int num, unsigned int pagesize) { - return (num + 1) * (sizeof(struct vring_desc) + sizeof(__u16)) - + sizeof(__u32) + num * sizeof(struct vring_used_elem); + return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + + pagesize - 1) & ~(pagesize - 1)) + + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; } #ifdef __KERNEL__ -- 2.20.1