virtio: Add improved queue allocation API
authorAndy Lutomirski <luto@kernel.org>
Wed, 3 Feb 2016 05:46:37 +0000 (21:46 -0800)
committerMichael S. Tsirkin <mst@redhat.com>
Wed, 2 Mar 2016 15:01:57 +0000 (17:01 +0200)
This leaves vring_new_virtqueue alone for compatbility, but it
adds two new improved APIs:

vring_create_virtqueue: Creates a virtqueue backed by automatically
allocated coherent memory.  (Some day it this could be extended to
support non-coherent memory, too, if there ends up being a platform
on which it's worthwhile.)

__vring_new_virtqueue: Creates a virtqueue with a manually-specified
layout.  This should allow mic_virtio to work much more cleanly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
drivers/virtio/virtio_ring.c
include/linux/virtio.h
include/linux/virtio_ring.h

index 9abc008ff7ea84d0fecffb11e69ebf73cc936c12..e46d08107a50d855bed2b9e14878a720a23f5381 100644 (file)
@@ -95,6 +95,11 @@ struct vring_virtqueue {
        /* How to notify other side. FIXME: commonalize hcalls! */
        bool (*notify)(struct virtqueue *vq);
 
+       /* DMA, allocation, and size information */
+       bool we_own_ring;
+       size_t queue_size_in_bytes;
+       dma_addr_t queue_dma_addr;
+
 #ifdef DEBUG
        /* They're supposed to lock for us. */
        unsigned int in_use;
@@ -878,36 +883,31 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 }
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
-struct virtqueue *vring_new_virtqueue(unsigned int index,
-                                     unsigned int num,
-                                     unsigned int vring_align,
-                                     struct virtio_device *vdev,
-                                     bool weak_barriers,
-                                     void *pages,
-                                     bool (*notify)(struct virtqueue *),
-                                     void (*callback)(struct virtqueue *),
-                                     const char *name)
+struct virtqueue *__vring_new_virtqueue(unsigned int index,
+                                       struct vring vring,
+                                       struct virtio_device *vdev,
+                                       bool weak_barriers,
+                                       bool (*notify)(struct virtqueue *),
+                                       void (*callback)(struct virtqueue *),
+                                       const char *name)
 {
-       struct vring_virtqueue *vq;
        unsigned int i;
+       struct vring_virtqueue *vq;
 
-       /* We assume num is a power of 2. */
-       if (num & (num - 1)) {
-               dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
-               return NULL;
-       }
-
-       vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
+       vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
                     GFP_KERNEL);
        if (!vq)
                return NULL;
 
-       vring_init(&vq->vring, num, pages, vring_align);
+       vq->vring = vring;
        vq->vq.callback = callback;
        vq->vq.vdev = vdev;
        vq->vq.name = name;
-       vq->vq.num_free = num;
+       vq->vq.num_free = vring.num;
        vq->vq.index = index;
+       vq->we_own_ring = false;
+       vq->queue_dma_addr = 0;
+       vq->queue_size_in_bytes = 0;
        vq->notify = notify;
        vq->weak_barriers = weak_barriers;
        vq->broken = false;
@@ -932,18 +932,145 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 
        /* Put everything in free lists. */
        vq->free_head = 0;
-       for (i = 0; i < num-1; i++)
+       for (i = 0; i < vring.num-1; i++)
                vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-       memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
+       memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
 
        return &vq->vq;
 }
+EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
+
+static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
+                             dma_addr_t *dma_handle, gfp_t flag)
+{
+       if (vring_use_dma_api(vdev)) {
+               return dma_alloc_coherent(vdev->dev.parent, size,
+                                         dma_handle, flag);
+       } else {
+               void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
+               if (queue) {
+                       phys_addr_t phys_addr = virt_to_phys(queue);
+                       *dma_handle = (dma_addr_t)phys_addr;
+
+                       /*
+                        * Sanity check: make sure we dind't truncate
+                        * the address.  The only arches I can find that
+                        * have 64-bit phys_addr_t but 32-bit dma_addr_t
+                        * are certain non-highmem MIPS and x86
+                        * configurations, but these configurations
+                        * should never allocate physical pages above 32
+                        * bits, so this is fine.  Just in case, throw a
+                        * warning and abort if we end up with an
+                        * unrepresentable address.
+                        */
+                       if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
+                               free_pages_exact(queue, PAGE_ALIGN(size));
+                               return NULL;
+                       }
+               }
+               return queue;
+       }
+}
+
+static void vring_free_queue(struct virtio_device *vdev, size_t size,
+                            void *queue, dma_addr_t dma_handle)
+{
+       if (vring_use_dma_api(vdev)) {
+               dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
+       } else {
+               free_pages_exact(queue, PAGE_ALIGN(size));
+       }
+}
+
+struct virtqueue *vring_create_virtqueue(
+       unsigned int index,
+       unsigned int num,
+       unsigned int vring_align,
+       struct virtio_device *vdev,
+       bool weak_barriers,
+       bool may_reduce_num,
+       bool (*notify)(struct virtqueue *),
+       void (*callback)(struct virtqueue *),
+       const char *name)
+{
+       struct virtqueue *vq;
+       void *queue;
+       dma_addr_t dma_addr;
+       size_t queue_size_in_bytes;
+       struct vring vring;
+
+       /* We assume num is a power of 2. */
+       if (num & (num - 1)) {
+               dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
+               return NULL;
+       }
+
+       /* TODO: allocate each queue chunk individually */
+       for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
+               queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+                                         &dma_addr,
+                                         GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+               if (queue)
+                       break;
+       }
+
+       if (!num)
+               return NULL;
+
+       if (!queue) {
+               /* Try to get a single page. You are my only hope! */
+               queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+                                         &dma_addr, GFP_KERNEL|__GFP_ZERO);
+       }
+       if (!queue)
+               return NULL;
+
+       queue_size_in_bytes = vring_size(num, vring_align);
+       vring_init(&vring, num, queue, vring_align);
+
+       vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+                                  notify, callback, name);
+       if (!vq) {
+               vring_free_queue(vdev, queue_size_in_bytes, queue,
+                                dma_addr);
+               return NULL;
+       }
+
+       to_vvq(vq)->queue_dma_addr = dma_addr;
+       to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
+       to_vvq(vq)->we_own_ring = true;
+
+       return vq;
+}
+EXPORT_SYMBOL_GPL(vring_create_virtqueue);
+
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+                                     unsigned int num,
+                                     unsigned int vring_align,
+                                     struct virtio_device *vdev,
+                                     bool weak_barriers,
+                                     void *pages,
+                                     bool (*notify)(struct virtqueue *vq),
+                                     void (*callback)(struct virtqueue *vq),
+                                     const char *name)
+{
+       struct vring vring;
+       vring_init(&vring, num, pages, vring_align);
+       return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
+                                    notify, callback, name);
+}
 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 
-void vring_del_virtqueue(struct virtqueue *vq)
+void vring_del_virtqueue(struct virtqueue *_vq)
 {
-       list_del(&vq->list);
-       kfree(to_vvq(vq));
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       if (vq->we_own_ring) {
+               vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
+                                vq->vring.desc, vq->queue_dma_addr);
+       }
+       list_del(&_vq->list);
+       kfree(vq);
 }
 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 
@@ -1007,20 +1134,42 @@ void virtio_break_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_break_device);
 
-void *virtqueue_get_avail(struct virtqueue *_vq)
+dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
 
-       return vq->vring.avail;
+       BUG_ON(!vq->we_own_ring);
+
+       return vq->queue_dma_addr;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_avail);
+EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
 
-void *virtqueue_get_used(struct virtqueue *_vq)
+dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 {
        struct vring_virtqueue *vq = to_vvq(_vq);
 
-       return vq->vring.used;
+       BUG_ON(!vq->we_own_ring);
+
+       return vq->queue_dma_addr +
+               ((char *)vq->vring.avail - (char *)vq->vring.desc);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
+
+dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
+{
+       struct vring_virtqueue *vq = to_vvq(_vq);
+
+       BUG_ON(!vq->we_own_ring);
+
+       return vq->queue_dma_addr +
+               ((char *)vq->vring.used - (char *)vq->vring.desc);
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
+
+const struct vring *virtqueue_get_vring(struct virtqueue *vq)
+{
+       return &to_vvq(vq)->vring;
 }
-EXPORT_SYMBOL_GPL(virtqueue_get_used);
+EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
 MODULE_LICENSE("GPL");
index 8f4d4bfa6d46084d8bea6aa3181f7bd7184fc58c..d5eb5479a4254356a39e76c805743d35458dd07b 100644 (file)
@@ -75,8 +75,27 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
 bool virtqueue_is_broken(struct virtqueue *vq);
 
-void *virtqueue_get_avail(struct virtqueue *vq);
-void *virtqueue_get_used(struct virtqueue *vq);
+const struct vring *virtqueue_get_vring(struct virtqueue *vq);
+dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
+dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
+dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
+
+/*
+ * Legacy accessors -- in almost all cases, these are the wrong functions
+ * to use.
+ */
+static inline void *virtqueue_get_desc(struct virtqueue *vq)
+{
+       return virtqueue_get_vring(vq)->desc;
+}
+static inline void *virtqueue_get_avail(struct virtqueue *vq)
+{
+       return virtqueue_get_vring(vq)->avail;
+}
+static inline void *virtqueue_get_used(struct virtqueue *vq)
+{
+       return virtqueue_get_vring(vq)->used;
+}
 
 /**
  * virtio_device - representation of a device using virtio
index a156e2b6ccfe33ef4901b7a3059f25add3c0f70c..e8d36938f09a56cfe4729c17b0ee8175b4991bbe 100644 (file)
@@ -59,6 +59,35 @@ static inline void virtio_store_mb(bool weak_barriers,
 struct virtio_device;
 struct virtqueue;
 
+/*
+ * Creates a virtqueue and allocates the descriptor ring.  If
+ * may_reduce_num is set, then this may allocate a smaller ring than
+ * expected.  The caller should query virtqueue_get_ring_size to learn
+ * the actual size of the ring.
+ */
+struct virtqueue *vring_create_virtqueue(unsigned int index,
+                                        unsigned int num,
+                                        unsigned int vring_align,
+                                        struct virtio_device *vdev,
+                                        bool weak_barriers,
+                                        bool may_reduce_num,
+                                        bool (*notify)(struct virtqueue *vq),
+                                        void (*callback)(struct virtqueue *vq),
+                                        const char *name);
+
+/* Creates a virtqueue with a custom layout. */
+struct virtqueue *__vring_new_virtqueue(unsigned int index,
+                                       struct vring vring,
+                                       struct virtio_device *vdev,
+                                       bool weak_barriers,
+                                       bool (*notify)(struct virtqueue *),
+                                       void (*callback)(struct virtqueue *),
+                                       const char *name);
+
+/*
+ * Creates a virtqueue with a standard layout but a caller-allocated
+ * ring.
+ */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      unsigned int num,
                                      unsigned int vring_align,
@@ -68,7 +97,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
                                      bool (*notify)(struct virtqueue *vq),
                                      void (*callback)(struct virtqueue *vq),
                                      const char *name);
+
+/*
+ * Destroys a virtqueue.  If created with vring_create_virtqueue, this
+ * also frees the ring.
+ */
 void vring_del_virtqueue(struct virtqueue *vq);
+
 /* Filter out transport-specific feature bits. */
 void vring_transport_features(struct virtio_device *vdev);