drm/radeon/kms: Add support for multi-ring sync in CS ioctl (v2)
authorChristian König <deathsimple@vodafone.de>
Fri, 6 Jan 2012 03:11:06 +0000 (22:11 -0500)
committerDave Airlie <airlied@redhat.com>
Fri, 6 Jan 2012 09:16:09 +0000 (09:16 +0000)
Use semaphores to sync buffers across rings in the CS
ioctl.  Add a reloc flag to allow userspace to skip
sync for buffers.

agd5f: port to latest CS ioctl changes.

v2: add ring lock/unlock to make sure changes hit the ring.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_fence.c
include/drm/radeon_drm.h

index 5e3542384b212b3bd063c6d3a902b744bd864e50..3483ed9b38e99c1c55b22e8e6df03a511233fa00 100644 (file)
@@ -230,6 +230,7 @@ struct radeon_fence {
        bool                            signaled;
        /* RB, DMA, etc. */
        int                             ring;
+       struct radeon_semaphore         *semaphore;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -789,6 +790,7 @@ struct radeon_cs_parser {
        struct radeon_cs_reloc  *relocs;
        struct radeon_cs_reloc  **relocs_ptr;
        struct list_head        validated;
+       bool                    sync_to_ring[RADEON_NUM_RINGS];
        /* indices of various chunks */
        int                     chunk_ib_idx;
        int                     chunk_relocs_idx;
index 4d595403b50c792ec54892d63ee4bd48acb7edb1..17af0e83c328503e395cd0437e384733c38b3079 100644 (file)
@@ -84,6 +84,13 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                        p->relocs[i].flags = r->flags;
                        radeon_bo_list_add_object(&p->relocs[i].lobj,
                                                  &p->validated);
+
+                       if (p->relocs[i].robj->tbo.sync_obj && !(r->flags & RADEON_RELOC_DONT_SYNC)) {
+                               struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj;
+                               if (!radeon_fence_signaled(fence)) {
+                                       p->sync_to_ring[fence->ring] = true;
+                               }
+                       }
                } else
                        p->relocs[i].handle = 0;
        }
@@ -109,6 +116,36 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
        return 0;
 }
 
+static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
+{
+       int i, r;
+
+       for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+               /* no need to sync to our own or unused rings */
+               if (i == p->ring || !p->sync_to_ring[i] || !p->rdev->ring[i].ready)
+                       continue;
+
+               if (!p->ib->fence->semaphore) {
+                       r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore);
+                       if (r)
+                               return r;
+               }
+
+               r = radeon_ring_lock(p->rdev, &p->rdev->ring[i], 3);
+               if (r)
+                       return r;
+               radeon_semaphore_emit_signal(p->rdev, i, p->ib->fence->semaphore);
+               radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[i]);
+
+               r = radeon_ring_lock(p->rdev, &p->rdev->ring[p->ring], 3);
+               if (r)
+                       return r;
+               radeon_semaphore_emit_wait(p->rdev, p->ring, p->ib->fence->semaphore);
+               radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[p->ring]);
+       }
+       return 0;
+}
+
 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 {
        struct drm_radeon_cs *cs = data;
@@ -314,6 +351,10 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
                DRM_ERROR("Invalid command stream !\n");
                return r;
        }
+       r = radeon_cs_sync_rings(parser);
+       if (r) {
+               DRM_ERROR("Failed to synchronize rings !\n");
+       }
        parser->ib->vm_id = 0;
        r = radeon_ib_schedule(rdev, parser->ib);
        if (r) {
@@ -384,6 +425,10 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
        if (r) {
                goto out;
        }
+       r = radeon_cs_sync_rings(parser);
+       if (r) {
+               DRM_ERROR("Failed to synchronize rings !\n");
+       }
        parser->ib->vm_id = vm->id;
        /* ib pool is bind at 0 in virtual address space to gpu_addr is the
         * offset inside the pool bo
index ae9e3da594a17848ed109f67735c45317557956c..64ea3dd9e6ff2ab58ed36476693a9dde4cff6f5a 100644 (file)
@@ -153,6 +153,8 @@ static void radeon_fence_destroy(struct kref *kref)
        list_del(&fence->list);
        fence->emitted = false;
        write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
+       if (fence->semaphore)
+               radeon_semaphore_free(fence->rdev, fence->semaphore);
        kfree(fence);
 }
 
@@ -172,6 +174,7 @@ int radeon_fence_create(struct radeon_device *rdev,
        (*fence)->signaled = false;
        (*fence)->seq = 0;
        (*fence)->ring = ring;
+       (*fence)->semaphore = NULL;
        INIT_LIST_HEAD(&(*fence)->list);
 
        write_lock_irqsave(&rdev->fence_lock, irq_flags);
index d7079f42624b614bf4cd4077239a513447fb1e6c..2a807a5669bcbd2b6eb69f6a195a45aa19079b8e 100644 (file)
@@ -917,6 +917,9 @@ struct drm_radeon_cs_chunk {
        uint64_t                chunk_data;
 };
 
+/* drm_radeon_cs_reloc.flags */
+#define RADEON_RELOC_DONT_SYNC         0x01
+
 struct drm_radeon_cs_reloc {
        uint32_t                handle;
        uint32_t                read_domains;