amdgpu: use drm sync objects for shared semaphores (v6)
authorDave Airlie <airlied@redhat.com>
Mon, 13 Mar 2017 22:18:15 +0000 (22:18 +0000)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 16 Jun 2017 20:58:32 +0000 (16:58 -0400)
This creates a new command submission chunk for amdgpu
to add in and out sync objects around the submission.

Sync objects are managed via the drm syncobj ioctls.

The command submission interface is enhanced with two new
chunks, one for syncobj pre submission dependencies,
and one for post submission sync obj signalling,
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

In theory VkFences could be backed with sync objects and
just get passed into the cs as syncobj handles as well.

NOTE: this interface addition needs a version bump to expose
it to userspace.

TODO: update to dep_sync when rebasing onto amdgpu master.
(with this - r-b from Christian)

v1.1: keep file reference on import.
v2: move to using syncobjs
v2.1: change some APIs to just use p pointer.
v3: make more robust against CS failures, we now add the
wait sems but only remove them once the CS job has been
submitted.
v4: rewrite names of API and base on new syncobj code.
v5: move post deps earlier, rename some apis
v6: lookup post deps earlier, and just replace fences
in post deps stage (Christian)

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
include/uapi/drm/amdgpu_drm.h

index e0adad590ecb29491299e69224abade2207536b1..9f827aca90ee308745072220cedc963a12ff5593 100644 (file)
@@ -1159,6 +1159,9 @@ struct amdgpu_cs_parser {
 
        /* user fence */
        struct amdgpu_bo_list_entry     uf_entry;
+
+       unsigned num_post_dep_syncobjs;
+       struct drm_syncobj **post_dep_syncobjs;
 };
 
 #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
index 29469e6b58b81e874d47facdf6c26392e8c9166c..aeee6840e82b6bc5d4e1b4b1cbbbc9df30fff936 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                        break;
 
                case AMDGPU_CHUNK_ID_DEPENDENCIES:
+               case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+               case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
                        break;
 
                default:
@@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
                ttm_eu_backoff_reservation(&parser->ticket,
                                           &parser->validated);
        }
+
+       for (i = 0; i < parser->num_post_dep_syncobjs; i++)
+               drm_syncobj_put(parser->post_dep_syncobjs[i]);
+       kfree(parser->post_dep_syncobjs);
+
        dma_fence_put(parser->fence);
 
        if (parser->ctx)
@@ -971,6 +979,64 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
        return 0;
 }
 
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+                                                uint32_t handle)
+{
+       int r;
+       struct dma_fence *fence;
+       r = drm_syncobj_fence_get(p->filp, handle, &fence);
+       if (r)
+               return r;
+
+       r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
+       dma_fence_put(fence);
+
+       return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+                                           struct amdgpu_cs_chunk *chunk)
+{
+       unsigned num_deps;
+       int i, r;
+       struct drm_amdgpu_cs_chunk_sem *deps;
+
+       deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+       num_deps = chunk->length_dw * 4 /
+               sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+       for (i = 0; i < num_deps; ++i) {
+               r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+                                            struct amdgpu_cs_chunk *chunk)
+{
+       unsigned num_deps;
+       int i;
+       struct drm_amdgpu_cs_chunk_sem *deps;
+       deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+       num_deps = chunk->length_dw * 4 /
+               sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+       p->post_dep_syncobjs = kmalloc_array(num_deps,
+                                            sizeof(struct drm_syncobj *),
+                                            GFP_KERNEL);
+       p->num_post_dep_syncobjs = 0;
+
+       for (i = 0; i < num_deps; ++i) {
+               p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
+               if (!p->post_dep_syncobjs[i])
+                       return -EINVAL;
+               p->num_post_dep_syncobjs++;
+       }
+       return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                                  struct amdgpu_cs_parser *p)
 {
@@ -985,12 +1051,30 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
                        r = amdgpu_cs_process_fence_dep(p, chunk);
                        if (r)
                                return r;
+               } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+                       r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+                       if (r)
+                               return r;
+               } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+                       r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+                       if (r)
+                               return r;
                }
        }
 
        return 0;
 }
 
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+       int i;
+
+       for (i = 0; i < p->num_post_dep_syncobjs; ++i) {
+               drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i],
+                                         p->fence);
+       }
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
                            union drm_amdgpu_cs *cs)
 {
@@ -1011,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        job->owner = p->filp;
        job->fence_ctx = entity->fence_context;
        p->fence = dma_fence_get(&job->base.s_fence->finished);
+
+       amdgpu_cs_post_dependencies(p);
+
        cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
        job->uf_sequence = cs->out.handle;
        amdgpu_job_free_resources(job);
@@ -1018,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
        trace_amdgpu_cs_ioctl(job);
        amd_sched_entity_push_job(&job->base);
-
        return 0;
 }
 
index 8168f8ec711a612ed86fb5f1e9ba91f8f5761586..4c7c2628ace4e816b78ea892ceee5dd6212f7fbe 100644 (file)
@@ -782,7 +782,7 @@ static struct drm_driver kms_driver = {
        .driver_features =
            DRIVER_USE_AGP |
            DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-           DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
+           DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
        .load = amdgpu_driver_load_kms,
        .open = amdgpu_driver_open_kms,
        .postclose = amdgpu_driver_postclose_kms,
index 34128f698f5e97e577e01e300ce0ae679c5ce8bd..d9aa4a339650fafa122f14148b9f0250ecfe536f 100644 (file)
@@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB             0x01
 #define AMDGPU_CHUNK_ID_FENCE          0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES   0x03
+#define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
+#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
 
 struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
@@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence {
        __u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+       __u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
        union {
                struct drm_amdgpu_cs_chunk_ib           ib_data;