drm/amdgpu: fix dependency issue
authorChunming Zhou <David1.Zhou@amd.com>
Tue, 9 May 2017 05:39:40 +0000 (13:39 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 10 May 2017 17:23:53 +0000 (13:23 -0400)
The problem is that executing the jobs in the right order doesn't give you the right result
because consecutive jobs executed on the same engine are pipelined.
In other words job B does it buffer read before job A has written it's result.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h

index 86923c57908b92033ed34f733aff08338392e665..833c3c16501a0221da5161eaec189a5576292abc 100644 (file)
@@ -1129,6 +1129,7 @@ struct amdgpu_job {
        void                    *owner;
        uint64_t                fence_ctx; /* the fence_context this job uses */
        bool                    vm_needs_flush;
+       bool                    need_pipeline_sync;
        unsigned                vm_id;
        uint64_t                vm_pd_addr;
        uint32_t                gds_base, gds_size;
index 2d11ac8d1aa9811ce61606aa83d1207ee661b222..6e4ae0d983c21db9ffac65340994ed6869407caf 100644 (file)
@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                return r;
        }
+       if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+               amdgpu_ring_emit_pipeline_sync(ring);
 
        if (vm) {
                r = amdgpu_vm_flush(ring, job);
index c3cfeb335d990113ca6246acfbfc8d25b46ed921..7570f2439a1175b6509b76121dbd4c5628502fbb 100644 (file)
@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
        (*job)->vm = vm;
        (*job)->ibs = (void *)&(*job)[1];
        (*job)->num_ibs = num_ibs;
+       (*job)->need_pipeline_sync = false;
 
        amdgpu_sync_create(&(*job)->sync);
 
@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
                fence = amdgpu_sync_get_fence(&job->sync);
        }
 
+       if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
+               job->need_pipeline_sync = true;
+
        return fence;
 }
 
index c42a9979d056d77146061b099a9edca27cf70165..07ff3b1514f129edc23875c1f42053f7ef1aaa72 100644 (file)
@@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
        if (ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-       if (ring->funcs->emit_pipeline_sync)
+       if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
                amdgpu_ring_emit_pipeline_sync(ring);
 
        if (ring->funcs->emit_vm_flush && vm_flush_needed) {
index 28b92c8d99853923c126d106a5519c9559613e6d..fea96a765cf14698eaa2b9e6f30ea7a707245ea7 100644 (file)
@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
        dma_fence_put(f);
 }
 
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+                                   struct amd_sched_entity *entity)
+{
+       struct amd_gpu_scheduler *sched = entity->sched;
+       struct amd_sched_fence *s_fence;
+
+       if (!fence || dma_fence_is_signaled(fence))
+               return false;
+       if (fence->context == entity->fence_context)
+               return true;
+       s_fence = to_amd_sched_fence(fence);
+       if (s_fence && s_fence->sched == sched)
+               return true;
+
+       return false;
+}
+
 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 {
        struct amd_gpu_scheduler *sched = entity->sched;
index 0255c7f8a6d8350604ef54bde3561a725edaa930..924d4a5899e140cb61766f2e4fde9223d36c9850 100644 (file)
@@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
                       void *owner);
 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
+bool amd_sched_dependency_optimized(struct dma_fence* fence,
+                                   struct amd_sched_entity *entity);
 #endif