drm/amdgpu: fix incorrect mutex usage v3
authorChristian König <christian.koenig@amd.com>
Thu, 5 Nov 2015 18:49:48 +0000 (19:49 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 16 Nov 2015 16:05:58 +0000 (11:05 -0500)
Before this patch the scheduler fence was created when we push the job
into the queue, so we could only get the fence after pushing it.

The mutex now was necessary to prevent the thread pushing the jobs to
the hardware from running faster than the thread pushing the jobs into
the queue.

Otherwise the thread pushing jobs into the queue would have accessed
possible freed up memory when it tries to get a reference to the fence.

So what you get in the end is thread A:
mutex_lock(&job->lock);
...
Kick of thread B.
...
mutex_unlock(&job->lock);

And thread B:
mutex_lock(&job->lock);
....
mutex_unlock(&job->lock);
kfree(job);

I'm actually not sure if I'm still up to date on this, but this usage
pattern used to be not allowed with mutexes. See here as well
https://lwn.net/Articles/575460/.

v2: remove unrelated changes, fix missing owner
v3: rebased, add more commit message

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h

index 7b02e34551728cc2b08ba83108c683af7f92857a..0f187027c753ba281510fe23c67bd22c2e8b4a7d 100644 (file)
@@ -1225,7 +1225,7 @@ struct amdgpu_job {
        struct amdgpu_device    *adev;
        struct amdgpu_ib        *ibs;
        uint32_t                num_ibs;
-       struct mutex            job_lock;
+       void                    *owner;
        struct amdgpu_user_fence uf;
        int (*free_job)(struct amdgpu_job *job);
 };
index 2ae73d5232dda6319a5581b057073dba3c1e1a15..44cf977ae4f67483d8c1637ef864241fe7291f0b 100644 (file)
@@ -845,8 +845,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                goto out;
 
        if (amdgpu_enable_scheduler && parser.num_ibs) {
-               struct amdgpu_job *job;
                struct amdgpu_ring * ring = parser.ibs->ring;
+               struct amd_sched_fence *fence;
+               struct amdgpu_job *job;
 
                job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
                if (!job) {
@@ -859,37 +860,41 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                job->adev = parser.adev;
                job->ibs = parser.ibs;
                job->num_ibs = parser.num_ibs;
-               job->base.owner = parser.filp;
-               mutex_init(&job->job_lock);
+               job->owner = parser.filp;
+               job->free_job = amdgpu_cs_free_job;
+
                if (job->ibs[job->num_ibs - 1].user) {
                        job->uf = parser.uf;
                        job->ibs[job->num_ibs - 1].user = &job->uf;
                        parser.uf.bo = NULL;
                }
 
-               parser.ibs = NULL;
-               parser.num_ibs = 0;
-
-               job->free_job = amdgpu_cs_free_job;
-               mutex_lock(&job->job_lock);
-               r = amd_sched_entity_push_job(&job->base);
-               if (r) {
-                       mutex_unlock(&job->job_lock);
+               fence = amd_sched_fence_create(job->base.s_entity,
+                                              parser.filp);
+               if (!fence) {
+                       r = -ENOMEM;
                        amdgpu_cs_free_job(job);
                        kfree(job);
                        goto out;
                }
-               cs->out.handle =
-                       amdgpu_ctx_add_fence(parser.ctx, ring,
-                                            &job->base.s_fence->base);
+               job->base.s_fence = fence;
+               fence_get(&fence->base);
+
+               cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
+                                                     &fence->base);
                job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
 
-               list_sort(NULL, &parser.validated, cmp_size_smaller_first);
-               ttm_eu_fence_buffer_objects(&parser.ticket,
-                               &parser.validated,
-                               &job->base.s_fence->base);
+               parser.ibs = NULL;
+               parser.num_ibs = 0;
+
                trace_amdgpu_cs_ioctl(job);
-               mutex_unlock(&job->job_lock);
+               amd_sched_entity_push_job(&job->base);
+
+               list_sort(NULL, &parser.validated, cmp_size_smaller_first);
+               ttm_eu_fence_buffer_objects(&parser.ticket, &parser.validated,
+                                           &fence->base);
+               fence_put(&fence->base);
+
                amdgpu_cs_parser_fini_late(&parser);
                mutex_unlock(&vm->mutex);
                return 0;
index 8ef9e4415fccb10ec6537968b9996ca7503102ee..438c05254695586bb95a9e3caa2ea356aa4b941f 100644 (file)
@@ -45,12 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
                return NULL;
        }
        job = to_amdgpu_job(sched_job);
-       mutex_lock(&job->job_lock);
        trace_amdgpu_sched_run_job(job);
-       r = amdgpu_ib_schedule(job->adev,
-                              job->num_ibs,
-                              job->ibs,
-                              job->base.owner);
+       r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
        if (r) {
                DRM_ERROR("Error scheduling IBs (%d)\n", r);
                goto err;
@@ -63,7 +59,6 @@ err:
        if (job->free_job)
                job->free_job(job);
 
-       mutex_unlock(&job->job_lock);
        kfree(job);
        return fence ? &fence->base : NULL;
 }
@@ -89,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
                        return -ENOMEM;
                job->base.sched = &ring->sched;
                job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+               job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+               if (!job->base.s_fence) {
+                       kfree(job);
+                       return -ENOMEM;
+               }
+               *f = fence_get(&job->base.s_fence->base);
+
                job->adev = adev;
                job->ibs = ibs;
                job->num_ibs = num_ibs;
-               job->base.owner = owner;
-               mutex_init(&job->job_lock);
+               job->owner = owner;
                job->free_job = free_job;
-               mutex_lock(&job->job_lock);
-               r = amd_sched_entity_push_job(&job->base);
-               if (r) {
-                       mutex_unlock(&job->job_lock);
-                       kfree(job);
-                       return r;
-               }
-               *f = fence_get(&job->base.s_fence->base);
-               mutex_unlock(&job->job_lock);
+               amd_sched_entity_push_job(&job->base);
        } else {
                r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
                if (r)
index ccb7c1554f5e12c8f85f6d8454723f07a4603b05..ea30d6ad4c137e60e6f9c1ce4265305d60f85cc9 100644 (file)
@@ -276,21 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
  *
  * Returns 0 for success, negative error code otherwise.
  */
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
        struct amd_sched_entity *entity = sched_job->s_entity;
-       struct amd_sched_fence *fence = amd_sched_fence_create(
-               entity, sched_job->owner);
-
-       if (!fence)
-               return -ENOMEM;
-
-       sched_job->s_fence = fence;
 
        wait_event(entity->sched->job_scheduled,
                   amd_sched_entity_in(sched_job));
        trace_amd_sched_job(sched_job);
-       return 0;
 }
 
 /**
index 4d05ca6fb057db3607734f91b3325254db49b012..939692b14f4b10ca3434f537f8ee2e214b2bec2e 100644 (file)
@@ -79,7 +79,6 @@ struct amd_sched_job {
        struct amd_gpu_scheduler        *sched;
        struct amd_sched_entity         *s_entity;
        struct amd_sched_fence          *s_fence;
-       void                            *owner;
 };
 
 extern const struct fence_ops amd_sched_fence_ops;
@@ -131,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
                          uint32_t jobs);
 void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
                           struct amd_sched_entity *entity);
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 
 struct amd_sched_fence *amd_sched_fence_create(
        struct amd_sched_entity *s_entity, void *owner);