drm/amdgpu: fix and cleanup user fence handling v2
authorChristian König <christian.koenig@amd.com>
Fri, 6 May 2016 20:14:00 +0000 (22:14 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 11 May 2016 17:30:32 +0000 (13:30 -0400)
We leaked the BO in the error pass, additional to that we only have
one user fence for all IBs in a job.

v2: remove white space changes

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index d4c1eb7816f01c80b3367b575a9796a104c6facd..2a009c398dcb539b91507e8bad3297c35d8a3eba 100644 (file)
@@ -368,13 +368,6 @@ struct amdgpu_fence_driver {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 
-struct amdgpu_user_fence {
-       /* write-back bo */
-       struct amdgpu_bo        *bo;
-       /* write-back address offset to bo start */
-       uint32_t                offset;
-};
-
 int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
@@ -741,10 +734,7 @@ struct amdgpu_ib {
        uint32_t                        length_dw;
        uint64_t                        gpu_addr;
        uint32_t                        *ptr;
-       struct amdgpu_user_fence        *user;
        uint32_t                        flags;
-       /* resulting sequence number */
-       uint64_t                        sequence;
 };
 
 enum amdgpu_ring_type {
@@ -1219,7 +1209,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
 struct amdgpu_cs_chunk {
        uint32_t                chunk_id;
        uint32_t                length_dw;
-       uint32_t                *kdata;
+       void                    *kdata;
 };
 
 struct amdgpu_cs_parser {
@@ -1263,7 +1253,12 @@ struct amdgpu_job {
        uint32_t                gds_base, gds_size;
        uint32_t                gws_base, gws_size;
        uint32_t                oa_base, oa_size;
-       struct amdgpu_user_fence uf;
+
+       /* user fence handling */
+       struct amdgpu_bo        *uf_bo;
+       uint32_t                uf_offset;
+       uint64_t                uf_sequence;
+
 };
 #define to_amdgpu_job(sched_job)               \
                container_of((sched_job), struct amdgpu_job, base)
index 9ab2f0886a148ee415f6480fc66563c45b39dd9c..2bbeeb07c187e0977ed88feb405106aad178c939 100644 (file)
@@ -87,33 +87,30 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 }
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
-                                     struct amdgpu_user_fence *uf,
-                                     struct drm_amdgpu_cs_chunk_fence *fence_data)
+                                     struct drm_amdgpu_cs_chunk_fence *data,
+                                     uint32_t *offset)
 {
        struct drm_gem_object *gobj;
-       uint32_t handle;
 
-       handle = fence_data->handle;
        gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
-                                    fence_data->handle);
+                                    data->handle);
        if (gobj == NULL)
                return -EINVAL;
 
-       uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
-       uf->offset = fence_data->offset;
-
-       if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
-               drm_gem_object_unreference_unlocked(gobj);
-               return -EINVAL;
-       }
-
-       p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
+       p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
        p->uf_entry.priority = 0;
        p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
        p->uf_entry.tv.shared = true;
        p->uf_entry.user_pages = NULL;
+       *offset = data->offset;
 
        drm_gem_object_unreference_unlocked(gobj);
+
+       if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
+               amdgpu_bo_unref(&p->uf_entry.robj);
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -124,8 +121,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
        union drm_amdgpu_cs *cs = data;
        uint64_t *chunk_array_user;
        uint64_t *chunk_array;
-       struct amdgpu_user_fence uf = {};
        unsigned size, num_ibs = 0;
+       uint32_t uf_offset = 0;
        int i;
        int ret;
 
@@ -200,7 +197,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
                                goto free_partial_kdata;
                        }
 
-                       ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
+                       ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
+                                                        &uf_offset);
                        if (ret)
                                goto free_partial_kdata;
 
@@ -219,7 +217,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
        if (ret)
                goto free_all_kdata;
 
-       p->job->uf = uf;
+       if (p->uf_entry.robj) {
+               p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
+               p->job->uf_offset = uf_offset;
+       }
 
        kfree(chunk_array);
        return 0;
@@ -377,7 +378,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        INIT_LIST_HEAD(&duplicates);
        amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-       if (p->job->uf.bo)
+       if (p->uf_entry.robj)
                list_add(&p->uf_entry.tv.head, &p->validated);
 
        if (need_mmap_lock)
@@ -760,17 +761,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
                j++;
        }
 
-       /* wrap the last IB with user fence */
-       if (parser->job->uf.bo) {
-               struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
-
-               /* UVD & VCE fw doesn't support user fences */
-               if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
-                   parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
-                       return -EINVAL;
-
-               ib->user = &parser->job->uf;
-       }
+       /* UVD & VCE fw doesn't support user fences */
+       if (parser->job->uf_bo && (
+           parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+           parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
+               return -EINVAL;
 
        return 0;
 }
@@ -856,7 +851,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        job->ctx = entity->fence_context;
        p->fence = fence_get(fence);
        cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
-       job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
+       job->uf_sequence = cs->out.handle;
 
        trace_amdgpu_cs_ioctl(job);
        amd_sched_entity_push_job(&job->base);
index 201aceb01d8a8adfbdbcf495262493ab95ced95b..34e35423b78e81d3d91102e2acacf889fa9792c0 100644 (file)
@@ -203,10 +203,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        }
 
        /* wrap the last IB with fence */
-       if (ib->user) {
-               uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
-               addr += ib->user->offset;
-               amdgpu_ring_emit_fence(ring, addr, ib->sequence,
+       if (job && job->uf_bo) {
+               uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
+
+               addr += job->uf_offset;
+               amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
                                       AMDGPU_FENCE_FLAG_64BIT);
        }
 
index 8ea68d0cfad66dcac037cb9f76c6b704a2a06a67..f0dafa514fe48f785b5dbf53cc3367e214732414 100644 (file)
@@ -97,7 +97,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
                amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
        fence_put(job->fence);
 
-       amdgpu_bo_unref(&job->uf.bo);
+       amdgpu_bo_unref(&job->uf_bo);
        amdgpu_sync_free(&job->sync);
 
        if (!job->base.use_sched)