drm/amdgpu: extend lock range for race condition when gpu reset
authorRoger.He <Hongbo.He@amd.com>
Fri, 5 May 2017 05:27:10 +0000 (13:27 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 21:40:31 +0000 (17:40 -0400)
to cover below case:
1. A task gart bind/unbind but not add to adev->gtt_list yet
2. at this time gpu reset, gtt only recover those gtt in adev->gtt_list

Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Roger.He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index 3bd709600a158dbb231791439f34542565406590..ea8ad69fa65d657b612dfab09d23c0da4e89cf4e 100644 (file)
@@ -554,7 +554,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages);
 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist,
index 902e6015abca377bb10b057dbb0c8479b66be394..a57abc1a25fb5fbe6da96410316cc85342363b44 100644 (file)
@@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
  *
  * Unbinds the requested pages from the gart page table and
  * replaces them with the dummy page (all asics).
+ * Returns 0 for success, -EINVAL for failure.
  */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages)
 {
        unsigned t;
@@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 
        if (!adev->gart.ready) {
                WARN(1, "trying to unbind memory from uninitialized GART !\n");
-               return;
+               return -EINVAL;
        }
 
        t = offset / AMDGPU_GPU_PAGE_SIZE;
@@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        }
        mb();
        amdgpu_gart_flush_gpu_tlb(adev, 0);
+       return 0;
 }
 
 /**
index 5db0230e45c6db9c2604547346aa715afcb8cdd0..fcf617c237ef1cf56b35511e35f5f2097ab34d92 100644 (file)
@@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
                return r;
        }
 
+       spin_lock(&gtt->adev->gtt_list_lock);
        flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
        gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
        r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
@@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
        if (r) {
                DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
                          ttm->num_pages, gtt->offset);
-               return r;
+               goto error_gart_bind;
        }
-       spin_lock(&gtt->adev->gtt_list_lock);
+
        list_add_tail(&gtt->list, &gtt->adev->gtt_list);
+error_gart_bind:
        spin_unlock(&gtt->adev->gtt_list_lock);
-       return 0;
+       return r;
 }
 
 int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       int r;
 
        if (gtt->userptr)
                amdgpu_ttm_tt_unpin_userptr(ttm);
@@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
                return 0;
 
        /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
-       if (gtt->adev->gart.ready)
-               amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
-
        spin_lock(&gtt->adev->gtt_list_lock);
+       r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+       if (r) {
+               DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
+                         gtt->ttm.ttm.num_pages, gtt->offset);
+               goto error_unbind;
+       }
        list_del_init(&gtt->list);
+error_unbind:
        spin_unlock(&gtt->adev->gtt_list_lock);
-
-       return 0;
+       return r;
 }
 
 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)