From 738f64ccc237f80b7673a6cc43051916eb90dc27 Mon Sep 17 00:00:00 2001 From: "Roger.He" Date: Fri, 5 May 2017 13:27:10 +0800 Subject: [PATCH] drm/amdgpu: extend lock range for race condition when gpu reset MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit to cover below case: 1. A task gart bind/unbind but not add to adev->gtt_list yet 2. at this time gpu reset, gtt only recover those gtt in adev->gtt_list Reviewed-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Roger.He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 22 ++++++++++++++-------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3bd709600a15..ea8ad69fa65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -554,7 +554,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 902e6015abca..a57abc1a25fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). + * Returns 0 for success, -EINVAL for failure. */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); - return; + return -EINVAL; } t = offset / AMDGPU_GPU_PAGE_SIZE; @@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db0230e45c6..fcf617c237ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) return r; } + spin_lock(>t->adev->gtt_list_lock); flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) if (r) { DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", ttm->num_pages, gtt->offset); - return r; + goto error_gart_bind; } - spin_lock(>t->adev->gtt_list_lock); + list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: spin_unlock(>t->adev->gtt_list_lock); - return 0; + return r; } int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - if (gtt->adev->gart.ready) - amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - spin_lock(>t->adev->gtt_list_lock); + r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + if (r) { + DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + goto error_unbind; + } list_del_init(>t->list); +error_unbind: spin_unlock(>t->adev->gtt_list_lock); - - return 0; + return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) -- 2.20.1