drm/amdgpu: fix fundamental suspend/resume issue
authorChristian König <christian.koenig@amd.com>
Wed, 10 May 2017 18:06:58 +0000 (20:06 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 21:39:30 +0000 (17:39 -0400)
Reinitializing the VM manager during suspend/resume is a very very bad
idea since all the VMs are still active and kicking.

This can lead to random VM faults after resume when new processes
become the same client ID assigned.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 07ff3b1514f129edc23875c1f42053f7ef1aaa72..1bf36c3542c19115ae52ebcfc666720acc8890e1 100644 (file)
@@ -672,6 +672,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
        struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
        struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
 
+       atomic64_set(&id->owner, 0);
        id->gds_base = 0;
        id->gds_size = 0;
        id->gws_base = 0;
@@ -680,6 +681,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
        id->oa_size = 0;
 }
 
+/**
+ * amdgpu_vm_reset_all_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
+{
+       unsigned i, j;
+
+       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+               struct amdgpu_vm_id_manager *id_mgr =
+                       &adev->vm_manager.id_mgr[i];
+
+               for (j = 1; j < id_mgr->num_ids; ++j)
+                       amdgpu_vm_reset_id(adev, i, j);
+       }
+}
+
 /**
  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
  *
@@ -2270,7 +2291,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
                adev->vm_manager.seqno[i] = 0;
 
-
        atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
        atomic64_set(&adev->vm_manager.client_counter, 0);
        spin_lock_init(&adev->vm_manager.prt_lock);
index d97e28b4bdc41cbb52e70647685b58db4886514a..e1d951ece4333672512f96ae914ee740f0a5922b 100644 (file)
@@ -204,6 +204,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
                        unsigned vmid);
+void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
                                 struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
index a572979f186cdaeba52701fb8183850e16581143..d860939152df234517e7806135015a23aa316df9 100644 (file)
@@ -950,10 +950,6 @@ static int gmc_v6_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->vm_manager.enabled) {
-               gmc_v6_0_vm_fini(adev);
-               adev->vm_manager.enabled = false;
-       }
        gmc_v6_0_hw_fini(adev);
 
        return 0;
@@ -968,16 +964,9 @@ static int gmc_v6_0_resume(void *handle)
        if (r)
                return r;
 
-       if (!adev->vm_manager.enabled) {
-               r = gmc_v6_0_vm_init(adev);
-               if (r) {
-                       dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-                       return r;
-               }
-               adev->vm_manager.enabled = true;
-       }
+       amdgpu_vm_reset_all_ids(adev);
 
-       return r;
+       return 0;
 }
 
 static bool gmc_v6_0_is_idle(void *handle)
index a9083a16a250920c64605bc447ca19c8b3014f3a..2750e5c2381301ceebddb8f614f7e5868de23d2c 100644 (file)
@@ -1117,10 +1117,6 @@ static int gmc_v7_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->vm_manager.enabled) {
-               gmc_v7_0_vm_fini(adev);
-               adev->vm_manager.enabled = false;
-       }
        gmc_v7_0_hw_fini(adev);
 
        return 0;
@@ -1135,16 +1131,9 @@ static int gmc_v7_0_resume(void *handle)
        if (r)
                return r;
 
-       if (!adev->vm_manager.enabled) {
-               r = gmc_v7_0_vm_init(adev);
-               if (r) {
-                       dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-                       return r;
-               }
-               adev->vm_manager.enabled = true;
-       }
+       amdgpu_vm_reset_all_ids(adev);
 
-       return r;
+       return 0;
 }
 
 static bool gmc_v7_0_is_idle(void *handle)
index 4ac99784160a3ed9fbb59c5bf53a8d1b0a6ec1b4..f56b4089ee9f3fe7581cd6c541d434867c613cbb 100644 (file)
@@ -1209,10 +1209,6 @@ static int gmc_v8_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->vm_manager.enabled) {
-               gmc_v8_0_vm_fini(adev);
-               adev->vm_manager.enabled = false;
-       }
        gmc_v8_0_hw_fini(adev);
 
        return 0;
@@ -1227,16 +1223,9 @@ static int gmc_v8_0_resume(void *handle)
        if (r)
                return r;
 
-       if (!adev->vm_manager.enabled) {
-               r = gmc_v8_0_vm_init(adev);
-               if (r) {
-                       dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
-                       return r;
-               }
-               adev->vm_manager.enabled = true;
-       }
+       amdgpu_vm_reset_all_ids(adev);
 
-       return r;
+       return 0;
 }
 
 static bool gmc_v8_0_is_idle(void *handle)
index dc1e1c1d6b2430cb9957047a454cde87bf439561..f936332a069d2d1c9329a3d52a17f9e44776f659 100644 (file)
@@ -791,10 +791,6 @@ static int gmc_v9_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (adev->vm_manager.enabled) {
-               gmc_v9_0_vm_fini(adev);
-               adev->vm_manager.enabled = false;
-       }
        gmc_v9_0_hw_fini(adev);
 
        return 0;
@@ -809,17 +805,9 @@ static int gmc_v9_0_resume(void *handle)
        if (r)
                return r;
 
-       if (!adev->vm_manager.enabled) {
-               r = gmc_v9_0_vm_init(adev);
-               if (r) {
-                       dev_err(adev->dev,
-                               "vm manager initialization failed (%d).\n", r);
-                       return r;
-               }
-               adev->vm_manager.enabled = true;
-       }
+       amdgpu_vm_reset_all_ids(adev);
 
-       return r;
+       return 0;
 }
 
 static bool gmc_v9_0_is_idle(void *handle)