drm/amdgpu:need som change on vega10 mailbox
authorMonk Liu <Monk.Liu@amd.com>
Fri, 21 Apr 2017 11:35:11 +0000 (19:35 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 21:40:18 +0000 (17:40 -0400)
if sriov gpu reset is invoked by job timeout, it is run
in a global work-queue which is very slow and better not call
msleep ortherwise it takes long time to get back CPU.

so make below changes:

1: Change msleep 1 to mdelay 5
2: Ignore the ack fail from pf after time out,
   because VF FLR will clear ack, sometime VF FLR is done
   prior to the beginning of poll_ack so we can ignore this ack

TODO:
Put job_timedout (and the following gpu reset) in a driver thread,
instead of the global work_struct.

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c

index 712f36ef2efd51de58fec7f645a7b851ad6bf76f..e967a7b17afe668e183c89dd5ac78cb855b21a0c 100644 (file)
@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
                        r = -ETIME;
                        break;
                }
-               msleep(1);
-               timeout -= 1;
+               mdelay(5);
+               timeout -= 5;
 
                reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
                                                     mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
        r = xgpu_ai_mailbox_rcv_msg(adev, event);
        while (r) {
                if (timeout <= 0) {
-                       pr_err("Doesn't get ack from pf.\n");
+                       pr_err("Doesn't get msg:%d from pf.\n", event);
                        r = -ETIME;
                        break;
                }
-               msleep(1);
-               timeout -= 1;
+               mdelay(5);
+               timeout -= 5;
 
                r = xgpu_ai_mailbox_rcv_msg(adev, event);
        }
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
        /* start to poll ack */
        r = xgpu_ai_poll_ack(adev);
        if (r)
-               return r;
+               pr_err("Doesn't get ack from pf, continue\n");
 
        xgpu_ai_mailbox_set_valid(adev, false);
 
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
                req == IDH_REQ_GPU_FINI_ACCESS ||
                req == IDH_REQ_GPU_RESET_ACCESS) {
                r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
-               if (r)
+               if (r) {
+                       pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
                        return r;
+               }
        }
 
        return 0;
@@ -211,7 +213,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
                                        struct amdgpu_irq_src *source,
                                        struct amdgpu_iv_entry *entry)
 {
-       DRM_DEBUG("get ack intr and do nothing.\n");
+       printk("get ack intr and do nothing.\n");
        return 0;
 }
 
index 7bdc51b02326118938884d6d33ef036456822563..f0d64f13abbc0b2816967a2ab0301192bcd09f09 100644 (file)
@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
                        r = -ETIME;
                        break;
                }
-               msleep(1);
-               timeout -= 1;
+               mdelay(5);
+               timeout -= 5;
 
                reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
        }
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
                        r = -ETIME;
                        break;
                }
-               msleep(1);
-               timeout -= 1;
+               mdelay(5);
+               timeout -= 5;
 
                r = xgpu_vi_mailbox_rcv_msg(adev, event);
        }
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
                request == IDH_REQ_GPU_RESET_ACCESS) {
                r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
                if (r)
-                       return r;
+                       pr_err("Doesn't get ack from pf, continue\n");
        }
 
        return 0;