drm/amdgpu: Make SDMA phase quantum configurable
authorFelix Kuehling <Felix.Kuehling@amd.com>
Fri, 15 Jul 2016 22:37:05 +0000 (18:37 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Jul 2017 15:06:36 +0000 (11:06 -0400)
Set a configurable SDMA phase quantum when enabling SDMA context
switching. The default value significantly reduces SDMA latency
in page table updates when user-mode SDMA queues have concurrent
activity, compared to the initial HW setting.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Andres Rodriguez <andres.rodriguez@amd.com>
Reviewed-by: Shaoyun Liu <shaoyun.liu@amd.com>
Acked-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index c290b262d7da86044659c90841282aeef4a772a3..ebd077fdce1b932d1028d4e7739e2a0dd1f9928f 100644 (file)
@@ -107,6 +107,7 @@ extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
 extern unsigned amdgpu_cg_mask;
 extern unsigned amdgpu_pg_mask;
+extern unsigned amdgpu_sdma_phase_quantum;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern unsigned amdgpu_pp_feature_mask;
index 3c83fe6c5db846e864ee6a404d1d82af171292d8..b3d7beb6806c21ecbc537fe98901bba6f9b50a99 100644 (file)
@@ -108,6 +108,7 @@ unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
 unsigned amdgpu_cg_mask = 0xffffffff;
 unsigned amdgpu_pg_mask = 0xffffffff;
+unsigned amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 unsigned amdgpu_pp_feature_mask = 0xffffffff;
@@ -227,6 +228,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
 MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
 module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
 
+MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
+module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+
 MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
 module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
 
index 4a9cea03f54f94ee5416b275ad7b68103bb409f8..f508f4d01e4a9000f633c85e290964098e8c1b86 100644 (file)
@@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
  */
 static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 {
-       u32 f32_cntl;
+       u32 f32_cntl, phase_quantum = 0;
        int i;
 
+       if (amdgpu_sdma_phase_quantum) {
+               unsigned value = amdgpu_sdma_phase_quantum;
+               unsigned unit = 0;
+
+               while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                               SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+                       value = (value + 1) >> 1;
+                       unit++;
+               }
+               if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                           SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+                       value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                                SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+                       unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                               SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+                       WARN_ONCE(1,
+                       "clamping sdma_phase_quantum to %uK clock cycles\n",
+                                 value << unit);
+               }
+               phase_quantum =
+                       value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+                       unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+       }
+
        for (i = 0; i < adev->sdma.num_instances; i++) {
                f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
                if (enable) {
                        f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
                                        AUTO_CTXSW_ENABLE, 1);
+                       if (amdgpu_sdma_phase_quantum) {
+                               WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                               WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                       }
                } else {
                        f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
                                        AUTO_CTXSW_ENABLE, 0);
index 67a29fb3d3b3924cf7437b9a1ff2c8881b75d268..b1de44f2282490189f0c4024d3d75711512547fe 100644 (file)
@@ -551,9 +551,33 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
  */
 static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 {
-       u32 f32_cntl;
+       u32 f32_cntl, phase_quantum = 0;
        int i;
 
+       if (amdgpu_sdma_phase_quantum) {
+               unsigned value = amdgpu_sdma_phase_quantum;
+               unsigned unit = 0;
+
+               while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                               SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+                       value = (value + 1) >> 1;
+                       unit++;
+               }
+               if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                           SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+                       value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                                SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+                       unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                               SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+                       WARN_ONCE(1,
+                       "clamping sdma_phase_quantum to %uK clock cycles\n",
+                                 value << unit);
+               }
+               phase_quantum =
+                       value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+                       unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+       }
+
        for (i = 0; i < adev->sdma.num_instances; i++) {
                f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
                if (enable) {
@@ -561,6 +585,12 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
                                        AUTO_CTXSW_ENABLE, 1);
                        f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
                                        ATC_L1_ENABLE, 1);
+                       if (amdgpu_sdma_phase_quantum) {
+                               WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                               WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+                                      phase_quantum);
+                       }
                } else {
                        f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
                                        AUTO_CTXSW_ENABLE, 0);
index 4a65697ccc942ec1b2b7393acbcf360274bb8e13..591f3e7fb5089bbe0e650dad310c6b26f7f75f3a 100644 (file)
@@ -493,13 +493,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
  */
 static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 {
-       u32 f32_cntl;
+       u32 f32_cntl, phase_quantum = 0;
        int i;
 
+       if (amdgpu_sdma_phase_quantum) {
+               unsigned value = amdgpu_sdma_phase_quantum;
+               unsigned unit = 0;
+
+               while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                               SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+                       value = (value + 1) >> 1;
+                       unit++;
+               }
+               if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                           SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+                       value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+                                SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+                       unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+                               SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+                       WARN_ONCE(1,
+                       "clamping sdma_phase_quantum to %uK clock cycles\n",
+                                 value << unit);
+               }
+               phase_quantum =
+                       value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+                       unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+       }
+
        for (i = 0; i < adev->sdma.num_instances; i++) {
                f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
                f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
                                AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+               if (enable && amdgpu_sdma_phase_quantum) {
+                       WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM),
+                              phase_quantum);
+                       WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM),
+                              phase_quantum);
+                       WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM),
+                              phase_quantum);
+               }
                WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl);
        }