drm/amdgpu: implement burst NOP for SDMA
authorJammy Zhou <Jammy.Zhou@amd.com>
Tue, 1 Sep 2015 05:13:54 +0000 (13:13 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 2 Sep 2015 16:35:39 +0000 (12:35 -0400)
Customize the insert_nop func for SDMA rings, and use burst NOP for
ring/IB submissions in other places as well

Signed-off-by: Jammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index f0661b269a63ef7ed33dc87bf2f3392aa5bc9859..9ea9de457da373f702b401633ca4f7113748cbf6 100644 (file)
@@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
        WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
 }
 
+static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+       int i;
+
+       for (i = 0; i < count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       amdgpu_ring_write(ring, ring->nop |
+                                         SDMA_NOP_COUNT(count - 1));
+               else
+                       amdgpu_ring_write(ring, ring->nop);
+}
+
 /**
  * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
  *
@@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, next_rptr);
 
        /* IB packet must end on a 8 DW boundary */
-       while ((ring->wptr & 7) != 4)
-               amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+       cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
+
        amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
        amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
@@ -817,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
  */
 static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
 {
-       while (ib->length_dw & 0x7)
-               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+       u32 pad_count;
+       int i;
+
+       pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+       for (i = 0; i < pad_count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       ib->ptr[ib->length_dw++] =
+                                       SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
+                                       SDMA_NOP_COUNT(pad_count - 1);
+               else
+                       ib->ptr[ib->length_dw++] =
+                                       SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
 }
 
 /**
@@ -1305,7 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
        .test_ring = cik_sdma_ring_test_ring,
        .test_ib = cik_sdma_ring_test_ib,
        .is_lockup = cik_sdma_ring_is_lockup,
-       .insert_nop = amdgpu_ring_insert_nop,
+       .insert_nop = cik_sdma_ring_insert_nop,
 };
 
 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
index 1b913bce2599d790ca76e9a3a8a25d7abfcf552b..14e87234171aeacb9cbbbedafa358e22825b5526 100644 (file)
@@ -220,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
        WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
 }
 
+static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+       int i;
+
+       for (i = 0; i < count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       amdgpu_ring_write(ring, ring->nop |
+                               SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+               else
+                       amdgpu_ring_write(ring, ring->nop);
+}
+
 /**
  * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
  *
@@ -247,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, next_rptr);
 
        /* IB packet must end on a 8 DW boundary */
-       while ((ring->wptr & 7) != 2)
-               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+       sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
+
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
                          SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
        /* base must be 32 byte aligned */
@@ -881,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
  */
 static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
 {
-       while (ib->length_dw & 0x7)
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+       u32 pad_count;
+       int i;
+
+       pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+       for (i = 0; i < pad_count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       ib->ptr[ib->length_dw++] =
+                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+                               SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+               else
+                       ib->ptr[ib->length_dw++] =
+                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 }
 
 /**
@@ -1316,7 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
        .test_ring = sdma_v2_4_ring_test_ring,
        .test_ib = sdma_v2_4_ring_test_ib,
        .is_lockup = sdma_v2_4_ring_is_lockup,
-       .insert_nop = amdgpu_ring_insert_nop,
+       .insert_nop = sdma_v2_4_ring_insert_nop,
 };
 
 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
index a7550a8f5d84de2f23f2db72f1201f8d7cef2420..9bfe92df15f712b86f45e674a8c995f10812ed0f 100644 (file)
@@ -306,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
        }
 }
 
+static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+       int i;
+
+       for (i = 0; i < count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       amdgpu_ring_write(ring, ring->nop |
+                               SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+               else
+                       amdgpu_ring_write(ring, ring->nop);
+}
+
 /**
  * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
  *
@@ -332,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, next_rptr);
 
        /* IB packet must end on a 8 DW boundary */
-       while ((ring->wptr & 7) != 2)
-               amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+       sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
                          SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
@@ -1001,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
  */
 static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
 {
-       while (ib->length_dw & 0x7)
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+       struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+       u32 pad_count;
+       int i;
+
+       pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+       for (i = 0; i < pad_count; i++)
+               if (sdma && sdma->burst_nop && (i == 0))
+                       ib->ptr[ib->length_dw++] =
+                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+                               SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+               else
+                       ib->ptr[ib->length_dw++] =
+                               SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 }
 
 /**
@@ -1440,7 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
        .test_ring = sdma_v3_0_ring_test_ring,
        .test_ib = sdma_v3_0_ring_test_ib,
        .is_lockup = sdma_v3_0_ring_is_lockup,
-       .insert_nop = amdgpu_ring_insert_nop,
+       .insert_nop = sdma_v3_0_ring_insert_nop,
 };
 
 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)