drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
authormonk.liu <monk.liu@amd.com>
Wed, 23 Sep 2015 05:49:58 +0000 (13:49 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 23 Sep 2015 21:23:45 +0000 (17:23 -0400)
we used to adopt wait_reg_mem to let CE wait before DE finish page
updating, but from Tonga+, CE doesn't support wait_reg_mem package so
this logic no longer works.

so here is another approach to do same thing:
Insert two of SWITCH_BUFFER at both front and end of vm_flush can
guarantee that CE not go further to process IB_const before vm_flush
done.

Insert two of SWITCH_BUFFER also works on CI, so remove legency method
to sync CE and ME

v2:
Insert double SWITCH_BUFFER at front of vm flush as well.

Signed-off-by: monk.liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index 57b427f958da1b5d77f35cb9724321fd3ab6a3f5..6647fb26ef25ce21dba9bffb87cb31e5abe73d80 100644 (file)
@@ -1202,8 +1202,6 @@ struct amdgpu_gfx {
        struct amdgpu_irq_src           priv_inst_irq;
        /* gfx status */
        uint32_t gfx_current_status;
-       /* sync signal for const engine */
-       unsigned ce_sync_offs;
        /* ce ram size*/
        unsigned ce_ram_size;
 };
index 392ec10cc4759c38e9d16a2083f0dceb15ac6d65..e992bf2ff66ce23e8c1d5f81b6b093713c1fac90 100644 (file)
@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
        return 0;
 }
 
-static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
-{
-       struct amdgpu_device *adev = ring->adev;
-       u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
-
-       /* instruct DE to set a magic number */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                                        WRITE_DATA_DST_SEL(5)));
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 1);
-
-       /* let CE wait till condition satisfied */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
-       amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
-                                                        WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
-                                                        WAIT_REG_MEM_FUNCTION(3) |  /* == */
-                                                        WAIT_REG_MEM_ENGINE(2)));   /* ce */
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 1);
-       amdgpu_ring_write(ring, 0xffffffff);
-       amdgpu_ring_write(ring, 4); /* poll interval */
-
-       /* instruct CE to reset wb of ce_sync to zero */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
-                                                        WRITE_DATA_DST_SEL(5) |
-                                                        WR_CONFIRM));
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 0);
-}
-
 /*
  * vm
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                                        unsigned vm_id, uint64_t pd_addr)
 {
        int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+       if (usepfp) {
+               /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+       }
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                amdgpu_ring_write(ring, 0x0);
 
                /* synce CE with ME to prevent CE fetch CEIB before context switch done */
-               gfx_v7_0_ce_sync_me(ring);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
        }
 }
 
@@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
                return r;
        }
 
-       r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
-       if (r) {
-               DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
-               return r;
-       }
-
        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                ring = &adev->gfx.gfx_ring[i];
                ring->ring_obj = NULL;
@@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
        for (i = 0; i < adev->gfx.num_compute_rings; i++)
                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
-       amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
-
        gfx_v7_0_cp_compute_fini(adev);
        gfx_v7_0_rlc_fini(adev);
        gfx_v7_0_mec_fini(adev);
index 78e5900d71cdbf6ab6e9282071c59b28303bb16a..cb4f68f53f248ab58cdb00100dc0da5d6f81da9d 100644 (file)
@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
                return r;
        }
 
-       r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
-       if (r) {
-               DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
-               return r;
-       }
-
        /* set up the gfx ring */
        for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                ring = &adev->gfx.gfx_ring[i];
@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
        for (i = 0; i < adev->gfx.num_compute_rings; i++)
                amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
-       amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
-
        gfx_v8_0_mec_fini(adev);
 
        return 0;
@@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
        return true;
 }
 
-static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
-{
-       struct amdgpu_device *adev = ring->adev;
-       u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
-
-       /* instruct DE to set a magic number */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                                        WRITE_DATA_DST_SEL(5)));
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 1);
-
-       /* let CE wait till condition satisfied */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
-       amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
-                                                        WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
-                                                        WAIT_REG_MEM_FUNCTION(3) |  /* == */
-                                                        WAIT_REG_MEM_ENGINE(2)));   /* ce */
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 1);
-       amdgpu_ring_write(ring, 0xffffffff);
-       amdgpu_ring_write(ring, 4); /* poll interval */
-
-       /* instruct CE to reset wb of ce_sync to zero */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
-                                                        WRITE_DATA_DST_SEL(5) |
-                                                        WR_CONFIRM));
-       amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
-       amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
-       amdgpu_ring_write(ring, 0);
-}
-
 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                                        unsigned vm_id, uint64_t pd_addr)
 {
@@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
        amdgpu_ring_write(ring, 0xffffffff);
        amdgpu_ring_write(ring, 4); /* poll interval */
 
+       if (usepfp) {
+               /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+       }
+
        amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
        amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
                                 WRITE_DATA_DST_SEL(0)) |
@@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
                amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                amdgpu_ring_write(ring, 0x0);
-
-               /* synce CE with ME to prevent CE fetch CEIB before context switch done */
-               gfx_v8_0_ce_sync_me(ring);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+               amdgpu_ring_write(ring, 0);
        }
 }