From: Christian König Date: Fri, 6 May 2016 13:31:19 +0000 (+0200) Subject: drm/amdgpu: move context switch handling into common code v2 X-Git-Tag: MMI-PSA29.97-13-9~7809^2~1^2~32 X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=f153d2867bf74f84d47f67c377a8e3a34865e562;p=GitHub%2FMotorolaMobilityLLC%2Fkernel-slsi.git drm/amdgpu: move context switch handling into common code v2 It was a source of bugs to repeat that in each IP version. v2: rename parameter Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 781f0471ef06..db87edc72936 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -283,7 +283,7 @@ struct amdgpu_ring_funcs { int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, - struct amdgpu_ib *ib); + struct amdgpu_ib *ib, bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); @@ -2221,7 +2221,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) +#define amdgpu_ring_emit_ib(r, ib, c) (r)->funcs->emit_ib((r), (ib), (c)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index d6f85923edcd..88b8fda7340f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,18 +121,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; - uint64_t ctx, old_ctx; struct fence *hwf; struct amdgpu_vm *vm = NULL; unsigned i, patch_offset = ~0; - bool skip_preamble; + bool skip_preamble, need_ctx_switch; int r = 0; if (num_ibs == 0) return -EINVAL; - ctx = ibs->ctx; if (job) /* for domain0 job like ring test, ibs->job is not assigned */ vm = job->vm; @@ -156,7 +154,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, patch_offset = amdgpu_ring_init_cond_exec(ring); if (vm) { - /* do context switch */ r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, ib->gds_base, ib->gds_size, ib->gws_base, ib->gws_size, @@ -173,16 +170,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; - skip_preamble = ring->current_ctx == ctx; - old_ctx = ring->current_ctx; + skip_preamble = ring->current_ctx == ib->ctx; + need_ctx_switch = ring->current_ctx != ib->ctx; for (i = 0; i < num_ibs; ++i) { + ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) continue; - amdgpu_ring_emit_ib(ring, ib); - ring->current_ctx = ctx; + amdgpu_ring_emit_ib(ring, ib, need_ctx_switch); + need_ctx_switch = false; } if (ring->funcs->emit_hdp_invalidate) @@ -191,7 +189,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_fence_emit(ring, &hwf); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); - ring->current_ctx = old_ctx; if (ib->vm_id) amdgpu_vm_reset_id(adev, ib->vm_id); amdgpu_ring_undo(ring); @@ -212,6 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); + ring->current_ctx = ibs->ctx; amdgpu_ring_commit(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 7b7b0f64530a..ad91664a7649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -762,7 +762,7 @@ out: * @ib: the IB to execute * */ -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ef99d2370182..40d0650e3a37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -34,7 +34,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index df824f80ac07..6c2aa2b863b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -210,7 +210,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { u32 extra_bits = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d82fa9641f0e..189ef2b23668 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2030,13 +2030,12 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { - bool need_ctx_switch = ring->current_ctx != ib->ctx; u32 header, control = 0; u32 next_rptr = ring->wptr + 5; - if (need_ctx_switch) + if (ctx_switch) next_rptr += 2; next_rptr += 4; @@ -2047,7 +2046,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch) { + if (ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -2070,7 +2069,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { u32 header, control = 0; u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 9a0b6df210c1..0d556c907ab6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5646,13 +5646,12 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { - bool need_ctx_switch = ring->current_ctx != ib->ctx; u32 header, control = 0; u32 next_rptr = ring->wptr + 5; - if (need_ctx_switch) + if (ctx_switch) next_rptr += 2; next_rptr += 4; @@ -5663,7 +5662,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch) { + if (ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -5686,7 +5685,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { u32 header, control = 0; u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6be2c0faa1bc..de94adb2b19e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -242,7 +242,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { u32 vmid = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index b3dab09205af..ca2aee3e88a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -400,7 +400,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { u32 vmid = ib->vm_id & 0xf; u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 46a397654837..a75ffb5b11b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -489,7 +489,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index b96486c09250..ecb81014d836 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -539,7 +539,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 892bdac4bb21..a43f1a7c58bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -631,7 +631,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));