drm/amdgpu:changes in gfx DMAframe scheme (v2)

author Monk Liu <Monk.Liu@amd.com>

Wed, 15 Mar 2017 04:18:57 +0000 (12:18 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 30 Mar 2017 03:55:42 +0000 (23:55 -0400)
author Monk Liu <Monk.Liu@amd.com>
Wed, 15 Mar 2017 04:18:57 +0000 (12:18 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 30 Mar 2017 03:55:42 +0000 (23:55 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 2957404bd44a5bf5c32e13ff4b902877d06c1e49..c40c1a16e72e2acbcc9dbe24c7bb405be1e39c57 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -912,7 +912,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
  
                         /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
                         if (ce_preempt > 1 || de_preempt > 1)
-                               BUG();
+                               return -EINVAL;
                 }
  
                 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index c4857083d834e899f3f3d701fa43b65555c9283b..6b8bb1b070cca3078e76c48b3f08be4bfd6021ff 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -161,9 +161,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                 return r;
         }
  
-       if (ring->funcs->init_cond_exec)
-               patch_offset = amdgpu_ring_init_cond_exec(ring);
-
         if (vm) {
                 r = amdgpu_vm_flush(ring, job);
                 if (r) {
@@ -172,7 +169,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                 }
         }
  
-       if (ring->funcs->emit_hdp_flush
+       if (ring->funcs->init_cond_exec)
+               patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+               if (ring->funcs->emit_hdp_flush
  #ifdef CONFIG_X86_64
             && !(adev->flags & AMD_IS_APU)
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 10e8232d6cacb342d983fddab0fa98faa32abc4e..72bef223a080b95f18a67c357f4ab13dcd5994f0 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -577,42 +577,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
                 id->oa_size != job->oa_size);
         int r;
  
-       if (ring->funcs->emit_pipeline_sync && (
-           job->vm_needs_flush || gds_switch_needed ||
-           amdgpu_vm_ring_has_compute_vm_bug(ring)))
-               amdgpu_ring_emit_pipeline_sync(ring);
+       if (job->vm_needs_flush || gds_switch_needed ||
+               amdgpu_vm_is_gpu_reset(adev, id) ||
+               amdgpu_vm_ring_has_compute_vm_bug(ring)) {
+               unsigned patch_offset = 0;
  
-       if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
-           amdgpu_vm_is_gpu_reset(adev, id))) {
-               struct dma_fence *fence;
-               u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
+               if (ring->funcs->init_cond_exec)
+                       patch_offset = amdgpu_ring_init_cond_exec(ring);
  
-               trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
-               amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
+               if (ring->funcs->emit_pipeline_sync &&
+                       (job->vm_needs_flush || gds_switch_needed ||
+                       amdgpu_vm_ring_has_compute_vm_bug(ring)))
+                       amdgpu_ring_emit_pipeline_sync(ring);
  
-               r = amdgpu_fence_emit(ring, &fence);
-               if (r)
-                       return r;
+               if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+                       amdgpu_vm_is_gpu_reset(adev, id))) {
+                       struct dma_fence *fence;
+                       u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
  
-               mutex_lock(&adev->vm_manager.lock);
-               dma_fence_put(id->last_flush);
-               id->last_flush = fence;
-               mutex_unlock(&adev->vm_manager.lock);
-       }
+                       trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+                       amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
  
-       if (gds_switch_needed) {
-               id->gds_base = job->gds_base;
-               id->gds_size = job->gds_size;
-               id->gws_base = job->gws_base;
-               id->gws_size = job->gws_size;
-               id->oa_base = job->oa_base;
-               id->oa_size = job->oa_size;
-               amdgpu_ring_emit_gds_switch(ring, job->vm_id,
-                                           job->gds_base, job->gds_size,
-                                           job->gws_base, job->gws_size,
-                                           job->oa_base, job->oa_size);
-       }
+                       r = amdgpu_fence_emit(ring, &fence);
+                       if (r)
+                               return r;
  
+                       mutex_lock(&adev->vm_manager.lock);
+                       dma_fence_put(id->last_flush);
+                       id->last_flush = fence;
+                       mutex_unlock(&adev->vm_manager.lock);
+               }
+
+               if (gds_switch_needed) {
+                       id->gds_base = job->gds_base;
+                       id->gds_size = job->gds_size;
+                       id->gws_base = job->gws_base;
+                       id->gws_size = job->gws_size;
+                       id->oa_base = job->oa_base;
+                       id->oa_size = job->oa_size;
+                       amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+                                                       job->gds_base, job->gds_size,
+                                                       job->gws_base, job->gws_size,
+                                                       job->oa_base, job->oa_size);
+               }
+
+               if (ring->funcs->patch_cond_exec)
+                       amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
+               /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+               if (ring->funcs->emit_switch_buffer) {
+                       amdgpu_ring_emit_switch_buffer(ring);
+                       amdgpu_ring_emit_switch_buffer(ring);
+               }
+       }
         return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index c59bb38c091f438c7e6478fad91f35bd54605300..e0fa0d30e162a0fbe4fa24821b4b0765920ba5a4 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6675,8 +6675,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                 amdgpu_ring_write(ring, 0x0);
-               /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
-               amdgpu_ring_insert_nop(ring, 128);
         }
  }
  
@@ -7078,15 +7076,24 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
         .get_rptr = gfx_v8_0_ring_get_rptr,
         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
-       .emit_frame_size =
-               20 + /* gfx_v8_0_ring_emit_gds_switch */
-               7 + /* gfx_v8_0_ring_emit_hdp_flush */
-               5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
-               6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
-               7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-               128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
-               2 + /* gfx_v8_ring_emit_sb */
-               3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
+       .emit_frame_size = /* maximum 215dw if count 16 IBs in */
+               5 +  /* COND_EXEC */
+               7 +  /* PIPELINE_SYNC */
+               19 + /* VM_FLUSH */
+               8 +  /* FENCE for VM_FLUSH */
+               20 + /* GDS switch */
+               4 + /* double SWITCH_BUFFER,
+                      the first COND_EXEC jump to the place just
+                          prior to this double SWITCH_BUFFER  */
+               5 + /* COND_EXEC */
+               7 +      /*     HDP_flush */
+               4 +      /*     VGT_flush */
+               14 + /* CE_META */
+               31 + /* DE_META */
+               3 + /* CNTX_CTRL */
+               5 + /* HDP_INVL */
+               8 + 8 + /* FENCE x2 */
+               2, /* SWITCH_BUFFER */
         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index f124f6d3b86916ac77c46bc7a842c19069f7891d..7666add21519d592fea972a80c47b16e9850903e 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3186,8 +3186,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
                 amdgpu_ring_write(ring, 0x0);
-               /* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
-               amdgpu_ring_insert_nop(ring, 128);
         }
  }
  
@@ -3682,15 +3680,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
-       .emit_frame_size =
-               20 + /* gfx_v9_0_ring_emit_gds_switch */
-               7 + /* gfx_v9_0_ring_emit_hdp_flush */
-               5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
-               8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-               7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-               128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
-               2 + /* gfx_v9_ring_emit_sb */
-               3, /* gfx_v9_ring_emit_cntxcntl */
+       .emit_frame_size = /* totally 242 maximum if 16 IBs */
+               5 +  /* COND_EXEC */
+               7 +  /* PIPELINE_SYNC */
+               46 + /* VM_FLUSH */
+               8 +  /* FENCE for VM_FLUSH */
+               20 + /* GDS switch */
+               4 + /* double SWITCH_BUFFER,
+                      the first COND_EXEC jump to the place just
+                          prior to this double SWITCH_BUFFER  */
+               5 + /* COND_EXEC */
+               7 +      /*     HDP_flush */
+               4 +      /*     VGT_flush */
+               14 + /* CE_META */
+               31 + /* DE_META */
+               3 + /* CNTX_CTRL */
+               5 + /* HDP_INVL */
+               8 + 8 + /* FENCE x2 */
+               2, /* SWITCH_BUFFER */
         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
         .emit_fence = gfx_v9_0_ring_emit_fence,
author	Monk Liu <Monk.Liu@amd.com>
	Wed, 15 Mar 2017 04:18:57 +0000 (12:18 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 30 Mar 2017 03:55:42 +0000 (23:55 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c		patch \| blob \| blame \| history