drm/i915: Rework GPU reset sequence to match driver load & thaw
authorMcAulay, Alistair <alistair.mcaulay@intel.com>
Fri, 15 Aug 2014 17:51:35 +0000 (18:51 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Wed, 3 Sep 2014 08:54:09 +0000 (10:54 +0200)
This patch is to address Daniels concerns over different code during reset:

http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html

"The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions."

Tested using igt drv_hangman.

V2: Cleaner way of preventing check_wedge returning -EAGAIN
V3: Clean the last_context during reset, to ensure do_switch() does the MI_SET_CONTEXT. As per review.
Signed-off-by: McAulay, Alistair <alistair.mcaulay@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
[danvet: Rebase over ctx->ppgtt rework and extend the comment in
check_wedge a bit.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_gtt.h

index ff4db249cc7210f16169cc9d6a4c4413e9f70104..683be99117c64192b8a5c643b81e55cbdf62b991 100644 (file)
@@ -844,7 +844,13 @@ int i915_reset(struct drm_device *dev)
                        !dev_priv->ums.mm_suspended) {
                dev_priv->ums.mm_suspended = 0;
 
+               /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
+               dev_priv->gpu_error.reload_in_reset = true;
+
                ret = i915_gem_init_hw(dev);
+
+               dev_priv->gpu_error.reload_in_reset = false;
+
                mutex_unlock(&dev->struct_mutex);
                if (ret) {
                        DRM_ERROR("Failed hw init on reset %d\n", ret);
index bcf8783dbc2e79176dc82045e9222e395ef71490..9c3677e4448e72e0eeee7ead8e3643207a6f2e7f 100644 (file)
@@ -1239,6 +1239,9 @@ struct i915_gpu_error {
 
        /* For missed irq/seqno simulation. */
        unsigned int test_irq_rings;
+
+       /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset   */
+       bool reload_in_reset;
 };
 
 enum modeset_restore {
index f1bb69377a356db36c105fdf84b615fdbe03dd23..fcd7dde6e44440c7e71528e31cd3f0df799ba9a2 100644 (file)
@@ -1085,7 +1085,13 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
                if (i915_terminally_wedged(error))
                        return -EIO;
 
-               return -EAGAIN;
+               /*
+                * Check if GPU Reset is in progress - we need intel_ring_begin
+                * to work properly to reinit the hw state while the gpu is
+                * still marked as reset-in-progress. Handle this with a flag.
+                */
+               if (!error->reload_in_reset)
+                       return -EAGAIN;
        }
 
        return 0;
index 9683e62ec61aa041e661daba39b167bb09e48d82..0fdb357f8a5c0138dd045cab5daa4d89c041dedb 100644 (file)
@@ -289,34 +289,17 @@ void i915_gem_context_reset(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        int i;
 
-       /* Prevent the hardware from restoring the last context (which hung) on
-        * the next switch */
        for (i = 0; i < I915_NUM_RINGS; i++) {
                struct intel_engine_cs *ring = &dev_priv->ring[i];
-               struct intel_context *dctx = ring->default_context;
                struct intel_context *lctx = ring->last_context;
 
-               /* Do a fake switch to the default context */
-               if (lctx == dctx)
-                       continue;
-
-               if (!lctx)
-                       continue;
+               if (lctx) {
+                       if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
+                               i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
 
-               if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
-                       WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
-                                                     get_context_alignment(dev), 0));
-                       /* Fake a finish/inactive */
-                       dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-                       dctx->legacy_hw_ctx.rcs_state->active = 0;
+                       i915_gem_context_unreference(lctx);
+                       ring->last_context = NULL;
                }
-
-               if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
-                       i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
-
-               i915_gem_context_unreference(lctx);
-               i915_gem_context_reference(dctx);
-               ring->last_context = dctx;
        }
 }
 
@@ -412,10 +395,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
        struct intel_engine_cs *ring;
        int ret, i;
 
-       /* FIXME: We should make this work, even in reset */
-       if (i915_reset_in_progress(&dev_priv->gpu_error))
-               return 0;
-
        BUG_ON(!dev_priv->ring[RCS].default_context);
 
        for_each_ring(ring, dev_priv, i) {
@@ -558,7 +537,7 @@ static int do_switch(struct intel_engine_cs *ring,
        from = ring->last_context;
 
        if (to->ppgtt) {
-               ret = to->ppgtt->switch_mm(to->ppgtt, ring, false);
+               ret = to->ppgtt->switch_mm(to->ppgtt, ring);
                if (ret)
                        goto unpin_out;
        }
index 4db237065610e9b6800e96fe57560ef1f6067c12..22ad38bb93f6f302e52614d25ffce815e766595d 100644 (file)
@@ -204,19 +204,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
-                          uint64_t val, bool synchronous)
+                          uint64_t val)
 {
-       struct drm_i915_private *dev_priv = ring->dev->dev_private;
        int ret;
 
        BUG_ON(entry >= 4);
 
-       if (synchronous) {
-               I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
-               I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
-               return 0;
-       }
-
        ret = intel_ring_begin(ring, 6);
        if (ret)
                return ret;
@@ -233,8 +226,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 }
 
 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct intel_engine_cs *ring,
-                         bool synchronous)
+                         struct intel_engine_cs *ring)
 {
        int i, ret;
 
@@ -243,7 +235,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
        for (i = used_pd - 1; i >= 0; i--) {
                dma_addr_t addr = ppgtt->pd_dma_addr[i];
-               ret = gen8_write_pdp(ring, i, addr, synchronous);
+               ret = gen8_write_pdp(ring, i, addr);
                if (ret)
                        return ret;
        }
@@ -708,29 +700,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 }
 
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                        struct intel_engine_cs *ring,
-                        bool synchronous)
+                        struct intel_engine_cs *ring)
 {
-       struct drm_device *dev = ppgtt->base.dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
        int ret;
 
-       /* If we're in reset, we can assume the GPU is sufficiently idle to
-        * manually frob these bits. Ideally we could use the ring functions,
-        * except our error handling makes it quite difficult (can't use
-        * intel_ring_begin, ring->flush, or intel_ring_advance)
-        *
-        * FIXME: We should try not to special case reset
-        */
-       if (synchronous ||
-           i915_reset_in_progress(&dev_priv->gpu_error)) {
-               WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-               I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-               I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
-               POSTING_READ(RING_PP_DIR_BASE(ring));
-               return 0;
-       }
-
        /* NB: TLBs must be flushed and invalidated before a switch */
        ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
        if (ret)
@@ -752,29 +725,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 }
 
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct intel_engine_cs *ring,
-                         bool synchronous)
+                         struct intel_engine_cs *ring)
 {
-       struct drm_device *dev = ppgtt->base.dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
        int ret;
 
-       /* If we're in reset, we can assume the GPU is sufficiently idle to
-        * manually frob these bits. Ideally we could use the ring functions,
-        * except our error handling makes it quite difficult (can't use
-        * intel_ring_begin, ring->flush, or intel_ring_advance)
-        *
-        * FIXME: We should try not to special case reset
-        */
-       if (synchronous ||
-           i915_reset_in_progress(&dev_priv->gpu_error)) {
-               WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-               I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-               I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
-               POSTING_READ(RING_PP_DIR_BASE(ring));
-               return 0;
-       }
-
        /* NB: TLBs must be flushed and invalidated before a switch */
        ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
        if (ret)
@@ -803,14 +757,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 }
 
 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct intel_engine_cs *ring,
-                         bool synchronous)
+                         struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ppgtt->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (!synchronous)
-               return 0;
 
        I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
        I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
@@ -1189,7 +1140,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 
        if (ppgtt) {
                for_each_ring(ring, dev_priv, i) {
-                       ret = ppgtt->switch_mm(ppgtt, ring, true);
+                       ret = ppgtt->switch_mm(ppgtt, ring);
                        if (ret != 0)
                                return ret;
                }
index 6280648d4805bd5216f45dfcc9e4be238967c75a..d5c14af51e995d5c19a6d37b950dc2691a44ffb2 100644 (file)
@@ -264,8 +264,7 @@ struct i915_hw_ppgtt {
 
        int (*enable)(struct i915_hw_ppgtt *ppgtt);
        int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-                        struct intel_engine_cs *ring,
-                        bool synchronous);
+                        struct intel_engine_cs *ring);
        void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };