drm/i915: Detect a failed GPU reset+recovery
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 17 Jan 2017 15:59:06 +0000 (17:59 +0200)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 18 Jan 2017 10:47:26 +0000 (10:47 +0000)
If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.

v2: Check the same ring is hung again before declaring the reset broken.
v3: use engine_stalled (Mika)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-6-git-send-email-mika.kuoppala@intel.com
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c

index fbad2b64b8905d129ca3cf53b4e4ece43873a59e..348dec48250216c0245702deb8c67a8a5847e302 100644 (file)
@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
 
        pr_notice("drm/i915: Resetting chip after gpu hang\n");
        disable_irq(dev_priv->drm.irq);
-       i915_gem_reset_prepare(dev_priv);
+       ret = i915_gem_reset_prepare(dev_priv);
+       if (ret) {
+               DRM_ERROR("GPU recovery failed\n");
+               intel_gpu_reset(dev_priv, ALL_ENGINES);
+               goto error;
+       }
 
        ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
        if (ret) {
index 1cd485c314e6e25084957e51b76cbd738198b8a3..e75e367f253a8830300b4926ad7d8b9c92671c2b 100644 (file)
@@ -3327,7 +3327,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
        return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
index 832ad092f4623750a62f51f46904451e1ce4c67f..3e10e810101488c1545fc8c2148a83aa07a1a4ac 100644 (file)
@@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine)
        return true;
 }
 
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       int err = 0;
 
        /* Ensure irq handler finishes, and not run again. */
-       for_each_engine(engine, dev_priv, id)
+       for_each_engine(engine, dev_priv, id) {
+               struct drm_i915_gem_request *request;
+
                tasklet_kill(&engine->irq_tasklet);
 
+               if (engine_stalled(engine)) {
+                       request = i915_gem_find_active_request(engine);
+                       if (request && request->fence.error == -EIO)
+                               err = -EIO; /* Previous reset failed! */
+               }
+       }
+
        i915_gem_revoke_fences(dev_priv);
+
+       return err;
 }
 
 static void skip_request(struct drm_i915_gem_request *request)