drm/i915: Always sanity check engine state upon idling
authorChris Wilson <chris@chris-wilson.co.uk>
Sat, 26 Aug 2017 11:09:33 +0000 (12:09 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 5 Jun 2018 09:41:57 +0000 (11:41 +0200)
commit cad9946c2a4375386062131858881cfd30fc1b8f upstream.

When we do a locked idle we know that afterwards all requests have been
completed and the engines have been cleared of tasks. For whatever
reason, this doesn't always happen and we may go into a suspend with
ELSP still full, and this causes an issue upon resume as we get very,
very confused.

If the engines refuse to idle, mark the device as wedged. In the process
we get rid of the maybe unused open-coded version of wait_for_engines
reported by Nick Desaulniers and Matthias Kaehlcke.

v2: Suppress the -EIO before suspend, but keep it for seqno wrap.

References: https://bugs.freedesktop.org/show_bug.cgi?id=101891
References: https://bugs.freedesktop.org/show_bug.cgi?id=102456
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20170826110935.10237-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/i915/i915_gem.c

index 3b2c0538e48d7b18c1210faef38612af6de6d337..90359c7954c8d66532808e82f8116121ac576ca5 100644 (file)
@@ -3378,24 +3378,12 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
        return 0;
 }
 
-static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms)
-{
-       return wait_for(intel_engine_is_idle(engine), timeout_ms);
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       for_each_engine(engine, i915, id) {
-               if (GEM_WARN_ON(wait_for_engine(engine, 50))) {
-                       i915_gem_set_wedged(i915);
-                       return -EIO;
-               }
-
-               GEM_BUG_ON(intel_engine_get_seqno(engine) !=
-                          intel_engine_last_submit(engine));
+       if (wait_for(intel_engines_are_idle(i915), 50)) {
+               DRM_ERROR("Failed to idle engines, declaring wedged!\n");
+               i915_gem_set_wedged(i915);
+               return -EIO;
        }
 
        return 0;
@@ -4575,7 +4563,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
        ret = i915_gem_wait_for_idle(dev_priv,
                                     I915_WAIT_INTERRUPTIBLE |
                                     I915_WAIT_LOCKED);
-       if (ret)
+       if (ret && ret != -EIO)
                goto err_unlock;
 
        assert_kernel_context_is_current(dev_priv);
@@ -4619,11 +4607,12 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
         * machine in an unusable condition.
         */
        i915_gem_sanitize(dev_priv);
-       goto out_rpm_put;
+
+       intel_runtime_pm_put(dev_priv);
+       return 0;
 
 err_unlock:
        mutex_unlock(&dev->struct_mutex);
-out_rpm_put:
        intel_runtime_pm_put(dev_priv);
        return ret;
 }