drm/i915: Wait for concurrent global resets to complete
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 20 Jun 2017 09:57:43 +0000 (10:57 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 20 Jun 2017 19:59:58 +0000 (20:59 +0100)
If we enter i915_handle_error() a second time and a global reset is
already in progress, we can simply wait for completion of the first
reset. Currently we exit early prior to the actual reset being
performed -- the worst of both worlds!

v2: Plug into the existing reset_queue, and remember that kselftests is
playing games with I915_RESET_BACKOFF to prevent hangcheck from screwing
up.
v3: Rename to i915_reset_device to fit in better with i915_reset_engine

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/selftests/intel_hangcheck.c

index 4cd9ee1ba332e63247ed1d2e1505e10ff61ca633..8e9f4378b5a7da5c8ebe05b7e15bf33cb7dab226 100644 (file)
@@ -2600,13 +2600,13 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 }
 
 /**
- * i915_reset_and_wakeup - do process context error handling work
+ * i915_reset_device - do process context error handling work
  * @dev_priv: i915 device private
  *
  * Fire an error uevent so userspace can see that a hang or error
  * was detected.
  */
-static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
+static void i915_reset_device(struct drm_i915_private *dev_priv)
 {
        struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
        char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
@@ -2646,13 +2646,6 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
        if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
                kobject_uevent_env(kobj,
                                   KOBJ_CHANGE, reset_done_event);
-
-       /*
-        * Note: The wake_up also serves as a memory barrier so that
-        * waiters see the updated value of the dev_priv->gpu_error.
-        */
-       clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
-       wake_up_all(&dev_priv->gpu_error.reset_queue);
 }
 
 static inline void
@@ -2744,11 +2737,17 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
        if (!engine_mask)
                goto out;
 
-       if (test_and_set_bit(I915_RESET_BACKOFF,
-                            &dev_priv->gpu_error.flags))
+       if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
+               wait_event(dev_priv->gpu_error.reset_queue,
+                          !test_bit(I915_RESET_BACKOFF,
+                                    &dev_priv->gpu_error.flags));
                goto out;
+       }
+
+       i915_reset_device(dev_priv);
 
-       i915_reset_and_wakeup(dev_priv);
+       clear_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags);
+       wake_up_all(&dev_priv->gpu_error.reset_queue);
 
 out:
        intel_runtime_pm_put(dev_priv);
index aa31d6c0cdfb110a84f4d35fec1b3826e4fa4d89..cc00a361f0fa470fec84ac89561fcae5a4e92f4e 100644 (file)
@@ -316,6 +316,8 @@ static int igt_global_reset(void *arg)
 
        GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
        clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+       wake_up_all(&i915->gpu_error.reset_queue);
+
        if (i915_terminally_wedged(&i915->gpu_error))
                err = -EIO;
 
@@ -404,6 +406,7 @@ fini:
 unlock:
        mutex_unlock(&i915->drm.struct_mutex);
        clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+       wake_up_all(&i915->gpu_error.reset_queue);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                return -EIO;
@@ -519,6 +522,7 @@ fini:
 unlock:
        mutex_unlock(&i915->drm.struct_mutex);
        clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+       wake_up_all(&i915->gpu_error.reset_queue);
 
        if (i915_terminally_wedged(&i915->gpu_error))
                return -EIO;