drm/i915: Stop the machine as we install the wedged submit_request handler
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 22 Nov 2016 14:41:21 +0000 (14:41 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 22 Nov 2016 17:42:19 +0000 (17:42 +0000)
In order to prevent a race between the old callback submitting an
incomplete request and i915_gem_set_wedged() installing its nop handler,
we must ensure that the swap occurs when the machine is idle
(stop_machine).

v2: move context lost from out of BKL.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161122144121.7379-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem.c

index 91f1ac30c5faab99c557ff17cd4bee6c59af3080..3d4e07e9734f59e6898faa596c910e6589702d47 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/stop_machine.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
@@ -2787,6 +2788,12 @@ static void nop_submit_request(struct drm_i915_gem_request *request)
 
 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
 {
+       /* We need to be sure that no thread is running the old callback as
+        * we install the nop handler (otherwise we would submit a request
+        * to hardware that will never complete). In order to prevent this
+        * race, we wait until the machine is idle before making the swap
+        * (using stop_machine()).
+        */
        engine->submit_request = nop_submit_request;
 
        /* Mark all pending requests as complete so that any concurrent
@@ -2817,20 +2824,29 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
        }
 }
 
-void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+static int __i915_gem_set_wedged_BKL(void *data)
 {
+       struct drm_i915_private *i915 = data;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
+       for_each_engine(engine, i915, id)
+               i915_gem_cleanup_engine(engine);
+
+       return 0;
+}
+
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
+{
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
        set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
-       i915_gem_context_lost(dev_priv);
-       for_each_engine(engine, dev_priv, id)
-               i915_gem_cleanup_engine(engine);
-       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
+       stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
 
+       i915_gem_context_lost(dev_priv);
        i915_gem_retire_requests(dev_priv);
+
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
 
 static void