drm/i915: Don't wait for a spinlock inside error capture
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 6 Sep 2016 07:38:44 +0000 (08:38 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 6 Sep 2016 08:25:03 +0000 (09:25 +0100)
If we can't grab the breadcrumb's spinlock, possibly due to a driver
deadlock inside the waiters, ignore them. Like hangcheck, error
capturing must work no matter how the driver/GPU dies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20160906073844.22561-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
drivers/gpu/drm/i915/i915_gpu_error.c

index aed55e4f100f1516b876ec37e09aad5b2a07af03..d45a959b7c53b353f0f405d8cd438f17bf737357 100644 (file)
@@ -489,7 +489,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
                        }
                }
 
-               if (ee->num_waiters) {
+               if (IS_ERR(ee->waiters)) {
+                       err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
+                                  dev_priv->engine[i].name);
+               } else if (ee->num_waiters) {
                        err_printf(m, "%s --- %d waiters\n",
                                   dev_priv->engine[i].name,
                                   ee->num_waiters);
@@ -648,7 +651,8 @@ static void i915_error_state_free(struct kref *error_ref)
                i915_error_object_free(ee->wa_ctx);
 
                kfree(ee->requests);
-               kfree(ee->waiters);
+               if (!IS_ERR_OR_NULL(ee->waiters))
+                       kfree(ee->waiters);
        }
 
        i915_error_object_free(error->semaphore);
@@ -933,7 +937,14 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
        ee->num_waiters = 0;
        ee->waiters = NULL;
 
-       spin_lock(&b->lock);
+       if (RB_EMPTY_ROOT(&b->waiters))
+               return;
+
+       if (!spin_trylock(&b->lock)) {
+               ee->waiters = ERR_PTR(-EDEADLK);
+               return;
+       }
+
        count = 0;
        for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
                count++;
@@ -947,9 +958,13 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
        if (!waiter)
                return;
 
-       ee->waiters = waiter;
+       if (!spin_trylock(&b->lock)) {
+               kfree(waiter);
+               ee->waiters = ERR_PTR(-EDEADLK);
+               return;
+       }
 
-       spin_lock(&b->lock);
+       ee->waiters = waiter;
        for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
                struct intel_wait *w = container_of(rb, typeof(*w), node);