drm/i915: Allow userspace to request no-error-capture upon GPU hangs
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 Jul 2016 07:08:39 +0000 (08:08 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 4 Jul 2016 07:18:24 +0000 (08:18 +0100)
igt likes to inject GPU hangs into its command streams. However, as we
expect these hangs, we don't actually want them recorded in the dmesg
output or stored in the i915_error_state (usually). To accommodate this
allow userspace to set a flag on the context that any hang emanating
from that context will not be recorded. We still do the error capture
(otherwise how do we find the guilty context and know its intent?) as
part of the reason for random GPU hang injection is to exercise the race
conditions between the error capture and normal execution.

v2: Split out the request->ringbuf error capture changes.
v3: Move the flag defines next to the intel_context->flags definition

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Dave Gordon <david.s.gordon@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1467616119-4093-9-git-send-email-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_gpu_error.c
include/uapi/drm/i915_drm.h

index 488891853cb53b9b680c266a5e2e0969c53bce93..251a08d8808db748146278bb3b7e22525b6643a1 100644 (file)
@@ -475,6 +475,7 @@ struct drm_i915_error_state {
        struct timeval time;
 
        char error_msg[128];
+       bool simulated;
        int iommu;
        u32 reset_count;
        u32 suspend_count;
@@ -875,9 +876,10 @@ struct i915_gem_context {
 
        /* Unique identifier for this context, used by the hw for tracking */
        unsigned long flags;
+#define CONTEXT_NO_ZEROMAP             BIT(0)
+#define CONTEXT_NO_ERROR_CAPTURE       BIT(1)
        unsigned hw_id;
        u32 user_handle;
-#define CONTEXT_NO_ZEROMAP             (1<<0)
 
        u32 ggtt_alignment;
 
index 3a6594b70900a2d86054795bafb7d4ce795b7437..8e952b1a31b3170768c2579357cf80ce6b4d080b 100644 (file)
@@ -1026,6 +1026,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
                else
                        args->value = to_i915(dev)->ggtt.base.total;
                break;
+       case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+               args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -1071,6 +1074,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
                        ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
                }
                break;
+       case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+               if (args->size) {
+                       ret = -EINVAL;
+               } else {
+                       if (args->value)
+                               ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
+                       else
+                               ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
+               }
+               break;
        default:
                ret = -EINVAL;
                break;
index 1be63590a7fefc4bf517e1a15d187fefe8ec3c42..c6e05cccbedf8ad8954c09d659af4c3a18e65160 100644 (file)
@@ -1093,9 +1093,8 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                        struct i915_address_space *vm;
                        struct intel_ringbuffer *rb;
 
-                       vm = request->ctx && request->ctx->ppgtt ?
-                               &request->ctx->ppgtt->base :
-                               &ggtt->base;
+                       vm = request->ctx->ppgtt ?
+                               &request->ctx->ppgtt->base : &ggtt->base;
 
                        /* We need to copy these to an anonymous buffer
                         * as the simplest method to avoid being overwritten
@@ -1123,6 +1122,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                                rcu_read_unlock();
                        }
 
+                       error->simulated |=
+                               request->ctx->flags & CONTEXT_NO_ERROR_CAPTURE;
+
                        rb = request->ringbuf;
                        error->ring[i].cpu_ring_head = rb->head;
                        error->ring[i].cpu_ring_tail = rb->tail;
@@ -1422,12 +1424,14 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
        i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
        DRM_INFO("%s\n", error->error_msg);
 
-       spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-       if (dev_priv->gpu_error.first_error == NULL) {
-               dev_priv->gpu_error.first_error = error;
-               error = NULL;
+       if (!error->simulated) {
+               spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+               if (!dev_priv->gpu_error.first_error) {
+                       dev_priv->gpu_error.first_error = error;
+                       error = NULL;
+               }
+               spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
        }
-       spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
 
        if (error) {
                i915_error_state_free(&error->ref);
index a642bbc7777dcf6aaf8e48c67f3e27577c3836f6..d7e81a3886fdc41e63ec00cd7b1f0c50c7f24ebb 100644 (file)
@@ -1173,6 +1173,7 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_BAN_PERIOD  0x1
 #define I915_CONTEXT_PARAM_NO_ZEROMAP  0x2
 #define I915_CONTEXT_PARAM_GTT_SIZE    0x3
+#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE    0x4
        __u64 value;
 };