drm/i915: include hangcheck action and score in error_state
authorMika Kuoppala <mika.kuoppala@linux.intel.com>
Fri, 6 Sep 2013 13:03:28 +0000 (16:03 +0300)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Fri, 6 Sep 2013 15:56:17 +0000 (17:56 +0200)
Score and action reveals what all the rings were doing
and why hang was declared. Add idle state so that
we can distinguish between waiting and idle ring.

v2: - add idle as a hangcheck action
    - consensed hangcheck status to single line (Chris)
    - mark active explicitly when we are making progress (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index c5f0abaa9a22f9a501baab5d59829a772f7c60fb..1fb01b5b819d6c036e87934e76dc86449ced282e 100644 (file)
@@ -328,6 +328,8 @@ struct drm_i915_error_state {
        u32 *active_bo_count, *pinned_bo_count;
        struct intel_overlay_error_state *overlay;
        struct intel_display_error_state *display;
+       int hangcheck_score[I915_NUM_RINGS];
+       enum intel_ring_hangcheck_action hangcheck_action[I915_NUM_RINGS];
 };
 
 struct intel_crtc_config;
index aba9d7498996c29845e6691ac4eff83c8fe85223..c38d575dc5a6173e0c4842c0df8ea576732894fc 100644 (file)
@@ -213,6 +213,24 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
        }
 }
 
+static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
+{
+       switch (a) {
+       case HANGCHECK_IDLE:
+               return "idle";
+       case HANGCHECK_WAIT:
+               return "wait";
+       case HANGCHECK_ACTIVE:
+               return "active";
+       case HANGCHECK_KICK:
+               return "kick";
+       case HANGCHECK_HUNG:
+               return "hung";
+       }
+
+       return "unknown";
+}
+
 static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
                                  struct drm_device *dev,
                                  struct drm_i915_error_state *error,
@@ -253,6 +271,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
        err_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
        err_printf(m, "  ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
        err_printf(m, "  ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
+       err_printf(m, "  hangcheck: %s [%d]\n",
+                  hangcheck_action_to_str(error->hangcheck_action[ring]),
+                  error->hangcheck_score[ring]);
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -718,6 +739,9 @@ static void i915_record_ring_state(struct drm_device *dev,
 
        error->cpu_ring_head[ring->id] = ring->head;
        error->cpu_ring_tail[ring->id] = ring->tail;
+
+       error->hangcheck_score[ring->id] = ring->hangcheck.score;
+       error->hangcheck_action[ring->id] = ring->hangcheck.action;
 }
 
 
index 9e48cf27db5e7f7f660b056d4142dcbf72b9637f..5350ef57ec3506907fe312ff55cebba06addb294 100644 (file)
@@ -1975,6 +1975,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
                if (ring->hangcheck.seqno == seqno) {
                        if (ring_idle(ring, seqno)) {
+                               ring->hangcheck.action = HANGCHECK_IDLE;
+
                                if (waitqueue_active(&ring->irq_queue)) {
                                        /* Issue a wake-up to catch stuck h/w. */
                                        DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
@@ -2003,6 +2005,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
                                                                    acthd);
 
                                switch (ring->hangcheck.action) {
+                               case HANGCHECK_IDLE:
                                case HANGCHECK_WAIT:
                                        break;
                                case HANGCHECK_ACTIVE:
@@ -2018,6 +2021,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
                                }
                        }
                } else {
+                       ring->hangcheck.action = HANGCHECK_ACTIVE;
+
                        /* Gradually reduce the count so that we catch DoS
                         * attempts across multiple batches.
                         */
index ad2dd65c63f8ff970ba2541b735470444bd96959..b5aac57020850148f69bccf38d5b89c07582b792 100644 (file)
@@ -34,6 +34,7 @@ struct  intel_hw_status_page {
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
 enum intel_ring_hangcheck_action {
+       HANGCHECK_IDLE = 0,
        HANGCHECK_WAIT,
        HANGCHECK_ACTIVE,
        HANGCHECK_KICK,