From 8a90523639f49dc4b4fa7ae47bb9c8ed73ea8577 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Sat, 11 Jul 2009 16:48:03 -0400 Subject: [PATCH] drm/i915: refactor error detection & collection This patch refactors the existing error detection and collection code, placing most of it in i915_handle_error(). Additionally, we introduce a work queue for scheduling post-crash tasks such as generating a uevent. Using the uevent facility, userspace should be able to capture a post-mortem dump for diagnostics. Signed-off-by: Jesse Barnes Signed-off-by: Ben Gamari Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_debugfs.c | 2 + drivers/gpu/drm/i915/i915_irq.c | 232 ++++++++++++++++-------- 3 files changed, 161 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d0875287588..b05b44dd3bf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -229,6 +229,7 @@ typedef struct drm_i915_private { spinlock_t error_lock; struct drm_i915_error_state *first_error; + struct work_struct error_work; /* Register state */ u8 saveLBB; diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c index 9a44bfcb813..cb3b97405fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_debugfs.c +++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c @@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused) error = dev_priv->first_error; + seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, + error->time.tv_usec); seq_printf(m, "EIR: 0x%08x\n", error->eir); seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er); seq_printf(m, " INSTPM: 0x%08x\n", error->instpm); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7ba23a69a0c..f340b3fd54e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) return ret; } +/** + * i915_error_work_func - do process context error handling work + * @work: work struct + * + * Fire an error uevent so userspace can see that a hang or error + * was detected. + */ +static void i915_error_work_func(struct work_struct *work) +{ + drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, + error_work); + struct drm_device *dev = dev_priv->dev; + char *event_string = "ERROR=1"; + char *envp[] = { event_string, NULL }; + + DRM_DEBUG("generating error event\n"); + + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); +} + +/** + * i915_capture_error_state - capture an error record for later analysis + * @dev: drm device + * + * Should be called when an error is detected (either a hang or an error + * interrupt) to capture error state from the time of the error. Fills + * out a structure which becomes available in debugfs for user level tools + * to pick up. + */ static void i915_capture_error_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev) error->acthd = I915_READ(ACTHD_I965); } + do_gettimeofday(&error->time); + dev_priv->first_error = error; out: spin_unlock_irqrestore(&dev_priv->error_lock, flags); } +/** + * i915_handle_error - handle an error interrupt + * @dev: drm device + * + * Do some basic checking of regsiter state at error interrupt time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + */ +static void i915_handle_error(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 eir = I915_READ(EIR); + u32 pipea_stats = I915_READ(PIPEASTAT); + u32 pipeb_stats = I915_READ(PIPEBSTAT); + + i915_capture_error_state(dev); + + printk(KERN_ERR "render error detected, EIR: 0x%08x\n", + eir); + + if (IS_G4X(dev)) { + if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) { + u32 ipeir = I915_READ(IPEIR_I965); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printk(KERN_ERR " INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printk(KERN_ERR " INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + if (eir & GM45_ERROR_PAGE_TABLE) { + u32 pgtbl_err = I915_READ(PGTBL_ER); + printk(KERN_ERR "page table error\n"); + printk(KERN_ERR " PGTBL_ER: 0x%08x\n", + pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + } + } + + if (IS_I9XX(dev)) { + if (eir & I915_ERROR_PAGE_TABLE) { + u32 pgtbl_err = I915_READ(PGTBL_ER); + printk(KERN_ERR "page table error\n"); + printk(KERN_ERR " PGTBL_ER: 0x%08x\n", + pgtbl_err); + I915_WRITE(PGTBL_ER, pgtbl_err); + (void)I915_READ(PGTBL_ER); + } + } + + if (eir & I915_ERROR_MEMORY_REFRESH) { + printk(KERN_ERR "memory refresh error\n"); + printk(KERN_ERR "PIPEASTAT: 0x%08x\n", + pipea_stats); + printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", + pipeb_stats); + /* pipestat has already been acked */ + } + if (eir & I915_ERROR_INSTRUCTION) { + printk(KERN_ERR "instruction error\n"); + printk(KERN_ERR " INSTPM: 0x%08x\n", + I915_READ(INSTPM)); + if (!IS_I965G(dev)) { + u32 ipeir = I915_READ(IPEIR); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD)); + I915_WRITE(IPEIR, ipeir); + (void)I915_READ(IPEIR); + } else { + u32 ipeir = I915_READ(IPEIR_I965); + + printk(KERN_ERR " IPEIR: 0x%08x\n", + I915_READ(IPEIR_I965)); + printk(KERN_ERR " IPEHR: 0x%08x\n", + I915_READ(IPEHR_I965)); + printk(KERN_ERR " INSTDONE: 0x%08x\n", + I915_READ(INSTDONE_I965)); + printk(KERN_ERR " INSTPS: 0x%08x\n", + I915_READ(INSTPS)); + printk(KERN_ERR " INSTDONE1: 0x%08x\n", + I915_READ(INSTDONE1)); + printk(KERN_ERR " ACTHD: 0x%08x\n", + I915_READ(ACTHD_I965)); + I915_WRITE(IPEIR_I965, ipeir); + (void)I915_READ(IPEIR_I965); + } + } + + I915_WRITE(EIR, eir); + (void)I915_READ(EIR); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + } + + schedule_work(&dev_priv->error_work); +} + irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipea_stats = I915_READ(PIPEASTAT); pipeb_stats = I915_READ(PIPEBSTAT); + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + i915_handle_error(dev); + /* * Clear the PIPE(A|B)STAT regs before the IIR */ @@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) I915_READ(PORT_HOTPLUG_STAT); } - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) { - u32 eir = I915_READ(EIR); - - i915_capture_error_state(dev); - - printk(KERN_ERR "render error detected, EIR: 0x%08x\n", - eir); - if (eir & I915_ERROR_PAGE_TABLE) { - u32 pgtbl_err = I915_READ(PGTBL_ER); - printk(KERN_ERR "page table error\n"); - printk(KERN_ERR " PGTBL_ER: 0x%08x\n", - pgtbl_err); - I915_WRITE(PGTBL_ER, pgtbl_err); - (void)I915_READ(PGTBL_ER); - } - if (eir & I915_ERROR_MEMORY_REFRESH) { - printk(KERN_ERR "memory refresh error\n"); - printk(KERN_ERR "PIPEASTAT: 0x%08x\n", - pipea_stats); - printk(KERN_ERR "PIPEBSTAT: 0x%08x\n", - pipeb_stats); - /* pipestat has already been acked */ - } - if (eir & I915_ERROR_INSTRUCTION) { - printk(KERN_ERR "instruction error\n"); - printk(KERN_ERR " INSTPM: 0x%08x\n", - I915_READ(INSTPM)); - if (!IS_I965G(dev)) { - u32 ipeir = I915_READ(IPEIR); - - printk(KERN_ERR " IPEIR: 0x%08x\n", - I915_READ(IPEIR)); - printk(KERN_ERR " IPEHR: 0x%08x\n", - I915_READ(IPEHR)); - printk(KERN_ERR " INSTDONE: 0x%08x\n", - I915_READ(INSTDONE)); - printk(KERN_ERR " ACTHD: 0x%08x\n", - I915_READ(ACTHD)); - I915_WRITE(IPEIR, ipeir); - (void)I915_READ(IPEIR); - } else { - u32 ipeir = I915_READ(IPEIR_I965); - - printk(KERN_ERR " IPEIR: 0x%08x\n", - I915_READ(IPEIR_I965)); - printk(KERN_ERR " IPEHR: 0x%08x\n", - I915_READ(IPEHR_I965)); - printk(KERN_ERR " INSTDONE: 0x%08x\n", - I915_READ(INSTDONE_I965)); - printk(KERN_ERR " INSTPS: 0x%08x\n", - I915_READ(INSTPS)); - printk(KERN_ERR " INSTDONE1: 0x%08x\n", - I915_READ(INSTDONE1)); - printk(KERN_ERR " ACTHD: 0x%08x\n", - I915_READ(ACTHD_I965)); - I915_WRITE(IPEIR_I965, ipeir); - (void)I915_READ(IPEIR_I965); - } - } - - I915_WRITE(EIR, eir); - (void)I915_READ(EIR); - eir = I915_READ(EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir); - I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); - } - } - I915_WRITE(IIR, iir); new_iir = I915_READ(IIR); /* Flush posted writes */ @@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev) atomic_set(&dev_priv->irq_received, 0); INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); + INIT_WORK(&dev_priv->error_work, i915_error_work_func); if (IS_IGDNG(dev)) { igdng_irq_preinstall(dev); -- 2.20.1