drm/vc4: Fix spurious GPU resets due to BO reuse.
authorEric Anholt <eric@anholt.net>
Mon, 8 Feb 2016 19:19:14 +0000 (11:19 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 16 Feb 2016 20:21:00 +0000 (12:21 -0800)
We were tracking the "where are the head pointers pointing" globally,
so if another job reused the same BOs and execution was at the same
point as last time we checked, we'd stop and trigger a reset even
though the GPU had made progress.

Signed-off-by: Eric Anholt <eric@anholt.net>
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_gem.c

index 080865ec2bae67c7ff7b04a97fdb5956096cc156..b6ccf8181643115988ff0b6ddb60b6dfa5d0ba59 100644 (file)
@@ -92,7 +92,6 @@ struct vc4_dev {
        struct work_struct overflow_mem_work;
 
        struct {
-               uint32_t last_ct0ca, last_ct1ca;
                struct timer_list timer;
                struct work_struct reset_work;
        } hangcheck;
@@ -192,6 +191,11 @@ struct vc4_exec_info {
        /* Sequence number for this bin/render job. */
        uint64_t seqno;
 
+       /* Last current addresses the hardware was processing when the
+        * hangcheck timer checked on us.
+        */
+       uint32_t last_ct0ca, last_ct1ca;
+
        /* Kernel-space copy of the ioctl arguments */
        struct drm_vc4_submit_cl *args;
 
index a9d020e7e891c89d14d139baa6d5a71aff2d2949..1a819dd826f85b18ce068ce978b783f41e368465 100644 (file)
@@ -257,10 +257,17 @@ vc4_hangcheck_elapsed(unsigned long data)
        struct drm_device *dev = (struct drm_device *)data;
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        uint32_t ct0ca, ct1ca;
+       unsigned long irqflags;
+       struct vc4_exec_info *exec;
+
+       spin_lock_irqsave(&vc4->job_lock, irqflags);
+       exec = vc4_first_job(vc4);
 
        /* If idle, we can stop watching for hangs. */
-       if (list_empty(&vc4->job_list))
+       if (!exec) {
+               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
                return;
+       }
 
        ct0ca = V3D_READ(V3D_CTNCA(0));
        ct1ca = V3D_READ(V3D_CTNCA(1));
@@ -268,14 +275,16 @@ vc4_hangcheck_elapsed(unsigned long data)
        /* If we've made any progress in execution, rearm the timer
         * and wait.
         */
-       if (ct0ca != vc4->hangcheck.last_ct0ca ||
-           ct1ca != vc4->hangcheck.last_ct1ca) {
-               vc4->hangcheck.last_ct0ca = ct0ca;
-               vc4->hangcheck.last_ct1ca = ct1ca;
+       if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
+               exec->last_ct0ca = ct0ca;
+               exec->last_ct1ca = ct1ca;
+               spin_unlock_irqrestore(&vc4->job_lock, irqflags);
                vc4_queue_hangcheck(dev);
                return;
        }
 
+       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
        /* We've gone too long with no progress, reset.  This has to
         * be done from a work struct, since resetting can sleep and
         * this timer hook isn't allowed to.