drm/i915: Separate out reset flags from the reset counter

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 9 Sep 2016 13:11:47 +0000 (14:11 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 9 Sep 2016 13:23:02 +0000 (14:23 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 9 Sep 2016 13:11:47 +0000 (14:11 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 9 Sep 2016 13:23:02 +0000 (14:23 +0100)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 1147b99a3120ba6a26dd0812baf42f8d8fef8a4d..24bf923a230ff5da4c2ee79d698904c87a868d60 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1287,6 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
         enum intel_engine_id id;
         int j;
  
+       if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+               seq_printf(m, "Wedged\n");
+       if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
+               seq_printf(m, "Reset in progress\n");
+       if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
+               seq_printf(m, "Waiter holding struct mutex\n");
+       if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
+               seq_printf(m, "struct_mutex blocked for reset\n");
+
         if (!i915.enable_hangcheck) {
                 seq_printf(m, "Hangcheck disabled\n");
                 return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 02c34d6996eacde3adbdee191659b8b5393e5493..47a676d859dba8f203ac3f6d5185a1c3882c4090 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1579,7 +1579,7 @@ static int i915_drm_resume(struct drm_device *dev)
         mutex_lock(&dev->struct_mutex);
         if (i915_gem_init_hw(dev)) {
                 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-               atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+               set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
         }
         mutex_unlock(&dev->struct_mutex);
  
@@ -1741,20 +1741,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
  {
         struct drm_device *dev = &dev_priv->drm;
         struct i915_gpu_error *error = &dev_priv->gpu_error;
-       unsigned reset_counter;
         int ret;
  
         mutex_lock(&dev->struct_mutex);
  
         /* Clear any previous failed attempts at recovery. Time to try again. */
-       atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-       /* Clear the reset-in-progress flag and increment the reset epoch. */
-       reset_counter = atomic_inc_return(&error->reset_counter);
-       if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-               ret = -EIO;
-               goto error;
-       }
+       __clear_bit(I915_WEDGED, &error->flags);
+       error->reset_count++;
  
         pr_notice("drm/i915: Resetting chip after gpu hang\n");
  
@@ -1791,6 +1784,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
                 goto error;
         }
  
+       clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
         mutex_unlock(&dev->struct_mutex);
  
         /*
@@ -1805,7 +1799,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
         return 0;
  
  error:
-       atomic_or(I915_WEDGED, &error->reset_counter);
+       set_bit(I915_WEDGED, &error->flags);
         mutex_unlock(&dev->struct_mutex);
         return ret;
  }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index f39bede7664c24cbab2d2ae50c1ad0177c29b7ac..dced7e72b625d71dd723a5a0163e56340eacc750 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1405,9 +1405,10 @@ struct i915_gpu_error {
          * State variable controlling the reset flow and count
          *
          * This is a counter which gets incremented when reset is triggered,
-        * and again when reset has been handled. So odd values (lowest bit set)
-        * means that reset is in progress and even values that
-        * (reset_counter >> 1):th reset was successfully completed.
+        *
+        * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+        * meaning that any waiters holding onto the struct_mutex should
+        * relinquish the lock immediately in order for the reset to start.
          *
          * If reset is not completed succesfully, the I915_WEDGE bit is
          * set meaning that hardware is terminally sour and there is no
@@ -1422,10 +1423,11 @@ struct i915_gpu_error {
          * naturally enforces the correct ordering between the bail-out of the
          * waiter and the gpu reset work code.
          */
-       atomic_t reset_counter;
+       unsigned long reset_count;
  
-#define I915_RESET_IN_PROGRESS_FLAG    1
-#define I915_WEDGED                    (1 << 31)
+       unsigned long flags;
+#define I915_RESET_IN_PROGRESS 0
+#define I915_WEDGED            (BITS_PER_LONG - 1)
  
         /**
          * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3241,44 +3243,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
  
  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
  
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-       return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-       return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-       return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-       return unlikely(reset & I915_WEDGED);
-}
-
  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
  {
-       return __i915_reset_in_progress(i915_reset_counter(error));
+       return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
  }
  
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
  {
-       return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+       return unlikely(test_bit(I915_WEDGED, &error->flags));
  }
  
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
  {
-       return __i915_terminally_wedged(i915_reset_counter(error));
+       return i915_reset_in_progress(error) | i915_terminally_wedged(error);
  }
  
  static inline u32 i915_reset_count(struct i915_gpu_error *error)
  {
-       return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+       return READ_ONCE(error->reset_count);
  }
  
  void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index b00fb8548d508e6ab42eaf8ffd0ee18f4cfd81ff..87a4f3543f0bb1e028254ef87570990b0108836c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
                  * for all other failure, such as an allocation failure, bail.
                  */
                 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-               atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+               set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
                 ret = 0;
         }
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index ec613fd5e01cfc456b67d2c75b8000818de8baef..24eb4b1b7540e0f52f9ee08389deb686c1de4ec3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
         } while (tmp != req);
  }
  
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
  {
-       if (__i915_terminally_wedged(reset_counter))
+       struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+       if (i915_terminally_wedged(error))
                 return -EIO;
  
-       if (__i915_reset_in_progress(reset_counter)) {
+       if (i915_reset_in_progress(error)) {
                 /* Non-interruptible callers can't handle -EAGAIN, hence return
                  * -EIO unconditionally for these.
                  */
-               if (!interruptible)
+               if (!dev_priv->mm.interruptible)
                         return -EIO;
  
                 return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
                        struct i915_gem_context *ctx)
  {
         struct drm_i915_private *dev_priv = engine->i915;
-       unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
         struct drm_i915_gem_request *req;
         u32 seqno;
         int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
          * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
          * and restart.
          */
-       ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+       ret = i915_gem_check_wedge(dev_priv);
         if (ret)
                 return ERR_PTR(ret);
  
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 82358d4e0cc2aa8e6b336fda7cb312985b7851a4..ed172d7beecb757ea0b9b4462fa540123c7529f0 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
  
         kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
  
+       DRM_DEBUG_DRIVER("resetting chip\n");
+       kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
         /*
-        * Note that there's only one work item which does gpu resets, so we
-        * need not worry about concurrent gpu resets potentially incrementing
-        * error->reset_counter twice. We only need to take care of another
-        * racing irq/hangcheck declaring the gpu dead for a second time. A
-        * quick check for that is good enough: schedule_work ensures the
-        * correct ordering between hang detection and this work item, and since
-        * the reset in-progress bit is only ever set by code outside of this
-        * work we don't need to worry about any other races.
+        * In most cases it's guaranteed that we get here with an RPM
+        * reference held, for example because there is a pending GPU
+        * request that won't finish until the reset is done. This
+        * isn't the case at least when we get here by doing a
+        * simulated reset via debugs, so get an RPM reference.
          */
-       if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-               DRM_DEBUG_DRIVER("resetting chip\n");
-               kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-               /*
-                * In most cases it's guaranteed that we get here with an RPM
-                * reference held, for example because there is a pending GPU
-                * request that won't finish until the reset is done. This
-                * isn't the case at least when we get here by doing a
-                * simulated reset via debugs, so get an RPM reference.
-                */
-               intel_runtime_pm_get(dev_priv);
+       intel_runtime_pm_get(dev_priv);
  
-               intel_prepare_reset(dev_priv);
+       intel_prepare_reset(dev_priv);
  
-               /*
-                * All state reset _must_ be completed before we update the
-                * reset counter, for otherwise waiters might miss the reset
-                * pending state and not properly drop locks, resulting in
-                * deadlocks with the reset work.
-                */
-               ret = i915_reset(dev_priv);
+       /*
+        * All state reset _must_ be completed before we update the
+        * reset counter, for otherwise waiters might miss the reset
+        * pending state and not properly drop locks, resulting in
+        * deadlocks with the reset work.
+        */
+       ret = i915_reset(dev_priv);
  
-               intel_finish_reset(dev_priv);
+       intel_finish_reset(dev_priv);
  
-               intel_runtime_pm_put(dev_priv);
+       intel_runtime_pm_put(dev_priv);
  
-               if (ret == 0)
-                       kobject_uevent_env(kobj,
-                                          KOBJ_CHANGE, reset_done_event);
+       if (ret == 0)
+               kobject_uevent_env(kobj,
+                                  KOBJ_CHANGE, reset_done_event);
  
-               /*
-                * Note: The wake_up also serves as a memory barrier so that
-                * waiters see the update value of the reset counter atomic_t.
-                */
-               wake_up_all(&dev_priv->gpu_error.reset_queue);
-       }
+       /*
+        * Note: The wake_up also serves as a memory barrier so that
+        * waiters see the updated value of the dev_priv->gpu_error.
+        */
+       wake_up_all(&dev_priv->gpu_error.reset_queue);
  }
  
  static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
         i915_capture_error_state(dev_priv, engine_mask, error_msg);
         i915_report_and_clear_eir(dev_priv);
  
-       if (engine_mask) {
-               atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-                               &dev_priv->gpu_error.reset_counter);
+       if (!engine_mask)
+               return;
  
-               /*
-                * Wakeup waiting processes so that the reset function
-                * i915_reset_and_wakeup doesn't deadlock trying to grab
-                * various locks. By bumping the reset counter first, the woken
-                * processes will see a reset in progress and back off,
-                * releasing their locks and then wait for the reset completion.
-                * We must do this for _all_ gpu waiters that might hold locks
-                * that the reset work needs to acquire.
-                *
-                * Note: The wake_up serves as the required memory barrier to
-                * ensure that the waiters see the updated value of the reset
-                * counter atomic_t.
-                */
-               i915_error_wake_up(dev_priv);
-       }
+       if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+                            &dev_priv->gpu_error.flags))
+               return;
+
+       /*
+        * Wakeup waiting processes so that the reset function
+        * i915_reset_and_wakeup doesn't deadlock trying to grab
+        * various locks. By bumping the reset counter first, the woken
+        * processes will see a reset in progress and back off,
+        * releasing their locks and then wait for the reset completion.
+        * We must do this for _all_ gpu waiters that might hold locks
+        * that the reset work needs to acquire.
+        *
+        * Note: The wake_up also provides a memory barrier to ensure that the
+        * waiters see the updated value of the reset flags.
+        */
+       i915_error_wake_up(dev_priv);
  
         i915_reset_and_wakeup(dev_priv);
  }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 76d696b9b22f395773712e5d0a20553369ecd446..7e725054ba58922d7b172bda602feb97e8b79320 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3646,15 +3646,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
         mutex_unlock(&dev->mode_config.mutex);
  }
  
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+       struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+       if (i915_reset_in_progress(error))
+               return true;
+
+       if (crtc->reset_count != i915_reset_count(error))
+               return true;
+
+       return false;
+}
+
  static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
  {
         struct drm_device *dev = crtc->dev;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       unsigned reset_counter;
         bool pending;
  
-       reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-       if (intel_crtc->reset_counter != reset_counter)
+       if (abort_flip_on_reset(intel_crtc))
                 return false;
  
         spin_lock_irq(&dev->event_lock);
@@ -11533,10 +11544,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
  {
         struct drm_device *dev = crtc->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
-       unsigned reset_counter;
  
-       reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-       if (crtc->reset_counter != reset_counter)
+       if (abort_flip_on_reset(crtc))
                 return true;
  
         /*
@@ -12202,8 +12211,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
         if (ret)
                 goto cleanup;
  
-       intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-       if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+       intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+       if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
                 ret = -EIO;
                 goto cleanup;
         }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index c55b7ab2e9ed13652591637d10a1adf5e938ccac..8c089e339c29f536a95a08dc976d66259439aceb 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -706,8 +706,8 @@ struct intel_crtc {
  
         struct intel_crtc_state *config;
  
-       /* reset counter value when the last flip was submitted */
-       unsigned int reset_counter;
+       /* global reset count when the last flip was submitted */
+       unsigned int reset_count;
  
         /* Access to these should be protected by dev_priv->irq_lock. */
         bool cpu_fifo_underrun_disabled;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 9 Sep 2016 13:11:47 +0000 (14:11 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 9 Sep 2016 13:23:02 +0000 (14:23 +0100)
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_display.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_drv.h		patch \| blob \| blame \| history