drm/i915: Extend vlv/chv residency resolution
authorMika Kuoppala <mika.kuoppala@linux.intel.com>
Wed, 15 Mar 2017 16:07:13 +0000 (18:07 +0200)
committerMika Kuoppala <mika.kuoppala@intel.com>
Thu, 16 Mar 2017 10:28:28 +0000 (12:28 +0200)
Vlv and chv residency counters are 40 bits in width.
With a control bit, we can choose between upper or lower
32 bit window into this counter.

Lets toggle this bit on and off on and read both parts.
As a result we can push the wrap from 13 seconds to 54
minutes.

v2: commit msg, loop readability, goto elimination (Chris)
v3: bug ref, divide outside runtime pm lock (Chris)

References: https://bugs.freedesktop.org/show_bug.cgi?id=94852
Reported-by: Len Brown <len.brown@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
drivers/gpu/drm/i915/intel_pm.c

index da742a9dd9e177818b3cafd27967e4b0255726eb..dcf1b7274e145881a0f201d1e5a1b934978962be 100644 (file)
@@ -8350,12 +8350,51 @@ void intel_pm_setup(struct drm_i915_private *dev_priv)
        atomic_set(&dev_priv->pm.wakeref_count, 0);
 }
 
+static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
+                            const i915_reg_t reg)
+{
+       u32 lower, upper, tmp, saved_ctl;
+
+       /* The register accessed do not need forcewake. We borrow
+        * uncore lock to prevent concurrent access to range reg.
+        */
+       spin_lock_irq(&dev_priv->uncore.lock);
+       saved_ctl = I915_READ_FW(VLV_COUNTER_CONTROL);
+
+       if (!(saved_ctl & VLV_COUNT_RANGE_HIGH))
+               I915_WRITE_FW(VLV_COUNTER_CONTROL,
+                             _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+
+       /* vlv and chv residency counters are 40 bits in width.
+        * With a control bit, we can choose between upper or lower
+        * 32bit window into this counter.
+        */
+       upper = I915_READ_FW(reg);
+       do {
+               tmp = upper;
+
+               I915_WRITE_FW(VLV_COUNTER_CONTROL,
+                             _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+               lower = I915_READ_FW(reg);
+
+               I915_WRITE_FW(VLV_COUNTER_CONTROL,
+                             _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+               upper = I915_READ_FW(reg);
+       } while (upper != tmp);
+
+       if (!(saved_ctl & VLV_COUNT_RANGE_HIGH))
+               I915_WRITE_FW(VLV_COUNTER_CONTROL,
+                             _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+
+       spin_unlock_irq(&dev_priv->uncore.lock);
+
+       return lower | (u64)upper << 8;
+}
+
 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
                           const i915_reg_t reg)
 {
-       u64 raw_time; /* 32b value may overflow during fixed point math */
-       u64 units = 128000ULL, div = 100000ULL;
-       u64 ret;
+       u64 time_hw, units, div;
 
        if (!intel_enable_rc6())
                return 0;
@@ -8367,16 +8406,19 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
                units = 1000;
                div = dev_priv->czclk_freq;
 
-               if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
-                       units <<= 8;
+               time_hw = vlv_residency_raw(dev_priv, reg);
        } else if (IS_GEN9_LP(dev_priv)) {
                units = 1000;
                div = 1200;             /* 833.33ns */
-       }
 
-       raw_time = I915_READ(reg) * units;
-       ret = DIV_ROUND_UP_ULL(raw_time, div);
+               time_hw = I915_READ(reg);
+       } else {
+               units = 128000; /* 1.28us */
+               div = 100000;
+
+               time_hw = I915_READ(reg);
+       }
 
        intel_runtime_pm_put(dev_priv);
-       return ret;
+       return DIV_ROUND_UP_ULL(time_hw * units, div);
 }