wake_up_all(&ring->irq_queue);
}
-static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
- struct intel_rps_ei *rps_ei)
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei *ei)
{
- u32 cz_ts, cz_freq_khz;
- u32 render_count, media_count;
- u32 elapsed_render, elapsed_media, elapsed_time;
- u32 residency = 0;
-
- cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
- cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
-
- render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
- media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
-
- if (rps_ei->cz_clock == 0) {
- rps_ei->cz_clock = cz_ts;
- rps_ei->render_c0 = render_count;
- rps_ei->media_c0 = media_count;
-
- return dev_priv->rps.cur_freq;
- }
-
- elapsed_time = cz_ts - rps_ei->cz_clock;
- rps_ei->cz_clock = cz_ts;
+ ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+ ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+ ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
- elapsed_render = render_count - rps_ei->render_c0;
- rps_ei->render_c0 = render_count;
+static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+ const struct intel_rps_ei *old,
+ const struct intel_rps_ei *now,
+ int threshold)
+{
+ u64 time, c0;
- elapsed_media = media_count - rps_ei->media_c0;
- rps_ei->media_c0 = media_count;
+ if (old->cz_clock == 0)
+ return false;
- /* Convert all the counters into common unit of milli sec */
- elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
- elapsed_render /= cz_freq_khz;
- elapsed_media /= cz_freq_khz;
+ time = now->cz_clock - old->cz_clock;
+ time *= threshold * dev_priv->mem_freq;
- /*
- * Calculate overall C0 residency percentage
- * only if elapsed time is non zero
+ /* Workload can be split between render + media, e.g. SwapBuffers
+ * being blitted in X after being rendered in mesa. To account for
+ * this we need to combine both engines into our activity counter.
*/
- if (elapsed_time) {
- residency =
- ((max(elapsed_render, elapsed_media) * 100)
- / elapsed_time);
- }
+ c0 = now->render_c0 - old->render_c0;
+ c0 += now->media_c0 - old->media_c0;
+ c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
- return residency;
+ return c0 >= time;
}
-/**
- * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
- * busy-ness calculated from C0 counters of render & media power wells
- * @dev_priv: DRM device private
- *
- */
-static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
- u32 residency_C0_up = 0, residency_C0_down = 0;
- int new_delay, adj;
-
- dev_priv->rps.ei_interrupt_count++;
-
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+ vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+ dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+ dev_priv->rps.ei_interrupt_count = 0;
+}
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+ struct intel_rps_ei now;
+ u32 events = 0;
- if (dev_priv->rps.up_ei.cz_clock == 0) {
- vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
- vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
- return dev_priv->rps.cur_freq;
- }
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+ vlv_c0_read(dev_priv, &now);
+ if (now.cz_clock == 0)
+ return 0;
/*
* To down throttle, C0 residency should be less than down threshold
* for continous EI intervals. So calculate down EI counters
* once in VLV_INT_COUNT_FOR_DOWN_EI
*/
- if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
-
+ if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
+ pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
dev_priv->rps.ei_interrupt_count = 0;
-
- residency_C0_down = vlv_c0_residency(dev_priv,
- &dev_priv->rps.down_ei);
- } else {
- residency_C0_up = vlv_c0_residency(dev_priv,
- &dev_priv->rps.up_ei);
}
- new_delay = dev_priv->rps.cur_freq;
-
- adj = dev_priv->rps.last_adj;
- /* C0 residency is greater than UP threshold. Increase Frequency */
- if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
- if (adj > 0)
- adj *= 2;
- else
- adj = 1;
-
- if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
-
- /*
- * For better performance, jump directly
- * to RPe if we're below it.
- */
- if (new_delay < dev_priv->rps.efficient_freq)
- new_delay = dev_priv->rps.efficient_freq;
+ if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+ if (!vlv_c0_above(dev_priv,
+ &dev_priv->rps.down_ei, &now,
+ VLV_RP_DOWN_EI_THRESHOLD))
+ events |= GEN6_PM_RP_DOWN_THRESHOLD;
+ dev_priv->rps.down_ei = now;
+ }
- } else if (!dev_priv->rps.ei_interrupt_count &&
- (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
- if (adj < 0)
- adj *= 2;
- else
- adj = -1;
- /*
- * This means, C0 residency is less than down threshold over
- * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
- */
- if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
+ if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ if (vlv_c0_above(dev_priv,
+ &dev_priv->rps.up_ei, &now,
+ VLV_RP_UP_EI_THRESHOLD))
+ events |= GEN6_PM_RP_UP_THRESHOLD;
+ dev_priv->rps.up_ei = now;
}
- return new_delay;
+ return events;
}
static void gen6_pm_rps_work(struct work_struct *work)
mutex_lock(&dev_priv->rps.hw_lock);
+ pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
+
adj = dev_priv->rps.last_adj;
if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0)
else
new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0;
- } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
- new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
}
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+ mutex_lock(&dev_priv->rps.hw_lock);
+ if (dev_priv->rps.enabled) {
+ if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ gen6_rps_reset_ei(dev_priv);
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ }
+ mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
void gen6_rps_idle(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
else
gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
dev_priv->rps.last_adj = 0;
+ I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
void gen6_rps_boost(struct drm_i915_private *dev_priv)
{
+ u32 val;
+
mutex_lock(&dev_priv->rps.hw_lock);
- if (dev_priv->rps.enabled) {
- intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+ val = dev_priv->rps.max_freq_softlimit;
+ if (dev_priv->rps.enabled &&
+ dev_priv->mm.busy &&
+ dev_priv->rps.cur_freq < val) {
+ intel_set_rps(dev_priv->dev, val);
dev_priv->rps.last_adj = 0;
}
mutex_unlock(&dev_priv->rps.hw_lock);