drm/i915/skl: Tune IZ hashing when subslices are unbalanced
authorDamien Lespiau <damien.lespiau@intel.com>
Sat, 14 Feb 2015 18:30:29 +0000 (18:30 +0000)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Mon, 23 Feb 2015 23:07:05 +0000 (00:07 +0100)
When one EU is disabled in a particular subslice, we can tune how the
work is spread between subslices to improve EU utilization.

v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That
      will also make the machinery work if several sublices have 7 EUs.
      (Jeff Mcgee)
    - Only apply the different hashing algorithm if the slice is
      effectively unbalanced by checking there's a single subslice with
      7 EUs. (Jeff Mcgee)

v3: Fix typo in comment (Jeff Mcgee)

Issue: VIZ-3845
Cc: Jeff Mcgee <jeff.mcgee@intel.com>
Reviewed-by: Jeff Mcgee <jeff.mcgee@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_ringbuffer.c

index 9a365b40b50eeccf189a2a2875375cec73398ef4..f9992ca11d10eca87dce16b95140b476cb6c75a4 100644 (file)
@@ -650,13 +650,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev)
                                continue;
 
                        for (ss = 0; ss < ss_max; ss++) {
+                               u32 n_disabled;
+
                                if (ss_disable & (0x1 << ss))
                                        /* skip disabled subslice */
                                        continue;
 
-                               info->eu_total += eu_max -
-                                                 hweight8(eu_disable[s] >>
-                                                          (ss * eu_max));
+                               n_disabled = hweight8(eu_disable[s] >>
+                                                     (ss * eu_max));
+
+                               /*
+                                * Record which subslice(s) has(have) 7 EUs. we
+                                * can tune the hash used to spread work among
+                                * subslices if they are unbalanced.
+                                */
+                               if (eu_max - n_disabled == 7)
+                                       info->subslice_7eu[s] |= 1 << ss;
+
+                               info->eu_total += eu_max - n_disabled;
                        }
                }
 
index 61d41abde2e94730ccac0890540f9034b78e84d5..4280d0b292da26dbbf76215fc1390f876deab52e 100644 (file)
@@ -700,6 +700,8 @@ struct intel_device_info {
        u8 subslice_per_slice;
        u8 eu_total;
        u8 eu_per_subslice;
+       /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+       u8 subslice_7eu[3];
        u8 has_slice_pg:1;
        u8 has_subslice_pg:1;
        u8 has_eu_pg:1;
index 5fab90c84c5da7459e2fb3b1634c979e49ae7b36..c2124119692d5f858df7b87f8f5c5932d3bbd451 100644 (file)
@@ -1351,6 +1351,8 @@ enum skl_disp_power_wells {
 #define   GEN6_WIZ_HASHING_16x4                                GEN6_WIZ_HASHING(1, 0)
 #define   GEN6_WIZ_HASHING_MASK                                GEN6_WIZ_HASHING(1, 1)
 #define   GEN6_TD_FOUR_ROW_DISPATCH_DISABLE            (1 << 5)
+#define   GEN9_IZ_HASHING_MASK(slice)                  (0x3 << (slice * 2))
+#define   GEN9_IZ_HASHING(slice, val)                  ((val) << (slice * 2))
 
 #define GFX_MODE       0x02520
 #define GFX_MODE_GEN7  0x0229c
index 4570fe172b7951eb18ef727c4773aef1325bf997..665985d5fcf4eddad0aba4c832b961afe240f019 100644 (file)
@@ -1002,6 +1002,49 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
        return 0;
 }
 
+static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
+{
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u8 vals[3] = { 0, 0, 0 };
+       unsigned int i;
+
+       for (i = 0; i < 3; i++) {
+               u8 ss;
+
+               /*
+                * Only consider slices where one, and only one, subslice has 7
+                * EUs
+                */
+               if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
+                       continue;
+
+               /*
+                * subslice_7eu[i] != 0 (because of the check above) and
+                * ss_max == 4 (maximum number of subslices possible per slice)
+                *
+                * ->    0 <= ss <= 3;
+                */
+               ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
+               vals[i] = 3 - ss;
+       }
+
+       if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
+               return 0;
+
+       /* Tune IZ hashing. See intel_device_info_runtime_init() */
+       WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+                           GEN9_IZ_HASHING_MASK(2) |
+                           GEN9_IZ_HASHING_MASK(1) |
+                           GEN9_IZ_HASHING_MASK(0),
+                           GEN9_IZ_HASHING(2, vals[2]) |
+                           GEN9_IZ_HASHING(1, vals[1]) |
+                           GEN9_IZ_HASHING(0, vals[0]));
+
+       return 0;
+}
+
+
 static int skl_init_workarounds(struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ring->dev;
@@ -1014,7 +1057,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
                WA_SET_BIT_MASKED(HIZ_CHICKEN,
                                  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-       return 0;
+       return skl_tune_iz_hashing(ring);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *ring)