drm/i915: swizzling support for snb/ivb
authorDaniel Vetter <daniel.vetter@ffwll.ch>
Thu, 2 Feb 2012 08:58:12 +0000 (09:58 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Wed, 8 Feb 2012 22:16:24 +0000 (23:16 +0100)
We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

v3: Add a comment as to why we don't need to check the 3rd memory
channel.

v4: Fixup whitespace.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_tiling.c
drivers/gpu/drm/i915/i915_reg.h

index 3f27173fb5191580a86c38f52ac9db1adc5991d4..dfef9569f2a11bbbd0cdc63958fa6197257bbed5 100644 (file)
@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
        i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
 
        mutex_lock(&dev->struct_mutex);
-       ret = i915_gem_init_ringbuffer(dev);
+       ret = i915_gem_init_hw(dev);
        mutex_unlock(&dev->struct_mutex);
        if (ret)
                return ret;
index 1658cfd85aa7d5fba882869303ba4dbca24e3c12..12ddf47b8187b31483d6d7d32bc61829084f1d4d 100644 (file)
@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
                mutex_lock(&dev->struct_mutex);
                dev_priv->mm.suspended = 0;
 
-               error = i915_gem_init_ringbuffer(dev);
+               error = i915_gem_init_hw(dev);
                mutex_unlock(&dev->struct_mutex);
 
                if (HAS_PCH_SPLIT(dev))
@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
                        !dev_priv->mm.suspended) {
                dev_priv->mm.suspended = 0;
 
+               i915_gem_init_swizzling(dev);
+
                dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
                if (HAS_BSD(dev))
                    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
index 865de800756d94508a322c8b3b190e03fc0a322c..08454192c4c7a1e5ab0a6d67590170975e2da606 100644 (file)
@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
                                            uint32_t read_domains,
                                            uint32_t write_domain);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
+int __must_check i915_gem_init_hw(struct drm_device *dev);
+void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
                      unsigned long start,
index 51a2b0c2a30d550d2adceffb130747943d6aa5a7..86fffd26a89456d4810497f24f7cae973689e407 100644 (file)
@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
        return 0;
 }
 
+void i915_gem_init_swizzling(struct drm_device *dev)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+
+       if (INTEL_INFO(dev)->gen < 6 ||
+           dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+               return;
+
+       I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+                                DISP_TILE_SURFACE_SWIZZLING);
+
+       I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
+       if (IS_GEN6(dev))
+               I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
+       else
+               I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
+}
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_init_hw(struct drm_device *dev)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
        int ret;
 
+       i915_gem_init_swizzling(dev);
+
        ret = intel_init_render_ring_buffer(dev);
        if (ret)
                return ret;
@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
        mutex_lock(&dev->struct_mutex);
        dev_priv->mm.suspended = 0;
 
-       ret = i915_gem_init_ringbuffer(dev);
+       ret = i915_gem_init_hw(dev);
        if (ret != 0) {
                mutex_unlock(&dev->struct_mutex);
                return ret;
index 861223bf39444b53ea8e24dadd6f11e616cb7aff..1a93066659871eedfcc1e50668cfd82b1c7b4b63 100644 (file)
@@ -93,8 +93,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
        uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
        if (INTEL_INFO(dev)->gen >= 6) {
-               swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-               swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+               uint32_t dimm_c0, dimm_c1;
+               dimm_c0 = I915_READ(MAD_DIMM_C0);
+               dimm_c1 = I915_READ(MAD_DIMM_C1);
+               dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+               dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+               /* Enable swizzling when the channels are populated with
+                * identically sized dimms. We don't need to check the 3rd
+                * channel because no cpu with gpu attached ships in that
+                * configuration. Also, swizzling only makes sense for 2
+                * channels anyway. */
+               if (dimm_c0 == dimm_c1) {
+                       swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+                       swizzle_y = I915_BIT_6_SWIZZLE_9;
+               } else {
+                       swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+                       swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+               }
        } else if (IS_GEN5(dev)) {
                /* On Ironlake whatever DRAM config, GPU always do
                 * same swizzling setup.
index f9607387c00c675cb935648e11413c34792a4178..89816fe3f9d8da2c6021b12cd8477b2a38a50c5c 100644 (file)
 #define FENCE_REG_SANDYBRIDGE_0                0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT        32
 
+/* control register for cpu gtt access */
+#define TILECTL                                0x101000
+#define   TILECTL_SWZCTL                       (1 << 0)
+#define   TILECTL_TLB_PREFETCH_DIS     (1 << 2)
+#define   TILECTL_BACKSNOOP_DIS                (1 << 3)
+
 /*
  * Instruction and interrupt control regs
  */
 #define RING_MAX_IDLE(base)    ((base)+0x54)
 #define RING_HWS_PGA(base)     ((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)        ((base)+0x2080)
+#define ARB_MODE               0x04030
+#define   ARB_MODE_SWIZZLE_SNB (1<<4)
+#define   ARB_MODE_SWIZZLE_IVB (1<<5)
+#define   ARB_MODE_ENABLE(x)   GFX_MODE_ENABLE(x)
+#define   ARB_MODE_DISABLE(x)  GFX_MODE_DISABLE(x)
 #define RENDER_HWS_PGA_GEN7    (0x04080)
 #define RING_FAULT_REG(ring)   (0x4094 + 0x100*(ring)->id)
 #define DONE_REG               0x40b0
 #define C0DRB3                 0x10206
 #define C1DRB3                 0x10606
 
+/** snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0                    (MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define MAD_DIMM_C1                    (MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define MAD_DIMM_C2                    (MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK            (0x3 << 24)
+#define   MAD_DIMM_ECC_OFF             (0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF (0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON (0x2 << 24)
+#define   MAD_DIMM_ECC_ON              (0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE      (0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE     (0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16         (0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16         (0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK         (0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK         (0x1 << 17)
+#define   MAD_DIMM_A_SELECT            (0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT                8
+#define   MAD_DIMM_B_SIZE_MASK         (0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT                0
+#define   MAD_DIMM_A_SIZE_MASK         (0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+
 /* Clocking configuration register */
 #define CLKCFG                 0x10c00
 #define CLKCFG_FSB_400                                 (5 << 0)        /* hrawclk 100 */