drm/i915: Move whole object to CPU domain for coherent shmem access
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 10 Mar 2017 00:09:42 +0000 (00:09 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 13 Mar 2017 11:16:09 +0000 (11:16 +0000)
If the object is coherent, we can simply update the cache domain on the
whole object rather than calculate the before/after clflushes. The
advantage is that we then get correct tracking of ellided flushes when
changing coherency later.

Testcase: igt/gem_pwrite_snooped
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170310000942.11661-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c

index 6a1c4319f86fb0408e24adec2466d2c261febe0a..d11c96405a5346a8d936bd853197bd701a898b95 100644 (file)
@@ -3353,9 +3353,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
                                    unsigned int *needs_clflush);
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                                     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE 0x1
-#define CLFLUSH_AFTER 0x2
-#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER  BIT(1)
+#define CLFLUSH_FLAGS  (CLFLUSH_BEFORE | CLFLUSH_AFTER)
 
 static inline void
 i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
index aca1eaddafb417141828dcbb030641db6df9ed8b..202bb850f260af26220935e90caac91e6c035d15 100644 (file)
@@ -788,6 +788,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
+       if (i915_gem_object_is_coherent(obj) ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, false);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
        i915_gem_object_flush_gtt_write_domain(obj);
 
        /* If we're not in the cpu read domain, set ourself into the gtt
@@ -796,16 +805,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
         * anyway again before the next pread happens.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush = !i915_gem_object_is_coherent(obj);
-
-       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, false);
-               if (ret)
-                       goto err_unpin;
-
-               *needs_clflush = 0;
-       }
+               *needs_clflush = CLFLUSH_BEFORE;
 
+out:
        /* return with the pages pinned */
        return 0;
 
@@ -838,6 +840,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
        if (ret)
                return ret;
 
+       if (i915_gem_object_is_coherent(obj) ||
+           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, true);
+               if (ret)
+                       goto err_unpin;
+               else
+                       goto out;
+       }
+
        i915_gem_object_flush_gtt_write_domain(obj);
 
        /* If we're not in the cpu write domain, set ourself into the
@@ -846,25 +857,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
         * right away and we therefore have to clflush anyway.
         */
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-               *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+               *needs_clflush |= CLFLUSH_AFTER;
 
        /* Same trick applies to invalidate partially written cachelines read
         * before writing.
         */
        if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush |= !i915_gem_object_is_coherent(obj);
-
-       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-               ret = i915_gem_object_set_to_cpu_domain(obj, true);
-               if (ret)
-                       goto err_unpin;
-
-               *needs_clflush = 0;
-       }
-
-       if ((*needs_clflush & CLFLUSH_AFTER) == 0)
-               obj->cache_dirty = true;
+               *needs_clflush |= CLFLUSH_BEFORE;
 
+out:
        intel_fb_obj_invalidate(obj, ORIGIN_CPU);
        obj->mm.dirty = true;
        /* return with the pages pinned */