drm/i915: Perform object clflushing asynchronously
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 22 Feb 2017 11:40:48 +0000 (11:40 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 22 Feb 2017 12:12:15 +0000 (12:12 +0000)
Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

v2: Introduce i915_gem_clflush.h and use a new name, split out some
extras into separate patches.

Suggested-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-5-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_clflush.c [new file with mode: 0644]
drivers/gpu/drm/i915/i915_gem_clflush.h [new file with mode: 0644]
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_display.c

index d0c9577c7533f9edea2d99f43910e85428ff8239..53e30fcb2751c3a7bfd79b1df4b812ccd0382b06 100644 (file)
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 # GEM code
 i915-y += i915_cmd_parser.o \
          i915_gem_batch_pool.o \
+         i915_gem_clflush.o \
          i915_gem_context.o \
          i915_gem_dmabuf.o \
          i915_gem_evict.o \
index 590c524a1531e97c1b0a8f41970631f41694c7ab..70fc2fa0eaa876ebacbc46cb190321231dbeccf8 100644 (file)
@@ -3383,7 +3383,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
index 00213c28279642fc6e19360af9b8289234378421..fad0f5adb970ee57bbff405584079f3c7bb14187 100644 (file)
@@ -29,6 +29,7 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
        return 0;
 }
 
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-                            bool force)
-{
-       /* If we don't have a page list set up, then we're not pinned
-        * to GPU, and we can ignore the cache flush because it'll happen
-        * again at bind time.
-        */
-       if (!obj->mm.pages) {
-               GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
-               return;
-       }
-
-       /*
-        * Stolen memory is always coherent with the GPU as it is explicitly
-        * marked as wc by the system, or the system is cache-coherent.
-        * Similarly, we only access struct pages through the CPU cache, so
-        * anything not backed by physical memory we consider to be always
-        * coherent and not need clflushing.
-        */
-       if (!i915_gem_object_has_struct_page(obj))
-               return;
-
-       /* If the GPU is snooping the contents of the CPU cache,
-        * we do not need to manually clear the CPU cache lines.  However,
-        * the caches are only snooped when the render cache is
-        * flushed/invalidated.  As we always have to emit invalidations
-        * and flushes when moving into and out of the RENDER domain, correct
-        * snooping behaviour occurs naturally as the result of our domain
-        * tracking.
-        */
-       if (!force && i915_gem_object_is_coherent(obj)) {
-               obj->cache_dirty = true;
-               return;
-       }
-
-       trace_i915_gem_object_clflush(obj);
-       drm_clflush_sg(obj->mm.pages);
-       obj->cache_dirty = false;
-}
-
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                return;
 
-       i915_gem_clflush_object(obj, obj->pin_display);
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+       i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
        obj->base.write_domain = 0;
 }
 
@@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
                return;
 
-       i915_gem_clflush_object(obj, true);
-       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
-
+       i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
        obj->base.write_domain = 0;
 }
 
@@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               i915_gem_clflush_object(obj, false);
-
+               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
                obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
        }
 
@@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
        mutex_lock(&dev_priv->drm.struct_mutex);
 
+       i915_gem_clflush_init(dev_priv);
+
        if (!i915.enable_execlists) {
                dev_priv->gt.resume = intel_legacy_submission_resume;
                dev_priv->gt.cleanup_engine = intel_engine_cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
new file mode 100644 (file)
index 0000000..c5fee02
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_frontbuffer.h"
+#include "i915_gem_clflush.h"
+
+static DEFINE_SPINLOCK(clflush_lock);
+static u64 clflush_context;
+
+struct clflush {
+       struct dma_fence dma; /* Must be first for dma_fence_free() */
+       struct i915_sw_fence wait;
+       struct work_struct work;
+       struct drm_i915_gem_object *obj;
+};
+
+static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
+{
+       return DRIVER_NAME;
+}
+
+static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
+{
+       return "clflush";
+}
+
+static bool i915_clflush_enable_signaling(struct dma_fence *fence)
+{
+       return true;
+}
+
+static void i915_clflush_release(struct dma_fence *fence)
+{
+       struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
+
+       i915_sw_fence_fini(&clflush->wait);
+
+       BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
+       dma_fence_free(&clflush->dma);
+}
+
+static const struct dma_fence_ops i915_clflush_ops = {
+       .get_driver_name = i915_clflush_get_driver_name,
+       .get_timeline_name = i915_clflush_get_timeline_name,
+       .enable_signaling = i915_clflush_enable_signaling,
+       .wait = dma_fence_default_wait,
+       .release = i915_clflush_release,
+};
+
+static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+{
+       drm_clflush_sg(obj->mm.pages);
+       obj->cache_dirty = false;
+
+       intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+}
+
+static void i915_clflush_work(struct work_struct *work)
+{
+       struct clflush *clflush = container_of(work, typeof(*clflush), work);
+       struct drm_i915_gem_object *obj = clflush->obj;
+
+       if (!obj->cache_dirty)
+               goto out;
+
+       if (i915_gem_object_pin_pages(obj)) {
+               DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
+               goto out;
+       }
+
+       __i915_do_clflush(obj);
+
+       i915_gem_object_unpin_pages(obj);
+
+out:
+       i915_gem_object_put(obj);
+
+       dma_fence_signal(&clflush->dma);
+       dma_fence_put(&clflush->dma);
+}
+
+static int __i915_sw_fence_call
+i915_clflush_notify(struct i915_sw_fence *fence,
+                   enum i915_sw_fence_notify state)
+{
+       struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+
+       switch (state) {
+       case FENCE_COMPLETE:
+               schedule_work(&clflush->work);
+               break;
+
+       case FENCE_FREE:
+               dma_fence_put(&clflush->dma);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+                            unsigned int flags)
+{
+       struct clflush *clflush;
+
+       /*
+        * Stolen memory is always coherent with the GPU as it is explicitly
+        * marked as wc by the system, or the system is cache-coherent.
+        * Similarly, we only access struct pages through the CPU cache, so
+        * anything not backed by physical memory we consider to be always
+        * coherent and not need clflushing.
+        */
+       if (!i915_gem_object_has_struct_page(obj))
+               return;
+
+       obj->cache_dirty = true;
+
+       /* If the GPU is snooping the contents of the CPU cache,
+        * we do not need to manually clear the CPU cache lines.  However,
+        * the caches are only snooped when the render cache is
+        * flushed/invalidated.  As we always have to emit invalidations
+        * and flushes when moving into and out of the RENDER domain, correct
+        * snooping behaviour occurs naturally as the result of our domain
+        * tracking.
+        */
+       if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+               return;
+
+       trace_i915_gem_object_clflush(obj);
+
+       clflush = NULL;
+       if (!(flags & I915_CLFLUSH_SYNC))
+               clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+       if (clflush) {
+               dma_fence_init(&clflush->dma,
+                              &i915_clflush_ops,
+                              &clflush_lock,
+                              clflush_context,
+                              0);
+               i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
+
+               clflush->obj = i915_gem_object_get(obj);
+               INIT_WORK(&clflush->work, i915_clflush_work);
+
+               dma_fence_get(&clflush->dma);
+
+               i915_sw_fence_await_reservation(&clflush->wait,
+                                               obj->resv, NULL,
+                                               false, I915_FENCE_TIMEOUT,
+                                               GFP_KERNEL);
+
+               reservation_object_lock(obj->resv, NULL);
+               reservation_object_add_excl_fence(obj->resv, &clflush->dma);
+               reservation_object_unlock(obj->resv);
+
+               i915_sw_fence_commit(&clflush->wait);
+       } else if (obj->mm.pages) {
+               __i915_do_clflush(obj);
+       } else {
+               GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
+       }
+}
+
+void i915_gem_clflush_init(struct drm_i915_private *i915)
+{
+       clflush_context = dma_fence_context_alloc(1);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
new file mode 100644 (file)
index 0000000..b62d61a
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+                            unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
index 6fb29832bc63706eb9cd06b873b405d4eff9ba26..35d2cb979452ac8e159048a91f10418a15fd6427 100644 (file)
@@ -35,6 +35,7 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
                if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
                        continue;
 
+               if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+                       i915_gem_clflush_object(obj, 0);
+                       obj->base.write_domain = 0;
+               }
+
                ret = i915_gem_request_await_object
                        (req, obj, obj->base.pending_write_domain);
                if (ret)
                        return ret;
-
-               if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-                       i915_gem_clflush_object(obj, false);
        }
 
        /* Unconditionally flush any chipset caches (for streaming writes). */
index 4d67cbb3185c90c865c40ce7e922a9e2ae562cfd..2806d6b0570d656e4fcfdc51d9d30a7cc9c7a4f9 100644 (file)
@@ -37,6 +37,7 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -13188,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
        struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
        int ret;
 
+       if (obj) {
+               if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+                   INTEL_INFO(dev_priv)->cursor_needs_physical) {
+                       const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+
+                       ret = i915_gem_object_attach_phys(obj, align);
+                       if (ret) {
+                               DRM_DEBUG_KMS("failed to attach phys object\n");
+                               return ret;
+                       }
+               } else {
+                       struct i915_vma *vma;
+
+                       vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+                       if (IS_ERR(vma)) {
+                               DRM_DEBUG_KMS("failed to pin object\n");
+                               return PTR_ERR(vma);
+                       }
+
+                       to_intel_plane_state(new_state)->vma = vma;
+               }
+       }
+
        if (!obj && !old_obj)
                return 0;
 
@@ -13240,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
                i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
        }
 
-       if (plane->type == DRM_PLANE_TYPE_CURSOR &&
-           INTEL_INFO(dev_priv)->cursor_needs_physical) {
-               int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
-               ret = i915_gem_object_attach_phys(obj, align);
-               if (ret) {
-                       DRM_DEBUG_KMS("failed to attach phys object\n");
-                       return ret;
-               }
-       } else {
-               struct i915_vma *vma;
-
-               vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-               if (IS_ERR(vma)) {
-                       DRM_DEBUG_KMS("failed to pin object\n");
-                       return PTR_ERR(vma);
-               }
-
-               to_intel_plane_state(new_state)->vma = vma;
-       }
-
        return 0;
 }