drm/i915: Force CPU relocations if not GTT mapped
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 11 Aug 2014 10:00:12 +0000 (12:00 +0200)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Mon, 11 Aug 2014 10:01:29 +0000 (12:01 +0200)
Move the decision on whether we need to have a mappable object during
execbuffer to the fore and then reuse that decision by propagating the
flag through to reservation. As a corollary, before doing the actual
relocation through the GTT, we can make sure that we do have a GTT
mapping through which to operate.

Note that the key to make this work is to ditch the
obj->map_and_fenceable unbind optimization - with full ppgtt it
doesn't make a lot of sense any more anyway.

v2: Revamp and resend to ease future patches.
v3: Refresh patch rationale

References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
[danvet: Explain why obj->map_and_fenceable is key and split out the
secure batch fix.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c

index 1be7e541a7c780747596043f5db679d1450fdae4..1ca2231e39295b6ff7b262cd1c8133f0216589f8 100644 (file)
@@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma)
        vma->unbind_vma(vma);
 
        list_del_init(&vma->mm_list);
-       /* Avoid an unnecessary call to unbind on rebind. */
        if (i915_is_ggtt(vma->vm))
-               obj->map_and_fenceable = true;
+               obj->map_and_fenceable = false;
 
        drm_mm_remove_node(&vma->node);
        i915_gem_vma_destroy(vma);
@@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
                        return 0;
                }
        } else if (enable) {
+               if (WARN_ON(!obj->map_and_fenceable))
+                       return -EINVAL;
+
                reg = i915_find_fence_reg(dev);
                if (IS_ERR(reg))
                        return PTR_ERR(reg);
@@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
        obj->fence_reg = I915_FENCE_REG_NONE;
        obj->madv = I915_MADV_WILLNEED;
-       /* Avoid an unnecessary call to unbind on the first bind. */
-       obj->map_and_fenceable = true;
 
        i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
index 60998fc4e5b22147687db554a4c80f27d08e1bea..6320a385841b8231124c5b589ae13ecd3cee331e 100644 (file)
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
        return ret;
 }
 
-static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-               i915_is_ggtt(vma->vm);
-}
-
 static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
                                struct intel_engine_cs *ring,
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
        struct drm_i915_gem_object *obj = vma->obj;
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-       bool need_fence;
        uint64_t flags;
        int ret;
 
        flags = 0;
-
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       if (need_fence || need_reloc_mappable(vma))
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
                flags |= PIN_MAPPABLE;
-
        if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
                flags |= PIN_GLOBAL;
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       struct drm_i915_gem_object *obj = vma->obj;
-       bool need_fence, need_mappable;
 
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       need_mappable = need_fence || need_reloc_mappable(vma);
+       if (entry->relocation_count == 0)
+               return false;
+
+       if (!i915_is_ggtt(vma->vm))
+               return false;
+
+       /* See also use_cpu_reloc() */
+       if (HAS_LLC(vma->obj->base.dev))
+               return false;
+
+       if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+               return false;
 
-       WARN_ON((need_mappable || need_fence) &&
+       return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+       struct drm_i915_gem_object *obj = vma->obj;
+
+       WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
               !i915_is_ggtt(vma->vm));
 
        if (entry->alignment &&
            vma->node.start & (entry->alignment - 1))
                return true;
 
-       if (need_mappable && !obj->map_and_fenceable)
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
                return true;
 
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                        obj->tiling_mode != I915_TILING_NONE;
                need_mappable = need_fence || need_reloc_mappable(vma);
 
-               if (need_mappable)
+               if (need_mappable) {
+                       entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
                        list_move(&vma->exec_list, &ordered_vmas);
-               else
+               else
                        list_move_tail(&vma->exec_list, &ordered_vmas);
 
                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                        if (!drm_mm_node_allocated(&vma->node))
                                continue;
 
-                       if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+                       if (eb_vma_misplaced(vma))
                                ret = i915_vma_unbind(vma);
                        else
                                ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);