drm/i915: Stash a pointer to the obj's resv in the vma
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 16 Jun 2017 14:05:25 +0000 (15:05 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 16 Jun 2017 15:54:05 +0000 (16:54 +0100)
During execbuf, a mandatory step is that we add this request (this
fence) to each object's reservation_object. Inside execbuf, we track the
vma, and to add the fence to the reservation_object then means having to
first chase the obj, incurring another cache miss. We can reduce the
 number of cache misses by stashing a pointer to the reservation_object
in the vma itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170616140525.6394-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/i915_vma.h

index 2f7a2d2510fc333054bab9e80d649a287cd3a0ef..eb46dfa374a7f3a09529b8fa66a096080caca884 100644 (file)
@@ -1192,17 +1192,17 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
        if (err)
                goto err_request;
 
-       GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true));
+       GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
        i915_vma_move_to_active(batch, rq, 0);
-       reservation_object_lock(obj->resv, NULL);
-       reservation_object_add_excl_fence(obj->resv, &rq->fence);
-       reservation_object_unlock(obj->resv);
+       reservation_object_lock(batch->resv, NULL);
+       reservation_object_add_excl_fence(batch->resv, &rq->fence);
+       reservation_object_unlock(batch->resv);
        i915_vma_unpin(batch);
 
        i915_vma_move_to_active(vma, rq, true);
-       reservation_object_lock(vma->obj->resv, NULL);
-       reservation_object_add_excl_fence(vma->obj->resv, &rq->fence);
-       reservation_object_unlock(vma->obj->resv);
+       reservation_object_lock(vma->resv, NULL);
+       reservation_object_add_excl_fence(vma->resv, &rq->fence);
+       reservation_object_unlock(vma->resv);
 
        rq->batch = batch;
 
@@ -1252,7 +1252,6 @@ relocate_entry(struct i915_vma *vma,
               struct i915_execbuffer *eb,
               const struct i915_vma *target)
 {
-       struct drm_i915_gem_object *obj = vma->obj;
        u64 offset = reloc->offset;
        u64 target_offset = relocation_target(reloc, target);
        bool wide = eb->reloc_cache.use_64bit_reloc;
@@ -1260,7 +1259,7 @@ relocate_entry(struct i915_vma *vma,
 
        if (!eb->reloc_cache.vaddr &&
            (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-            !reservation_object_test_signaled_rcu(obj->resv, true))) {
+            !reservation_object_test_signaled_rcu(vma->resv, true))) {
                const unsigned int gen = eb->reloc_cache.gen;
                unsigned int len;
                u32 *batch;
@@ -1320,7 +1319,7 @@ relocate_entry(struct i915_vma *vma,
        }
 
 repeat:
-       vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
+       vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
        if (IS_ERR(vaddr))
                return PTR_ERR(vaddr);
 
@@ -1793,11 +1792,11 @@ slow:
        return eb_relocate_slow(eb);
 }
 
-static void eb_export_fence(struct drm_i915_gem_object *obj,
+static void eb_export_fence(struct i915_vma *vma,
                            struct drm_i915_gem_request *req,
                            unsigned int flags)
 {
-       struct reservation_object *resv = obj->resv;
+       struct reservation_object *resv = vma->resv;
 
        /*
         * Ignore errors from failing to allocate the new fence, we can't
@@ -1856,7 +1855,7 @@ skip_flushes:
                const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
                struct i915_vma *vma = exec_to_vma(entry);
 
-               eb_export_fence(vma->obj, eb->request, entry->flags);
+               eb_export_fence(vma, eb->request, entry->flags);
                if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
                        i915_vma_put(vma);
        }
index f5c57dff288e18e241379b3308c14d81cd123ebe..532c709febbd7f9befc217b01ff09702a312cdf2 100644 (file)
@@ -90,6 +90,7 @@ vma_create(struct drm_i915_gem_object *obj,
        init_request_active(&vma->last_fence, NULL);
        vma->vm = vm;
        vma->obj = obj;
+       vma->resv = obj->resv;
        vma->size = obj->base.size;
        vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
index 04d7a5da70fd8621b78cd81f908fd5c5b24fa4bd..4a673fc1a4320f957595dffe5ff8c7c53fa20adf 100644 (file)
@@ -50,6 +50,7 @@ struct i915_vma {
        struct drm_i915_gem_object *obj;
        struct i915_address_space *vm;
        struct drm_i915_fence_reg *fence;
+       struct reservation_object *resv; /** Alias of obj->resv */
        struct sg_table *pages;
        void __iomem *iomap;
        u64 size;
@@ -111,8 +112,8 @@ struct i915_vma {
        /**
         * Used for performing relocations during execbuffer insertion.
         */
-       struct hlist_node exec_node;
        struct drm_i915_gem_exec_object2 *exec_entry;
+       struct hlist_node exec_node;
        u32 exec_handle;
 
        struct i915_gem_context *ctx;