From 2549d6c26ce1c85a76990b972a2c7e8f440455cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Oct 2010 12:10:41 +0100 Subject: [PATCH] drm/i915: Avoid vmallocing a buffer for the relocations ... perform an access validation check up front instead and copy them in on-demand, during i915_gem_object_pin_and_relocate(). As around 20% of the CPU overhead may be spent inside vmalloc for the relocation entries when submitting an execbuffer [for x11perf -aa10text], the savings are considerable and result in around a 10% throughput increase [for glyphs]. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 224 +++++++++++--------------------- 1 file changed, 75 insertions(+), 149 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 72ab3032300..67998e8a2d7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3291,12 +3291,12 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, static int i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_file *file_priv, - struct drm_i915_gem_exec_object2 *entry, - struct drm_i915_gem_relocation_entry *relocs) + struct drm_i915_gem_exec_object2 *entry) { struct drm_device *dev = obj->dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); + struct drm_i915_gem_relocation_entry __user *user_relocs; int i, ret; void __iomem *reloc_page; bool need_fence; @@ -3337,15 +3337,24 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, /* Apply the relocations, using the GTT aperture to avoid cache * flushing requirements. */ + user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; for (i = 0; i < entry->relocation_count; i++) { - struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; + struct drm_i915_gem_relocation_entry reloc; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_obj_priv; uint32_t reloc_val, reloc_offset; uint32_t __iomem *reloc_entry; + ret = __copy_from_user_inatomic(&reloc, + user_relocs+i, + sizeof(reloc)); + if (ret) { + i915_gem_object_unpin(obj); + return -EFAULT; + } + target_obj = drm_gem_object_lookup(obj->dev, file_priv, - reloc->target_handle); + reloc.target_handle); if (target_obj == NULL) { i915_gem_object_unpin(obj); return -ENOENT; @@ -3358,13 +3367,13 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, "presumed %08x delta %08x\n", __func__, obj, - (int) reloc->offset, - (int) reloc->target_handle, - (int) reloc->read_domains, - (int) reloc->write_domain, + (int) reloc.offset, + (int) reloc.target_handle, + (int) reloc.read_domains, + (int) reloc.write_domain, (int) target_obj_priv->gtt_offset, - (int) reloc->presumed_offset, - reloc->delta); + (int) reloc.presumed_offset, + reloc.delta); #endif /* The target buffer should have appeared before us in the @@ -3372,89 +3381,89 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, */ if (target_obj_priv->gtt_space == NULL) { DRM_ERROR("No GTT space found for object %d\n", - reloc->target_handle); + reloc.target_handle); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } /* Validate that the target is in a valid r/w GPU domain */ - if (reloc->write_domain & (reloc->write_domain - 1)) { + if (reloc.write_domain & (reloc.write_domain - 1)) { DRM_ERROR("reloc with multiple write domains: " "obj %p target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); + obj, reloc.target_handle, + (int) reloc.offset, + reloc.read_domains, + reloc.write_domain); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->write_domain & I915_GEM_DOMAIN_CPU || - reloc->read_domains & I915_GEM_DOMAIN_CPU) { + if (reloc.write_domain & I915_GEM_DOMAIN_CPU || + reloc.read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " "obj %p target %d offset %d " "read %08x write %08x", - obj, reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); + obj, reloc.target_handle, + (int) reloc.offset, + reloc.read_domains, + reloc.write_domain); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->write_domain && target_obj->pending_write_domain && - reloc->write_domain != target_obj->pending_write_domain) { + if (reloc.write_domain && target_obj->pending_write_domain && + reloc.write_domain != target_obj->pending_write_domain) { DRM_ERROR("Write domain conflict: " "obj %p target %d offset %d " "new %08x old %08x\n", - obj, reloc->target_handle, - (int) reloc->offset, - reloc->write_domain, + obj, reloc.target_handle, + (int) reloc.offset, + reloc.write_domain, target_obj->pending_write_domain); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - target_obj->pending_read_domains |= reloc->read_domains; - target_obj->pending_write_domain |= reloc->write_domain; + target_obj->pending_read_domains |= reloc.read_domains; + target_obj->pending_write_domain |= reloc.write_domain; /* If the relocation already has the right value in it, no * more work needs to be done. */ - if (target_obj_priv->gtt_offset == reloc->presumed_offset) { + if (target_obj_priv->gtt_offset == reloc.presumed_offset) { drm_gem_object_unreference(target_obj); continue; } /* Check that the relocation address is valid... */ - if (reloc->offset > obj->size - 4) { + if (reloc.offset > obj->size - 4) { DRM_ERROR("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, (int) obj->size); + obj, reloc.target_handle, + (int) reloc.offset, (int) obj->size); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->offset & 3) { + if (reloc.offset & 3) { DRM_ERROR("Relocation not 4-byte aligned: " "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); + obj, reloc.target_handle, + (int) reloc.offset); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; } /* and points to somewhere within the target object. */ - if (reloc->delta >= target_obj->size) { + if (reloc.delta >= target_obj->size) { DRM_ERROR("Relocation beyond target object bounds: " "obj %p target %d delta %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->delta, (int) target_obj->size); + obj, reloc.target_handle, + (int) reloc.delta, (int) target_obj->size); drm_gem_object_unreference(target_obj); i915_gem_object_unpin(obj); return -EINVAL; @@ -3470,23 +3479,18 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, /* Map the page containing the relocation we're going to * perform. */ - reloc_offset = obj_priv->gtt_offset + reloc->offset; + reloc_offset = obj_priv->gtt_offset + reloc.offset; reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, (reloc_offset & ~(PAGE_SIZE - 1)), KM_USER0); reloc_entry = (uint32_t __iomem *)(reloc_page + (reloc_offset & (PAGE_SIZE - 1))); - reloc_val = target_obj_priv->gtt_offset + reloc->delta; + reloc_val = target_obj_priv->gtt_offset + reloc.delta; writel(reloc_val, reloc_entry); io_mapping_unmap_atomic(reloc_page, KM_USER0); - /* The updated presumed offset for this entry will be - * copied back out to the user. - */ - reloc->presumed_offset = target_obj_priv->gtt_offset; - drm_gem_object_unreference(target_obj); } @@ -3551,98 +3555,40 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) } static int -i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, - uint32_t buffer_count, - struct drm_i915_gem_relocation_entry **relocs) +i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec, + uint64_t exec_offset) { - uint32_t reloc_count = 0, reloc_index = 0, i; - int ret; - - *relocs = NULL; - for (i = 0; i < buffer_count; i++) { - if (reloc_count + exec_list[i].relocation_count < reloc_count) - return -EINVAL; - reloc_count += exec_list[i].relocation_count; - } - - *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); - if (*relocs == NULL) { - DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); - return -ENOMEM; - } - - for (i = 0; i < buffer_count; i++) { - struct drm_i915_gem_relocation_entry __user *user_relocs; + uint32_t exec_start, exec_len; - user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; + exec_start = (uint32_t) exec_offset + exec->batch_start_offset; + exec_len = (uint32_t) exec->batch_len; - ret = copy_from_user(&(*relocs)[reloc_index], - user_relocs, - exec_list[i].relocation_count * - sizeof(**relocs)); - if (ret != 0) { - drm_free_large(*relocs); - *relocs = NULL; - return -EFAULT; - } + if ((exec_start | exec_len) & 0x7) + return -EINVAL; - reloc_index += exec_list[i].relocation_count; - } + if (!exec_start) + return -EINVAL; return 0; } static int -i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, - uint32_t buffer_count, - struct drm_i915_gem_relocation_entry *relocs) +validate_exec_list(struct drm_i915_gem_exec_object2 *exec, + int count) { - uint32_t reloc_count = 0, i; - int ret = 0; - - if (relocs == NULL) - return 0; - - for (i = 0; i < buffer_count; i++) { - struct drm_i915_gem_relocation_entry __user *user_relocs; - int unwritten; - - user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; + int i; - unwritten = copy_to_user(user_relocs, - &relocs[reloc_count], - exec_list[i].relocation_count * - sizeof(*relocs)); + for (i = 0; i < count; i++) { + char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; + size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry); - if (unwritten) { - ret = -EFAULT; - goto err; - } + if (!access_ok(VERIFY_READ, ptr, length)) + return -EFAULT; - reloc_count += exec_list[i].relocation_count; + if (fault_in_pages_readable(ptr, length)) + return -EFAULT; } -err: - drm_free_large(relocs); - - return ret; -} - -static int -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, - uint64_t exec_offset) -{ - uint32_t exec_start, exec_len; - - exec_start = (uint32_t) exec_offset + exec->batch_start_offset; - exec_len = (uint32_t) exec->batch_len; - - if ((exec_start | exec_len) & 0x7) - return -EINVAL; - - if (!exec_start) - return -EINVAL; - return 0; } @@ -3657,11 +3603,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; struct drm_clip_rect *cliprects = NULL; - struct drm_i915_gem_relocation_entry *relocs = NULL; struct drm_i915_gem_request *request = NULL; - int ret, ret2, i, pinned = 0; + int ret, i, pinned = 0; uint64_t exec_offset; - uint32_t reloc_index; int pin_tries, flips; struct intel_ring_buffer *ring = NULL; @@ -3670,6 +3614,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) return ret; + ret = validate_exec_list(exec_list, args->buffer_count); + if (ret) + return ret; + #if WATCH_EXEC DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", (int) args->buffers_ptr, args->buffer_count, args->batch_len); @@ -3722,11 +3670,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, - &relocs); - if (ret != 0) - goto pre_mutex_err; - ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_mutex_err; @@ -3765,19 +3708,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* Pin and relocate */ for (pin_tries = 0; ; pin_tries++) { ret = 0; - reloc_index = 0; for (i = 0; i < args->buffer_count; i++) { object_list[i]->pending_read_domains = 0; object_list[i]->pending_write_domain = 0; ret = i915_gem_object_pin_and_relocate(object_list[i], file_priv, - &exec_list[i], - &relocs[reloc_index]); + &exec_list[i]); if (ret) break; pinned = i + 1; - reloc_index += exec_list[i].relocation_count; } /* success */ if (ret == 0) @@ -3967,20 +3907,6 @@ err: mutex_unlock(&dev->struct_mutex); pre_mutex_err: - /* Copy the updated relocations out regardless of current error - * state. Failure to update the relocs would mean that the next - * time userland calls execbuf, it would do so with presumed offset - * state that didn't match the actual object state. - */ - ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, - relocs); - if (ret2 != 0) { - DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); - - if (ret == 0) - ret = ret2; - } - drm_free_large(object_list); kfree(cliprects); kfree(request); -- 2.20.1