From: Joonas Lahtinen Date: Fri, 11 Nov 2016 10:43:54 +0000 (+0200) Subject: drm/i915: Split out i915_vma.c X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=b42fe9ca0a1e2b270c06b3f118f1f9db952d623b;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git drm/i915: Split out i915_vma.c As a side product, had to split two other files; - i915_gem_fence_reg.h - i915_gem_object.h (only parts that needed immediate untanglement) I tried to move code in as big chunks as possible, to make review easier. i915_vma_compare was moved to a header temporarily. v2: - Use i915_gem_fence_reg.{c,h} v3: - Rebased v4: - Fix building when DEBUG_GEM is enabled by reordering a bit. Cc: Tvrtko Ursulin Cc: Chris Wilson Acked-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1478861034-30643-1-git-send-email-joonas.lahtinen@linux.intel.com --- diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0857e5035f4d..3dea46af9fe6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ - i915_gem_fence.o \ + i915_gem_fence_reg.o \ i915_gem_gtt.o \ i915_gem_internal.o \ i915_gem.o \ @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_trace_points.o \ + i915_vma.o \ intel_breadcrumbs.o \ intel_engine_cs.o \ intel_hangcheck.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 91bec6065fe7..d558124c972a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -60,11 +60,15 @@ #include "intel_ringbuffer.h" #include "i915_gem.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_vma.h" + #include "intel_gvt.h" /* General customization: @@ -459,23 +463,6 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -struct drm_i915_fence_reg { - struct list_head link; - struct drm_i915_private *i915; - struct i915_vma *vma; - int pin_count; - int id; - /** - * Whether the tiling parameters for the currently - * associated fence register have changed. Note that - * for the purposes of tracking tiling changes we also - * treat the unfenced register, the register slot that - * the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - bool dirty; -}; - struct sdvo_device_mapping { u8 initialized; u8 dvo_port; @@ -2179,31 +2166,6 @@ enum hdmi_force_audio { #define I915_GTT_OFFSET_NONE ((u32)-1) -struct drm_i915_gem_object_ops { - unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 - - /* Interface between the GEM object and its backing storage. - * get_pages() is called once prior to the use of the associated set - * of pages before to binding them into the GTT, and put_pages() is - * called after we no longer need them. As we expect there to be - * associated cost with migrating pages between the backing storage - * and making them available for the GPU (e.g. clflush), we may hold - * onto the pages after they are no longer referenced by the GPU - * in case they may be used again shortly (for example migrating the - * pages to a different memory domain within the GTT). put_pages() - * will therefore most likely be called when the object itself is - * being released or under memory pressure (where we attempt to - * reap pages for the shrinker). - */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); - void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); - - int (*dmabuf_export)(struct drm_i915_gem_object *); - void (*release)(struct drm_i915_gem_object *); -}; - /* * Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is * considered to be the frontbuffer for the given plane interface-wise. This @@ -2225,292 +2187,6 @@ struct drm_i915_gem_object_ops { #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) -struct drm_i915_gem_object { - struct drm_gem_object base; - - const struct drm_i915_gem_object_ops *ops; - - /** List of VMAs backed by this object */ - struct list_head vma_list; - struct rb_root vma_tree; - - /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; - struct list_head global_link; - union { - struct rcu_head rcu; - struct llist_node freed; - }; - - /** - * Whether the object is currently in the GGTT mmap. - */ - struct list_head userfault_link; - - /** Used in execbuf to temporarily hold a ref */ - struct list_head obj_exec_link; - - struct list_head batch_pool_link; - - unsigned long flags; - - /** - * Have we taken a reference for the object for incomplete GPU - * activity? - */ -#define I915_BO_ACTIVE_REF 0 - - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit - */ - unsigned long gt_ro:1; - unsigned int cache_level:3; - unsigned int cache_dirty:1; - - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - - /** Current tiling stride for the object, if it's tiled. */ - unsigned int tiling_and_stride; -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) -#define STRIDE_MASK (~TILING_MASK) - - /** Count of VMA actually bound by this object */ - unsigned int bind_count; - unsigned int active_count; - unsigned int pin_display; - - struct { - struct mutex lock; /* protects the pages and their use */ - atomic_t pages_pin_count; - - struct sg_table *pages; - void *mapping; - - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ - - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; - - /** - * Advice: are the backing pages purgeable? - */ - unsigned int madv:2; - - /** - * This is set if the object has been written to since the - * pages were last acquired. - */ - bool dirty:1; - - /** - * This is set if the object has been pinned due to unknown - * swizzling. - */ - bool quirked:1; - } mm; - - /** Breadcrumb of last rendering to the buffer. - * There can only be one writer, but we allow for multiple readers. - * If there is a writer that necessarily implies that all other - * read requests are complete - but we may only be lazily clearing - * the read requests. A read request is naturally the most recent - * request on a ring, so we may have two different write and read - * requests on one ring where the write request is older than the - * read request. This allows for the CPU to read from an active - * buffer by only waiting for the write to complete. - */ - struct reservation_object *resv; - - /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; - - /** Record of address bit 17 of each page at last unbind. */ - unsigned long *bit_17; - - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; - - /** for phys allocated objects */ - struct drm_dma_handle *phys_handle; - - struct reservation_object __builtin_resv; -}; - -static inline struct drm_i915_gem_object * -to_intel_bo(struct drm_gem_object *gem) -{ - /* Assert that to_intel_bo(NULL) == NULL */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); - - return container_of(gem, struct drm_i915_gem_object, base); -} - -/** - * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle - * @filp: DRM file private date - * @handle: userspace handle - * - * Returns: - * - * A pointer to the object named by the handle if such exists on @filp, NULL - * otherwise. This object is only valid whilst under the RCU read lock, and - * note carefully the object may be in the process of being destroyed. - */ -static inline struct drm_i915_gem_object * -i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) -{ -#ifdef CONFIG_LOCKDEP - WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); -#endif - return idr_find(&file->object_idr, handle); -} - -static inline struct drm_i915_gem_object * -i915_gem_object_lookup(struct drm_file *file, u32 handle) -{ - struct drm_i915_gem_object *obj; - - rcu_read_lock(); - obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; - rcu_read_unlock(); - - return obj; -} - -__deprecated -extern struct drm_gem_object * -drm_gem_object_lookup(struct drm_file *file, u32 handle); - -__attribute__((nonnull)) -static inline struct drm_i915_gem_object * -i915_gem_object_get(struct drm_i915_gem_object *obj) -{ - drm_gem_object_reference(&obj->base); - return obj; -} - -__deprecated -extern void drm_gem_object_reference(struct drm_gem_object *); - -__attribute__((nonnull)) -static inline void -i915_gem_object_put(struct drm_i915_gem_object *obj) -{ - __drm_gem_object_unreference(&obj->base); -} - -__deprecated -extern void drm_gem_object_unreference(struct drm_gem_object *); - -__deprecated -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); - -static inline bool -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) -{ - return atomic_read(&obj->base.refcount.refcount) == 0; -} - -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; -} - -static inline bool -i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; -} - -static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) -{ - return obj->active_count; -} - -static inline bool -i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) -{ - return test_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __set_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); - -static inline unsigned int -i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & TILING_MASK; -} - -static inline bool -i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; -} - -static inline unsigned int -i915_gem_object_get_stride(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & STRIDE_MASK; -} - -static inline struct intel_engine_cs * -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) -{ - struct intel_engine_cs *engine = NULL; - struct dma_fence *fence; - - rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->resv); - rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - - return engine; -} - -static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) -{ - i915_gem_object_get(vma->obj); - return vma; -} - -static inline void i915_vma_put(struct i915_vma *vma) -{ - i915_gem_object_put(vma->obj); -} - /* * Optimised SGL iterator for GEM objects */ @@ -3220,13 +2896,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); -int __must_check i915_vma_unbind(struct i915_vma *vma); -void i915_vma_close(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); - int i915_gem_object_unbind(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); @@ -3476,54 +3145,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } -/* i915_gem_fence.c */ +/* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); -/** - * i915_vma_pin_fence - pin fencing state - * @vma: vma to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * vma (and its object) is ready to be used as a scanout target. Fencing - * status must be synchronize first by calling i915_vma_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_vma_unpin_fence(). - * - * Returns: - * - * True if the vma has a fence, false otherwise. - */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; -} - -/** - * i915_vma_unpin_fence - unpin fencing state - * @vma: vma to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_vma_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -static inline void -i915_vma_unpin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } -} - void i915_gem_restore_fences(struct drm_device *dev); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1c20edba7f2a..d51fb5d96dc2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2919,117 +2919,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static void __i915_vma_iounmap(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_pinned(vma)); - - if (vma->iomap == NULL) - return; - - io_mapping_unmap(vma->iomap); - vma->iomap = NULL; -} - -int i915_vma_unbind(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - unsigned long active; - int ret; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - - /* First wait upon any activity as retiring the request may - * have side-effects such as unpinning or even unbinding this vma. - */ - active = i915_vma_get_active(vma); - if (active) { - int idx; - - /* When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - - for_each_active(active, idx) { - ret = i915_gem_active_retire(&vma->last_read[idx], - &vma->vm->dev->struct_mutex); - if (ret) - break; - } - - __i915_vma_unpin(vma); - if (ret) - return ret; - - GEM_BUG_ON(i915_vma_is_active(vma)); - } - - if (i915_vma_is_pinned(vma)) - return -EBUSY; - - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; - - GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - if (i915_vma_is_map_and_fenceable(vma)) { - /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; - } - - if (likely(!vma->vm->closed)) { - trace_i915_vma_unbind(vma); - vma->vm->unbind_vma(vma); - } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - - return 0; -} - static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) { int ret, i; @@ -3057,172 +2946,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -static bool i915_gem_valid_gtt_space(struct i915_vma *vma, - unsigned long cache_level) -{ - struct drm_mm_node *gtt_space = &vma->node; - struct drm_mm_node *other; - - /* - * On some machines we have to be careful when putting differing types - * of snoopable memory together to avoid the prefetcher crossing memory - * domains and dying. During vm initialisation, we decide whether or not - * these constraints apply and set the drm_mm.color_adjust - * appropriately. - */ - if (vma->vm->mm.color_adjust == NULL) - return true; - - if (!drm_mm_node_allocated(gtt_space)) - return true; - - if (list_empty(>t_space->node_list)) - return true; - - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); - if (other->allocated && !other->hole_follows && other->color != cache_level) - return false; - - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) - return false; - - return true; -} - -/** - * i915_vma_insert - finds a slot for the vma in its address space - * @vma: the vma - * @size: requested size in bytes (can be larger than the VMA) - * @alignment: required alignment - * @flags: mask of PIN_* flags to use - * - * First we try to allocate some free space that meets the requirements for - * the VMA. Failiing that, if the flags permit, it will evict an old VMA, - * preferrably the oldest idle entry to make room for the new VMA. - * - * Returns: - * 0 on success, negative error code otherwise. - */ -static int -i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); - struct drm_i915_gem_object *obj = vma->obj; - u64 start, end; - int ret; - - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - - size = max(size, vma->size); - if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj)); - - alignment = max(max(alignment, vma->display_alignment), - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - flags & PIN_MAPPABLE)); - - start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - - end = vma->vm->total; - if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); - if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - - /* If binding the object/GGTT view requires more space than the entire - * aperture has, reject it early before evicting everything in a vain - * attempt to find space. - */ - if (size > end) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", - size, obj->base.size, - flags & PIN_MAPPABLE ? "mappable" : "total", - end); - return -E2BIG; - } - - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; - - if (flags & PIN_OFFSET_FIXED) { - u64 offset = flags & PIN_OFFSET_MASK; - if (offset & (alignment - 1) || offset > end - size) { - ret = -EINVAL; - goto err_unpin; - } - - vma->node.start = offset; - vma->node.size = size; - vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) { - ret = i915_gem_evict_for_vma(vma); - if (ret == 0) - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) - goto err_unpin; - } - } else { - u32 search_flag, alloc_flag; - - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; - } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } - - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, - * so we know that we always have a minimum alignment of 4096. - * The drm_mm range manager is optimised to return results - * with zero alignment, so where possible use the optimal - * path. - */ - if (alignment <= 4096) - alignment = 0; - -search_free: - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, - &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(vma->vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; - - goto err_unpin; - } - - GEM_BUG_ON(vma->node.start < start); - GEM_BUG_ON(vma->node.start + vma->node.size > end); - } - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - obj->bind_count++; - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); - - return 0; - -err_unpin: - i915_gem_object_unpin_pages(obj); - return ret; -} - void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force) { @@ -3818,100 +3541,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return ret < 0 ? ret : 0; } -static bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - if (!drm_mm_node_allocated(&vma->node)) - return false; - - if (vma->node.size < size) - return true; - - if (alignment && vma->node.start & (alignment - 1)) - return true; - - if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) - return true; - - if (flags & PIN_OFFSET_BIAS && - vma->node.start < (flags & PIN_OFFSET_MASK)) - return true; - - if (flags & PIN_OFFSET_FIXED && - vma->node.start != (flags & PIN_OFFSET_MASK)) - return true; - - return false; -} - -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_ggtt_size(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj)); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - true); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->ggtt.mappable_end); - - /* - * Explicitly disable for rotated VMA since the display does not - * need the fence and the VMA is not accessible to other users. - */ - if (mappable && fenceable && - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) - vma->flags |= I915_VMA_CAN_FENCE; - else - vma->flags &= ~I915_VMA_CAN_FENCE; -} - -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) -{ - unsigned int bound = vma->flags; - int ret; - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err; - } - - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err; - } - - ret = i915_vma_bind(vma, vma->obj->cache_level, flags); - if (ret) - goto err; - - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); - - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - -err: - __i915_vma_unpin(vma); - return ret; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c deleted file mode 100644 index cd59dbc6588c..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include "i915_drv.h" - -/** - * DOC: fence register handling - * - * Important to avoid confusions: "fences" in the i915 driver are not execution - * fences used to track command completion but hardware detiler objects which - * wrap a given range of the global GTT. Each platform has only a fairly limited - * set of these objects. - * - * Fences are used to detile GTT memory mappings. They're also connected to the - * hardware frontbuffer render tracking and hence interact with frontbuffer - * compression. Furthermore on older platforms fences are required for tiled - * objects used by the display engine. They can also be used by the render - * engine - they're required for blitter commands and are optional for render - * commands. But on gen4+ both display (with the exception of fbc) and rendering - * have their own tiling state bits and don't need fences. - * - * Also note that fences only support X and Y tiling and hence can't be used for - * the fancier new tiling formats like W, Ys and Yf. - * - * Finally note that because fences are such a restricted resource they're - * dynamically associated with objects. Furthermore fence state is committed to - * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must - * explicitly call i915_gem_object_get_fence() to synchronize fencing status - * for cpu access. Also note that some code wants an unfenced view, for those - * cases the fence can be removed forcefully with i915_gem_object_put_fence(). - * - * Internally these functions will synchronize with userspace access by removing - * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. - */ - -#define pipelined 0 - -static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - i915_reg_t fence_reg_lo, fence_reg_hi; - int fence_pitch_shift; - u64 val; - - if (INTEL_INFO(fence->i915)->gen >= 6) { - fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); - fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); - fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; - - } else { - fence_reg_lo = FENCE_REG_965_LO(fence->id); - fence_reg_hi = FENCE_REG_965_HI(fence->id); - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; - } - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 row_size = stride * (is_y_tiled ? 32 : 8); - u32 size = rounddown((u32)vma->node.size, row_size); - - val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; - val |= vma->node.start & 0xfffff000; - val |= (u64)((stride / 128) - 1) << fence_pitch_shift; - if (is_y_tiled) - val |= BIT(I965_FENCE_TILING_Y_SHIFT); - val |= I965_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg_lo, 0); - POSTING_READ(fence_reg_lo); - - I915_WRITE(fence_reg_hi, upper_32_bits(val)); - I915_WRITE(fence_reg_lo, lower_32_bits(val)); - POSTING_READ(fence_reg_lo); - } -} - -static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - u32 val; - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - int pitch_val; - int tile_width; - - WARN((vma->node.start & ~I915_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", - vma->node.start, - i915_vma_is_map_and_fenceable(vma), - vma->node.size); - - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) - tile_width = 128; - else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = stride / tile_width; - pitch_val = ffs(pitch_val) - 1; - - val = vma->node.start; - if (is_y_tiled) - val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I915_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - i915_reg_t reg = FENCE_REG(fence->id); - - I915_WRITE(reg, val); - POSTING_READ(reg); - } -} - -static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - u32 val; - - val = 0; - if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; - unsigned int stride = i915_gem_object_get_stride(vma->obj); - u32 pitch_val; - - WARN((vma->node.start & ~I830_FENCE_START_MASK) || - !is_power_of_2(vma->node.size) || - (vma->node.start & (vma->node.size - 1)), - "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", - vma->node.start, vma->node.size); - - pitch_val = stride / 128; - pitch_val = ffs(pitch_val) - 1; - - val = vma->node.start; - if (is_y_tiled) - val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I830_FENCE_SIZE_BITS(vma->node.size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } - - if (!pipelined) { - struct drm_i915_private *dev_priv = fence->i915; - i915_reg_t reg = FENCE_REG(fence->id); - - I915_WRITE(reg, val); - POSTING_READ(reg); - } -} - -static void fence_write(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - /* Previous access through the fence register is marshalled by - * the mb() inside the fault handlers (i915_gem_release_mmaps) - * and explicitly managed for internal users. - */ - - if (IS_GEN2(fence->i915)) - i830_write_fence_reg(fence, vma); - else if (IS_GEN3(fence->i915)) - i915_write_fence_reg(fence, vma); - else - i965_write_fence_reg(fence, vma); - - /* Access through the fenced region afterwards is - * ordered by the posting reads whilst writing the registers. - */ - - fence->dirty = false; -} - -static int fence_update(struct drm_i915_fence_reg *fence, - struct i915_vma *vma) -{ - int ret; - - if (vma) { - if (!i915_vma_is_map_and_fenceable(vma)) - return -EINVAL; - - if (WARN(!i915_gem_object_get_stride(vma->obj) || - !i915_gem_object_get_tiling(vma->obj), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - i915_gem_object_get_stride(vma->obj), - i915_gem_object_get_tiling(vma->obj))) - return -EINVAL; - - ret = i915_gem_active_retire(&vma->last_fence, - &vma->obj->base.dev->struct_mutex); - if (ret) - return ret; - } - - if (fence->vma) { - ret = i915_gem_active_retire(&fence->vma->last_fence, - &fence->vma->obj->base.dev->struct_mutex); - if (ret) - return ret; - } - - if (fence->vma && fence->vma != vma) { - /* Ensure that all userspace CPU access is completed before - * stealing the fence. - */ - i915_gem_release_mmap(fence->vma->obj); - - fence->vma->fence = NULL; - fence->vma = NULL; - - list_move(&fence->link, &fence->i915->mm.fence_list); - } - - fence_write(fence, vma); - - if (vma) { - if (fence->vma != vma) { - vma->fence = fence; - fence->vma = vma; - } - - list_move_tail(&fence->link, &fence->i915->mm.fence_list); - } - - return 0; -} - -/** - * i915_vma_put_fence - force-remove fence for a VMA - * @vma: vma to map linearly (not through a fence reg) - * - * This function force-removes any fence from the given object, which is useful - * if the kernel wants to do untiled GTT access. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ -int -i915_vma_put_fence(struct i915_vma *vma) -{ - struct drm_i915_fence_reg *fence = vma->fence; - - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - if (!fence) - return 0; - - if (fence->pin_count) - return -EBUSY; - - return fence_update(fence, NULL); -} - -static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) -{ - struct drm_i915_fence_reg *fence; - - list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { - if (fence->pin_count) - continue; - - return fence; - } - - /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(&dev_priv->drm)) - return ERR_PTR(-EAGAIN); - - return ERR_PTR(-EDEADLK); -} - -/** - * i915_vma_get_fence - set up fencing for a vma - * @vma: vma to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - * - * Returns: - * - * 0 on success, negative error code on failure. - */ -int -i915_vma_get_fence(struct i915_vma *vma) -{ - struct drm_i915_fence_reg *fence; - struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; - - /* Note that we revoke fences on runtime suspend. Therefore the user - * must keep the device awake whilst using the fence. - */ - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - /* Just update our place in the LRU if our fence is getting reused. */ - if (vma->fence) { - fence = vma->fence; - if (!fence->dirty) { - list_move_tail(&fence->link, - &fence->i915->mm.fence_list); - return 0; - } - } else if (set) { - fence = fence_find(to_i915(vma->vm->dev)); - if (IS_ERR(fence)) - return PTR_ERR(fence); - } else - return 0; - - return fence_update(fence, set); -} - -/** - * i915_gem_restore_fences - restore fence state - * @dev: DRM device - * - * Restore the hw fence state to match the software tracking again, to be called - * after a gpu reset and on resume. Note that on runtime suspend we only cancel - * the fences, to be reacquired by the user later. - */ -void i915_gem_restore_fences(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int i; - - for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - struct i915_vma *vma = reg->vma; - - /* - * Commit delayed tiling changes if we have an object still - * attached to the fence, otherwise just clear the fence. - */ - if (vma && !i915_gem_object_is_tiled(vma->obj)) { - GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); - - list_move(®->link, &dev_priv->mm.fence_list); - vma->fence = NULL; - vma = NULL; - } - - fence_write(reg, vma); - reg->vma = vma; - } -} - -/** - * DOC: tiling swizzling details - * - * The idea behind tiling is to increase cache hit rates by rearranging - * pixel data so that a group of pixel accesses are in the same cacheline. - * Performance improvement from doing this on the back/depth buffer are on - * the order of 30%. - * - * Intel architectures make this somewhat more complicated, though, by - * adjustments made to addressing of data when the memory is in interleaved - * mode (matched pairs of DIMMS) to improve memory bandwidth. - * For interleaved memory, the CPU sends every sequential 64 bytes - * to an alternate memory channel so it can get the bandwidth from both. - * - * The GPU also rearranges its accesses for increased bandwidth to interleaved - * memory, and it matches what the CPU does for non-tiled. However, when tiled - * it does it a little differently, since one walks addresses not just in the - * X direction but also Y. So, along with alternating channels when bit - * 6 of the address flips, it also alternates when other bits flip -- Bits 9 - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) - * are common to both the 915 and 965-class hardware. - * - * The CPU also sometimes XORs in higher bits as well, to improve - * bandwidth doing strided access like we do so frequently in graphics. This - * is called "Channel XOR Randomization" in the MCH documentation. The result - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address - * decode. - * - * All of this bit 6 XORing has an effect on our memory management, - * as we need to make sure that the 3d driver can correctly address object - * contents. - * - * If we don't have interleaved memory, all tiling is safe and no swizzling is - * required. - * - * When bit 17 is XORed in, we simply refuse to tile at all. Bit - * 17 is not just a page offset, so as we page an object out and back in, - * individual pages in it will have different bit 17 addresses, resulting in - * each 64 bytes being swapped with its neighbor! - * - * Otherwise, if interleaved, we have to tell the 3d driver what the address - * swizzling it needs to do is, since it's writing with the CPU to the pages - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order - * to match what the GPU expects. - */ - -/** - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern - * @dev: DRM device - * - * Detects bit 6 swizzling of address lookup between IGD access and CPU - * access through main memory. - */ -void -i915_gem_detect_bit_6_swizzle(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - - if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { - /* - * On BDW+, swizzling is not used. We leave the CPU memory - * controller in charge of optimizing memory accesses without - * the extra address manipulation GPU side. - * - * VLV and CHV don't have GPU swizzling. - */ - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } else if (INTEL_INFO(dev)->gen >= 6) { - if (dev_priv->preserve_bios_swizzle) { - if (I915_READ(DISP_ARB_CTL) & - DISP_TILE_SURFACE_SWIZZLING) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else { - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - } else { - uint32_t dimm_c0, dimm_c1; - dimm_c0 = I915_READ(MAD_DIMM_C0); - dimm_c1 = I915_READ(MAD_DIMM_C1); - dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; - dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; - /* Enable swizzling when the channels are populated - * with identically sized dimms. We don't need to check - * the 3rd channel because no cpu with gpu attached - * ships in that configuration. Also, swizzling only - * makes sense for 2 channels anyway. */ - if (dimm_c0 == dimm_c1) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else { - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - } - } else if (IS_GEN5(dev_priv)) { - /* On Ironlake whatever DRAM config, GPU always do - * same swizzling setup. - */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else if (IS_GEN2(dev_priv)) { - /* As far as we know, the 865 doesn't have these bit 6 - * swizzling issues. - */ - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && - !IS_G33(dev_priv))) { - uint32_t dcc; - - /* On 9xx chipsets, channel interleave by the CPU is - * determined by DCC. For single-channel, neither the CPU - * nor the GPU do swizzling. For dual channel interleaved, - * the GPU's interleave is bit 9 and 10 for X tiled, and bit - * 9 for Y tiled. The CPU's interleave is independent, and - * can be based on either bit 11 (haven't seen this yet) or - * bit 17 (common). - */ - dcc = I915_READ(DCC); - switch (dcc & DCC_ADDRESSING_MODE_MASK) { - case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - break; - case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: - if (dcc & DCC_CHANNEL_XOR_DISABLE) { - /* This is the base swizzling by the GPU for - * tiled buffers. - */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { - /* Bit 11 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; - swizzle_y = I915_BIT_6_SWIZZLE_9_11; - } else { - /* Bit 17 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; - swizzle_y = I915_BIT_6_SWIZZLE_9_17; - } - break; - } - - /* check for L-shaped memory aka modified enhanced addressing */ - if (IS_GEN4(dev_priv) && - !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } - - if (dcc == 0xffffffff) { - DRM_ERROR("Couldn't read from MCHBAR. " - "Disabling tiling.\n"); - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; - } - } else { - /* The 965, G33, and newer, have a very flexible memory - * configuration. It will enable dual-channel mode - * (interleaving) on as much memory as it can, and the GPU - * will additionally sometimes enable different bit 6 - * swizzling for tiled objects from the CPU. - * - * Here's what I found on the G965: - * slot fill memory size swizzling - * 0A 0B 1A 1B 1-ch 2-ch - * 512 0 0 0 512 0 O - * 512 0 512 0 16 1008 X - * 512 0 0 512 16 1008 X - * 0 512 0 512 16 1008 X - * 1024 1024 1024 0 2048 1024 O - * - * We could probably detect this based on either the DRB - * matching, which was the case for the swizzling required in - * the table above, or from the 1-ch value being less than - * the minimum size of a rank. - * - * Reports indicate that the swizzling actually - * varies depending upon page placement inside the - * channels, i.e. we see swizzled pages where the - * banks of memory are paired and unswizzled on the - * uneven portion, so leave that as unknown. - */ - if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { - swizzle_x = I915_BIT_6_SWIZZLE_9_10; - swizzle_y = I915_BIT_6_SWIZZLE_9; - } - } - - if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || - swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { - /* Userspace likes to explode if it sees unknown swizzling, - * so lie. We will finish the lie when reporting through - * the get-tiling-ioctl by reporting the physical swizzle - * mode as unknown instead. - * - * As we don't strictly know what the swizzling is, it may be - * bit17 dependent, and so we need to also prevent the pages - * from being moved. - */ - dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; - } - - dev_priv->mm.bit_6_swizzle_x = swizzle_x; - dev_priv->mm.bit_6_swizzle_y = swizzle_y; -} - -/* - * Swap every 64 bytes of this page around, to account for it having a new - * bit 17 of its physical address and therefore being interpreted differently - * by the GPU. - */ -static void -i915_gem_swizzle_page(struct page *page) -{ - char temp[64]; - char *vaddr; - int i; - - vaddr = kmap(page); - - for (i = 0; i < PAGE_SIZE; i += 128) { - memcpy(temp, &vaddr[i], 64); - memcpy(&vaddr[i], &vaddr[i + 64], 64); - memcpy(&vaddr[i + 64], temp, 64); - } - - kunmap(page); -} - -/** - * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling - * @obj: i915 GEM buffer object - * @pages: the scattergather list of physical pages - * - * This function fixes up the swizzling in case any page frame number for this - * object has changed in bit 17 since that state has been saved with - * i915_gem_object_save_bit_17_swizzle(). - * - * This is called when pinning backing storage again, since the kernel is free - * to move unpinned backing storage around (either by directly moving pages or - * by swapping them out and back in again). - */ -void -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - struct sgt_iter sgt_iter; - struct page *page; - int i; - - if (obj->bit_17 == NULL) - return; - - i = 0; - for_each_sgt_page(page, sgt_iter, pages) { - char new_bit_17 = page_to_phys(page) >> 17; - if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { - i915_gem_swizzle_page(page); - set_page_dirty(page); - } - i++; - } -} - -/** - * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling - * @obj: i915 GEM buffer object - * @pages: the scattergather list of physical pages - * - * This function saves the bit 17 of each page frame number so that swizzling - * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must - * be called before the backing storage can be unpinned. - */ -void -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - const unsigned int page_count = obj->base.size >> PAGE_SHIFT; - struct sgt_iter sgt_iter; - struct page *page; - int i; - - if (obj->bit_17 == NULL) { - obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), - sizeof(long), GFP_KERNEL); - if (obj->bit_17 == NULL) { - DRM_ERROR("Failed to allocate memory for bit 17 " - "record\n"); - return; - } - } - - i = 0; - - for_each_sgt_page(page, sgt_iter, pages) { - if (page_to_phys(page) & (1 << 17)) - __set_bit(i, obj->bit_17); - else - __clear_bit(i, obj->bit_17); - i++; - } -} diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c new file mode 100644 index 000000000000..cd59dbc6588c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -0,0 +1,716 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include "i915_drv.h" + +/** + * DOC: fence register handling + * + * Important to avoid confusions: "fences" in the i915 driver are not execution + * fences used to track command completion but hardware detiler objects which + * wrap a given range of the global GTT. Each platform has only a fairly limited + * set of these objects. + * + * Fences are used to detile GTT memory mappings. They're also connected to the + * hardware frontbuffer render tracking and hence interact with frontbuffer + * compression. Furthermore on older platforms fences are required for tiled + * objects used by the display engine. They can also be used by the render + * engine - they're required for blitter commands and are optional for render + * commands. But on gen4+ both display (with the exception of fbc) and rendering + * have their own tiling state bits and don't need fences. + * + * Also note that fences only support X and Y tiling and hence can't be used for + * the fancier new tiling formats like W, Ys and Yf. + * + * Finally note that because fences are such a restricted resource they're + * dynamically associated with objects. Furthermore fence state is committed to + * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must + * explicitly call i915_gem_object_get_fence() to synchronize fencing status + * for cpu access. Also note that some code wants an unfenced view, for those + * cases the fence can be removed forcefully with i915_gem_object_put_fence(). + * + * Internally these functions will synchronize with userspace access by removing + * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. + */ + +#define pipelined 0 + +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + i915_reg_t fence_reg_lo, fence_reg_hi; + int fence_pitch_shift; + u64 val; + + if (INTEL_INFO(fence->i915)->gen >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); + fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + + } else { + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); + fence_pitch_shift = I965_FENCE_PITCH_SHIFT; + } + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 row_size = stride * (is_y_tiled ? 32 : 8); + u32 size = rounddown((u32)vma->node.size, row_size); + + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; + val |= vma->node.start & 0xfffff000; + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (is_y_tiled) + val |= BIT(I965_FENCE_TILING_Y_SHIFT); + val |= I965_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg_lo, 0); + POSTING_READ(fence_reg_lo); + + I915_WRITE(fence_reg_hi, upper_32_bits(val)); + I915_WRITE(fence_reg_lo, lower_32_bits(val)); + POSTING_READ(fence_reg_lo); + } +} + +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + u32 val; + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + int pitch_val; + int tile_width; + + WARN((vma->node.start & ~I915_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", + vma->node.start, + i915_vma_is_map_and_fenceable(vma), + vma->node.size); + + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) + tile_width = 128; + else + tile_width = 512; + + /* Note: pitch better be a power of two tile widths */ + pitch_val = stride / tile_width; + pitch_val = ffs(pitch_val) - 1; + + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I915_FENCE_SIZE_BITS(vma->node.size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + + I915_WRITE(reg, val); + POSTING_READ(reg); + } +} + +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + u32 val; + + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 pitch_val; + + WARN((vma->node.start & ~I830_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", + vma->node.start, vma->node.size); + + pitch_val = stride / 128; + pitch_val = ffs(pitch_val) - 1; + + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I830_FENCE_SIZE_BITS(vma->node.size); + val |= pitch_val << I830_FENCE_PITCH_SHIFT; + val |= I830_FENCE_REG_VALID; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); + + I915_WRITE(reg, val); + POSTING_READ(reg); + } +} + +static void fence_write(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + /* Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. + */ + + if (IS_GEN2(fence->i915)) + i830_write_fence_reg(fence, vma); + else if (IS_GEN3(fence->i915)) + i915_write_fence_reg(fence, vma); + else + i965_write_fence_reg(fence, vma); + + /* Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. + */ + + fence->dirty = false; +} + +static int fence_update(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) +{ + int ret; + + if (vma) { + if (!i915_vma_is_map_and_fenceable(vma)) + return -EINVAL; + + if (WARN(!i915_gem_object_get_stride(vma->obj) || + !i915_gem_object_get_tiling(vma->obj), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + i915_gem_object_get_stride(vma->obj), + i915_gem_object_get_tiling(vma->obj))) + return -EINVAL; + + ret = i915_gem_active_retire(&vma->last_fence, + &vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + if (fence->vma) { + ret = i915_gem_active_retire(&fence->vma->last_fence, + &fence->vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } + + if (fence->vma && fence->vma != vma) { + /* Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + i915_gem_release_mmap(fence->vma->obj); + + fence->vma->fence = NULL; + fence->vma = NULL; + + list_move(&fence->link, &fence->i915->mm.fence_list); + } + + fence_write(fence, vma); + + if (vma) { + if (fence->vma != vma) { + vma->fence = fence; + fence->vma = vma; + } + + list_move_tail(&fence->link, &fence->i915->mm.fence_list); + } + + return 0; +} + +/** + * i915_vma_put_fence - force-remove fence for a VMA + * @vma: vma to map linearly (not through a fence reg) + * + * This function force-removes any fence from the given object, which is useful + * if the kernel wants to do untiled GTT access. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int +i915_vma_put_fence(struct i915_vma *vma) +{ + struct drm_i915_fence_reg *fence = vma->fence; + + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + if (!fence) + return 0; + + if (fence->pin_count) + return -EBUSY; + + return fence_update(fence, NULL); +} + +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) +{ + struct drm_i915_fence_reg *fence; + + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + if (fence->pin_count) + continue; + + return fence; + } + + /* Wait for completion of pending flips which consume fences */ + if (intel_has_pending_fb_unpin(&dev_priv->drm)) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-EDEADLK); +} + +/** + * i915_vma_get_fence - set up fencing for a vma + * @vma: vma to map through a fence reg + * + * When mapping objects through the GTT, userspace wants to be able to write + * to them without having to worry about swizzling if the object is tiled. + * This function walks the fence regs looking for a free one for @obj, + * stealing one if it can't find any. + * + * It then sets up the reg based on the object's properties: address, pitch + * and tiling format. + * + * For an untiled surface, this removes any existing fence. + * + * Returns: + * + * 0 on success, negative error code on failure. + */ +int +i915_vma_get_fence(struct i915_vma *vma) +{ + struct drm_i915_fence_reg *fence; + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + + /* Note that we revoke fences on runtime suspend. Therefore the user + * must keep the device awake whilst using the fence. + */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + /* Just update our place in the LRU if our fence is getting reused. */ + if (vma->fence) { + fence = vma->fence; + if (!fence->dirty) { + list_move_tail(&fence->link, + &fence->i915->mm.fence_list); + return 0; + } + } else if (set) { + fence = fence_find(to_i915(vma->vm->dev)); + if (IS_ERR(fence)) + return PTR_ERR(fence); + } else + return 0; + + return fence_update(fence, set); +} + +/** + * i915_gem_restore_fences - restore fence state + * @dev: DRM device + * + * Restore the hw fence state to match the software tracking again, to be called + * after a gpu reset and on resume. Note that on runtime suspend we only cancel + * the fences, to be reacquired by the user later. + */ +void i915_gem_restore_fences(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int i; + + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; + struct i915_vma *vma = reg->vma; + + /* + * Commit delayed tiling changes if we have an object still + * attached to the fence, otherwise just clear the fence. + */ + if (vma && !i915_gem_object_is_tiled(vma->obj)) { + GEM_BUG_ON(!reg->dirty); + GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + + list_move(®->link, &dev_priv->mm.fence_list); + vma->fence = NULL; + vma = NULL; + } + + fence_write(reg, vma); + reg->vma = vma; + } +} + +/** + * DOC: tiling swizzling details + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an object out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +/** + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @dev: DRM device + * + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +void +i915_gem_detect_bit_6_swizzle(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + + if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) { + /* + * On BDW+, swizzling is not used. We leave the CPU memory + * controller in charge of optimizing memory accesses without + * the extra address manipulation GPU side. + * + * VLV and CHV don't have GPU swizzling. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (INTEL_INFO(dev)->gen >= 6) { + if (dev_priv->preserve_bios_swizzle) { + if (I915_READ(DISP_ARB_CTL) & + DISP_TILE_SURFACE_SWIZZLING) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } else { + uint32_t dimm_c0, dimm_c1; + dimm_c0 = I915_READ(MAD_DIMM_C0); + dimm_c1 = I915_READ(MAD_DIMM_C1); + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + /* Enable swizzling when the channels are populated + * with identically sized dimms. We don't need to check + * the 3rd channel because no cpu with gpu attached + * ships in that configuration. Also, swizzling only + * makes sense for 2 channels anyway. */ + if (dimm_c0 == dimm_c1) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + } + } else if (IS_GEN5(dev_priv)) { + /* On Ironlake whatever DRAM config, GPU always do + * same swizzling setup. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (IS_GEN2(dev_priv)) { + /* As far as we know, the 865 doesn't have these bit 6 + * swizzling issues. + */ + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else if (IS_MOBILE(dev_priv) || (IS_GEN3(dev_priv) && + !IS_G33(dev_priv))) { + uint32_t dcc; + + /* On 9xx chipsets, channel interleave by the CPU is + * determined by DCC. For single-channel, neither the CPU + * nor the GPU do swizzling. For dual channel interleaved, + * the GPU's interleave is bit 9 and 10 for X tiled, and bit + * 9 for Y tiled. The CPU's interleave is independent, and + * can be based on either bit 11 (haven't seen this yet) or + * bit 17 (common). + */ + dcc = I915_READ(DCC); + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (dcc & DCC_CHANNEL_XOR_DISABLE) { + /* This is the base swizzling by the GPU for + * tiled buffers. + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { + /* Bit 11 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 swizzling by the CPU in addition. */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; + } + break; + } + + /* check for L-shaped memory aka modified enhanced addressing */ + if (IS_GEN4(dev_priv) && + !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + + if (dcc == 0xffffffff) { + DRM_ERROR("Couldn't read from MCHBAR. " + "Disabling tiling.\n"); + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + } else { + /* The 965, G33, and newer, have a very flexible memory + * configuration. It will enable dual-channel mode + * (interleaving) on as much memory as it can, and the GPU + * will additionally sometimes enable different bit 6 + * swizzling for tiled objects from the CPU. + * + * Here's what I found on the G965: + * slot fill memory size swizzling + * 0A 0B 1A 1B 1-ch 2-ch + * 512 0 0 0 512 0 O + * 512 0 512 0 16 1008 X + * 512 0 0 512 16 1008 X + * 0 512 0 512 16 1008 X + * 1024 1024 1024 0 2048 1024 O + * + * We could probably detect this based on either the DRB + * matching, which was the case for the swizzling required in + * the table above, or from the 1-ch value being less than + * the minimum size of a rank. + * + * Reports indicate that the swizzling actually + * varies depending upon page placement inside the + * channels, i.e. we see swizzled pages where the + * banks of memory are paired and unswizzled on the + * uneven portion, so leave that as unknown. + */ + if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } + + if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || + swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { + /* Userspace likes to explode if it sees unknown swizzling, + * so lie. We will finish the lie when reporting through + * the get-tiling-ioctl by reporting the physical swizzle + * mode as unknown instead. + * + * As we don't strictly know what the swizzling is, it may be + * bit17 dependent, and so we need to also prevent the pages + * from being moved. + */ + dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } + + dev_priv->mm.bit_6_swizzle_x = swizzle_x; + dev_priv->mm.bit_6_swizzle_y = swizzle_y; +} + +/* + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static void +i915_gem_swizzle_page(struct page *page) +{ + char temp[64]; + char *vaddr; + int i; + + vaddr = kmap(page); + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); +} + +/** + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function fixes up the swizzling in case any page frame number for this + * object has changed in bit 17 since that state has been saved with + * i915_gem_object_save_bit_17_swizzle(). + * + * This is called when pinning backing storage again, since the kernel is free + * to move unpinned backing storage around (either by directly moving pages or + * by swapping them out and back in again). + */ +void +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) + return; + + i = 0; + for_each_sgt_page(page, sgt_iter, pages) { + char new_bit_17 = page_to_phys(page) >> 17; + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { + i915_gem_swizzle_page(page); + set_page_dirty(page); + } + i++; + } +} + +/** + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling + * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages + * + * This function saves the bit 17 of each page frame number so that swizzling + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must + * be called before the backing storage can be unpinned. + */ +void +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + const unsigned int page_count = obj->base.size >> PAGE_SHIFT; + struct sgt_iter sgt_iter; + struct page *page; + int i; + + if (obj->bit_17 == NULL) { + obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count), + sizeof(long), GFP_KERNEL); + if (obj->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + i = 0; + + for_each_sgt_page(page, sgt_iter, pages) { + if (page_to_phys(page) & (1 << 17)) + __set_bit(i, obj->bit_17); + else + __clear_bit(i, obj->bit_17); + i++; + } +} diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h new file mode 100644 index 000000000000..22c4a2d01adf --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_FENCE_REG_H__ +#define __I915_FENCE_REG_H__ + +#include + +struct drm_i915_private; +struct i915_vma; + +struct drm_i915_fence_reg { + struct list_head link; + struct drm_i915_private *i915; + struct i915_vma *vma; + int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; +}; + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5fafa3d4fc8..f60e5a710f09 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -96,13 +96,6 @@ * */ -static inline struct i915_ggtt * -i915_vm_to_ggtt(struct i915_address_space *vm) -{ - GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, base); -} - static int i915_get_ggtt_vma_pages(struct i915_vma *vma); @@ -3348,176 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } -static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) -{ - const unsigned int idx = rq->engine->id; - struct i915_vma *vma = - container_of(active, struct i915_vma, last_read[idx]); - struct drm_i915_gem_object *obj = vma->obj; - - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); - - i915_vma_clear_active(vma, idx); - if (i915_vma_is_active(vma)) - return; - - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); - - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - if (--obj->active_count) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); - - obj->mm.dirty = true; /* be paranoid */ - - if (i915_gem_object_has_active_reference(obj)) { - i915_gem_object_clear_active_reference(obj); - i915_gem_object_put(obj); - } -} - -static void -i915_ggtt_retire__write(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - struct i915_vma *vma = - container_of(active, struct i915_vma, last_write); - - intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->fence); - - list_del(&vma->vm_link); - if (!i915_vma_is_ggtt(vma)) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - -void i915_vma_close(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; - - list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); - - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); -} - -static inline long vma_compare(struct i915_vma *vma, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - - if (vma->vm != vm) - return vma->vm - vm; - - if (!view) - return vma->ggtt_view.type; - - if (vma->ggtt_view.type != view->type) - return vma->ggtt_view.type - view->type; - - return memcmp(&vma->ggtt_view.params, - &view->params, - sizeof(view->params)); -} - -static struct i915_vma * -__i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - struct rb_node *rb, **p; - int i; - - GEM_BUG_ON(vm->closed); - - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); - if (vma == NULL) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&vma->exec_list); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - init_request_active(&vma->last_read[i], i915_vma_retire); - init_request_active(&vma->last_write, - i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); - init_request_active(&vma->last_fence, NULL); - list_add(&vma->vm_link, &vm->unbound_list); - vma->vm = vm; - vma->obj = obj; - vma->size = obj->base.size; - - if (view) { - vma->ggtt_view = *view; - if (view->type == I915_GGTT_VIEW_PARTIAL) { - vma->size = view->params.partial.size; - vma->size <<= PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - vma->size = - intel_rotation_info_size(&view->params.rotated); - vma->size <<= PAGE_SHIFT; - } - } - - if (i915_is_ggtt(vm)) { - vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); - list_add_tail(&vma->obj_link, &obj->vma_list); - } - - rb = NULL; - p = &obj->vma_tree.rb_node; - while (*p) { - struct i915_vma *pos; - - rb = *p; - pos = rb_entry(rb, struct i915_vma, obj_node); - if (vma_compare(pos, vm, view) < 0) - p = &rb->rb_right; - else - p = &rb->rb_left; - } - rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); - - return vma; -} - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); - - return __i915_vma_create(obj, vm, view); -} - struct i915_vma * i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3530,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; - cmp = vma_compare(vma, vm, view); + cmp = i915_vma_compare(vma, vm, view); if (cmp == 0) return vma; @@ -3555,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) { - vma = __i915_vma_create(obj, vm, view); + vma = i915_vma_create(obj, vm, view); GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); } @@ -3747,99 +3570,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; } -/** - * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. - * @vma: VMA to map - * @cache_level: mapping cache level - * @flags: flags like global or local mapping - * - * DMA addresses are taken from the scatter-gather table of this object (or of - * this VMA in case of non-default GGTT views) and PTE entries set up. - * Note that DMA addresses are also the only part of the SG table we care about. - */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) -{ - u32 bind_flags; - u32 vma_flags; - int ret; - - if (WARN_ON(flags == 0)) - return -EINVAL; - - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; - - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (flags & PIN_UPDATE) - bind_flags |= vma_flags; - else - bind_flags &= ~vma_flags; - if (bind_flags == 0) - return 0; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - - ret = vma->vm->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; - - vma->flags |= bind_flags; - return 0; -} - -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) -{ - void __iomem *ptr; - - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); - - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); - - ptr = vma->iomap; - if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, - vma->node.start, - vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); - - vma->iomap = ptr; - } - - __i915_vma_pin(vma); - return ptr; -} - -void i915_vma_unpin_and_release(struct i915_vma **p_vma) -{ - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - - vma = fetch_and_zero(p_vma); - if (!vma) - return; - - obj = vma->obj; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); -} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c23ef9db1f53..57b5849c659e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -35,7 +35,9 @@ #define __I915_GEM_GTT_H__ #include +#include +#include "i915_gem_timeline.h" #include "i915_gem_request.h" #define I915_FENCE_REG_NONE -1 @@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +struct sg_table; + enum i915_ggtt_view_type { I915_GGTT_VIEW_NORMAL = 0, I915_GGTT_VIEW_ROTATED, @@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated; enum i915_cache_level; -/** - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - struct drm_i915_fence_reg *fence; - struct sg_table *pages; - void __iomem *iomap; - u64 size; - u64 display_alignment; - - unsigned int flags; - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. - */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) - - /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) - -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) - - unsigned int active; - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; - struct i915_gem_active last_fence; - - /** - * Support different GGTT views into the same object. - * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also - * assumed in GEM functions which take no ggtt view parameter. - */ - struct i915_ggtt_view ggtt_view; - - /** This object's place on the active/inactive lists */ - struct list_head vm_link; - - struct list_head obj_link; /* Link in the object's VMA list */ - struct rb_node obj_node; - - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; - - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; -}; - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); -void i915_vma_unpin_and_release(struct i915_vma **p_vma); - -static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_GGTT; -} - -static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CAN_FENCE; -} - -static inline bool i915_vma_is_closed(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CLOSED; -} - -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) -{ - return vma->active; -} - -static inline bool i915_vma_is_active(const struct i915_vma *vma) -{ - return i915_vma_get_active(vma); -} - -static inline void i915_vma_set_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active |= BIT(engine); -} - -static inline void i915_vma_clear_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active &= ~BIT(engine); -} - -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, - unsigned int engine) -{ - return vma->active & BIT(engine); -} - -static inline u32 i915_ggtt_offset(const struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(!vma->node.allocated); - GEM_BUG_ON(upper_32_bits(vma->node.start)); - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); - return lower_32_bits(vma->node.start); -} +struct i915_vma; struct i915_page_dma { struct page *page; @@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, base); +} + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); @@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, #define PIN_OFFSET_FIXED BIT(11) #define PIN_OFFSET_MASK (~4095) -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) - return 0; - - return __i915_vma_do_pin(vma, size, alignment, flags); -} - -static inline int i915_vma_pin_count(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_PIN_MASK; -} - -static inline bool i915_vma_is_pinned(const struct i915_vma *vma) -{ - return i915_vma_pin_count(vma); -} - -static inline void __i915_vma_pin(struct i915_vma *vma) -{ - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); -} - -static inline void __i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_pinned(vma)); - vma->flags--; -} - -static inline void i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - __i915_vma_unpin(vma); -} - -/** - * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture - * @vma: VMA to iomap - * - * The passed in VMA has to be pinned in the global GTT mappable region. - * An extra pinning of the VMA is acquired for the return iomapping, - * the caller must call i915_vma_unpin_iomap to relinquish the pinning - * after the iomapping is no longer required. - * - * Callers must hold the struct_mutex. - * - * Returns a valid iomapped pointer or ERR_PTR. - */ -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) - -/** - * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap - * @vma: VMA to unpin - * - * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). - * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). - */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} - -static inline struct page *i915_vma_first_page(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - return sg_page(vma->pages->sgl); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h new file mode 100644 index 000000000000..014f80392f18 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -0,0 +1,337 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_OBJECT_H__ +#define __I915_GEM_OBJECT_H__ + +#include + +#include +#include +#include + +#include + +struct drm_i915_gem_object_ops { + unsigned int flags; +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 + + /* Interface between the GEM object and its backing storage. + * get_pages() is called once prior to the use of the associated set + * of pages before to binding them into the GTT, and put_pages() is + * called after we no longer need them. As we expect there to be + * associated cost with migrating pages between the backing storage + * and making them available for the GPU (e.g. clflush), we may hold + * onto the pages after they are no longer referenced by the GPU + * in case they may be used again shortly (for example migrating the + * pages to a different memory domain within the GTT). put_pages() + * will therefore most likely be called when the object itself is + * being released or under memory pressure (where we attempt to + * reap pages for the shrinker). + */ + struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + + int (*dmabuf_export)(struct drm_i915_gem_object *); + void (*release)(struct drm_i915_gem_object *); +}; + +struct drm_i915_gem_object { + struct drm_gem_object base; + + const struct drm_i915_gem_object_ops *ops; + + /** List of VMAs backed by this object */ + struct list_head vma_list; + struct rb_root vma_tree; + + /** Stolen memory for this object, instead of being backed by shmem. */ + struct drm_mm_node *stolen; + struct list_head global_link; + union { + struct rcu_head rcu; + struct llist_node freed; + }; + + /** + * Whether the object is currently in the GGTT mmap. + */ + struct list_head userfault_link; + + /** Used in execbuf to temporarily hold a ref */ + struct list_head obj_exec_link; + + struct list_head batch_pool_link; + + unsigned long flags; + + /** + * Have we taken a reference for the object for incomplete GPU + * activity? + */ +#define I915_BO_ACTIVE_REF 0 + + /* + * Is the object to be mapped as read-only to the GPU + * Only honoured if hardware has relevant pte bit + */ + unsigned long gt_ro:1; + unsigned int cache_level:3; + unsigned int cache_dirty:1; + + atomic_t frontbuffer_bits; + unsigned int frontbuffer_ggtt_origin; /* write once */ + + /** Current tiling stride for the object, if it's tiled. */ + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) + + /** Count of VMA actually bound by this object */ + unsigned int bind_count; + unsigned int active_count; + unsigned int pin_display; + + struct { + struct mutex lock; /* protects the pages and their use */ + atomic_t pages_pin_count; + + struct sg_table *pages; + void *mapping; + + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ + } get_page; + + /** + * Advice: are the backing pages purgeable? + */ + unsigned int madv:2; + + /** + * This is set if the object has been written to since the + * pages were last acquired. + */ + bool dirty:1; + + /** + * This is set if the object has been pinned due to unknown + * swizzling. + */ + bool quirked:1; + } mm; + + /** Breadcrumb of last rendering to the buffer. + * There can only be one writer, but we allow for multiple readers. + * If there is a writer that necessarily implies that all other + * read requests are complete - but we may only be lazily clearing + * the read requests. A read request is naturally the most recent + * request on a ring, so we may have two different write and read + * requests on one ring where the write request is older than the + * read request. This allows for the CPU to read from an active + * buffer by only waiting for the write to complete. + */ + struct reservation_object *resv; + + /** References from framebuffers, locks out tiling changes. */ + unsigned long framebuffer_references; + + /** Record of address bit 17 of each page at last unbind. */ + unsigned long *bit_17; + + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + /** for phys allocated objects */ + struct drm_dma_handle *phys_handle; + + struct reservation_object __builtin_resv; +}; + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +/** + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle + * @filp: DRM file private date + * @handle: userspace handle + * + * Returns: + * + * A pointer to the object named by the handle if such exists on @filp, NULL + * otherwise. This object is only valid whilst under the RCU read lock, and + * note carefully the object may be in the process of being destroyed. + */ +static inline struct drm_i915_gem_object * +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); +#endif + return idr_find(&file->object_idr, handle); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + struct drm_i915_gem_object *obj; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, handle); + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + rcu_read_unlock(); + + return obj; +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); + +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + __drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); + +static inline bool +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) +{ + return atomic_read(&obj->base.refcount.refcount) == 0; +} + +static inline bool +i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; +} + +static inline bool +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return obj->active_count; +} + +static inline bool +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) +{ + return test_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __set_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); + +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + +static inline struct intel_engine_cs * +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) +{ + struct intel_engine_cs *engine = NULL; + struct dma_fence *fence; + + rcu_read_lock(); + fence = reservation_object_get_excl_rcu(obj->resv); + rcu_read_unlock(); + + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + dma_fence_put(fence); + + return engine; +} + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0f69fadf928f..a56559e3b034 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,9 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +struct drm_file; +struct drm_i915_gem_object; + struct intel_wait { struct rb_node node; struct task_struct *tsk; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c new file mode 100644 index 000000000000..738ff3a5cd6e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -0,0 +1,650 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_vma.h" + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_frontbuffer.h" + +#include + +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) + WARN_ON(i915_vma_unbind(vma)); + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + if (--obj->active_count) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + + obj->mm.dirty = true; /* be paranoid */ + + if (i915_gem_object_has_active_reference(obj)) { + i915_gem_object_clear_active_reference(obj); + i915_gem_object_put(obj); + } +} + +static void +i915_ggtt_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + struct i915_vma *vma = + container_of(active, struct i915_vma, last_write); + + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); +} + +static struct i915_vma * +__i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + struct rb_node *rb, **p; + int i; + + GEM_BUG_ON(vm->closed); + + vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); + if (vma == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_write, + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); + init_request_active(&vma->last_fence, NULL); + list_add(&vma->vm_link, &vm->unbound_list); + vma->vm = vm; + vma->obj = obj; + vma->size = obj->base.size; + + if (view) { + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } + + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; + list_add(&vma->obj_link, &obj->vma_list); + } else { + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + list_add_tail(&vma->obj_link, &obj->vma_list); + } + + rb = NULL; + p = &obj->vma_tree.rb_node; + while (*p) { + struct i915_vma *pos; + + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + if (i915_vma_compare(pos, vm, view) < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma_tree); + + return vma; +} + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + + return __i915_vma_create(obj, vm, view); +} + +/** + * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. + * @vma: VMA to map + * @cache_level: mapping cache level + * @flags: flags like global or local mapping + * + * DMA addresses are taken from the scatter-gather table of this object (or of + * this VMA in case of non-default GGTT views) and PTE entries set up. + * Note that DMA addresses are also the only part of the SG table we care about. + */ +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags) +{ + u32 bind_flags; + u32 vma_flags; + int ret; + + if (WARN_ON(flags == 0)) + return -EINVAL; + + bind_flags = 0; + if (flags & PIN_GLOBAL) + bind_flags |= I915_VMA_GLOBAL_BIND; + if (flags & PIN_USER) + bind_flags |= I915_VMA_LOCAL_BIND; + + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + if (flags & PIN_UPDATE) + bind_flags |= vma_flags; + else + bind_flags &= ~vma_flags; + if (bind_flags == 0) + return 0; + + if (vma_flags == 0 && vma->vm->allocate_va_range) { + trace_i915_va_alloc(vma); + ret = vma->vm->allocate_va_range(vma->vm, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + ret = vma->vm->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + + vma->flags |= bind_flags; + return 0; +} + +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) +{ + void __iomem *ptr; + + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) + return IO_ERR_PTR(-ENODEV); + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + + ptr = vma->iomap; + if (ptr == NULL) { + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, + vma->node.start, + vma->node.size); + if (ptr == NULL) + return IO_ERR_PTR(-ENOMEM); + + vma->iomap = ptr; + } + + __i915_vma_pin(vma); + return ptr; +} + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + obj = vma->obj; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_release_unless_active(obj); +} + +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + if (!drm_mm_node_allocated(&vma->node)) + return false; + + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) + return true; + + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) + return true; + + if (flags & PIN_OFFSET_BIAS && + vma->node.start < (flags & PIN_OFFSET_MASK)) + return true; + + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + + return false; +} + +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_ggtt_size(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj), + true); + + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + dev_priv->ggtt.mappable_end); + + /* + * Explicitly disable for rotated VMA since the display does not + * need the fence and the VMA is not accessible to other users. + */ + if (mappable && fenceable && + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) + vma->flags |= I915_VMA_CAN_FENCE; + else + vma->flags &= ~I915_VMA_CAN_FENCE; +} + +bool i915_gem_valid_gtt_space(struct i915_vma *vma, + unsigned long cache_level) +{ + struct drm_mm_node *gtt_space = &vma->node; + struct drm_mm_node *other; + + /* + * On some machines we have to be careful when putting differing types + * of snoopable memory together to avoid the prefetcher crossing memory + * domains and dying. During vm initialisation, we decide whether or not + * these constraints apply and set the drm_mm.color_adjust + * appropriately. + */ + if (vma->vm->mm.color_adjust == NULL) + return true; + + if (!drm_mm_node_allocated(gtt_space)) + return true; + + if (list_empty(>t_space->node_list)) + return true; + + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); + if (other->allocated && !other->hole_follows && other->color != cache_level) + return false; + + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) + return false; + + return true; +} + +/** + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment + * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; + u64 start, end; + int ret; + + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + alignment = max(max(alignment, vma->display_alignment), + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE)); + + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + + end = vma->vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->ggtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); + + /* If binding the object/GGTT view requires more space than the entire + * aperture has, reject it early before evicting everything in a vain + * attempt to find space. + */ + if (size > end) { + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, + flags & PIN_MAPPABLE ? "mappable" : "total", + end); + return -E2BIG; + } + + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + if (flags & PIN_OFFSET_FIXED) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { + ret = -EINVAL; + goto err_unpin; + } + + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; + } + } else { + u32 search_flag, alloc_flag; + + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + +search_free: + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(vma->vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; + + goto err_unpin; + } + + GEM_BUG_ON(vma->node.start < start); + GEM_BUG_ON(vma->node.start + vma->node.size > end); + } + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); + + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); + return ret; +} + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) +{ + unsigned int bound = vma->flags; + int ret; + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; + } + + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; + } + + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; + + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) + __i915_vma_set_map_and_fenceable(vma); + + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + return 0; + +err: + __i915_vma_unpin(vma); + return ret; +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!i915_vma_is_closed(vma)); + GEM_BUG_ON(vma->fence); + + list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); + + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) + WARN_ON(i915_vma_unbind(vma)); +} + +static void __i915_vma_iounmap(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (vma->iomap == NULL) + return; + + io_mapping_unmap(vma->iomap); + vma->iomap = NULL; +} + +int i915_vma_unbind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + unsigned long active; + int ret; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + * + * Even more scary is that the retire callback may free + * the object (last active vma). To prevent the explosion + * we defer the actual object free to a worker that can + * only proceed once it acquires the struct_mutex (which + * we currently hold, therefore it cannot free this object + * before we are finished). + */ + __i915_vma_pin(vma); + + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } + + __i915_vma_unpin(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); + } + + if (i915_vma_is_pinned(vma)) + return -EBUSY; + + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + if (i915_vma_is_map_and_fenceable(vma)) { + /* release the fence reg _after_ flushing */ + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + __i915_vma_iounmap(vma); + vma->flags &= ~I915_VMA_CAN_FENCE; + } + + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + if (vma->pages != obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + +destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); + + return 0; +} + diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h new file mode 100644 index 000000000000..329b3fe79d53 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -0,0 +1,342 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_VMA_H__ +#define __I915_VMA_H__ + +#include + +#include + +#include "i915_gem_gtt.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" +#include "i915_gem_request.h" + + +enum i915_cache_level; + +/** + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; + struct sg_table *pages; + void __iomem *iomap; + u64 size; + u64 display_alignment; + + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) + + /** Flags and address space this VMA is bound to */ +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CAN_FENCE BIT(9) +#define I915_VMA_CLOSED BIT(10) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; + + /** + * Support different GGTT views into the same object. + * This means there can be multiple VMA mappings per object and per VM. + * i915_ggtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * assumed in GEM functions which take no ggtt view parameter. + */ + struct i915_ggtt_view ggtt_view; + + /** This object's place on the active/inactive lists */ + struct list_head vm_link; + + struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; +}; + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); + +void i915_vma_unpin_and_release(struct i915_vma **p_vma); + +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CAN_FENCE; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) +{ + i915_gem_object_get(vma->obj); + return vma; +} + +static inline void i915_vma_put(struct i915_vma *vma) +{ + i915_gem_object_put(vma->obj); +} + +static inline long +i915_vma_compare(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_vma_is_ggtt(vma)); + + if (vma->vm != vm) + return vma->vm - vm; + + if (!view) + return vma->ggtt_view.type; + + if (vma->ggtt_view.type != view->type) + return vma->ggtt_view.type - view->type; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)); +} + +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags); +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +int __must_check i915_vma_unbind(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); +} + +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_PIN_MASK; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->flags++; + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->flags--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} + +/** + * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture + * @vma: VMA to iomap + * + * The passed in VMA has to be pinned in the global GTT mappable region. + * An extra pinning of the VMA is acquired for the return iomapping, + * the caller must call i915_vma_unpin_iomap to relinquish the pinning + * after the iomapping is no longer required. + * + * Callers must hold the struct_mutex. + * + * Returns a valid iomapped pointer or ERR_PTR. + */ +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) + +/** + * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap + * @vma: VMA to unpin + * + * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). + * + * Callers must hold the struct_mutex. This function is only valid to be + * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + */ +static inline void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON(vma->iomap == NULL); + i915_vma_unpin(vma); +} + +static inline struct page *i915_vma_first_page(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + return sg_page(vma->pages->sgl); +} + +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; +} + +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ +static inline void +i915_vma_unpin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } +} + +#endif +