drm/i915: Introduce & use new lightweight SGL iterators
authorDave Gordon <david.s.gordon@intel.com>
Fri, 20 May 2016 10:54:06 +0000 (11:54 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 20 May 2016 12:43:00 +0000 (13:43 +0100)
The existing for_each_sg_page() iterator is somewhat heavyweight, and is
limiting i915 driver performance in a few benchmarks. So here we
introduce somewhat lighter weight iterators, primarily for use with GEM
objects or other case where we need only deal with whole aligned pages.

Unlike the old iterator, the new iterators use an internal state
structure which is not intended to be accessed by the caller; instead
each takes as a parameter an output variable which is set before each
iteration. This makes them particularly simple to use :)

One of the new iterators provides the caller with the DMA address of
each page in turn; the other provides the 'struct page' pointer required
by many memory management operations.

Various uses of for_each_sg_page() are then converted to the new macros.

v2: Force inlining of the sg_iter constructor and make the union
anonymous.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1463741647-15666-4-git-send-email-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_fence.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_userptr.c

index 6894d8e0a4d28f893c07503ed820f818b0816d21..63ff5fa2b2bdeb7e217a938ad620a8b96adcc304 100644 (file)
@@ -2251,9 +2251,56 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-                      struct drm_i915_gem_object *new,
-                      unsigned frontbuffer_bits);
+/*
+ * Optimised SGL iterator for GEM objects
+ */
+static __always_inline struct sgt_iter {
+       struct scatterlist *sgp;
+       union {
+               unsigned long pfn;
+               dma_addr_t dma;
+       };
+       unsigned int curr;
+       unsigned int max;
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
+       struct sgt_iter s = { .sgp = sgl };
+
+       if (s.sgp) {
+               s.max = s.curr = s.sgp->offset;
+               s.max += s.sgp->length;
+               if (dma)
+                       s.dma = sg_dma_address(s.sgp);
+               else
+                       s.pfn = page_to_pfn(sg_page(s.sgp));
+       }
+
+       return s;
+}
+
+/**
+ * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:    DMA address (output)
+ * @__iter:    'struct sgt_iter' (iterator state, internal)
+ * @__sgt:     sg_table to iterate over (input)
+ */
+#define for_each_sgt_dma(__dmap, __iter, __sgt)                                \
+       for ((__iter) = __sgt_iter((__sgt)->sgl, true);                 \
+            ((__dmap) = (__iter).dma + (__iter).curr);                 \
+            (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
+            ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:      page pointer (output)
+ * @__iter:    'struct sgt_iter' (iterator state, internal)
+ * @__sgt:     sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)                         \
+       for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
+            ((__pp) = (__iter).pfn == 0 ? NULL :                       \
+             pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+            (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
+            ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
 
 /**
  * Request queue structure.
@@ -3108,6 +3155,11 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
                         struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
                      uint32_t handle, uint64_t *offset);
+
+void i915_gem_track_fb(struct drm_i915_gem_object *old,
+                      struct drm_i915_gem_object *new,
+                      unsigned frontbuffer_bits);
+
 /**
  * Returns true if seq1 is later than seq2.
  */
index fd3be2b385cb751a3057ea895f1fe3403d92ac5b..3251decf10f5c77b724fba47ac1bae941f19c62e 100644 (file)
@@ -2165,7 +2165,8 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 {
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       struct page *page;
        int ret;
 
        BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -2187,9 +2188,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
        if (obj->madv == I915_MADV_DONTNEED)
                obj->dirty = 0;
 
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               struct page *page = sg_page_iter_page(&sg_iter);
-
+       for_each_sgt_page(page, sgt_iter, obj->pages) {
                if (obj->dirty)
                        set_page_dirty(page);
 
@@ -2246,7 +2245,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
        struct address_space *mapping;
        struct sg_table *st;
        struct scatterlist *sg;
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
        struct page *page;
        unsigned long last_pfn = 0;     /* suppress gcc warning */
        int ret;
@@ -2343,8 +2342,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 
 err_pages:
        sg_mark_end(sg);
-       for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-               put_page(sg_page_iter_page(&sg_iter));
+       for_each_sgt_page(page, sgt_iter, st)
+               put_page(page);
        sg_free_table(st);
        kfree(st);
 
@@ -2403,7 +2402,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 {
        unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
        struct sg_table *sgt = obj->pages;
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       struct page *page;
        struct page *stack_pages[32];
        struct page **pages = stack_pages;
        unsigned long i = 0;
@@ -2420,8 +2420,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
                        return NULL;
        }
 
-       for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0)
-               pages[i++] = sg_page_iter_page(&sg_iter);
+       for_each_sgt_page(page, sgt_iter, sgt)
+               pages[i++] = page;
 
        /* Check that we have the expected number of pages */
        GEM_BUG_ON(i != n_pages);
index a2b938ec01a7ca6d4854a83213c56aa3f02f35ef..2b6bdc267fb5ed590cd4935a65a7484dda0eec10 100644 (file)
@@ -745,15 +745,15 @@ i915_gem_swizzle_page(struct page *page)
 void
 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       struct page *page;
        int i;
 
        if (obj->bit_17 == NULL)
                return;
 
        i = 0;
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               struct page *page = sg_page_iter_page(&sg_iter);
+       for_each_sgt_page(page, sgt_iter, obj->pages) {
                char new_bit_17 = page_to_phys(page) >> 17;
                if ((new_bit_17 & 0x1) !=
                    (test_bit(i, obj->bit_17) != 0)) {
@@ -775,7 +775,8 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       struct page *page;
        int page_count = obj->base.size >> PAGE_SHIFT;
        int i;
 
@@ -790,8 +791,9 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
        }
 
        i = 0;
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
+
+       for_each_sgt_page(page, sgt_iter, obj->pages) {
+               if (page_to_phys(page) & (1 << 17))
                        __set_bit(i, obj->bit_17);
                else
                        __clear_bit(i, obj->bit_17);
index 7eab619a3eb2c4cf4e68c9340844e979cd73f783..46684779d4d6f13ff1409663c3644e9ad6b2dd4c 100644 (file)
@@ -1839,20 +1839,19 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
                                      enum i915_cache_level cache_level, u32 flags)
 {
        struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-       gen6_pte_t *pt_vaddr;
        unsigned first_entry = start >> PAGE_SHIFT;
        unsigned act_pt = first_entry / GEN6_PTES;
        unsigned act_pte = first_entry % GEN6_PTES;
-       struct sg_page_iter sg_iter;
+       gen6_pte_t *pt_vaddr = NULL;
+       struct sgt_iter sgt_iter;
+       dma_addr_t addr;
 
-       pt_vaddr = NULL;
-       for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+       for_each_sgt_dma(addr, sgt_iter, pages) {
                if (pt_vaddr == NULL)
                        pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
                pt_vaddr[act_pte] =
-                       vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
-                                      cache_level, true, flags);
+                       vm->pte_encode(addr, cache_level, true, flags);
 
                if (++act_pte == GEN6_PTES) {
                        kunmap_px(ppgtt, pt_vaddr);
@@ -1861,6 +1860,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
                        act_pte = 0;
                }
        }
+
        if (pt_vaddr)
                kunmap_px(ppgtt, pt_vaddr);
 }
@@ -2362,22 +2362,20 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 {
        struct drm_i915_private *dev_priv = to_i915(vm->dev);
        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       unsigned first_entry = start >> PAGE_SHIFT;
-       gen8_pte_t __iomem *gtt_entries =
-               (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-       int i = 0;
-       struct sg_page_iter sg_iter;
-       dma_addr_t addr = 0; /* shut up gcc */
+       struct sgt_iter sgt_iter;
+       gen8_pte_t __iomem *gtt_entries;
+       gen8_pte_t gtt_entry;
+       dma_addr_t addr;
        int rpm_atomic_seq;
+       int i = 0;
 
        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-       for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
-               addr = sg_dma_address(sg_iter.sg) +
-                       (sg_iter.sg_pgoffset << PAGE_SHIFT);
-               gen8_set_pte(&gtt_entries[i],
-                            gen8_pte_encode(addr, level, true));
-               i++;
+       gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+
+       for_each_sgt_dma(addr, sgt_iter, st) {
+               gtt_entry = gen8_pte_encode(addr, level, true);
+               gen8_set_pte(&gtt_entries[i++], gtt_entry);
        }
 
        /*
@@ -2388,8 +2386,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
         * hardware should work, we must keep this posting read for paranoia.
         */
        if (i != 0)
-               WARN_ON(readq(&gtt_entries[i-1])
-                       != gen8_pte_encode(addr, level, true));
+               WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
 
        /* This next bit makes the above posting read even more important. We
         * want to flush the TLBs only after we're certain all the PTE updates
@@ -2440,20 +2437,20 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 {
        struct drm_i915_private *dev_priv = to_i915(vm->dev);
        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-       unsigned first_entry = start >> PAGE_SHIFT;
-       gen6_pte_t __iomem *gtt_entries =
-               (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-       int i = 0;
-       struct sg_page_iter sg_iter;
-       dma_addr_t addr = 0;
+       struct sgt_iter sgt_iter;
+       gen6_pte_t __iomem *gtt_entries;
+       gen6_pte_t gtt_entry;
+       dma_addr_t addr;
        int rpm_atomic_seq;
+       int i = 0;
 
        rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-       for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
-               addr = sg_page_iter_dma_address(&sg_iter);
-               iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
-               i++;
+       gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+
+       for_each_sgt_dma(addr, sgt_iter, st) {
+               gtt_entry = vm->pte_encode(addr, level, true, flags);
+               iowrite32(gtt_entry, &gtt_entries[i++]);
        }
 
        /* XXX: This serves as a posting read to make sure that the PTE has
@@ -2462,10 +2459,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
         * of NUMA access patterns. Therefore, even with the way we assume
         * hardware should work, we must keep this posting read for paranoia.
         */
-       if (i != 0) {
-               unsigned long gtt = readl(&gtt_entries[i-1]);
-               WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
-       }
+       if (i != 0)
+               WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
 
        /* This next bit makes the above posting read even more important. We
         * want to flush the TLBs only after we're certain all the PTE updates
@@ -3399,9 +3394,11 @@ static struct sg_table *
 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
                          struct drm_i915_gem_object *obj)
 {
+       const size_t n_pages = obj->base.size / PAGE_SIZE;
        unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
        unsigned int size_pages_uv;
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       dma_addr_t dma_addr;
        unsigned long i;
        dma_addr_t *page_addr_list;
        struct sg_table *st;
@@ -3410,7 +3407,7 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
        int ret = -ENOMEM;
 
        /* Allocate a temporary list of source pages for random access. */
-       page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+       page_addr_list = drm_malloc_gfp(n_pages,
                                        sizeof(dma_addr_t),
                                        GFP_TEMPORARY);
        if (!page_addr_list)
@@ -3433,11 +3430,10 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 
        /* Populate source page list from the object. */
        i = 0;
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
-               i++;
-       }
+       for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
+               page_addr_list[i++] = dma_addr;
 
+       GEM_BUG_ON(i != n_pages);
        st->nents = 0;
        sg = st->sgl;
 
index a84625b712267bd8b07e39f0777e83dd267bb289..2314c88323e39861152485ccfd1b8643ce8bc41f 100644 (file)
@@ -706,7 +706,8 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 static void
 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 {
-       struct sg_page_iter sg_iter;
+       struct sgt_iter sgt_iter;
+       struct page *page;
 
        BUG_ON(obj->userptr.work != NULL);
        __i915_gem_userptr_set_active(obj, false);
@@ -716,9 +717,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 
        i915_gem_gtt_finish_object(obj);
 
-       for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-               struct page *page = sg_page_iter_page(&sg_iter);
-
+       for_each_sgt_page(page, sgt_iter, obj->pages) {
                if (obj->dirty)
                        set_page_dirty(page);