From 63d15326168d54cfef082f6899c86f8260db4037 Mon Sep 17 00:00:00 2001 From: Dave Gordon Date: Fri, 20 May 2016 11:54:07 +0100 Subject: [PATCH] drm/i915: Inline sg_next() for the optimised SGL iterator Avoiding the out-of-line call to sg_next() reduces the kernel execution overhead by 10% in some workloads (for example the Unreal Engine 4 demo Atlantis on 2GiB GTTs) which are dominated by the cost of inserting PTEs due to texture thrashing. We can demonstrate this in a microbenchmark that forces us to rebind the object on every execbuf, where we can measure a 25% improvement, in the time required to execute an execbuf requiring a texture to be rebound, for inlining the sg_next() for large texture sizes. Benchmark: igt/benchmarks/gem_exec_fault Benchmark: igt/benchmarks/gem_exec_trace/Atlantis Signed-off-by: Dave Gordon Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1463741647-15666-5-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63ff5fa2b2bd..e2abd9eb352b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2277,6 +2277,25 @@ static __always_inline struct sgt_iter { return s; } +/** + * __sg_next - return the next scatterlist entry in a list + * @sg: The current sg entry + * + * Description: + * If the entry is the last, return NULL; otherwise, step to the next + * element in the array (@sg@+1). If that's a chain pointer, follow it; + * otherwise just return the pointer to the current element. + **/ +static inline struct scatterlist *__sg_next(struct scatterlist *sg) +{ +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + return sg_is_last(sg) ? NULL : + likely(!sg_is_chain(++sg)) ? sg : + sg_chain_ptr(sg); +} + /** * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table * @__dmap: DMA address (output) @@ -2287,7 +2306,7 @@ static __always_inline struct sgt_iter { for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ ((__dmap) = (__iter).dma + (__iter).curr); \ (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ - ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0)) + ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0)) /** * for_each_sgt_page - iterate over the pages of the given sg_table @@ -2300,7 +2319,7 @@ static __always_inline struct sgt_iter { ((__pp) = (__iter).pfn == 0 ? NULL : \ pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \ (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ - ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0)) + ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) /** * Request queue structure. -- 2.20.1