From eed39c1918f1803948d736c444bfacba2a482ad0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 19 Oct 2021 13:27:10 +0100 Subject: [PATCH] drm/i915: Flush TLBs before releasing backing store commit 7938d61591d33394a21bdd7797a245b65428f44c upstream. We need to flush TLBs before releasing backing store otherwise userspace is able to encounter stale entries if a) it is not declaring access to certain buffers and b) it races with the backing store release from a such undeclared execution already executing on the GPU in parallel. The approach taken is to mark any buffer objects which were ever bound to the GPU and to trigger a serialized TLB flush when their backing store is released. Alternatively the flushing could be done on VMA unbind, at which point we would be able to ascertain whether there is potential a parallel GPU execution (which could race), but essentially it boils down to paying the cost of TLB flushes potentially needlessly at VMA unbind time (when the backing store is not known to be going away so not needed for safety), versus potentially needlessly at backing store relase time (since we at that point cannot tell whether there is anything executing on the GPU which uses that object). Thereforce simplicity of implementation has been chosen for now with scope to benchmark and refine later as required. Signed-off-by: Tvrtko Ursulin Reported-by: Sushma Venkatesh Reddy Reviewed-by: Daniel Vetter Acked-by: Dave Airlie Cc: Daniel Vetter Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 84 +++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_object.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 6 ++ drivers/gpu/drm/i915/i915_vma.c | 4 ++ 5 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 816781f209d6..f8d13ee2d538 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2166,6 +2166,8 @@ struct drm_i915_private { struct intel_uncore uncore; + struct mutex tlb_invalidate_lock; + struct i915_virtual_gpu vgpu; struct intel_gvt *gvt; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9263b65720bc..08d31744e2d9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2220,6 +2220,76 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) rcu_read_unlock(); } +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { .bit = 1 }; + + if (WARN_ON_ONCE(class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + + return rb; +} + +static void invalidate_tlbs(struct drm_i915_private *dev_priv) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + const unsigned int num = ARRAY_SIZE(gen8_regs); + const i915_reg_t *regs = gen8_regs; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (INTEL_GEN(dev_priv) < 8) + return; + + assert_rpm_wakelock_held(dev_priv); + + mutex_lock(&dev_priv->tlb_invalidate_lock); + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + for_each_engine(engine, dev_priv, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + I915_WRITE_FW(rb.reg, rb.bit); + if (__intel_wait_for_register_fw(dev_priv, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->tlb_invalidate_lock); +} + void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { @@ -2257,8 +2327,18 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - if (!IS_ERR(pages)) + if (!IS_ERR(pages)) { + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + if (intel_runtime_pm_get_if_in_use(i915)) { + invalidate_tlbs(i915); + intel_runtime_pm_put(i915); + } + } + obj->ops->put_pages(obj, pages); + } unlock: mutex_unlock(&obj->mm.lock); @@ -4972,6 +5052,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + mutex_init(&dev_priv->tlb_invalidate_lock); + return 0; err_priorities: diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 39cfe04dcdb8..180a8bf24791 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -135,6 +135,7 @@ struct drm_i915_gem_object { * activity? */ #define I915_BO_ACTIVE_REF 0 +#define I915_BO_WAS_BOUND_BIT 1 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1db70350af0b..333e94381789 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2380,6 +2380,12 @@ enum i915_power_well_id { #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 76eed1fdac09..5653e7bac914 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -272,6 +272,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return ret; vma->flags |= bind_flags; + + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + return 0; } -- 2.20.1