drm/i915: Keep a global seqno per-engine
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 23 Feb 2017 07:44:08 +0000 (07:44 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 23 Feb 2017 14:49:26 +0000 (14:49 +0000)
Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

v2: Rename active_seqno to inflight_seqnos to more clearly indicate that
it is a counter and not equivalent to the existing seqno. Update
functions that operated on active_seqno similarly.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-3-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_gem_request.h
drivers/gpu/drm/i915/i915_gem_timeline.h
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 655e60d609c25a8a2113186fb754bb1a5402a239..1a28b5279bec6cf220bd1ea1bfeb3e13d5facf6b 100644 (file)
@@ -1079,15 +1079,6 @@ static const struct file_operations i915_error_state_fops = {
 };
 #endif
 
-static int
-i915_next_seqno_get(void *data, u64 *val)
-{
-       struct drm_i915_private *dev_priv = data;
-
-       *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
-       return 0;
-}
-
 static int
 i915_next_seqno_set(void *data, u64 val)
 {
@@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
-                       i915_next_seqno_get, i915_next_seqno_set,
+                       NULL, i915_next_seqno_set,
                        "0x%llx\n");
 
 static int i915_frequency_info(struct seq_file *m, void *unused)
index 12b06e05549020b1e1d1c39ad105879c828aaf91..b94ae6cf9837f638bedbd934d13ad254bcdd4994 100644 (file)
@@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
        pt->priority = INT_MIN;
 }
 
+static void unreserve_seqno(struct intel_engine_cs *engine)
+{
+       GEM_BUG_ON(!engine->timeline->inflight_seqnos);
+       engine->timeline->inflight_seqnos--;
+}
+
 void i915_gem_retire_noop(struct i915_gem_active *active,
                          struct drm_i915_gem_request *request)
 {
@@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
                                 &request->i915->gt.idle_work,
                                 msecs_to_jiffies(100));
        }
+       unreserve_seqno(request->engine);
 
        /* Walk through the active list, calling retire on each. This allows
         * objects to track their GPU activity and mark themselves as idle
@@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
        } while (tmp != req);
 }
 
-static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
+static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
        struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
        struct intel_engine_cs *engine;
@@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
        GEM_BUG_ON(i915->gt.active_requests > 1);
 
        /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-       if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
-               while (intel_breadcrumbs_busy(i915))
-                       cond_resched(); /* spin until threads are complete */
-       }
-       atomic_set(&timeline->seqno, seqno);
+       for_each_engine(engine, i915, id) {
+               struct intel_timeline *tl = &timeline->engine[id];
 
-       /* Finally reset hw state */
-       for_each_engine(engine, i915, id)
+               if (!i915_seqno_passed(seqno, tl->seqno)) {
+                       /* spin until threads are complete */
+                       while (intel_breadcrumbs_busy(engine))
+                               cond_resched();
+               }
+
+               /* Finally reset hw state */
+               tl->seqno = seqno;
                intel_engine_init_global_seqno(engine, seqno);
+       }
 
        list_for_each_entry(timeline, &i915->gt.timelines, link) {
                for_each_engine(engine, i915, id) {
@@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
        /* HWS page needs to be set less than what we
         * will inject to ring
         */
-       return i915_gem_init_global_seqno(dev_priv, seqno - 1);
+       return reset_all_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_global_seqno(struct drm_i915_private *i915)
+static int reserve_seqno(struct intel_engine_cs *engine)
 {
-       u32 active_requests = ++i915->gt.active_requests;
-       u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
+       u32 active = ++engine->timeline->inflight_seqnos;
+       u32 seqno = engine->timeline->seqno;
        int ret;
 
        /* Reservation is fine until we need to wrap around */
-       if (likely(seqno + active_requests > seqno))
+       if (likely(!add_overflows(seqno, active)))
                return 0;
 
-       ret = i915_gem_init_global_seqno(i915, 0);
+       /* Even though we are tracking inflight seqno individually on each
+        * engine, other engines may be observing us using hw semaphores and
+        * so we need to idle all engines before wrapping around this engine.
+        * As all engines are then idle, we can reset the seqno on all, so
+        * we don't stall in quick succession if each engine is being
+        * similarly utilized.
+        */
+       ret = reset_all_global_seqno(engine->i915, 0);
        if (ret) {
-               i915->gt.active_requests--;
+               engine->timeline->inflight_seqnos--;
                return ret;
        }
 
        return 0;
 }
 
-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
-{
-       /* seqno only incremented under a mutex */
-       return ++tl->seqno.counter;
-}
-
-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
+static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
-       return atomic_inc_return(&tl->seqno);
+       return ++tl->seqno;
 }
 
 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
        GEM_BUG_ON(timeline == request->timeline);
        assert_spin_locked(&timeline->lock);
 
-       seqno = timeline_get_seqno(timeline->common);
+       seqno = timeline_get_seqno(timeline);
        GEM_BUG_ON(!seqno);
        GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
-       GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
-       request->previous_seqno = timeline->last_submitted_seqno;
-       timeline->last_submitted_seqno = seqno;
-
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
        request->global_seqno = seqno;
@@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
        if (ret)
                return ERR_PTR(ret);
 
-       ret = reserve_global_seqno(dev_priv);
+       ret = reserve_seqno(engine);
        if (ret)
                goto err_unpin;
 
@@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
                       &i915_fence_ops,
                       &req->lock,
                       req->timeline->fence_context,
-                      __timeline_get_seqno(req->timeline->common));
+                      timeline_get_seqno(req->timeline));
 
        /* We bump the ref for the fence chain */
        i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         */
        req->head = req->ring->tail;
 
+       /* Check that we didn't interrupt ourselves with a new request */
+       GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
        return req;
 
 err_ctx:
@@ -623,7 +633,7 @@ err_ctx:
 
        kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-       dev_priv->gt.active_requests--;
+       unreserve_seqno(engine);
 err_unpin:
        engine->context_unpin(engine, ctx);
        return ERR_PTR(ret);
@@ -836,8 +846,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
         * our i915_gem_request_alloc() and called __i915_add_request() before
         * us, the timeline will hold its seqno which is later than ours.
         */
-       GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-                                    request->fence.seqno));
+       GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 
        /*
         * To ensure that this call will not fail, space for its emissions
@@ -891,16 +900,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
        list_add_tail(&request->link, &timeline->requests);
        spin_unlock_irq(&timeline->lock);
 
-       GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-                                    request->fence.seqno));
-
-       timeline->last_submitted_seqno = request->fence.seqno;
+       GEM_BUG_ON(timeline->seqno != request->fence.seqno);
        i915_gem_active_set(&timeline->last_request, request);
 
        list_add_tail(&request->ring_link, &ring->request_list);
        request->emitted_jiffies = jiffies;
 
-       i915_gem_mark_busy(engine);
+       if (!request->i915->gt.active_requests++)
+               i915_gem_mark_busy(engine);
 
        /* Let the backend know a new request has arrived that may need
         * to adjust the existing execution schedule due to a high priority
index ea511f06efaf52b6e1faa806bdf5f7730b72d3b1..9049936c571ca1fb3d89ae107d56992c3b3ba82c 100644 (file)
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
 
        u32 global_seqno;
 
-       /** GEM sequence number associated with the previous request,
-        * when the HWS breadcrumb is equal to this the GPU is processing
-        * this request.
-        */
-       u32 previous_seqno;
-
        /** Position in the ring of the start of the request */
        u32 head;
 
@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
        GEM_BUG_ON(!req->global_seqno);
        return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-                                req->previous_seqno);
+                                req->global_seqno - 1);
 }
 
 static inline bool
index f2e51f42cc2f276536f3462ecaf218abef6d795b..6c53e14cab2a4d307b7eb4e13aafea0a71ed458f 100644 (file)
@@ -33,7 +33,13 @@ struct i915_gem_timeline;
 
 struct intel_timeline {
        u64 fence_context;
-       u32 last_submitted_seqno;
+       u32 seqno;
+
+       /**
+        * Count of outstanding requests, from the time they are constructed
+        * to the moment they are retired. Loosely coupled to hardware.
+        */
+       u32 inflight_seqnos;
 
        spinlock_t lock;
 
@@ -56,7 +62,6 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
        struct list_head link;
-       atomic_t seqno;
 
        struct drm_i915_private *i915;
        const char *name;
index 5932e2dc0c6f7447008f24268270f203f1c7b8eb..4f4e703d1b147e7a0b7869e1cb5928da52deb645 100644 (file)
@@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
        cancel_fake_irq(engine);
 }
 
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 {
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       unsigned int mask = 0;
-
-       for_each_engine(engine, i915, id) {
-               struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-               spin_lock_irq(&b->lock);
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       bool busy = false;
 
-               if (b->first_wait) {
-                       wake_up_process(b->first_wait->tsk);
-                       mask |= intel_engine_flag(engine);
-               }
+       spin_lock_irq(&b->lock);
 
-               if (b->first_signal) {
-                       wake_up_process(b->signaler);
-                       mask |= intel_engine_flag(engine);
-               }
+       if (b->first_wait) {
+               wake_up_process(b->first_wait->tsk);
+               busy |= intel_engine_flag(engine);
+       }
 
-               spin_unlock_irq(&b->lock);
+       if (b->first_signal) {
+               wake_up_process(b->signaler);
+               busy |= intel_engine_flag(engine);
        }
 
-       return mask;
+       spin_unlock_irq(&b->lock);
+
+       return busy;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
index 46c94740be534217a3d094b55b0352a71aae0857..c4d4698f98e7cc6f69e76d651063a9cbb9210fd2 100644 (file)
@@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
                engine->irq_seqno_barrier(engine);
 
        GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-       engine->timeline->last_submitted_seqno = seqno;
-
        engine->hangcheck.seqno = seqno;
 
        /* After manually advancing the seqno, fake the interrupt in case
index 4091c97be6ecc73d84ce75f5409f446afa3c9e31..b91c2c7ef5a625a6d178c9f2a7b252818f9122df 100644 (file)
@@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
         * wtih serialising this hint with anything, so document it as
         * a hint and nothing more.
         */
-       return READ_ONCE(engine->timeline->last_submitted_seqno);
+       return READ_ONCE(engine->timeline->seqno);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
 
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {