* Note this requires that we are always called in request
* completion order.
*/
- if (list_is_last(&request->ring_link, &request->ring->request_list))
- tail = request->ring->tail;
- else
+ if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+ /* We may race here with execlists resubmitting this request
+ * as we retire it. The resubmission will move the ring->tail
+ * forwards (to request->wa_tail). We either read the
+ * current value that was written to hw, or the value that
+ * is just about to be. Either works, if we miss the last two
+ * noops - they are safe to be replayed on a reset.
+ */
+ tail = READ_ONCE(request->ring->tail);
+ } else {
tail = request->postfix;
+ }
list_del(&request->ring_link);
request->ring->head = tail;
* GPU processing the request, we never over-estimate the
* position of the head.
*/
- req->head = req->ring->tail;
+ req->head = req->ring->emit;
/* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
GEM_BUG_ON(freespace < wqi_size);
/* The GuC firmware wants the tail index in QWords, not bytes */
- tail = rq->tail;
- assert_ring_tail_valid(rq->ring, rq->tail);
- tail >>= 3;
+ tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- assert_ring_tail_valid(rq->ring, rq->tail);
- reg_state[CTX_RING_TAIL+1] = rq->tail;
+ reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
ce->state->obj->mm.dirty = true;
i915_gem_object_unpin_map(ce->state->obj);
- ce->ring->head = ce->ring->tail = 0;
- intel_ring_update_space(ce->ring);
+ intel_ring_reset(ce->ring, 0);
}
}
}
void intel_ring_update_space(struct intel_ring *ring)
{
- ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+ ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
}
static int
i915_gem_request_submit(request);
- assert_ring_tail_valid(request->ring, request->tail);
- I915_WRITE_TAIL(request->engine, request->tail);
+ I915_WRITE_TAIL(request->engine,
+ intel_ring_set_tail(request->ring, request->tail));
}
static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
return PTR_ERR(addr);
}
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ GEM_BUG_ON(!list_empty(&ring->request_list));
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
void intel_ring_unpin(struct intel_ring *ring)
{
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->tail);
+
if (i915_vma_is_map_and_fenceable(ring->vma))
i915_vma_unpin_iomap(ring->vma);
else
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Restart from the beginning of the rings for convenience */
for_each_engine(engine, dev_priv, id)
- engine->buffer->head = engine->buffer->tail;
+ intel_ring_reset(engine->buffer, 0);
}
static int ring_request_alloc(struct drm_i915_gem_request *request)
unsigned space;
/* Would completion of this request free enough space? */
- space = __intel_ring_space(target->postfix, ring->tail,
+ space = __intel_ring_space(target->postfix, ring->emit,
ring->size);
if (space >= bytes)
break;
u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
{
struct intel_ring *ring = req->ring;
- int remain_actual = ring->size - ring->tail;
- int remain_usable = ring->effective_size - ring->tail;
+ int remain_actual = ring->size - ring->emit;
+ int remain_usable = ring->effective_size - ring->emit;
int bytes = num_dwords * sizeof(u32);
int total_bytes, wait_bytes;
bool need_wrap = false;
if (unlikely(need_wrap)) {
GEM_BUG_ON(remain_actual > ring->space);
- GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+ GEM_BUG_ON(ring->emit + remain_actual > ring->size);
/* Fill the tail with MI_NOOP */
- memset(ring->vaddr + ring->tail, 0, remain_actual);
- ring->tail = 0;
+ memset(ring->vaddr + ring->emit, 0, remain_actual);
+ ring->emit = 0;
ring->space -= remain_actual;
}
- GEM_BUG_ON(ring->tail > ring->size - bytes);
- cs = ring->vaddr + ring->tail;
- ring->tail += bytes;
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ cs = ring->vaddr + ring->emit;
+ ring->emit += bytes;
ring->space -= bytes;
GEM_BUG_ON(ring->space < 0);
int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{
int num_dwords =
- (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+ (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
u32 *cs;
if (num_dwords == 0)
u32 head;
u32 tail;
+ u32 emit;
int space;
int size;
int intel_ring_pin(struct intel_ring *ring,
struct drm_i915_private *i915,
unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring);
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
- GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+ GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
}
static inline u32
GEM_BUG_ON(tail >= ring->size);
}
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_gem_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);