drm/i915: Enable lockless lookup of request tracking via RCU

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 4 Aug 2016 15:32:41 +0000 (16:32 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 4 Aug 2016 19:20:05 +0000 (20:20 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 4 Aug 2016 15:32:41 +0000 (16:32 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 4 Aug 2016 19:20:05 +0000 (20:20 +0100)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 16b388e2c9c584adf18f5f278de56347dffeaa0a..bc414789ed3ded4c7d795b288eb4044aba5ed033 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4431,7 +4431,9 @@ i915_gem_load_init(struct drm_device *dev)
         dev_priv->requests =
                 kmem_cache_create("i915_gem_request",
                                   sizeof(struct drm_i915_gem_request), 0,
-                                 SLAB_HWCACHE_ALIGN,
+                                 SLAB_HWCACHE_ALIGN |
+                                 SLAB_RECLAIM_ACCOUNT |
+                                 SLAB_DESTROY_BY_RCU,
                                   NULL);
  
         INIT_LIST_HEAD(&dev_priv->context_list);
@@ -4467,6 +4469,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
         kmem_cache_destroy(dev_priv->requests);
         kmem_cache_destroy(dev_priv->vmas);
         kmem_cache_destroy(dev_priv->objects);
+
+       /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+       rcu_barrier();
  }
  
  int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 773b942a3a5f8d1cdbb822cc0f062737c94cd195..3fecb8f0e0412034e16745f51ff4757bf305e1c8 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -205,7 +205,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
                 prefetchw(next);
  
                 INIT_LIST_HEAD(&active->link);
-               active->request = NULL;
+               RCU_INIT_POINTER(active->request, NULL);
  
                 active->retire(active, request);
         }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index 26ca697f5af721931ac8d21cfef42b288222e66d..6002adc435239a80e90be00a5c99b5d0bc251cad 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -183,6 +183,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
         return to_request(fence_get(&req->fence));
  }
  
+static inline struct drm_i915_gem_request *
+i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
+{
+       return to_request(fence_get_rcu(&req->fence));
+}
+
  static inline void
  i915_gem_request_put(struct drm_i915_gem_request *req)
  {
@@ -286,7 +292,7 @@ typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
                                    struct drm_i915_gem_request *);
  
  struct i915_gem_active {
-       struct drm_i915_gem_request *request;
+       struct drm_i915_gem_request __rcu *request;
         struct list_head link;
         i915_gem_retire_fn retire;
  };
@@ -327,13 +333,19 @@ i915_gem_active_set(struct i915_gem_active *active,
                     struct drm_i915_gem_request *request)
  {
         list_move(&active->link, &request->active_list);
-       active->request = request;
+       rcu_assign_pointer(active->request, request);
  }
  
  static inline struct drm_i915_gem_request *
  __i915_gem_active_peek(const struct i915_gem_active *active)
  {
-       return active->request;
+       /* Inside the error capture (running with the driver in an unknown
+        * state), we want to bend the rules slightly (a lot).
+        *
+        * Work is in progress to make it safer, in the meantime this keeps
+        * the known issue from spamming the logs.
+        */
+       return rcu_dereference_protected(active->request, 1);
  }
  
  /**
@@ -349,7 +361,29 @@ i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
  {
         struct drm_i915_gem_request *request;
  
-       request = active->request;
+       request = rcu_dereference_protected(active->request,
+                                           lockdep_is_held(mutex));
+       if (!request || i915_gem_request_completed(request))
+               return NULL;
+
+       return request;
+}
+
+/**
+ * i915_gem_active_peek_rcu - report the active request being monitored
+ * @active - the active tracker
+ *
+ * i915_gem_active_peek_rcu() returns the current request being tracked if
+ * still active, or NULL. It does not obtain a reference on the request
+ * for the caller, and inspection of the request is only valid under
+ * the RCU lock.
+ */
+static inline struct drm_i915_gem_request *
+i915_gem_active_peek_rcu(const struct i915_gem_active *active)
+{
+       struct drm_i915_gem_request *request;
+
+       request = rcu_dereference(active->request);
         if (!request || i915_gem_request_completed(request))
                 return NULL;
  
@@ -369,6 +403,119 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
         return i915_gem_request_get(i915_gem_active_peek(active, mutex));
  }
  
+/**
+ * __i915_gem_active_get_rcu - return a reference to the active request
+ * @active - the active tracker
+ *
+ * __i915_gem_active_get() returns a reference to the active request, or NULL
+ * if the active tracker is idle. The caller must hold the RCU read lock, but
+ * the returned pointer is safe to use outside of RCU.
+ */
+static inline struct drm_i915_gem_request *
+__i915_gem_active_get_rcu(const struct i915_gem_active *active)
+{
+       /* Performing a lockless retrieval of the active request is super
+        * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing
+        * slab of request objects will not be freed whilst we hold the
+        * RCU read lock. It does not guarantee that the request itself
+        * will not be freed and then *reused*. Viz,
+        *
+        * Thread A                     Thread B
+        *
+        * req = active.request
+        *                              retire(req) -> free(req);
+        *                              (req is now first on the slab freelist)
+        *                              active.request = NULL
+        *
+        *                              req = new submission on a new object
+        * ref(req)
+        *
+        * To prevent the request from being reused whilst the caller
+        * uses it, we take a reference like normal. Whilst acquiring
+        * the reference we check that it is not in a destroyed state
+        * (refcnt == 0). That prevents the request being reallocated
+        * whilst the caller holds on to it. To check that the request
+        * was not reallocated as we acquired the reference we have to
+        * check that our request remains the active request across
+        * the lookup, in the same manner as a seqlock. The visibility
+        * of the pointer versus the reference counting is controlled
+        * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
+        *
+        * In the middle of all that, we inspect whether the request is
+        * complete. Retiring is lazy so the request may be completed long
+        * before the active tracker is updated. Querying whether the
+        * request is complete is far cheaper (as it involves no locked
+        * instructions setting cachelines to exclusive) than acquiring
+        * the reference, so we do it first. The RCU read lock ensures the
+        * pointer dereference is valid, but does not ensure that the
+        * seqno nor HWS is the right one! However, if the request was
+        * reallocated, that means the active tracker's request was complete.
+        * If the new request is also complete, then both are and we can
+        * just report the active tracker is idle. If the new request is
+        * incomplete, then we acquire a reference on it and check that
+        * it remained the active request.
+        */
+       do {
+               struct drm_i915_gem_request *request;
+
+               request = rcu_dereference(active->request);
+               if (!request || i915_gem_request_completed(request))
+                       return NULL;
+
+               request = i915_gem_request_get_rcu(request);
+
+               /* What stops the following rcu_access_pointer() from occurring
+                * before the above i915_gem_request_get_rcu()? If we were
+                * to read the value before pausing to get the reference to
+                * the request, we may not notice a change in the active
+                * tracker.
+                *
+                * The rcu_access_pointer() is a mere compiler barrier, which
+                * means both the CPU and compiler are free to perform the
+                * memory read without constraint. The compiler only has to
+                * ensure that any operations after the rcu_access_pointer()
+                * occur afterwards in program order. This means the read may
+                * be performed earlier by an out-of-order CPU, or adventurous
+                * compiler.
+                *
+                * The atomic operation at the heart of
+                * i915_gem_request_get_rcu(), see fence_get_rcu(), is
+                * atomic_inc_not_zero() which is only a full memory barrier
+                * when successful. That is, if i915_gem_request_get_rcu()
+                * returns the request (and so with the reference counted
+                * incremented) then the following read for rcu_access_pointer()
+                * must occur after the atomic operation and so confirm
+                * that this request is the one currently being tracked.
+                */
+               if (!request || request == rcu_access_pointer(active->request))
+                       return rcu_pointer_handoff(request);
+
+               i915_gem_request_put(request);
+       } while (1);
+}
+
+/**
+ * i915_gem_active_get_unlocked - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_gem_active_get_unlocked() returns a reference to the active request,
+ * or NULL if the active tracker is idle. The reference is obtained under RCU,
+ * so no locking is required by the caller.
+ *
+ * The reference should be freed with i915_gem_request_put().
+ */
+static inline struct drm_i915_gem_request *
+i915_gem_active_get_unlocked(const struct i915_gem_active *active)
+{
+       struct drm_i915_gem_request *request;
+
+       rcu_read_lock();
+       request = __i915_gem_active_get_rcu(active);
+       rcu_read_unlock();
+
+       return request;
+}
+
  /**
   * i915_gem_active_isset - report whether the active tracker is assigned
   * @active - the active tracker
@@ -380,7 +527,7 @@ i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
  static inline bool
  i915_gem_active_isset(const struct i915_gem_active *active)
  {
-       return active->request;
+       return rcu_access_pointer(active->request);
  }
  
  /**
@@ -437,7 +584,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
         struct drm_i915_gem_request *request;
         int ret;
  
-       request = active->request;
+       request = rcu_dereference_protected(active->request,
+                                           lockdep_is_held(mutex));
         if (!request)
                 return 0;
  
@@ -446,7 +594,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
                 return ret;
  
         list_del_init(&active->link);
-       active->request = NULL;
+       RCU_INIT_POINTER(active->request, NULL);
+
         active->retire(active, request);
  
         return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c

index bcd85bdbc25f981370ebe17c7a062e040f98e077..1341cb55b6f170cbe0b886f5634f647e202ee459 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -205,6 +205,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
                 intel_runtime_pm_put(dev_priv);
  
         i915_gem_retire_requests(dev_priv);
+       /* expedite the RCU grace period to free some request slabs */
+       synchronize_rcu_expedited();
  
         return count;
  }
@@ -225,10 +227,15 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
   */
  unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
  {
-       return i915_gem_shrink(dev_priv, -1UL,
-                              I915_SHRINK_BOUND |
-                              I915_SHRINK_UNBOUND |
-                              I915_SHRINK_ACTIVE);
+       unsigned long freed;
+
+       freed = i915_gem_shrink(dev_priv, -1UL,
+                               I915_SHRINK_BOUND |
+                               I915_SHRINK_UNBOUND |
+                               I915_SHRINK_ACTIVE);
+       rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+
+       return freed;
  }
  
  static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 4 Aug 2016 15:32:41 +0000 (16:32 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 4 Aug 2016 19:20:05 +0000 (20:20 +0100)
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem_shrinker.c		patch \| blob \| blame \| history