drm/i915: Change GEM throttling to be 20ms like the comment says.
authorEric Anholt <eric@anholt.net>
Wed, 3 Jun 2009 07:27:35 +0000 (07:27 +0000)
committerEric Anholt <eric@anholt.net>
Thu, 4 Jun 2009 11:44:22 +0000 (11:44 +0000)
keithp didn't like the original 20ms plan because a cooperative client could
be starved by an uncooperative client.  There may even have been problems
with cooperative clients versus cooperative clients.  So keithp changed
throttle to just wait for the second to last seqno emitted by that client.
It worked well, until we started getting more round-trips to the server
due to DRI2 -- the server throttles in BlockHandler, and so if you did more
than one round trip after finishing your frame, you'd end up unintentionally
syncing to the swap.

Fix this by keeping track of the client's requests, so the client can wait
when it has an outstanding request over 20ms old.  This should have
non-starving behavior, good behavior in the presence of restarts, and less
waiting.  Improves high-settings openarena performance on my GM45 by 50%.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c

index 53d5445526253138a2e572c64e9698054ee5fe8f..0c222c28b8c1d02f2d610448f3795b28cbdadd2f 100644 (file)
@@ -1273,8 +1273,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 
        file_priv->driver_priv = i915_file_priv;
 
-       i915_file_priv->mm.last_gem_seqno = 0;
-       i915_file_priv->mm.last_gem_throttle_seqno = 0;
+       INIT_LIST_HEAD(&i915_file_priv->mm.request_list);
 
        return 0;
 }
@@ -1311,6 +1310,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
+       i915_gem_release(dev, file_priv);
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                i915_mem_release(dev, file_priv, dev_priv->agp_heap);
 }
index fcaa5444daa04e93d93a3c186021c1ec46d9ed55..e0fac5f62c69b787724ee8c7aa18de731e9d4cfe 100644 (file)
@@ -498,13 +498,16 @@ struct drm_i915_gem_request {
        /** Time at which this request was emitted, in jiffies. */
        unsigned long emitted_jiffies;
 
+       /** global list entry for this request */
        struct list_head list;
+
+       /** file_priv list entry for this request */
+       struct list_head client_list;
 };
 
 struct drm_i915_file_private {
        struct {
-               uint32_t last_gem_seqno;
-               uint32_t last_gem_throttle_seqno;
+               struct list_head request_list;
        } mm;
 };
 
index 39f5c658ef5e7c9754444672e86c166cd12a0191..3fbd8a0c40d124a2a47171e60467336c3a158c9f 100644 (file)
@@ -1481,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
  * Returned sequence numbers are nonzero on success.
  */
 static uint32_t
-i915_add_request(struct drm_device *dev, uint32_t flush_domains)
+i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+                uint32_t flush_domains)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_file_private *i915_file_priv = NULL;
        struct drm_i915_gem_request *request;
        uint32_t seqno;
        int was_empty;
        RING_LOCALS;
 
+       if (file_priv != NULL)
+               i915_file_priv = file_priv->driver_priv;
+
        request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
        if (request == NULL)
                return 0;
@@ -1515,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
        request->emitted_jiffies = jiffies;
        was_empty = list_empty(&dev_priv->mm.request_list);
        list_add_tail(&request->list, &dev_priv->mm.request_list);
+       if (i915_file_priv) {
+               list_add_tail(&request->client_list,
+                             &i915_file_priv->mm.request_list);
+       } else {
+               INIT_LIST_HEAD(&request->client_list);
+       }
 
        /* Associate any objects on the flushing list matching the write
         * domain we're flushing with our flush.
@@ -1664,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)
                        i915_gem_retire_request(dev, request);
 
                        list_del(&request->list);
+                       list_del(&request->client_list);
                        drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
                } else
                        break;
@@ -1977,7 +1989,7 @@ i915_gem_evict_something(struct drm_device *dev)
                        i915_gem_flush(dev,
                                       obj->write_domain,
                                       obj->write_domain);
-                       i915_add_request(dev, obj->write_domain);
+                       i915_add_request(dev, NULL, obj->write_domain);
 
                        obj = NULL;
                        continue;
@@ -2248,7 +2260,7 @@ try_again:
                                i915_gem_flush(dev,
                                               I915_GEM_GPU_DOMAINS,
                                               I915_GEM_GPU_DOMAINS);
-                               seqno = i915_add_request(dev,
+                               seqno = i915_add_request(dev, NULL,
                                                         I915_GEM_GPU_DOMAINS);
                                if (seqno == 0)
                                        return -ENOMEM;
@@ -2452,7 +2464,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 
        /* Queue the GPU write cache flushing we need. */
        i915_gem_flush(dev, 0, obj->write_domain);
-       seqno = i915_add_request(dev, obj->write_domain);
+       seqno = i915_add_request(dev, NULL, obj->write_domain);
        obj->write_domain = 0;
        i915_gem_object_move_to_active(obj, seqno);
 }
@@ -3089,6 +3101,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
  * This should get us reasonable parallelism between CPU and GPU but also
  * relatively low latency when blocking on a particular request to finish.
  */
@@ -3097,15 +3113,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
 {
        struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
        int ret = 0;
-       uint32_t seqno;
+       unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
 
        mutex_lock(&dev->struct_mutex);
-       seqno = i915_file_priv->mm.last_gem_throttle_seqno;
-       i915_file_priv->mm.last_gem_throttle_seqno =
-               i915_file_priv->mm.last_gem_seqno;
-       if (seqno)
-               ret = i915_wait_request(dev, seqno);
+       while (!list_empty(&i915_file_priv->mm.request_list)) {
+               struct drm_i915_gem_request *request;
+
+               request = list_first_entry(&i915_file_priv->mm.request_list,
+                                          struct drm_i915_gem_request,
+                                          client_list);
+
+               if (time_after_eq(request->emitted_jiffies, recent_enough))
+                       break;
+
+               ret = i915_wait_request(dev, request->seqno);
+               if (ret != 0)
+                       break;
+       }
        mutex_unlock(&dev->struct_mutex);
+
        return ret;
 }
 
@@ -3187,7 +3213,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                    struct drm_file *file_priv)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
-       struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
        struct drm_i915_gem_execbuffer *args = data;
        struct drm_i915_gem_exec_object *exec_list = NULL;
        struct drm_gem_object **object_list = NULL;
@@ -3363,7 +3388,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                               dev->invalidate_domains,
                               dev->flush_domains);
                if (dev->flush_domains)
-                       (void)i915_add_request(dev, dev->flush_domains);
+                       (void)i915_add_request(dev, file_priv,
+                                              dev->flush_domains);
        }
 
        for (i = 0; i < args->buffer_count; i++) {
@@ -3412,9 +3438,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
         * *some* interrupts representing completion of buffers that we can
         * wait on when trying to clear up gtt space).
         */
-       seqno = i915_add_request(dev, flush_domains);
+       seqno = i915_add_request(dev, file_priv, flush_domains);
        BUG_ON(seqno == 0);
-       i915_file_priv->mm.last_gem_seqno = seqno;
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
 
@@ -3802,7 +3827,7 @@ i915_gem_idle(struct drm_device *dev)
         */
        i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
                       ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-       seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+       seqno = i915_add_request(dev, NULL, ~I915_GEM_DOMAIN_CPU);
 
        if (seqno == 0) {
                mutex_unlock(&dev->struct_mutex);
@@ -4352,3 +4377,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
        drm_agp_chipset_flush(dev);
        return 0;
 }
+
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+{
+       struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+
+       /* Clean up our request list when the client is going away, so that
+        * later retire_requests won't dereference our soon-to-be-gone
+        * file_priv.
+        */
+       mutex_lock(&dev->struct_mutex);
+       while (!list_empty(&i915_file_priv->mm.request_list))
+               list_del_init(i915_file_priv->mm.request_list.next);
+       mutex_unlock(&dev->struct_mutex);
+}