drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device fence_lock
authorThomas Hellstrom <thellstrom@vmware.com>
Wed, 17 Nov 2010 12:28:29 +0000 (12:28 +0000)
committerDave Airlie <airlied@redhat.com>
Mon, 22 Nov 2010 03:25:18 +0000 (13:25 +1000)
The bo lock used only to protect the bo sync object members, and since it
is a per bo lock, fencing a buffer list will see a lot of locks and unlocks.
Replace it with a per-device lock that protects the sync object members on
*all* bos. Reading and setting these members will always be very quick, so
the risc of heavy lock contention is microscopic. Note that waiting for
sync objects will always take place outside of this lock.

The bo device fence lock will eventually be replaced with a seqlock /
rcu mechanism so we can determine that a bo is idle under a
rcu / read seqlock.

However this change will allow us to batch fencing and unreserving of
buffers with a minimal amount of locking.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jerome Glisse <j.glisse@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
drivers/gpu/drm/nouveau/nouveau_gem.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_object.h
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/gpu/drm/ttm/ttm_bo_vm.c
drivers/gpu/drm/ttm/ttm_execbuf_util.c
include/drm/ttm/ttm_bo_api.h
include/drm/ttm/ttm_bo_driver.h

index 9a1fdcf400c2fea235544f062fe8b33cd1b01fd4..1f2301d26c0a226f5fde5d21e2e4bbc8d8025c4b 100644 (file)
@@ -234,10 +234,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
                if (likely(fence)) {
                        struct nouveau_fence *prev_fence;
 
-                       spin_lock(&nvbo->bo.lock);
+                       spin_lock(&nvbo->bo.bdev->fence_lock);
                        prev_fence = nvbo->bo.sync_obj;
                        nvbo->bo.sync_obj = nouveau_fence_ref(fence);
-                       spin_unlock(&nvbo->bo.lock);
+                       spin_unlock(&nvbo->bo.bdev->fence_lock);
                        nouveau_fence_unref((void *)&prev_fence);
                }
 
@@ -557,9 +557,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
                                data |= r->vor;
                }
 
-               spin_lock(&nvbo->bo.lock);
+               spin_lock(&nvbo->bo.bdev->fence_lock);
                ret = ttm_bo_wait(&nvbo->bo, false, false, false);
-               spin_unlock(&nvbo->bo.lock);
+               spin_unlock(&nvbo->bo.bdev->fence_lock);
                if (ret) {
                        NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
                        break;
@@ -791,9 +791,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
        }
 
        if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
-               spin_lock(&nvbo->bo.lock);
+               spin_lock(&nvbo->bo.bdev->fence_lock);
                ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
-               spin_unlock(&nvbo->bo.lock);
+               spin_unlock(&nvbo->bo.bdev->fence_lock);
        } else {
                ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
                if (ret == 0)
index 1d067743fee068174e3a5e85eb0e2d47b5b712ee..e939cb6a91ccf2cc1996498e7b2167e05b01fd76 100644 (file)
@@ -369,11 +369,11 @@ void radeon_bo_list_fence(struct list_head *head, void *fence)
 
        list_for_each_entry(lobj, head, list) {
                bo = lobj->bo;
-               spin_lock(&bo->tbo.lock);
+               spin_lock(&bo->tbo.bdev->fence_lock);
                old_fence = (struct radeon_fence *)bo->tbo.sync_obj;
                bo->tbo.sync_obj = radeon_fence_ref(fence);
                bo->tbo.sync_obj_arg = NULL;
-               spin_unlock(&bo->tbo.lock);
+               spin_unlock(&bo->tbo.bdev->fence_lock);
                if (old_fence) {
                        radeon_fence_unref(&old_fence);
                }
index d143702b244a4febdd51aa69ca7023e19d692a62..fd536751f8658ccc3da86b7c9d1468836e645c86 100644 (file)
@@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
        r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
        if (unlikely(r != 0))
                return r;
-       spin_lock(&bo->tbo.lock);
+       spin_lock(&bo->tbo.bdev->fence_lock);
        if (mem_type)
                *mem_type = bo->tbo.mem.mem_type;
        if (bo->tbo.sync_obj)
                r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-       spin_unlock(&bo->tbo.lock);
+       spin_unlock(&bo->tbo.bdev->fence_lock);
        ttm_bo_unreserve(&bo->tbo);
        return r;
 }
index 5d8750830dc37adb02694148cee1106efc4e1749..d93c73b1c47121e517c2bd5fb633fdb32dfb0d7b 100644 (file)
@@ -427,11 +427,9 @@ moved:
        }
 
        if (bo->mem.mm_node) {
-               spin_lock(&bo->lock);
                bo->offset = (bo->mem.start << PAGE_SHIFT) +
                    bdev->man[bo->mem.mem_type].gpu_offset;
                bo->cur_placement = bo->mem.placement;
-               spin_unlock(&bo->lock);
        } else
                bo->offset = 0;
 
@@ -485,14 +483,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
        int put_count;
        int ret;
 
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        (void) ttm_bo_wait(bo, false, false, true);
        if (!bo->sync_obj) {
 
                spin_lock(&glob->lru_lock);
 
                /**
-                * Lock inversion between bo::reserve and bo::lock here,
+                * Lock inversion between bo:reserve and bdev::fence_lock here,
                 * but that's OK, since we're only trylocking.
                 */
 
@@ -501,7 +499,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
                if (unlikely(ret == -EBUSY))
                        goto queue;
 
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                put_count = ttm_bo_del_from_lru(bo);
 
                spin_unlock(&glob->lru_lock);
@@ -522,7 +520,7 @@ queue:
        kref_get(&bo->list_kref);
        list_add_tail(&bo->ddestroy, &bdev->ddestroy);
        spin_unlock(&glob->lru_lock);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bdev->fence_lock);
 
        if (sync_obj) {
                driver->sync_obj_flush(sync_obj, sync_obj_arg);
@@ -547,14 +545,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
                               bool no_wait_reserve,
                               bool no_wait_gpu)
 {
+       struct ttm_bo_device *bdev = bo->bdev;
        struct ttm_bo_global *glob = bo->glob;
        int put_count;
        int ret = 0;
 
 retry:
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bdev->fence_lock);
 
        if (unlikely(ret != 0))
                return ret;
@@ -707,9 +706,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
        struct ttm_placement placement;
        int ret = 0;
 
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bdev->fence_lock);
 
        if (unlikely(ret != 0)) {
                if (ret != -ERESTARTSYS) {
@@ -1044,6 +1043,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 {
        int ret = 0;
        struct ttm_mem_reg mem;
+       struct ttm_bo_device *bdev = bo->bdev;
 
        BUG_ON(!atomic_read(&bo->reserved));
 
@@ -1052,9 +1052,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
         * Have the driver move function wait for idle when necessary,
         * instead of doing it here.
         */
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bdev->fence_lock);
        if (ret)
                return ret;
        mem.num_pages = bo->num_pages;
@@ -1171,7 +1171,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
        }
        bo->destroy = destroy;
 
-       spin_lock_init(&bo->lock);
        kref_init(&bo->kref);
        kref_init(&bo->list_kref);
        atomic_set(&bo->cpu_writers, 0);
@@ -1535,7 +1534,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
        bdev->dev_mapping = NULL;
        bdev->glob = glob;
        bdev->need_dma32 = need_dma32;
-
+       spin_lock_init(&bdev->fence_lock);
        mutex_lock(&glob->device_list_mutex);
        list_add_tail(&bdev->device_list, &glob->device_list);
        mutex_unlock(&glob->device_list_mutex);
@@ -1659,6 +1658,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
                bool lazy, bool interruptible, bool no_wait)
 {
        struct ttm_bo_driver *driver = bo->bdev->driver;
+       struct ttm_bo_device *bdev = bo->bdev;
        void *sync_obj;
        void *sync_obj_arg;
        int ret = 0;
@@ -1672,9 +1672,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
                        void *tmp_obj = bo->sync_obj;
                        bo->sync_obj = NULL;
                        clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-                       spin_unlock(&bo->lock);
+                       spin_unlock(&bdev->fence_lock);
                        driver->sync_obj_unref(&tmp_obj);
-                       spin_lock(&bo->lock);
+                       spin_lock(&bdev->fence_lock);
                        continue;
                }
 
@@ -1683,29 +1683,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 
                sync_obj = driver->sync_obj_ref(bo->sync_obj);
                sync_obj_arg = bo->sync_obj_arg;
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
                                            lazy, interruptible);
                if (unlikely(ret != 0)) {
                        driver->sync_obj_unref(&sync_obj);
-                       spin_lock(&bo->lock);
+                       spin_lock(&bdev->fence_lock);
                        return ret;
                }
-               spin_lock(&bo->lock);
+               spin_lock(&bdev->fence_lock);
                if (likely(bo->sync_obj == sync_obj &&
                           bo->sync_obj_arg == sync_obj_arg)) {
                        void *tmp_obj = bo->sync_obj;
                        bo->sync_obj = NULL;
                        clear_bit(TTM_BO_PRIV_FLAG_MOVING,
                                  &bo->priv_flags);
-                       spin_unlock(&bo->lock);
+                       spin_unlock(&bdev->fence_lock);
                        driver->sync_obj_unref(&sync_obj);
                        driver->sync_obj_unref(&tmp_obj);
-                       spin_lock(&bo->lock);
+                       spin_lock(&bdev->fence_lock);
                } else {
-                       spin_unlock(&bo->lock);
+                       spin_unlock(&bdev->fence_lock);
                        driver->sync_obj_unref(&sync_obj);
-                       spin_lock(&bo->lock);
+                       spin_lock(&bdev->fence_lock);
                }
        }
        return 0;
@@ -1714,6 +1714,7 @@ EXPORT_SYMBOL(ttm_bo_wait);
 
 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 {
+       struct ttm_bo_device *bdev = bo->bdev;
        int ret = 0;
 
        /*
@@ -1723,9 +1724,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
        ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
        if (unlikely(ret != 0))
                return ret;
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        ret = ttm_bo_wait(bo, false, true, no_wait);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bdev->fence_lock);
        if (likely(ret == 0))
                atomic_inc(&bo->cpu_writers);
        ttm_bo_unreserve(bo);
@@ -1797,9 +1798,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
         * Wait for GPU, then move to system cached.
         */
 
-       spin_lock(&bo->lock);
+       spin_lock(&bo->bdev->fence_lock);
        ret = ttm_bo_wait(bo, false, false, false);
-       spin_unlock(&bo->lock);
+       spin_unlock(&bo->bdev->fence_lock);
 
        if (unlikely(ret != 0))
                goto out;
index 3106d5bcce32d908f0991ad07eca6936c5c01cde..4b75133d6606f8caca8c02127e4046fa0b48f68a 100644 (file)
@@ -337,7 +337,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
         * TODO: Explicit member copy would probably be better here.
         */
 
-       spin_lock_init(&fbo->lock);
        init_waitqueue_head(&fbo->event_queue);
        INIT_LIST_HEAD(&fbo->ddestroy);
        INIT_LIST_HEAD(&fbo->lru);
@@ -520,7 +519,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
        struct ttm_buffer_object *ghost_obj;
        void *tmp_obj = NULL;
 
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        if (bo->sync_obj) {
                tmp_obj = bo->sync_obj;
                bo->sync_obj = NULL;
@@ -529,7 +528,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
        bo->sync_obj_arg = sync_obj_arg;
        if (evict) {
                ret = ttm_bo_wait(bo, false, false, false);
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                if (tmp_obj)
                        driver->sync_obj_unref(&tmp_obj);
                if (ret)
@@ -552,7 +551,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
                 */
 
                set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                if (tmp_obj)
                        driver->sync_obj_unref(&tmp_obj);
 
index fe6cb77899f4b22ffc388190fb08e909fc4a298a..8dd446cb778e2e1858892859551bbeef57de347e 100644 (file)
@@ -118,17 +118,17 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
         * move.
         */
 
-       spin_lock(&bo->lock);
+       spin_lock(&bdev->fence_lock);
        if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
                ret = ttm_bo_wait(bo, false, true, false);
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                if (unlikely(ret != 0)) {
                        retval = (ret != -ERESTARTSYS) ?
                            VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
                        goto out_unlock;
                }
        } else
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
 
 
        ret = ttm_mem_io_reserve(bdev, &bo->mem);
index 7dcc6470e2f59ef36921c904804e9fe8d0926c85..c3a2100bace62546d5a0a2e8eba7a9dfff48122e 100644 (file)
@@ -203,14 +203,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 
        list_for_each_entry(entry, list, head) {
                struct ttm_buffer_object *bo = entry->bo;
-               struct ttm_bo_driver *driver = bo->bdev->driver;
+               struct ttm_bo_device *bdev = bo->bdev;
+               struct ttm_bo_driver *driver = bdev->driver;
                void *old_sync_obj;
 
-               spin_lock(&bo->lock);
+               spin_lock(&bdev->fence_lock);
                old_sync_obj = bo->sync_obj;
                bo->sync_obj = driver->sync_obj_ref(sync_obj);
                bo->sync_obj_arg = entry->new_sync_obj_arg;
-               spin_unlock(&bo->lock);
+               spin_unlock(&bdev->fence_lock);
                ttm_bo_unreserve(bo);
                entry->reserved = false;
                if (old_sync_obj)
index b0fc9c12554be0cd8e443b55b4761654960deae0..edacd483c59cb396921dfbf7681fbe5fd1310e0d 100644 (file)
@@ -154,7 +154,6 @@ struct ttm_tt;
  * keeps one refcount. When this refcount reaches zero,
  * the object is destroyed.
  * @event_queue: Queue for processes waiting on buffer object status change.
- * @lock: spinlock protecting mostly synchronization members.
  * @mem: structure describing current placement.
  * @persistant_swap_storage: Usually the swap storage is deleted for buffers
  * pinned in physical memory. If this behaviour is not desired, this member
@@ -213,7 +212,6 @@ struct ttm_buffer_object {
        struct kref kref;
        struct kref list_kref;
        wait_queue_head_t event_queue;
-       spinlock_t lock;
 
        /**
         * Members protected by the bo::reserved lock.
@@ -248,10 +246,10 @@ struct ttm_buffer_object {
        atomic_t reserved;
 
        /**
-        * Members protected by the bo::lock
+        * Members protected by struct buffer_object_device::fence_lock
         * In addition, setting sync_obj to anything else
         * than NULL requires bo::reserved to be held. This allows for
-        * checking NULL while reserved but not holding bo::lock.
+        * checking NULL while reserved but not holding the mentioned lock.
         */
 
        void *sync_obj_arg;
index 1e25a40c688e7aaaef2b2640d4b025b262377cf5..ca8131e9830043cd9fdd729fdae6dfefa3008df5 100644 (file)
@@ -510,6 +510,8 @@ struct ttm_bo_global {
  *
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
+ * @fence_lock: Protects the synchronizing members on *all* bos belonging
+ * to this device.
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
@@ -531,6 +533,7 @@ struct ttm_bo_device {
        struct ttm_bo_driver *driver;
        rwlock_t vm_lock;
        struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+       spinlock_t fence_lock;
        /*
         * Protected by the vm lock.
         */