drm/radeon: use an intervall tree to manage the VMA v2
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 30 Jul 2014 15:49:56 +0000 (11:49 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 5 Aug 2014 12:53:51 +0000 (08:53 -0400)
Scales much better than scanning the address range linearly.

v2: store pfn instead of address

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/Kconfig
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_trace.h
drivers/gpu/drm/radeon/radeon_vm.c

index 31894c8c1773d6f95684f31731e0e02543e3502e..b066bb3ca01ace734e16abd61fccfb88cdd46c0c 100644 (file)
@@ -114,6 +114,7 @@ config DRM_RADEON
        select POWER_SUPPLY
        select HWMON
        select BACKLIGHT_CLASS_DEVICE
+       select INTERVAL_TREE
        help
          Choose this option if you have an ATI Radeon graphics card.  There
          are both PCI and AGP versions.  You don't need to choose this to
index 31dda41394d8b38dafb7b3728130e97961d14926..56fc7d2da1494d939dbba66a156cc6e4da19dc46 100644 (file)
@@ -64,6 +64,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/interval_tree.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -447,14 +448,12 @@ struct radeon_mman {
 struct radeon_bo_va {
        /* protected by bo being reserved */
        struct list_head                bo_list;
-       uint64_t                        soffset;
-       uint64_t                        eoffset;
        uint32_t                        flags;
        uint64_t                        addr;
        unsigned                        ref_count;
 
        /* protected by vm mutex */
-       struct list_head                vm_list;
+       struct interval_tree_node       it;
        struct list_head                vm_status;
 
        /* constant after initialization */
@@ -877,7 +876,7 @@ struct radeon_vm_pt {
 };
 
 struct radeon_vm {
-       struct list_head                va;
+       struct rb_root                  va;
        unsigned                        id;
 
        /* BOs moved, but not yet updated in the PT */
index 99e4e0cd72a604e7c6a0243c41f6b31fe751cba5..bfd7e1b0ff3f88b19a6e2d4ec636c3c8dd9c95a4 100644 (file)
@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 
        switch (args->operation) {
        case RADEON_VA_MAP:
-               if (bo_va->soffset) {
+               if (bo_va->it.start) {
                        args->operation = RADEON_VA_RESULT_VA_EXIST;
-                       args->offset = bo_va->soffset;
+                       args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
                        goto out;
                }
                r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
index cd781f34bd8df02d07147d430d0f2eea34a5c4e5..9db74a96ef617d7d66292023d8250ca3821e236a 100644 (file)
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
                             ),
 
            TP_fast_assign(
-                          __entry->soffset = bo_va->soffset;
-                          __entry->eoffset = bo_va->eoffset;
+                          __entry->soffset = bo_va->it.start;
+                          __entry->eoffset = bo_va->it.last + 1;
                           __entry->flags = bo_va->flags;
                           ),
            TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
index 906c8ae867acc288dc0ef9d2b3df92c361116c57..39bc5c2b02d1ff9bf1f26264023bfd812a329424 100644 (file)
@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
        }
        bo_va->vm = vm;
        bo_va->bo = bo;
-       bo_va->soffset = 0;
-       bo_va->eoffset = 0;
+       bo_va->it.start = 0;
+       bo_va->it.last = 0;
        bo_va->flags = 0;
        bo_va->addr = 0;
        bo_va->ref_count = 1;
        INIT_LIST_HEAD(&bo_va->bo_list);
-       INIT_LIST_HEAD(&bo_va->vm_list);
        INIT_LIST_HEAD(&bo_va->vm_status);
 
        mutex_lock(&vm->mutex);
-       list_add(&bo_va->vm_list, &vm->va);
        list_add_tail(&bo_va->bo_list, &bo->va);
        mutex_unlock(&vm->mutex);
 
@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                          uint32_t flags)
 {
        uint64_t size = radeon_bo_size(bo_va->bo);
-       uint64_t eoffset, last_offset = 0;
        struct radeon_vm *vm = bo_va->vm;
-       struct radeon_bo_va *tmp;
-       struct list_head *head;
        unsigned last_pfn, pt_idx;
+       uint64_t eoffset;
        int r;
 
        if (soffset) {
@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
        }
 
        mutex_lock(&vm->mutex);
-       head = &vm->va;
-       last_offset = 0;
-       list_for_each_entry(tmp, &vm->va, vm_list) {
-               if (bo_va == tmp) {
-                       /* skip over currently modified bo */
-                       continue;
+       if (bo_va->it.start || bo_va->it.last) {
+               if (bo_va->addr) {
+                       /* add a clone of the bo_va to clear the old address */
+                       struct radeon_bo_va *tmp;
+                       tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+                       tmp->it.start = bo_va->it.start;
+                       tmp->it.last = bo_va->it.last;
+                       tmp->vm = vm;
+                       tmp->addr = bo_va->addr;
+                       list_add(&tmp->vm_status, &vm->freed);
                }
 
-               if (soffset >= last_offset && eoffset <= tmp->soffset) {
-                       /* bo can be added before this one */
-                       break;
-               }
-               if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
-                       /* bo and tmp overlap, invalid offset */
-                       dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
-                               bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
-                               (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
-                       mutex_unlock(&vm->mutex);
-                       return -EINVAL;
-               }
-               last_offset = tmp->eoffset;
-               head = &tmp->vm_list;
+               interval_tree_remove(&bo_va->it, &vm->va);
+               bo_va->it.start = 0;
+               bo_va->it.last = 0;
        }
 
-       if (bo_va->soffset) {
-               /* add a clone of the bo_va to clear the old address */
-               tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-               if (!tmp) {
+       soffset /= RADEON_GPU_PAGE_SIZE;
+       eoffset /= RADEON_GPU_PAGE_SIZE;
+       if (soffset || eoffset) {
+               struct interval_tree_node *it;
+               it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
+               if (it) {
+                       struct radeon_bo_va *tmp;
+                       tmp = container_of(it, struct radeon_bo_va, it);
+                       /* bo and tmp overlap, invalid offset */
+                       dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
+                               "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
+                               soffset, tmp->bo, tmp->it.start, tmp->it.last);
                        mutex_unlock(&vm->mutex);
-                       return -ENOMEM;
+                       return -EINVAL;
                }
-               tmp->soffset = bo_va->soffset;
-               tmp->eoffset = bo_va->eoffset;
-               tmp->vm = vm;
-               list_add(&tmp->vm_status, &vm->freed);
+               bo_va->it.start = soffset;
+               bo_va->it.last = eoffset - 1;
+               interval_tree_insert(&bo_va->it, &vm->va);
        }
 
-       bo_va->soffset = soffset;
-       bo_va->eoffset = eoffset;
        bo_va->flags = flags;
        bo_va->addr = 0;
-       list_move(&bo_va->vm_list, head);
 
-       soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
-       eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+       soffset >>= radeon_vm_block_size;
+       eoffset >>= radeon_vm_block_size;
 
        BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 
@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
        unsigned count = 0;
        uint64_t addr;
 
-       start = start / RADEON_GPU_PAGE_SIZE;
-       end = end / RADEON_GPU_PAGE_SIZE;
-
        /* walk over the address space and update the page tables */
        for (addr = start; addr < end; ) {
                uint64_t pt_idx = addr >> radeon_vm_block_size;
@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
        uint64_t addr;
        int r;
 
-       if (!bo_va->soffset) {
+       if (!bo_va->it.start) {
                dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
                        bo_va->bo, vm);
                return -EINVAL;
@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 
        trace_radeon_vm_bo_update(bo_va);
 
-       nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE;
+       nptes = bo_va->it.last - bo_va->it.start + 1;
 
        /* padding, etc. */
        ndw = 64;
@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
                return r;
        ib.length_dw = 0;
 
-       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
-                             addr, radeon_vm_page_flags(bo_va->flags));
+       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+                             bo_va->it.last + 1, addr,
+                             radeon_vm_page_flags(bo_va->flags));
 
        radeon_semaphore_sync_to(ib.semaphore, vm->fence);
        r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
        list_del(&bo_va->bo_list);
 
        mutex_lock(&vm->mutex);
-       list_del(&bo_va->vm_list);
+       interval_tree_remove(&bo_va->it, &vm->va);
        list_del(&bo_va->vm_status);
 
        if (bo_va->addr) {
@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
        vm->last_flush = NULL;
        vm->last_id_use = NULL;
        mutex_init(&vm->mutex);
-       INIT_LIST_HEAD(&vm->va);
+       vm->va = RB_ROOT;
        INIT_LIST_HEAD(&vm->invalidated);
        INIT_LIST_HEAD(&vm->freed);
 
@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
        struct radeon_bo_va *bo_va, *tmp;
        int i, r;
 
-       if (!list_empty(&vm->va)) {
+       if (!RB_EMPTY_ROOT(&vm->va)) {
                dev_err(rdev->dev, "still active bo inside vm\n");
        }
-       list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
-               list_del_init(&bo_va->vm_list);
+       rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
+               interval_tree_remove(&bo_va->it, &vm->va);
                r = radeon_bo_reserve(bo_va->bo, false);
                if (!r) {
                        list_del_init(&bo_va->bo_list);