drm/radeon: use IBs for VM page table updates v2
authorAlex Deucher <alexander.deucher@amd.com>
Fri, 1 Feb 2013 16:32:42 +0000 (17:32 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 1 Feb 2013 18:57:10 +0000 (13:57 -0500)
For very large page table updates, we can exceed the
size of the ring.  To avoid this, use an IB to perform
the page table update.

v2(ck): cleanup the IB infrastructure and the use it instead
        of filling the struct ourself.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_gart.c
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/si.c

index 170bd03d4dd8d4c45fa2406ccd3a7471bd85253a..7cead763be9ec760ad05919ae11d4e5a6b504d85 100644 (file)
@@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
  * cayman_vm_set_page - update the page tables using the CP
  *
  * @rdev: radeon_device pointer
+ * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @incr: increase next addr by incr bytes
  * @flags: access flags
  *
- * Update the page tables using the CP (cayman-si).
+ * Update the page tables using the CP (cayman/TN).
  */
-void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
+void cayman_vm_set_page(struct radeon_device *rdev,
+                       struct radeon_ib *ib,
+                       uint64_t pe,
                        uint64_t addr, unsigned count,
                        uint32_t incr, uint32_t flags)
 {
-       struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
        uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
        uint64_t value;
        unsigned ndw;
@@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                        if (ndw > 0x3FFF)
                                ndw = 0x3FFF;
 
-                       radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
-                       radeon_ring_write(ring, pe);
-                       radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
                        for (; ndw > 1; ndw -= 2, --count, pe += 8) {
                                if (flags & RADEON_VM_PAGE_SYSTEM) {
                                        value = radeon_vm_map_gart(rdev, addr);
@@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                }
                                addr += incr;
                                value |= r600_flags;
-                               radeon_ring_write(ring, value);
-                               radeon_ring_write(ring, upper_32_bits(value));
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
                        }
                }
        } else {
@@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                ndw = 0xFFFFE;
 
                        /* for non-physically contiguous pages (system) */
-                       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
-                       radeon_ring_write(ring, pe);
-                       radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
                        for (; ndw > 0; ndw -= 2, --count, pe += 8) {
                                if (flags & RADEON_VM_PAGE_SYSTEM) {
                                        value = radeon_vm_map_gart(rdev, addr);
@@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                }
                                addr += incr;
                                value |= r600_flags;
-                               radeon_ring_write(ring, value);
-                               radeon_ring_write(ring, upper_32_bits(value));
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
                        }
                }
+               while (ib->length_dw & 0x7)
+                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
        }
 }
 
index 6539d6cb4bc7577f3d36c37a38a1838a451936b0..307d681b34434ff87ab38a194c8c6a10675f2c23 100644 (file)
@@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
                  struct radeon_ib *ib, struct radeon_vm *vm,
                  unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
+void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
                       struct radeon_ib *const_ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
@@ -1188,7 +1189,9 @@ struct radeon_asic {
                void (*fini)(struct radeon_device *rdev);
 
                u32 pt_ring_index;
-               void (*set_page)(struct radeon_device *rdev, uint64_t pe,
+               void (*set_page)(struct radeon_device *rdev,
+                                struct radeon_ib *ib,
+                                uint64_t pe,
                                 uint64_t addr, unsigned count,
                                 uint32_t incr, uint32_t flags);
        } vm;
@@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
-#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags)))
+#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
 #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
 #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
 #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
index e429e2574cae463a5f0974e57f595014fc4f2f7d..f4134a823958e305a5701458ef8599e07f87402a 100644 (file)
@@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
-void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
+void cayman_vm_set_page(struct radeon_device *rdev,
+                       struct radeon_ib *ib,
+                       uint64_t pe,
                        uint64_t addr, unsigned count,
                        uint32_t incr, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev);
 int si_irq_process(struct radeon_device *rdev);
 int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
-void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
+void si_vm_set_page(struct radeon_device *rdev,
+                   struct radeon_ib *ib,
+                   uint64_t pe,
                    uint64_t addr, unsigned count,
                    uint32_t incr, uint32_t flags);
 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
index 1d214b66650ff5a3865b04074ee7e7ebd632f2a4..70d38241b083afeb7e8a2d32053b2cc0f33086a7 100644 (file)
@@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
        return 0;
 }
 
-static void radeon_cs_sync_to(struct radeon_cs_parser *p,
-                             struct radeon_fence *fence)
-{
-       struct radeon_fence *other;
-
-       if (!fence)
-               return;
-
-       other = p->ib.sync_to[fence->ring];
-       p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other);
-}
-
 static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
        int i;
@@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
                if (!p->relocs[i].robj)
                        continue;
 
-               radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj);
+               radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
        }
 }
 
@@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
                goto out;
        }
        radeon_cs_sync_rings(parser);
-       radeon_cs_sync_to(parser, vm->fence);
-       radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
+       radeon_ib_sync_to(&parser->ib, vm->fence);
+       radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
+               rdev, vm, parser->ring));
 
        if ((rdev->family >= CHIP_TAHITI) &&
            (parser->chunk_const_ib_idx != -1)) {
index 6e24f84755b526567e44dc78f8b4abbce28f7670..2c1341f63dc5afa19e94bc4aac51e5986b4b3cdf 100644 (file)
@@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
  */
 static int radeon_vm_update_pdes(struct radeon_device *rdev,
                                 struct radeon_vm *vm,
+                                struct radeon_ib *ib,
                                 uint64_t start, uint64_t end)
 {
        static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
@@ -971,7 +972,7 @@ retry:
                    ((last_pt + incr * count) != pt)) {
 
                        if (count) {
-                               radeon_asic_vm_set_page(rdev, last_pde,
+                               radeon_asic_vm_set_page(rdev, ib, last_pde,
                                                        last_pt, count, incr,
                                                        RADEON_VM_PAGE_VALID);
                        }
@@ -985,7 +986,7 @@ retry:
        }
 
        if (count) {
-               radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
+               radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
                                        incr, RADEON_VM_PAGE_VALID);
 
        }
@@ -1009,6 +1010,7 @@ retry:
  */
 static void radeon_vm_update_ptes(struct radeon_device *rdev,
                                  struct radeon_vm *vm,
+                                 struct radeon_ib *ib,
                                  uint64_t start, uint64_t end,
                                  uint64_t dst, uint32_t flags)
 {
@@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
                if ((last_pte + 8 * count) != pte) {
 
                        if (count) {
-                               radeon_asic_vm_set_page(rdev, last_pte,
+                               radeon_asic_vm_set_page(rdev, ib, last_pte,
                                                        last_dst, count,
                                                        RADEON_GPU_PAGE_SIZE,
                                                        flags);
@@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
        }
 
        if (count) {
-               radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
+               radeon_asic_vm_set_page(rdev, ib, last_pte,
+                                       last_dst, count,
                                        RADEON_GPU_PAGE_SIZE, flags);
        }
 }
@@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
                            struct ttm_mem_reg *mem)
 {
        unsigned ridx = rdev->asic->vm.pt_ring_index;
-       struct radeon_ring *ring = &rdev->ring[ridx];
-       struct radeon_semaphore *sem = NULL;
+       struct radeon_ib ib;
        struct radeon_bo_va *bo_va;
        unsigned nptes, npdes, ndw;
        uint64_t addr;
@@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
                bo_va->valid = false;
        }
 
-       if (vm->fence && radeon_fence_signaled(vm->fence)) {
-               radeon_fence_unref(&vm->fence);
-       }
-
-       if (vm->fence && vm->fence->ring != ridx) {
-               r = radeon_semaphore_create(rdev, &sem);
-               if (r) {
-                       return r;
-               }
-       }
-
        nptes = radeon_bo_ngpu_pages(bo);
 
        /* assume two extra pdes in case the mapping overlaps the borders */
        npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
 
-       /* estimate number of dw needed */
-       /* semaphore, fence and padding */
-       ndw = 32;
+       /* padding, etc. */
+       ndw = 64;
 
        if (RADEON_VM_BLOCK_SIZE > 11)
                /* reserve space for one header for every 2k dwords */
@@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
        /* reserve space for pde addresses */
        ndw += npdes * 2;
 
-       r = radeon_ring_lock(rdev, ring, ndw);
-       if (r) {
-               return r;
-       }
+       /* update too big for an IB */
+       if (ndw > 0xfffff)
+               return -ENOMEM;
 
-       if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
-               radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
-               radeon_fence_note_sync(vm->fence, ridx);
-       }
+       r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4);
+       ib.length_dw = 0;
 
-       r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
+       r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
        if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
+               radeon_ib_free(rdev, &ib);
                return r;
        }
 
-       radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
+       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
                              addr, bo_va->flags);
 
-       radeon_fence_unref(&vm->fence);
-       r = radeon_fence_emit(rdev, &vm->fence, ridx);
+       radeon_ib_sync_to(&ib, vm->fence);
+       r = radeon_ib_schedule(rdev, &ib, NULL);
        if (r) {
-               radeon_ring_unlock_undo(rdev, ring);
+               radeon_ib_free(rdev, &ib);
                return r;
        }
-       radeon_ring_unlock_commit(rdev, ring);
-       radeon_semaphore_free(rdev, &sem, vm->fence);
+       radeon_fence_unref(&vm->fence);
+       vm->fence = radeon_fence_ref(ib.fence);
+       radeon_ib_free(rdev, &ib);
        radeon_fence_unref(&vm->last_flush);
 
        return 0;
index cd72062d5a9134bd596c8a6388b78fd45bb02425..8d58e268ff6d5afdfe93f6df0656fb60aef02acf 100644 (file)
@@ -108,6 +108,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
        radeon_fence_unref(&ib->fence);
 }
 
+/**
+ * radeon_ib_sync_to - sync to fence before executing the IB
+ *
+ * @ib: IB object to add fence to
+ * @fence: fence to sync to
+ *
+ * Sync to the fence before executing the IB
+ */
+void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
+{
+       struct radeon_fence *other;
+
+       if (!fence)
+               return;
+
+       other = ib->sync_to[fence->ring];
+       ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
+}
+
 /**
  * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
  *
index cd83bc5bd235b38dfcde5777423e740002d43342..a910cb92cfd00680830e7c4ab6d2c420947c8609 100644 (file)
@@ -3043,19 +3043,21 @@ void si_vm_fini(struct radeon_device *rdev)
  * si_vm_set_page - update the page tables using the CP
  *
  * @rdev: radeon_device pointer
+ * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @incr: increase next addr by incr bytes
  * @flags: access flags
  *
- * Update the page tables using the CP (cayman-si).
+ * Update the page tables using the CP (SI).
  */
-void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
+void si_vm_set_page(struct radeon_device *rdev,
+                   struct radeon_ib *ib,
+                   uint64_t pe,
                    uint64_t addr, unsigned count,
                    uint32_t incr, uint32_t flags)
 {
-       struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
        uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
        uint64_t value;
        unsigned ndw;
@@ -3066,11 +3068,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                        if (ndw > 0x3FFE)
                                ndw = 0x3FFE;
 
-                       radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
-                       radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                                WRITE_DATA_DST_SEL(1)));
-                       radeon_ring_write(ring, pe);
-                       radeon_ring_write(ring, upper_32_bits(pe));
+                       ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
+                       ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
+                                       WRITE_DATA_DST_SEL(1));
+                       ib->ptr[ib->length_dw++] = pe;
+                       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
                        for (; ndw > 2; ndw -= 2, --count, pe += 8) {
                                if (flags & RADEON_VM_PAGE_SYSTEM) {
                                        value = radeon_vm_map_gart(rdev, addr);
@@ -3082,8 +3084,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                }
                                addr += incr;
                                value |= r600_flags;
-                               radeon_ring_write(ring, value);
-                               radeon_ring_write(ring, upper_32_bits(value));
+                               ib->ptr[ib->length_dw++] = value;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
                        }
                }
        } else {
@@ -3095,9 +3097,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                        ndw = 0xFFFFE;
 
                                /* for non-physically contiguous pages (system) */
-                               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
-                               radeon_ring_write(ring, pe);
-                               radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
+                               ib->ptr[ib->length_dw++] = pe;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
                                for (; ndw > 0; ndw -= 2, --count, pe += 8) {
                                        if (flags & RADEON_VM_PAGE_SYSTEM) {
                                                value = radeon_vm_map_gart(rdev, addr);
@@ -3109,8 +3111,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                        }
                                        addr += incr;
                                        value |= r600_flags;
-                                       radeon_ring_write(ring, value);
-                                       radeon_ring_write(ring, upper_32_bits(value));
+                                       ib->ptr[ib->length_dw++] = value;
+                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
                                }
                        }
                } else {
@@ -3124,20 +3126,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
                                else
                                        value = 0;
                                /* for physically contiguous pages (vram) */
-                               radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
-                               radeon_ring_write(ring, pe); /* dst addr */
-                               radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
-                               radeon_ring_write(ring, r600_flags); /* mask */
-                               radeon_ring_write(ring, 0);
-                               radeon_ring_write(ring, value); /* value */
-                               radeon_ring_write(ring, upper_32_bits(value));
-                               radeon_ring_write(ring, incr); /* increment size */
-                               radeon_ring_write(ring, 0);
+                               ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                               ib->ptr[ib->length_dw++] = 0;
+                               ib->ptr[ib->length_dw++] = value; /* value */
+                               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                               ib->ptr[ib->length_dw++] = incr; /* increment size */
+                               ib->ptr[ib->length_dw++] = 0;
                                pe += ndw * 4;
                                addr += (ndw / 2) * incr;
                                count -= ndw / 2;
                        }
                }
+               while (ib->length_dw & 0x7)
+                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
        }
 }