drm/radeon: use one VMID for each ring
authorChristian König <christian.koenig@amd.com>
Wed, 19 Nov 2014 13:01:25 +0000 (14:01 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 20 Nov 2014 18:00:18 +0000 (13:00 -0500)
Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/ni_dma.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_vm.c
drivers/gpu/drm/radeon/si.c

index 755923bc67867e5e03c25e281c2d8c616ae6a51c..3deeed33322faa8eae3efd7ed012593a7b7e5e50 100644 (file)
@@ -4066,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
+       unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
        u32 header, control = INDIRECT_BUFFER_VALID;
 
        if (ib->is_const_ib) {
@@ -4094,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
        }
 
-       control |= ib->length_dw |
-               (ib->vm ? (ib->vm->id << 24) : 0);
+       control |= ib->length_dw | (vm_id << 24);
 
        radeon_ring_write(ring, header);
        radeon_ring_write(ring,
index 604e2e77095112917c645fa35de79fccb5aac866..54b98379188d090e918a1423cef7d738b5031849 100644 (file)
@@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
                              struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
-       u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+       u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
 
        if (rdev->wb.enabled) {
                u32 next_rptr = ring->wptr + 5;
index bee432d3dd3041f8c3bd86d4f4a89c90487a1188..360de9f1f4914079d3de3ad0022a50862b934395 100644 (file)
@@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
+       unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
        u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
                PACKET3_SH_ACTION_ENA;
 
@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 #endif
                          (ib->gpu_addr & 0xFFFFFFFC));
        radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-       radeon_ring_write(ring, ib->length_dw | 
-                         (ib->vm ? (ib->vm->id << 24) : 0));
+       radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
 
        /* flush read cache over gart for this vmid */
        radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
        radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
        radeon_ring_write(ring, 0xFFFFFFFF);
        radeon_ring_write(ring, 0);
-       radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
+       radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }
 
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
index 5a72404c9d5e3f1a71b100fb740d0025cbf780ed..50f88611ff60c832dc59e767543f91f8baf859eb 100644 (file)
@@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
                                struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
+       unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 
        if (rdev->wb.enabled) {
                u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
         */
        while ((ring->wptr & 7) != 5)
                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+       radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
        radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
 
index 61b2eeabf7a4c162f68278ea25e4fe8cacb04fe2..79f5f5bf4c0c2e43bf45212f38224d16f8a02589 100644 (file)
@@ -905,33 +905,39 @@ struct radeon_vm_pt {
        uint64_t                        addr;
 };
 
+struct radeon_vm_id {
+       unsigned                id;
+       uint64_t                pd_gpu_addr;
+       /* last flushed PD/PT update */
+       struct radeon_fence     *flushed_updates;
+       /* last use of vmid */
+       struct radeon_fence     *last_id_use;
+};
+
 struct radeon_vm {
-       struct rb_root                  va;
-       unsigned                        id;
+       struct rb_root          va;
 
        /* BOs moved, but not yet updated in the PT */
-       struct list_head                invalidated;
+       struct list_head        invalidated;
 
        /* BOs freed, but not yet updated in the PT */
-       struct list_head                freed;
+       struct list_head        freed;
 
        /* contains the page directory */
-       struct radeon_bo                *page_directory;
-       uint64_t                        pd_gpu_addr;
-       unsigned                        max_pde_used;
+       struct radeon_bo        *page_directory;
+       unsigned                max_pde_used;
 
        /* array of page tables, one for each page directory entry */
-       struct radeon_vm_pt             *page_tables;
+       struct radeon_vm_pt     *page_tables;
 
-       struct radeon_bo_va             *ib_bo_va;
+       struct radeon_bo_va     *ib_bo_va;
 
-       struct mutex                    mutex;
+       struct mutex            mutex;
        /* last fence for cs using this vm */
-       struct radeon_fence             *fence;
-       /* last flushed PD/PT update */
-       struct radeon_fence             *flushed_updates;
-       /* last use of vmid */
-       struct radeon_fence             *last_id_use;
+       struct radeon_fence     *fence;
+
+       /* for id and flush management per ring */
+       struct radeon_vm_id     ids[RADEON_NUM_RINGS];
 };
 
 struct radeon_vm_manager {
index 6ff5741ea403b68a93fa50c2ec9aaf8326959d43..e38efe4962f3411e4e98f4659b6aeb34d4457ed7 100644 (file)
@@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
                                       struct radeon_vm *vm, int ring)
 {
        struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+       struct radeon_vm_id *vm_id = &vm->ids[ring];
+
        unsigned choices[2] = {};
        unsigned i;
 
        /* check if the id is still valid */
-       if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+       if (vm_id->id && vm_id->last_id_use &&
+           vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
                return NULL;
 
        /* we definately need to flush */
-       vm->pd_gpu_addr = ~0ll;
+       vm_id->pd_gpu_addr = ~0ll;
 
        /* skip over VMID 0, since it is the system VM */
        for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 
                if (fence == NULL) {
                        /* found a free one */
-                       vm->id = i;
-                       trace_radeon_vm_grab_id(vm->id, ring);
+                       vm_id->id = i;
+                       trace_radeon_vm_grab_id(i, ring);
                        return NULL;
                }
 
@@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 
        for (i = 0; i < 2; ++i) {
                if (choices[i]) {
-                       vm->id = choices[i];
-                       trace_radeon_vm_grab_id(vm->id, ring);
+                       vm_id->id = choices[i];
+                       trace_radeon_vm_grab_id(choices[i], ring);
                        return rdev->vm_manager.active[choices[i]];
                }
        }
@@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev,
                     int ring, struct radeon_fence *updates)
 {
        uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+       struct radeon_vm_id *vm_id = &vm->ids[ring];
 
-       if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates ||
-           radeon_fence_is_earlier(vm->flushed_updates, updates)) {
+       if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
+           radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
 
-               trace_radeon_vm_flush(pd_addr, ring, vm->id);
-               radeon_fence_unref(&vm->flushed_updates);
-               vm->flushed_updates = radeon_fence_ref(updates);
-               vm->pd_gpu_addr = pd_addr;
+               trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
+               radeon_fence_unref(&vm_id->flushed_updates);
+               vm_id->flushed_updates = radeon_fence_ref(updates);
+               vm_id->pd_gpu_addr = pd_addr;
                radeon_ring_vm_flush(rdev, &rdev->ring[ring],
-                                    vm->id, vm->pd_gpu_addr);
+                                    vm_id->id, vm_id->pd_gpu_addr);
+
        }
 }
 
@@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev,
                     struct radeon_vm *vm,
                     struct radeon_fence *fence)
 {
+       unsigned vm_id = vm->ids[fence->ring].id;
+
        radeon_fence_unref(&vm->fence);
        vm->fence = radeon_fence_ref(fence);
 
-       radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
-       rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+       radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
+       rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
 
-       radeon_fence_unref(&vm->last_id_use);
-       vm->last_id_use = radeon_fence_ref(fence);
+       radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
+       vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
 }
 
 /**
@@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
        const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
                RADEON_VM_PTE_COUNT * 8);
        unsigned pd_size, pd_entries, pts_size;
-       int r;
+       int i, r;
 
-       vm->id = 0;
        vm->ib_bo_va = NULL;
        vm->fence = NULL;
-       vm->flushed_updates = NULL;
-       vm->last_id_use = NULL;
+
+       for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+               vm->ids[i].id = 0;
+               vm->ids[i].flushed_updates = NULL;
+               vm->ids[i].last_id_use = NULL;
+       }
        mutex_init(&vm->mutex);
        vm->va = RB_ROOT;
        INIT_LIST_HEAD(&vm->invalidated);
@@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
        radeon_bo_unref(&vm->page_directory);
 
        radeon_fence_unref(&vm->fence);
-       radeon_fence_unref(&vm->flushed_updates);
-       radeon_fence_unref(&vm->last_id_use);
+
+       for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+               radeon_fence_unref(&vm->ids[i].flushed_updates);
+               radeon_fence_unref(&vm->ids[i].last_id_use);
+       }
 
        mutex_destroy(&vm->mutex);
 }
index e91968b04154a30284325fd532d3578a37ae8740..14896ce76324f4b9a436b6835beea3a584d95fac 100644 (file)
@@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
+       unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
        u32 header;
 
        if (ib->is_const_ib) {
@@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 #endif
                          (ib->gpu_addr & 0xFFFFFFFC));
        radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
-       radeon_ring_write(ring, ib->length_dw |
-                         (ib->vm ? (ib->vm->id << 24) : 0));
+       radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
 
        if (!ib->is_const_ib) {
                /* flush read cache over gart for this vmid */
                radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
                radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
-               radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
+               radeon_ring_write(ring, vm_id);
                radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
                radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
                                  PACKET3_TC_ACTION_ENA |