drm/amdgpu: use SDMA round robin for VM updates v3
authorChristian König <christian.koenig@amd.com>
Mon, 8 Feb 2016 16:37:38 +0000 (17:37 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 12 Feb 2016 20:38:16 +0000 (15:38 -0500)
Distribute the load on both rings.

v2: use a loop for the initialization
v3: agd: rebase on upstream

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index 76aa7898eeb20ab3a31031d60823765fc80f4c4c..55fcc929b476af24d97863c0ad2fc773482c572c 100644 (file)
@@ -942,7 +942,9 @@ struct amdgpu_vm_manager {
        bool                                    enabled;
        /* vm pte handling */
        const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
-       struct amdgpu_ring                      *vm_pte_funcs_ring;
+       struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
+       unsigned                                vm_pte_num_rings;
+       atomic_t                                vm_pte_next_ring;
 };
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
index f0fb938457d9f1b778e783b731c26f96e7f757b7..6bdb891b9ddc2e1433c42850794ff9245ad95c03 100644 (file)
@@ -1403,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        adev->mman.buffer_funcs = NULL;
        adev->mman.buffer_funcs_ring = NULL;
        adev->vm_manager.vm_pte_funcs = NULL;
-       adev->vm_manager.vm_pte_funcs_ring = NULL;
+       adev->vm_manager.vm_pte_num_rings = 0;
        adev->gart.gart_funcs = NULL;
        adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
 
index 5e38b344d56b566313e6a827e5b9a17d7c88026f..264c5968a1d399bd3cd8dbb6aee4d954f6f4e5a8 100644 (file)
@@ -325,13 +325,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                              struct amdgpu_vm *vm,
                              struct amdgpu_bo *bo)
 {
-       struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+       struct amdgpu_ring *ring;
        struct fence *fence = NULL;
        struct amdgpu_job *job;
        unsigned entries;
        uint64_t addr;
        int r;
 
+       ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
        r = reservation_object_reserve_shared(bo->tbo.resv);
        if (r)
                return r;
@@ -413,7 +415,7 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
                                    struct amdgpu_vm *vm)
 {
-       struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+       struct amdgpu_ring *ring;
        struct amdgpu_bo *pd = vm->page_directory;
        uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
        uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
@@ -425,6 +427,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 
        int r;
 
+       ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
        /* padding, etc. */
        ndw = 64;
 
@@ -670,7 +674,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                                       uint32_t flags, uint64_t addr,
                                       struct fence **fence)
 {
-       struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+       struct amdgpu_ring *ring;
        void *owner = AMDGPU_FENCE_OWNER_VM;
        unsigned nptes, ncmds, ndw;
        struct amdgpu_job *job;
@@ -678,6 +682,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        struct fence *f = NULL;
        int r;
 
+       ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
        /* sync to everything on unmapping */
        if (!(flags & AMDGPU_PTE_VALID))
                owner = AMDGPU_FENCE_OWNER_UNDEFINED;
@@ -1269,10 +1275,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
  */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-       struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
        const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                AMDGPU_VM_PTE_COUNT * 8);
        unsigned pd_size, pd_entries;
+       unsigned ring_instance;
+       struct amdgpu_ring *ring;
        struct amd_sched_rq *rq;
        int i, r;
 
@@ -1298,6 +1305,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        }
 
        /* create scheduler entity for page table updates */
+
+       ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
+       ring_instance %= adev->vm_manager.vm_pte_num_rings;
+       ring = adev->vm_manager.vm_pte_rings[ring_instance];
        rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
        r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                  rq, amdgpu_sched_jobs);
@@ -1345,11 +1356,10 @@ error_free_sched_entity:
  */
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-       struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
        struct amdgpu_bo_va_mapping *mapping, *tmp;
        int i;
 
-       amd_sched_entity_fini(&ring->sched, &vm->entity);
+       amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
        if (!RB_EMPTY_ROOT(&vm->va)) {
                dev_err(adev->dev, "still active bo inside vm\n");
@@ -1397,6 +1407,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
        for (i = 1; i < adev->vm_manager.num_ids; ++i)
                list_add_tail(&adev->vm_manager.ids[i].list,
                              &adev->vm_manager.ids_lru);
+
+       atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 }
 
 /**
index 5e8566a224259eadf26fa7a9e6da3b3345a12b91..7ba34522c833ac4ab2c16747ab55b1d707b69285 100644 (file)
@@ -1371,8 +1371,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+       unsigned i;
+
        if (adev->vm_manager.vm_pte_funcs == NULL) {
                adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
-               adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
+               for (i = 0; i < adev->sdma.num_instances; i++)
+                       adev->vm_manager.vm_pte_rings[i] =
+                               &adev->sdma.instance[i].ring;
+
+               adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
        }
 }
index 4ae1058c96c6f34a18dd86c0e966bfb58c714d48..c895af79d4bbec44d763916e9912a0a867028262 100644 (file)
@@ -1376,8 +1376,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
 
 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+       unsigned i;
+
        if (adev->vm_manager.vm_pte_funcs == NULL) {
                adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
-               adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
+               for (i = 0; i < adev->sdma.num_instances; i++)
+                       adev->vm_manager.vm_pte_rings[i] =
+                               &adev->sdma.instance[i].ring;
+
+               adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
        }
 }
index c45f7926091dad97670d51f553acb8a954b072e9..dcb3efdd1bd9047a1ba78d0844f0052b725902f4 100644 (file)
@@ -1643,8 +1643,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
 
 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+       unsigned i;
+
        if (adev->vm_manager.vm_pte_funcs == NULL) {
                adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
-               adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
+               for (i = 0; i < adev->sdma.num_instances; i++)
+                       adev->vm_manager.vm_pte_rings[i] =
+                               &adev->sdma.instance[i].ring;
+
+               adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
        }
 }