drm/amdgpu: fix VM faults caused by vm_grab_id() v4
authorChristian König <christian.koenig@amd.com>
Fri, 26 Feb 2016 15:18:26 +0000 (16:18 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 29 Feb 2016 16:33:46 +0000 (11:33 -0500)
The owner must be per ring as long as we don't
support sharing VMIDs per process. Also move the
assigned VMID and page directory address into the
IB structure.

v3: assign the VMID to all IBs, not just the first one.
v4: use correct pointer for owner

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index f5bac97a438b09123cd1446ce118ed45f979d6ff..0c42a85ca5a51aef9d30f18da5460047dcf2d9b5 100644 (file)
@@ -769,8 +769,9 @@ struct amdgpu_ib {
        uint32_t                        *ptr;
        struct amdgpu_fence             *fence;
        struct amdgpu_user_fence        *user;
-       bool                            grabbed_vmid;
        struct amdgpu_vm                *vm;
+       unsigned                        vm_id;
+       uint64_t                        vm_pd_addr;
        struct amdgpu_ctx               *ctx;
        uint32_t                        gds_base, gds_size;
        uint32_t                        gws_base, gws_size;
@@ -877,10 +878,10 @@ struct amdgpu_vm_pt {
 };
 
 struct amdgpu_vm_id {
-       unsigned                id;
-       uint64_t                pd_gpu_addr;
+       struct amdgpu_vm_manager_id     *mgr_id;
+       uint64_t                        pd_gpu_addr;
        /* last flushed PD/PT update */
-       struct fence            *flushed_updates;
+       struct fence                    *flushed_updates;
 };
 
 struct amdgpu_vm {
@@ -954,10 +955,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
                                  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-                     struct amdgpu_sync *sync, struct fence *fence);
+                     struct amdgpu_sync *sync, struct fence *fence,
+                     unsigned *vm_id, uint64_t *vm_pd_addr);
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-                    struct amdgpu_vm *vm,
-                    struct fence *updates);
+                    unsigned vmid,
+                    uint64_t pd_addr);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
                                    struct amdgpu_vm *vm);
index b5bdd5d59b584ea2951f6eb4e5f7a916c7ac6aed..db14a7bbb8f41a2a1744a02bd719d2635c6867f9 100644 (file)
@@ -75,6 +75,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        }
 
        ib->vm = vm;
+       ib->vm_id = 0;
 
        return 0;
 }
@@ -139,7 +140,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                return -EINVAL;
        }
 
-       if (vm && !ibs->grabbed_vmid) {
+       if (vm && !ibs->vm_id) {
                dev_err(adev->dev, "VM IB without ID\n");
                return -EINVAL;
        }
@@ -152,10 +153,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
        if (vm) {
                /* do context switch */
-               amdgpu_vm_flush(ring, vm, last_vm_update);
+               amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr);
 
                if (ring->funcs->emit_gds_switch)
-                       amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
+                       amdgpu_ring_emit_gds_switch(ring, ib->vm_id,
                                                    ib->gds_base, ib->gds_size,
                                                    ib->gws_base, ib->gws_size,
                                                    ib->oa_base, ib->oa_size);
index f29bbb96a88187cdc8e8a82ec611645721c2cb44..90e52f7e17a0f39a033827e102a8f964e14ab445 100644 (file)
@@ -105,16 +105,23 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 
        struct fence *fence = amdgpu_sync_get_fence(&job->sync);
 
-       if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
+       if (fence == NULL && vm && !job->ibs->vm_id) {
                struct amdgpu_ring *ring = job->ring;
+               unsigned i, vm_id;
+               uint64_t vm_pd_addr;
                int r;
 
                r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-                                     &job->base.s_fence->base);
+                                     &job->base.s_fence->base,
+                                     &vm_id, &vm_pd_addr);
                if (r)
                        DRM_ERROR("Error getting VM ID (%d)\n", r);
-               else
-                       job->ibs->grabbed_vmid = true;
+               else {
+                       for (i = 0; i < job->num_ibs; ++i) {
+                               job->ibs[i].vm_id = vm_id;
+                               job->ibs[i].vm_pd_addr = vm_pd_addr;
+                       }
+               }
 
                fence = amdgpu_sync_get_fence(&job->sync);
        }
index 264c5968a1d399bd3cd8dbb6aee4d954f6f4e5a8..ba909245fef5027d052365fb8356669262a543e6 100644 (file)
@@ -50,6 +50,9 @@
  * SI supports 16.
  */
 
+/* Special value that no flush is necessary */
+#define AMDGPU_VM_NO_FLUSH (~0ll)
+
 /**
  * amdgpu_vm_num_pde - return the number of page directory entries
  *
@@ -157,50 +160,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
  * Allocate an id for the vm, adding fences to the sync obj as necessary.
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-                     struct amdgpu_sync *sync, struct fence *fence)
+                     struct amdgpu_sync *sync, struct fence *fence,
+                     unsigned *vm_id, uint64_t *vm_pd_addr)
 {
-       struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
+       uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
        struct amdgpu_device *adev = ring->adev;
-       struct amdgpu_vm_manager_id *id;
+       struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+       struct fence *updates = sync->last_vm_update;
        int r;
 
        mutex_lock(&adev->vm_manager.lock);
 
        /* check if the id is still valid */
-       if (vm_id->id) {
+       if (id->mgr_id) {
+               struct fence *flushed = id->flushed_updates;
+               bool is_later;
                long owner;
 
-               id = &adev->vm_manager.ids[vm_id->id];
-               owner = atomic_long_read(&id->owner);
-               if (owner == (long)vm) {
-                       list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-                       trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
+               if (!flushed)
+                       is_later = true;
+               else if (!updates)
+                       is_later = false;
+               else
+                       is_later = fence_is_later(updates, flushed);
+
+               owner = atomic_long_read(&id->mgr_id->owner);
+               if (!is_later && owner == (long)id &&
+                   pd_addr == id->pd_gpu_addr) {
+
+                       fence_put(id->mgr_id->active);
+                       id->mgr_id->active = fence_get(fence);
+
+                       list_move_tail(&id->mgr_id->list,
+                                      &adev->vm_manager.ids_lru);
 
-                       fence_put(id->active);
-                       id->active = fence_get(fence);
+                       *vm_id = id->mgr_id - adev->vm_manager.ids;
+                       *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+                       trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx);
 
                        mutex_unlock(&adev->vm_manager.lock);
                        return 0;
                }
        }
 
-       /* we definately need to flush */
-       vm_id->pd_gpu_addr = ~0ll;
+       id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+                                     struct amdgpu_vm_manager_id,
+                                     list);
 
-       id = list_first_entry(&adev->vm_manager.ids_lru,
-                             struct amdgpu_vm_manager_id,
-                             list);
-       list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-       atomic_long_set(&id->owner, (long)vm);
+       r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+       if (!r) {
+               fence_put(id->mgr_id->active);
+               id->mgr_id->active = fence_get(fence);
 
-       vm_id->id = id - adev->vm_manager.ids;
-       trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
+               fence_put(id->flushed_updates);
+               id->flushed_updates = fence_get(updates);
 
-       r = amdgpu_sync_fence(ring->adev, sync, id->active);
+               id->pd_gpu_addr = pd_addr;
 
-       if (!r) {
-               fence_put(id->active);
-               id->active = fence_get(fence);
+               list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
+               atomic_long_set(&id->mgr_id->owner, (long)id);
+
+               *vm_id = id->mgr_id - adev->vm_manager.ids;
+               *vm_pd_addr = pd_addr;
+               trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx);
        }
 
        mutex_unlock(&adev->vm_manager.lock);
@@ -211,35 +233,18 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  * amdgpu_vm_flush - hardware flush the vm
  *
  * @ring: ring to use for flush
- * @vm: vm we want to flush
- * @updates: last vm update that we waited for
+ * @vmid: vmid number to use
+ * @pd_addr: address of the page directory
  *
- * Flush the vm.
+ * Emit a VM flush when it is necessary.
  */
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-                    struct amdgpu_vm *vm,
-                    struct fence *updates)
+                    unsigned vmid,
+                    uint64_t pd_addr)
 {
-       uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
-       struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
-       struct fence *flushed_updates = vm_id->flushed_updates;
-       bool is_later;
-
-       if (!flushed_updates)
-               is_later = true;
-       else if (!updates)
-               is_later = false;
-       else
-               is_later = fence_is_later(updates, flushed_updates);
-
-       if (pd_addr != vm_id->pd_gpu_addr || is_later) {
-               trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
-               if (is_later) {
-                       vm_id->flushed_updates = fence_get(updates);
-                       fence_put(flushed_updates);
-               }
-               vm_id->pd_gpu_addr = pd_addr;
-               amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
+       if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+               trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid);
+               amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr);
        }
 }
 
@@ -1284,7 +1289,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        int i, r;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-               vm->ids[i].id = 0;
+               vm->ids[i].mgr_id = NULL;
                vm->ids[i].flushed_updates = NULL;
        }
        vm->va = RB_ROOT;
@@ -1381,13 +1386,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        amdgpu_bo_unref(&vm->page_directory);
        fence_put(vm->page_directory_fence);
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-               unsigned id = vm->ids[i].id;
+               struct amdgpu_vm_id *id = &vm->ids[i];
 
-               atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
-                                   (long)vm, 0);
-               fence_put(vm->ids[i].flushed_updates);
+               if (id->mgr_id)
+                       atomic_long_cmpxchg(&id->mgr_id->owner,
+                                           (long)id, 0);
+               fence_put(id->flushed_updates);
        }
-
 }
 
 /**
index 675f34916aabf13f150a56a62519e34a81d73336..e4e4b2ac77b741c37bf3fc140b4476c14d546717 100644 (file)
@@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
                           struct amdgpu_ib *ib)
 {
-       u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+       u32 extra_bits = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
 
        while ((next_rptr & 7) != 4)
index bc5bdaf3d2bbc56ed12ada6f40e9489846d509cb..9cdf59518533863d13bc0eea57a0fe413fffdfcf 100644 (file)
@@ -2043,8 +2043,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
        else
                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-       control |= ib->length_dw |
-               (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+       control |= ib->length_dw | (ib->vm_id << 24);
 
        amdgpu_ring_write(ring, header);
        amdgpu_ring_write(ring,
@@ -2072,8 +2071,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
        header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-       control |= ib->length_dw |
-                          (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+       control |= ib->length_dw | (ib->vm_id << 24);
 
        amdgpu_ring_write(ring, header);
        amdgpu_ring_write(ring,
index 71d536e595a2f8158914c95090eae9fe62c2ccf3..5f67a189bce9b1355048b6b0a2e8968f981129ce 100644 (file)
@@ -4619,8 +4619,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
        else
                header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-       control |= ib->length_dw |
-               (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+       control |= ib->length_dw | (ib->vm_id << 24);
 
        amdgpu_ring_write(ring, header);
        amdgpu_ring_write(ring,
@@ -4649,8 +4648,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
        header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-       control |= ib->length_dw |
-                          (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+       control |= ib->length_dw | (ib->vm_id << 24);
 
        amdgpu_ring_write(ring, header);
        amdgpu_ring_write(ring,
index 29ec986dd6fce61cd4b821d48ce6bbe3cc771762..dddb8d6a81f3d41fd7be3ab56cbc5d9b7cb4782b 100644 (file)
@@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
                                   struct amdgpu_ib *ib)
 {
-       u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+       u32 vmid = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
 
        while ((next_rptr & 7) != 2)
index 6f064d7076e68c98b8cefdb508bb0968d76b1866..19e02f7a06f3db97acf83e513bcec449320b2285 100644 (file)
@@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
                                   struct amdgpu_ib *ib)
 {
-       u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+       u32 vmid = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
 
        while ((next_rptr & 7) != 2)