void amdgpu_job_free(struct amdgpu_job *job);
int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct amd_sched_entity *entity, void *owner,
- struct fence **f);
+ struct dma_fence **f);
-struct amdgpu_ring {
- struct amdgpu_device *adev;
- const struct amdgpu_ring_funcs *funcs;
- struct amdgpu_fence_driver fence_drv;
- struct amd_gpu_scheduler sched;
-
- struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
- unsigned rptr_offs;
- unsigned wptr;
- unsigned wptr_old;
- unsigned ring_size;
- unsigned max_dw;
- int count_dw;
- uint64_t gpu_addr;
- uint32_t align_mask;
- uint32_t ptr_mask;
- bool ready;
- u32 nop;
- u32 idx;
- u32 me;
- u32 pipe;
- u32 queue;
- struct amdgpu_bo *mqd_obj;
- u32 doorbell_index;
- bool use_doorbell;
- unsigned wptr_offs;
- unsigned fence_offs;
- uint64_t current_ctx;
- enum amdgpu_ring_type type;
- char name[16];
- unsigned cond_exe_offs;
- u64 cond_exe_gpu_addr;
- volatile u32 *cond_exe_cpu_addr;
-#if defined(CONFIG_DEBUG_FS)
- struct dentry *ent;
-#endif
-};
-
-/*
- * VM
- */
-
-/* maximum number of VMIDs */
-#define AMDGPU_NUM_VM 16
-
-/* Maximum number of PTEs the hardware can write with one command */
-#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
-
-/* number of entries in page table */
-#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
-
-/* PTBs (Page Table Blocks) need to be aligned to 32K */
-#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
-
-/* LOG2 number of continuous pages for the fragment field */
-#define AMDGPU_LOG2_PAGES_PER_FRAG 4
-
-#define AMDGPU_PTE_VALID (1 << 0)
-#define AMDGPU_PTE_SYSTEM (1 << 1)
-#define AMDGPU_PTE_SNOOPED (1 << 2)
-
-/* VI only */
-#define AMDGPU_PTE_EXECUTABLE (1 << 4)
-
-#define AMDGPU_PTE_READABLE (1 << 5)
-#define AMDGPU_PTE_WRITEABLE (1 << 6)
-
-#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
-
-/* How to programm VM fault handling */
-#define AMDGPU_VM_FAULT_STOP_NEVER 0
-#define AMDGPU_VM_FAULT_STOP_FIRST 1
-#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
-
-struct amdgpu_vm_pt {
- struct amdgpu_bo_list_entry entry;
- uint64_t addr;
- uint64_t shadow_addr;
-};
-
-struct amdgpu_vm {
- /* tree of virtual addresses mapped */
- struct rb_root va;
-
- /* protecting invalidated */
- spinlock_t status_lock;
-
- /* BOs moved, but not yet updated in the PT */
- struct list_head invalidated;
-
- /* BOs cleared in the PT because of a move */
- struct list_head cleared;
-
- /* BO mappings freed, but not yet updated in the PT */
- struct list_head freed;
-
- /* contains the page directory */
- struct amdgpu_bo *page_directory;
- unsigned max_pde_used;
- struct dma_fence *page_directory_fence;
- uint64_t last_eviction_counter;
-
- /* array of page tables, one for each page directory entry */
- struct amdgpu_vm_pt *page_tables;
-
- /* for id and flush management per ring */
- struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS];
-
- /* protecting freed */
- spinlock_t freed_lock;
-
- /* Scheduler entity for page table updates */
- struct amd_sched_entity entity;
-
- /* client id */
- u64 client_id;
-};
-
-struct amdgpu_vm_id {
- struct list_head list;
- struct dma_fence *first;
- struct amdgpu_sync active;
- struct dma_fence *last_flush;
- atomic64_t owner;
-
- uint64_t pd_gpu_addr;
- /* last flushed PD/PT update */
- struct dma_fence *flushed_updates;
-
- uint32_t current_gpu_reset_count;
-
- uint32_t gds_base;
- uint32_t gds_size;
- uint32_t gws_base;
- uint32_t gws_size;
- uint32_t oa_base;
- uint32_t oa_size;
-};
-
-struct amdgpu_vm_manager {
- /* Handling of VMIDs */
- struct mutex lock;
- unsigned num_ids;
- struct list_head ids_lru;
- struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
-
- /* Handling of VM fences */
- u64 fence_context;
- unsigned seqno[AMDGPU_MAX_RINGS];
-
- uint32_t max_pfn;
- /* vram base address for page table entry */
- u64 vram_base_offset;
- /* is vm enabled? */
- bool enabled;
- /* vm pte handling */
- const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
- struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
- unsigned vm_pte_num_rings;
- atomic_t vm_pte_next_ring;
- /* client id counter */
- atomic64_t client_counter;
-};
-
-void amdgpu_vm_manager_init(struct amdgpu_device *adev);
-void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry);
-void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct list_head *duplicates);
-void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct amdgpu_sync *sync);
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- bool clear);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-int amdgpu_vm_bo_map(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr, uint64_t offset,
- uint64_t size, uint32_t flags);
-int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va);
-
/*
* context related structures
*/
--- /dev/null
- struct fence **fences;
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_RING_H__
+#define __AMDGPU_RING_H__
+
+#include "gpu_scheduler.h"
+
+/* max number of rings */
+#define AMDGPU_MAX_RINGS 16
+#define AMDGPU_MAX_GFX_RINGS 1
+#define AMDGPU_MAX_COMPUTE_RINGS 8
+#define AMDGPU_MAX_VCE_RINGS 3
+
+/* some special values for the owner field */
+#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
+#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
+
+#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
+#define AMDGPU_FENCE_FLAG_INT (1 << 1)
+
+enum amdgpu_ring_type {
+ AMDGPU_RING_TYPE_GFX,
+ AMDGPU_RING_TYPE_COMPUTE,
+ AMDGPU_RING_TYPE_SDMA,
+ AMDGPU_RING_TYPE_UVD,
+ AMDGPU_RING_TYPE_VCE
+};
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+
+/*
+ * Fences.
+ */
+struct amdgpu_fence_driver {
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_addr;
+ /* sync_seq is protected by ring emission lock */
+ uint32_t sync_seq;
+ atomic_t last_seq;
+ bool initialized;
+ struct amdgpu_irq_src *irq_src;
+ unsigned irq_type;
+ struct timer_list fallback_timer;
+ unsigned num_fences_mask;
+ spinlock_t lock;
- int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
++ struct dma_fence **fences;
+};
+
+int amdgpu_fence_driver_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
+void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
+
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission);
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq_src,
+ unsigned irq_type);
+void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
+void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
++int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+void amdgpu_fence_process(struct amdgpu_ring *ring);
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
+/*
+ * Rings.
+ */
+
+/* provided by hw blocks that expose a ring buffer for commands */
+struct amdgpu_ring_funcs {
+ enum amdgpu_ring_type type;
+ uint32_t align_mask;
+ u32 nop;
+
+ /* ring read/write ptr handling */
+ u32 (*get_rptr)(struct amdgpu_ring *ring);
+ u32 (*get_wptr)(struct amdgpu_ring *ring);
+ void (*set_wptr)(struct amdgpu_ring *ring);
+ /* validating and patching of IBs */
+ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+ /* constants to calculate how many DW are needed for an emit */
+ unsigned emit_frame_size;
+ unsigned emit_ib_size;
+ /* command emit functions */
+ void (*emit_ib)(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ unsigned vm_id, bool ctx_switch);
+ void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
+ uint64_t seq, unsigned flags);
+ void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
+ void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
+ uint64_t pd_addr);
+ void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+ void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
+ void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size);
+ /* testing functions */
+ int (*test_ring)(struct amdgpu_ring *ring);
+ int (*test_ib)(struct amdgpu_ring *ring, long timeout);
+ /* insert NOP packets */
+ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+ /* pad the indirect buffer to the necessary number of dw */
+ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+ unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
+ void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+ /* note usage for clock and power gating */
+ void (*begin_use)(struct amdgpu_ring *ring);
+ void (*end_use)(struct amdgpu_ring *ring);
+ void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+};
+
+struct amdgpu_ring {
+ struct amdgpu_device *adev;
+ const struct amdgpu_ring_funcs *funcs;
+ struct amdgpu_fence_driver fence_drv;
+ struct amd_gpu_scheduler sched;
+
+ struct amdgpu_bo *ring_obj;
+ volatile uint32_t *ring;
+ unsigned rptr_offs;
+ unsigned wptr;
+ unsigned wptr_old;
+ unsigned ring_size;
+ unsigned max_dw;
+ int count_dw;
+ uint64_t gpu_addr;
+ uint32_t ptr_mask;
+ bool ready;
+ u32 idx;
+ u32 me;
+ u32 pipe;
+ u32 queue;
+ struct amdgpu_bo *mqd_obj;
+ u32 doorbell_index;
+ bool use_doorbell;
+ unsigned wptr_offs;
+ unsigned fence_offs;
+ uint64_t current_ctx;
+ char name[16];
+ unsigned cond_exe_offs;
+ u64 cond_exe_gpu_addr;
+ volatile u32 *cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+ struct dentry *ent;
+#endif
+};
+
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_ring_commit(struct amdgpu_ring *ring);
+void amdgpu_ring_undo(struct amdgpu_ring *ring);
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+ unsigned ring_size, struct amdgpu_irq_src *irq_src,
+ unsigned irq_type);
+void amdgpu_ring_fini(struct amdgpu_ring *ring);
+
+#endif
--- /dev/null
- struct fence;
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_SYNC_H__
+#define __AMDGPU_SYNC_H__
+
+#include <linux/hashtable.h>
+
- struct fence *last_vm_update;
++struct dma_fence;
+struct reservation_object;
+struct amdgpu_device;
+struct amdgpu_ring;
+
+/*
+ * Container for fences used to sync command submissions.
+ */
+struct amdgpu_sync {
+ DECLARE_HASHTABLE(fences, 4);
- struct fence *f);
++ struct dma_fence *last_vm_update;
+};
+
+void amdgpu_sync_create(struct amdgpu_sync *sync);
+int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
++ struct dma_fence *f);
+int amdgpu_sync_resv(struct amdgpu_device *adev,
+ struct amdgpu_sync *sync,
+ struct reservation_object *resv,
+ void *owner);
- struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
++struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+ struct amdgpu_ring *ring);
++struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
+
+#endif
return r;
case TTM_PL_VRAM:
- old_start = (u64)old_mem->start << PAGE_SHIFT;
- old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
+ *addr = mm_node->start << PAGE_SHIFT;
+ *addr += bo->bdev->man[mem->mem_type].gpu_offset;
break;
default:
- DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+ DRM_ERROR("Unknown placement %d\n", mem->mem_type);
return -EINVAL;
}
- switch (new_mem->mem_type) {
- case TTM_PL_TT:
- r = amdgpu_ttm_bind(bo, new_mem);
- if (r)
- return r;
- case TTM_PL_VRAM:
- new_start = (u64)new_mem->start << PAGE_SHIFT;
- new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
- break;
- default:
- DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
- return -EINVAL;
- }
+ return 0;
+}
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ bool evict, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem,
+ struct ttm_mem_reg *old_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+
+ struct drm_mm_node *old_mm, *new_mm;
+ uint64_t old_start, old_size, new_start, new_size;
+ unsigned long num_pages;
- struct fence *fence = NULL;
++ struct dma_fence *fence = NULL;
+ int r;
+
+ BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+
if (!ring->ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
- BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+ old_mm = old_mem->mm_node;
+ r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start);
+ if (r)
+ return r;
+ old_size = old_mm->size;
+
- r = amdgpu_copy_buffer(ring, old_start, new_start,
- new_mem->num_pages * PAGE_SIZE, /* bytes */
- bo->resv, &fence, false);
+ new_mm = new_mem->mm_node;
+ r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start);
if (r)
return r;
- struct fence *next;
+ new_size = new_mm->size;
+
+ num_pages = new_mem->num_pages;
+ while (num_pages) {
+ unsigned long cur_pages = min(old_size, new_size);
- fence_put(fence);
++ struct dma_fence *next;
+
+ r = amdgpu_copy_buffer(ring, old_start, new_start,
+ cur_pages * PAGE_SIZE,
+ bo->resv, &next, false);
+ if (r)
+ goto error;
+
++ dma_fence_put(fence);
+ fence = next;
+
+ num_pages -= cur_pages;
+ if (!num_pages)
+ break;
+
+ old_size -= cur_pages;
+ if (!old_size) {
+ r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem,
+ &old_start);
+ if (r)
+ goto error;
+ old_size = old_mm->size;
+ } else {
+ old_start += cur_pages * PAGE_SIZE;
+ }
+
+ new_size -= cur_pages;
+ if (!new_size) {
+ r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem,
+ &new_start);
+ if (r)
+ goto error;
+
+ new_size = new_mm->size;
+ } else {
+ new_start += cur_pages * PAGE_SIZE;
+ }
+ }
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
- fence_put(fence);
+ dma_fence_put(fence);
return r;
- fence_wait(fence, false);
- fence_put(fence);
+
+error:
+ if (fence)
++ dma_fence_wait(fence, false);
++ dma_fence_put(fence);
+ return r;
}
static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct reservation_object *resv,
- struct fence **fence)
+ struct dma_fence **fence)
{
- struct amdgpu_device *adev = bo->adev;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_job *job;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct fence **fence);
+ struct dma_fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct fence **fence);
+ bool direct, struct dma_fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
}
}
- if (count)
+ if (count) {
+ if (vm->page_directory->shadow)
+ amdgpu_vm_do_set_ptes(¶ms, last_shadow, last_pt,
+ count, incr, AMDGPU_PTE_VALID);
+
amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt,
count, incr, AMDGPU_PTE_VALID);
+ }
- if (params.ib->length_dw != 0) {
- amdgpu_ring_pad_ib(ring, params.ib);
- amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
+ if (params.ib->length_dw == 0) {
+ amdgpu_job_free(job);
+ return 0;
+ }
+
+ amdgpu_ring_pad_ib(ring, params.ib);
+ amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+ AMDGPU_FENCE_OWNER_VM);
+ if (shadow)
+ amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
- WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &fence);
- if (r)
- goto error_free;
- amdgpu_bo_fence(pd, fence, true);
- dma_fence_put(vm->page_directory_fence);
- vm->page_directory_fence = dma_fence_get(fence);
- dma_fence_put(fence);
+ WARN_ON(params.ib->length_dw > ndw);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &fence);
+ if (r)
+ goto error_free;
- } else {
- amdgpu_job_free(job);
- }
+ amdgpu_bo_fence(vm->page_directory, fence, true);
- fence_put(vm->page_directory_fence);
- vm->page_directory_fence = fence_get(fence);
- fence_put(fence);
++ dma_fence_put(vm->page_directory_fence);
++ vm->page_directory_fence = dma_fence_get(fence);
++ dma_fence_put(fence);
return 0;
dma_addr_t *pages_addr,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
- uint32_t flags, uint64_t addr,
+ uint32_t flags,
+ struct drm_mm_node *nodes,
- struct fence **fence)
+ struct dma_fence **fence)
{
- const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
-
- uint64_t src = 0, start = mapping->it.start;
+ uint64_t pfn, src = 0, start = mapping->it.start;
int r;
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
dma_addr_t *pages_addr = NULL;
uint32_t gtt_flags, flags;
struct ttm_mem_reg *mem;
- struct fence *exclusive;
+ struct drm_mm_node *nodes;
- uint64_t addr;
+ struct dma_fence *exclusive;
int r;
if (clear) {
--- /dev/null
- struct fence *page_directory_fence;
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_VM_H__
+#define __AMDGPU_VM_H__
+
+#include <linux/rbtree.h>
+
+#include "gpu_scheduler.h"
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
+struct amdgpu_bo_va;
+struct amdgpu_job;
+struct amdgpu_bo_list_entry;
+
+/*
+ * GPUVM handling
+ */
+
+/* maximum number of VMIDs */
+#define AMDGPU_NUM_VM 16
+
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
+
+/* number of entries in page table */
+#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
+
+/* PTBs (Page Table Blocks) need to be aligned to 32K */
+#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
+
+/* LOG2 number of continuous pages for the fragment field */
+#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+
+#define AMDGPU_PTE_VALID (1 << 0)
+#define AMDGPU_PTE_SYSTEM (1 << 1)
+#define AMDGPU_PTE_SNOOPED (1 << 2)
+
+/* VI only */
+#define AMDGPU_PTE_EXECUTABLE (1 << 4)
+
+#define AMDGPU_PTE_READABLE (1 << 5)
+#define AMDGPU_PTE_WRITEABLE (1 << 6)
+
+#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
+
+/* How to programm VM fault handling */
+#define AMDGPU_VM_FAULT_STOP_NEVER 0
+#define AMDGPU_VM_FAULT_STOP_FIRST 1
+#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
+
+struct amdgpu_vm_pt {
+ struct amdgpu_bo *bo;
+ uint64_t addr;
+};
+
+struct amdgpu_vm {
+ /* tree of virtual addresses mapped */
+ struct rb_root va;
+
+ /* protecting invalidated */
+ spinlock_t status_lock;
+
+ /* BOs moved, but not yet updated in the PT */
+ struct list_head invalidated;
+
+ /* BOs cleared in the PT because of a move */
+ struct list_head cleared;
+
+ /* BO mappings freed, but not yet updated in the PT */
+ struct list_head freed;
+
+ /* contains the page directory */
+ struct amdgpu_bo *page_directory;
+ unsigned max_pde_used;
- struct fence *first;
++ struct dma_fence *page_directory_fence;
+ uint64_t last_eviction_counter;
+
+ /* array of page tables, one for each page directory entry */
+ struct amdgpu_vm_pt *page_tables;
+
+ /* for id and flush management per ring */
+ struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS];
+
+ /* protecting freed */
+ spinlock_t freed_lock;
+
+ /* Scheduler entity for page table updates */
+ struct amd_sched_entity entity;
+
+ /* client id */
+ u64 client_id;
+};
+
+struct amdgpu_vm_id {
+ struct list_head list;
- struct fence *last_flush;
++ struct dma_fence *first;
+ struct amdgpu_sync active;
- struct fence *flushed_updates;
++ struct dma_fence *last_flush;
+ atomic64_t owner;
+
+ uint64_t pd_gpu_addr;
+ /* last flushed PD/PT update */
- struct amdgpu_sync *sync, struct fence *fence,
++ struct dma_fence *flushed_updates;
+
+ uint32_t current_gpu_reset_count;
+
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_base;
+ uint32_t oa_size;
+};
+
+struct amdgpu_vm_manager {
+ /* Handling of VMIDs */
+ struct mutex lock;
+ unsigned num_ids;
+ struct list_head ids_lru;
+ struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
+
+ /* Handling of VM fences */
+ u64 fence_context;
+ unsigned seqno[AMDGPU_MAX_RINGS];
+
+ uint32_t max_pfn;
+ /* vram base address for page table entry */
+ u64 vram_base_offset;
+ /* is vm enabled? */
+ bool enabled;
+ /* vm pte handling */
+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
+ struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_rings;
+ atomic_t vm_pte_next_ring;
+ /* client id counter */
+ atomic64_t client_counter;
+};
+
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
+ struct list_head *validated,
+ struct amdgpu_bo_list_entry *entry);
+int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int (*callback)(void *p, struct amdgpu_bo *bo),
+ void *param);
+void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
++ struct amdgpu_sync *sync, struct dma_fence *fence,
+ struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_sync *sync);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ bool clear);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr);
+void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va);
+
+#endif