From: Alexandre Courbot Date: Wed, 11 Nov 2015 08:07:51 +0000 (+0900) Subject: drm/nouveau/instmem/gk20a: use DMA API CPU mapping X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=b306712d924af4dd63e44a08eb78c779e2d04154;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git drm/nouveau/instmem/gk20a: use DMA API CPU mapping Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access") tried to be smart while using the DMA-API by managing the CPU mappings of buffers allocated with the DMA-API by itself. In doing so, it relied on dma_to_phys() which is an architecture-private function not available everywhere. This broke the build on several architectures. Since there is no reliable and portable way to obtain the physical address of a DMA-API buffer, stop trying to be smart and just use the CPU mapping that the DMA-API can provide. This means that buffers will be CPU-mapped for all their life as opposed to when we need them, but anyway using the DMA-API here is a fallback for when no IOMMU is available so we should not expect optimal behavior. This makes the IOMMU and DMA-API implementations of instmem diverge enough that we should maybe put them into separate files... Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 51fccd54f603..4c20fec64d96 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -56,9 +56,6 @@ struct gk20a_instobj { /* CPU mapping */ u32 *vaddr; - struct list_head vaddr_node; - /* How many clients are using vaddr? */ - u32 use_cpt; }; #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) @@ -68,7 +65,6 @@ struct gk20a_instobj { struct gk20a_instobj_dma { struct gk20a_instobj base; - u32 *cpuaddr; dma_addr_t handle; struct nvkm_mm_node r; }; @@ -81,6 +77,11 @@ struct gk20a_instobj_dma { struct gk20a_instobj_iommu { struct gk20a_instobj base; + /* to link into gk20a_instmem::vaddr_lru */ + struct list_head vaddr_node; + /* how many clients are using vaddr? */ + u32 use_cpt; + /* will point to the higher half of pages */ dma_addr_t *dma_addrs; /* array of base.mem->size pages (+ dma_addr_ts) */ @@ -109,8 +110,6 @@ struct gk20a_instmem { /* Only used by DMA API */ struct dma_attrs attrs; - - void __iomem * (*cpu_map)(struct nvkm_memory *); }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -132,52 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory) return (u64)gk20a_instobj(memory)->mem.size << 12; } -static void __iomem * -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) -{ -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) - struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); - struct device *dev = node->base.imem->base.subdev.device->dev; - int npages = nvkm_memory_size(memory) >> 12; - struct page *pages[npages]; - int i; - - /* we shouldn't see a gk20a on anything but arm/arm64 anyways */ - /* phys_to_page does not exist on all platforms... */ - pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); - for (i = 1; i < npages; i++) - pages[i] = pages[0] + i; - - return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); -#else - BUG(); - return NULL; -#endif -} - -static void __iomem * -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) -{ - struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); - int npages = nvkm_memory_size(memory) >> 12; - - return vmap(node->pages, npages, VM_MAP, - pgprot_writecombine(PAGE_KERNEL)); -} - /* * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. */ static void -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj) +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) { - struct gk20a_instmem *imem = obj->imem; + struct gk20a_instmem *imem = obj->base.imem; /* there should not be any user left... */ WARN_ON(obj->use_cpt); list_del(&obj->vaddr_node); - vunmap(obj->vaddr); - obj->vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->memory); + vunmap(obj->base.vaddr); + obj->base.vaddr = NULL; + imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -193,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) if (list_empty(&imem->vaddr_lru)) break; - gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru, - struct gk20a_instobj, vaddr_node)); + gk20a_instobj_iommu_recycle_vaddr( + list_first_entry(&imem->vaddr_lru, + struct gk20a_instobj_iommu, vaddr_node)); } } static void __iomem * -gk20a_instobj_acquire(struct nvkm_memory *memory) +gk20a_instobj_acquire_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_flush(ltc); + + return node->vaddr; +} + +static void __iomem * +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; const u64 size = nvkm_memory_size(memory); unsigned long flags; @@ -211,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) spin_lock_irqsave(&imem->lock, flags); - if (node->vaddr) { + if (node->base.vaddr) { if (!node->use_cpt) { /* remove from LRU list since mapping in use again */ list_del(&node->vaddr_node); @@ -222,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) /* try to free some address space if we reached the limit */ gk20a_instmem_vaddr_gc(imem, size); - node->vaddr = imem->cpu_map(memory); - - if (!node->vaddr) { + /* map the pages */ + node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!node->base.vaddr) { nvkm_error(&imem->base.subdev, "cannot map instobj - " "this is not going to end well...\n"); goto out; @@ -238,15 +218,25 @@ out: node->use_cpt++; spin_unlock_irqrestore(&imem->lock, flags); - return node->vaddr; + return node->base.vaddr; } static void -gk20a_instobj_release(struct nvkm_memory *memory) +gk20a_instobj_release_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_invalidate(ltc); +} + +static void +gk20a_instobj_release_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; unsigned long flags; spin_lock_irqsave(&imem->lock, flags); @@ -290,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) nvkm_vm_map_at(vma, offset, &node->mem); } -/* - * Clear the CPU mapping of an instobj if it exists - */ -static void -gk20a_instobj_dtor(struct gk20a_instobj *node) -{ - struct gk20a_instmem *imem = node->imem; - unsigned long flags; - - spin_lock_irqsave(&imem->lock, flags); - - /* vaddr has already been recycled */ - if (!node->vaddr) - goto out; - - gk20a_instobj_recycle_vaddr(node); - -out: - spin_unlock_irqrestore(&imem->lock, flags); -} - static void * gk20a_instobj_dtor_dma(struct nvkm_memory *memory) { @@ -318,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - gk20a_instobj_dtor(&node->base); - - if (unlikely(!node->cpuaddr)) + if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, + dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, node->handle, &imem->attrs); out: @@ -337,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; struct nvkm_mm_node *r; + unsigned long flags; int i; - gk20a_instobj_dtor(&node->base); - if (unlikely(list_empty(&node->base.mem.regions))) goto out; + spin_lock_irqsave(&imem->lock, flags); + + /* vaddr has already been recycled */ + if (node->base.vaddr) + gk20a_instobj_iommu_recycle_vaddr(node); + + spin_unlock_irqrestore(&imem->lock, flags); + r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, rl_entry); @@ -374,8 +348,8 @@ gk20a_instobj_func_dma = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_dma, + .release = gk20a_instobj_release_dma, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -387,8 +361,8 @@ gk20a_instobj_func_iommu = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_iommu, + .release = gk20a_instobj_release_iommu, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -408,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, - &node->handle, GFP_KERNEL, - &imem->attrs); - if (!node->cpuaddr) { + node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, + &node->handle, GFP_KERNEL, + &imem->attrs); + if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; } @@ -617,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index, imem->mm = &tdev->iommu.mm; imem->domain = tdev->iommu.domain; imem->iommu_pgshift = tdev->iommu.pgshift; - imem->cpu_map = gk20a_instobj_cpu_map_iommu; imem->iommu_bit = tdev->func->iommu_bit; nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { init_dma_attrs(&imem->attrs); - /* We will access the memory through our own mapping */ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); - imem->cpu_map = gk20a_instobj_cpu_map_dma; nvkm_info(&imem->base.subdev, "using DMA API\n"); }