arm64: Implement coherent DMA API based on swiotlb
authorCatalin Marinas <catalin.marinas@arm.com>
Tue, 21 May 2013 16:35:19 +0000 (17:35 +0100)
committerCatalin Marinas <catalin.marinas@arm.com>
Thu, 27 Feb 2014 17:16:59 +0000 (17:16 +0000)
This patch adds support for DMA API cache maintenance on SoCs without
hardware device cache coherency.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/dma-mapping.h
arch/arm64/mm/cache.S
arch/arm64/mm/dma-mapping.c

index 889324981aa4f569a77e6a5385897b6493c85526..4c60e64a801c5cf3a3c5e379a3270ea3ff31eea5 100644 (file)
@@ -84,6 +84,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
 {
 }
 
+/*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
index fd0c0c0e447a657115c116f45bf9edd693b339a3..3a4572ec3273267c04334057b944739fe56d73e2 100644 (file)
@@ -30,6 +30,8 @@
 
 #define DMA_ERROR_CODE (~(dma_addr_t)0)
 extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops coherent_swiotlb_dma_ops;
+extern struct dma_map_ops noncoherent_swiotlb_dma_ops;
 
 static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
@@ -47,6 +49,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
                return __generic_dma_ops(dev);
 }
 
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+       dev->archdata.dma_ops = ops;
+}
+
 #include <asm-generic/dma-mapping-common.h>
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
index 1ea9f26d1b703585537d82cf30aa44ddbe879917..97fcef535a8a4da382d3c0369979dfcf43dd6da4 100644 (file)
@@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area)
        dsb     sy
        ret
 ENDPROC(__flush_dcache_area)
+
+/*
+ *     __dma_inv_range(start, end)
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+__dma_inv_range:
+       dcache_line_size x2, x3
+       sub     x3, x2, #1
+       bic     x0, x0, x3
+       bic     x1, x1, x3
+1:     dc      ivac, x0                        // invalidate D / U line
+       add     x0, x0, x2
+       cmp     x0, x1
+       b.lo    1b
+       dsb     sy
+       ret
+ENDPROC(__dma_inv_range)
+
+/*
+ *     __dma_clean_range(start, end)
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+__dma_clean_range:
+       dcache_line_size x2, x3
+       sub     x3, x2, #1
+       bic     x0, x0, x3
+1:     dc      cvac, x0                        // clean D / U line
+       add     x0, x0, x2
+       cmp     x0, x1
+       b.lo    1b
+       dsb     sy
+       ret
+ENDPROC(__dma_clean_range)
+
+/*
+ *     __dma_flush_range(start, end)
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+       dcache_line_size x2, x3
+       sub     x3, x2, #1
+       bic     x0, x0, x3
+1:     dc      civac, x0                       // clean & invalidate D / U line
+       add     x0, x0, x2
+       cmp     x0, x1
+       b.lo    1b
+       dsb     sy
+       ret
+ENDPROC(__dma_flush_range)
+
+/*
+ *     __dma_map_area(start, size, dir)
+ *     - start - kernel virtual start address
+ *     - size  - size of region
+ *     - dir   - DMA direction
+ */
+ENTRY(__dma_map_area)
+       add     x1, x1, x0
+       cmp     w2, #DMA_FROM_DEVICE
+       b.eq    __dma_inv_range
+       b       __dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ *     __dma_unmap_area(start, size, dir)
+ *     - start - kernel virtual start address
+ *     - size  - size of region
+ *     - dir   - DMA direction
+ */
+ENTRY(__dma_unmap_area)
+       add     x1, x1, x0
+       cmp     w2, #DMA_TO_DEVICE
+       b.ne    __dma_inv_range
+       ret
+ENDPROC(__dma_unmap_area)
index afa91a26a72b2c32c4155d70c1152cf497a3fb9c..88fbc5e5bae7e545b208abd8d5d70531dc3d9c85 100644 (file)
@@ -78,7 +78,166 @@ static void __dma_free_coherent(struct device *dev, size_t size,
        }
 }
 
-static struct dma_map_ops coherent_swiotlb_dma_ops = {
+static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
+                                    dma_addr_t *dma_handle, gfp_t flags,
+                                    struct dma_attrs *attrs)
+{
+       struct page *page, **map;
+       void *ptr, *coherent_ptr;
+       int order, i;
+
+       size = PAGE_ALIGN(size);
+       order = get_order(size);
+
+       ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+       if (!ptr)
+               goto no_mem;
+       map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
+       if (!map)
+               goto no_map;
+
+       /* remove any dirty cache lines on the kernel alias */
+       __dma_flush_range(ptr, ptr + size);
+
+       /* create a coherent mapping */
+       page = virt_to_page(ptr);
+       for (i = 0; i < (size >> PAGE_SHIFT); i++)
+               map[i] = page + i;
+       coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
+                           pgprot_dmacoherent(pgprot_default));
+       kfree(map);
+       if (!coherent_ptr)
+               goto no_map;
+
+       return coherent_ptr;
+
+no_map:
+       __dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+       *dma_handle = ~0;
+       return NULL;
+}
+
+static void __dma_free_noncoherent(struct device *dev, size_t size,
+                                  void *vaddr, dma_addr_t dma_handle,
+                                  struct dma_attrs *attrs)
+{
+       void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+       vunmap(vaddr);
+       __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+                                    unsigned long offset, size_t size,
+                                    enum dma_data_direction dir,
+                                    struct dma_attrs *attrs)
+{
+       dma_addr_t dev_addr;
+
+       dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+       __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+       return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+                                size_t size, enum dma_data_direction dir,
+                                struct dma_attrs *attrs)
+{
+       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+       swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+                                 int nelems, enum dma_data_direction dir,
+                                 struct dma_attrs *attrs)
+{
+       struct scatterlist *sg;
+       int i, ret;
+
+       ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+       for_each_sg(sgl, sg, ret, i)
+               __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+                              sg->length, dir);
+
+       return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+                                    struct scatterlist *sgl, int nelems,
+                                    enum dma_data_direction dir,
+                                    struct dma_attrs *attrs)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nelems, i)
+               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+                                sg->length, dir);
+       swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+                                         dma_addr_t dev_addr, size_t size,
+                                         enum dma_data_direction dir)
+{
+       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+       swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+}
+
+static void __swiotlb_sync_single_for_device(struct device *dev,
+                                            dma_addr_t dev_addr, size_t size,
+                                            enum dma_data_direction dir)
+{
+       swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+       __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+                                     struct scatterlist *sgl, int nelems,
+                                     enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nelems, i)
+               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+                                sg->length, dir);
+       swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+                                        struct scatterlist *sgl, int nelems,
+                                        enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+       for_each_sg(sgl, sg, nelems, i)
+               __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+                              sg->length, dir);
+}
+
+struct dma_map_ops noncoherent_swiotlb_dma_ops = {
+       .alloc = __dma_alloc_noncoherent,
+       .free = __dma_free_noncoherent,
+       .map_page = __swiotlb_map_page,
+       .unmap_page = __swiotlb_unmap_page,
+       .map_sg = __swiotlb_map_sg_attrs,
+       .unmap_sg = __swiotlb_unmap_sg_attrs,
+       .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+       .sync_single_for_device = __swiotlb_sync_single_for_device,
+       .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+       .sync_sg_for_device = __swiotlb_sync_sg_for_device,
+       .dma_supported = swiotlb_dma_supported,
+       .mapping_error = swiotlb_dma_mapping_error,
+};
+EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
+
+struct dma_map_ops coherent_swiotlb_dma_ops = {
        .alloc = __dma_alloc_coherent,
        .free = __dma_free_coherent,
        .map_page = swiotlb_map_page,
@@ -92,6 +251,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = {
        .dma_supported = swiotlb_dma_supported,
        .mapping_error = swiotlb_dma_mapping_error,
 };
+EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
 
 extern int swiotlb_late_init_with_default_size(size_t default_size);