gpu: host1x: Add IOMMU support
authorMikko Perttunen <mperttunen@nvidia.com>
Wed, 14 Dec 2016 11:16:14 +0000 (13:16 +0200)
committerThierry Reding <treding@nvidia.com>
Wed, 5 Apr 2017 16:11:43 +0000 (18:11 +0200)
Add support for the Host1x unit to be located behind
an IOMMU. This is required when gather buffers may be
allocated non-contiguously in physical memory, as can
be the case when TegraDRM is also using the IOMMU.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
drivers/gpu/host1x/cdma.c
drivers/gpu/host1x/cdma.h
drivers/gpu/host1x/dev.c
drivers/gpu/host1x/dev.h
drivers/gpu/host1x/hw/cdma_hw.c
drivers/gpu/host1x/job.c
drivers/gpu/host1x/job.h

index c5d82a8a2ec9c2498bf9e7ed96823cb14db8afae..28541b28073907434e1f929b9adf0cdc5c9cf01b 100644 (file)
@@ -51,9 +51,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
        struct host1x_cdma *cdma = pb_to_cdma(pb);
        struct host1x *host1x = cdma_to_host1x(cdma);
 
-       if (pb->phys != 0)
-               dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
-                           pb->phys);
+       if (!pb->phys)
+               return;
+
+       if (host1x->domain) {
+               iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
+               free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
+       }
+
+       dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
 
        pb->mapped = NULL;
        pb->phys = 0;
@@ -66,28 +72,64 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 {
        struct host1x_cdma *cdma = pb_to_cdma(pb);
        struct host1x *host1x = cdma_to_host1x(cdma);
+       struct iova *alloc;
+       u32 size;
+       int err;
 
        pb->mapped = NULL;
        pb->phys = 0;
-       pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
+       pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
+
+       size = pb->size + 4;
 
        /* initialize buffer pointers */
-       pb->fence = pb->size_bytes - 8;
+       pb->fence = pb->size - 8;
        pb->pos = 0;
 
-       /* allocate and map pushbuffer memory */
-       pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
-                                 GFP_KERNEL);
-       if (!pb->mapped)
-               goto fail;
+       if (host1x->domain) {
+               unsigned long shift;
+
+               size = iova_align(&host1x->iova, size);
+
+               pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+                                         GFP_KERNEL);
+               if (!pb->mapped)
+                       return -ENOMEM;
+
+               shift = iova_shift(&host1x->iova);
+               alloc = alloc_iova(&host1x->iova, size >> shift,
+                                  host1x->iova_end >> shift, true);
+               if (!alloc) {
+                       err = -ENOMEM;
+                       goto iommu_free_mem;
+               }
+
+               pb->dma = iova_dma_addr(&host1x->iova, alloc);
+               err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
+                               IOMMU_READ);
+               if (err)
+                       goto iommu_free_iova;
+       } else {
+               pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+                                         GFP_KERNEL);
+               if (!pb->mapped)
+                       return -ENOMEM;
+
+               pb->dma = pb->phys;
+       }
+
+       pb->alloc_size = size;
 
        host1x_hw_pushbuffer_init(host1x, pb);
 
        return 0;
 
-fail:
-       host1x_pushbuffer_destroy(pb);
-       return -ENOMEM;
+iommu_free_iova:
+       __free_iova(&host1x->iova, alloc);
+iommu_free_mem:
+       dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+
+       return err;
 }
 
 /*
@@ -101,7 +143,7 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
        WARN_ON(pb->pos == pb->fence);
        *(p++) = op1;
        *(p++) = op2;
-       pb->pos = (pb->pos + 8) & (pb->size_bytes - 1);
+       pb->pos = (pb->pos + 8) & (pb->size - 1);
 }
 
 /*
@@ -111,7 +153,7 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
 {
        /* Advance the next write position */
-       pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1);
+       pb->fence = (pb->fence + slots * 8) & (pb->size - 1);
 }
 
 /*
@@ -119,7 +161,7 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
  */
 static u32 host1x_pushbuffer_space(struct push_buffer *pb)
 {
-       return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8;
+       return ((pb->fence - pb->pos) & (pb->size - 1)) / 8;
 }
 
 /*
index 470087af8fe520681ba3cc8457646343f16f6aee..ec170a78f4e1a311a47d9da4a94dba866440568d 100644 (file)
@@ -43,10 +43,12 @@ struct host1x_job;
 
 struct push_buffer {
        void *mapped;                   /* mapped pushbuffer memory */
-       dma_addr_t phys;                /* physical address of pushbuffer */
+       dma_addr_t dma;                 /* device address of pushbuffer */
+       phys_addr_t phys;               /* physical address of pushbuffer */
        u32 fence;                      /* index we've written */
        u32 pos;                        /* index to write to */
-       u32 size_bytes;
+       u32 size;
+       u32 alloc_size;
 };
 
 struct buffer_timeout {
index a62317af76ad7b8b90593c2fda904a102e360701..b386a0bf828abf24457c442c8153e32e77387a07 100644 (file)
@@ -27,6 +27,7 @@
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
+#undef CREATE_TRACE_POINTS
 
 #include "bus.h"
 #include "dev.h"
@@ -168,16 +169,37 @@ static int host1x_probe(struct platform_device *pdev)
                return err;
        }
 
+       if (iommu_present(&platform_bus_type)) {
+               struct iommu_domain_geometry *geometry;
+               unsigned long order;
+
+               host->domain = iommu_domain_alloc(&platform_bus_type);
+               if (!host->domain)
+                       return -ENOMEM;
+
+               err = iommu_attach_device(host->domain, &pdev->dev);
+               if (err)
+                       goto fail_free_domain;
+
+               geometry = &host->domain->geometry;
+
+               order = __ffs(host->domain->pgsize_bitmap);
+               init_iova_domain(&host->iova, 1UL << order,
+                                geometry->aperture_start >> order,
+                                geometry->aperture_end >> order);
+               host->iova_end = geometry->aperture_end;
+       }
+
        err = host1x_channel_list_init(host);
        if (err) {
                dev_err(&pdev->dev, "failed to initialize channel list\n");
-               return err;
+               goto fail_detach_device;
        }
 
        err = clk_prepare_enable(host->clk);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to enable clock\n");
-               return err;
+               goto fail_detach_device;
        }
 
        err = host1x_syncpt_init(host);
@@ -206,6 +228,15 @@ fail_deinit_syncpt:
        host1x_syncpt_deinit(host);
 fail_unprepare_disable:
        clk_disable_unprepare(host->clk);
+fail_detach_device:
+       if (host->domain) {
+               put_iova_domain(&host->iova);
+               iommu_detach_device(host->domain, &pdev->dev);
+       }
+fail_free_domain:
+       if (host->domain)
+               iommu_domain_free(host->domain);
+
        return err;
 }
 
@@ -218,6 +249,12 @@ static int host1x_remove(struct platform_device *pdev)
        host1x_syncpt_deinit(host);
        clk_disable_unprepare(host->clk);
 
+       if (host->domain) {
+               put_iova_domain(&host->iova);
+               iommu_detach_device(host->domain, &pdev->dev);
+               iommu_domain_free(host->domain);
+       }
+
        return 0;
 }
 
index 06dd4f85125fb16a625b2355c55751cc3dbefed2..e5113acecd7ad3fb0d619bed19f4bccaf6858715 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
 
 #include "channel.h"
 #include "syncpt.h"
@@ -108,6 +110,10 @@ struct host1x {
        struct device *dev;
        struct clk *clk;
 
+       struct iommu_domain *domain;
+       struct iova_domain iova;
+       dma_addr_t iova_end;
+
        struct mutex intr_mutex;
        int intr_syncpt_irq;
 
index 659c1bbfeeba2316df23976b2ce78217f578ffad..6b231119193ee4025abf4a1fd8aa56ee045cc288 100644 (file)
@@ -30,7 +30,7 @@
  */
 static void push_buffer_init(struct push_buffer *pb)
 {
-       *(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0);
+       *(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0);
 }
 
 /*
@@ -55,8 +55,8 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
                *(p++) = HOST1X_OPCODE_NOP;
                *(p++) = HOST1X_OPCODE_NOP;
                dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
-                       &pb->phys, getptr);
-               getptr = (getptr + 8) & (pb->size_bytes - 1);
+                       &pb->dma, getptr);
+               getptr = (getptr + 8) & (pb->size - 1);
        }
 
        wmb();
@@ -78,10 +78,9 @@ static void cdma_start(struct host1x_cdma *cdma)
                         HOST1X_CHANNEL_DMACTRL);
 
        /* set base, put and end pointer */
-       host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+       host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
        host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
-       host1x_ch_writel(ch, cdma->push_buffer.phys +
-                        cdma->push_buffer.size_bytes + 4,
+       host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4,
                         HOST1X_CHANNEL_DMAEND);
 
        /* reset GET */
@@ -115,9 +114,8 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
                         HOST1X_CHANNEL_DMACTRL);
 
        /* set base, end pointer (all of memory) */
-       host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
-       host1x_ch_writel(ch, cdma->push_buffer.phys +
-                        cdma->push_buffer.size_bytes,
+       host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
+       host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size,
                         HOST1X_CHANNEL_DMAEND);
 
        /* set GET, by loading the value in PUT (then reset GET) */
index 92c3df93330321f738542b5c67549f948d0ca3c7..5f5f8ee6143d156ab3dce6086037da7811f38816 100644 (file)
@@ -174,9 +174,10 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
        return 0;
 }
 
-static unsigned int pin_job(struct host1x_job *job)
+static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
        unsigned int i;
+       int err;
 
        job->num_unpins = 0;
 
@@ -186,12 +187,16 @@ static unsigned int pin_job(struct host1x_job *job)
                dma_addr_t phys_addr;
 
                reloc->target.bo = host1x_bo_get(reloc->target.bo);
-               if (!reloc->target.bo)
+               if (!reloc->target.bo) {
+                       err = -EINVAL;
                        goto unpin;
+               }
 
                phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
-               if (!phys_addr)
+               if (!phys_addr) {
+                       err = -EINVAL;
                        goto unpin;
+               }
 
                job->addr_phys[job->num_unpins] = phys_addr;
                job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -201,28 +206,67 @@ static unsigned int pin_job(struct host1x_job *job)
 
        for (i = 0; i < job->num_gathers; i++) {
                struct host1x_job_gather *g = &job->gathers[i];
+               size_t gather_size = 0;
+               struct scatterlist *sg;
                struct sg_table *sgt;
                dma_addr_t phys_addr;
+               unsigned long shift;
+               struct iova *alloc;
+               unsigned int j;
 
                g->bo = host1x_bo_get(g->bo);
-               if (!g->bo)
+               if (!g->bo) {
+                       err = -EINVAL;
                        goto unpin;
+               }
 
                phys_addr = host1x_bo_pin(g->bo, &sgt);
-               if (!phys_addr)
+               if (!phys_addr) {
+                       err = -EINVAL;
                        goto unpin;
+               }
+
+               if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+                       for_each_sg(sgt->sgl, sg, sgt->nents, j)
+                               gather_size += sg->length;
+                       gather_size = iova_align(&host->iova, gather_size);
+
+                       shift = iova_shift(&host->iova);
+                       alloc = alloc_iova(&host->iova, gather_size >> shift,
+                                          host->iova_end >> shift, true);
+                       if (!alloc) {
+                               err = -ENOMEM;
+                               goto unpin;
+                       }
+
+                       err = iommu_map_sg(host->domain,
+                                       iova_dma_addr(&host->iova, alloc),
+                                       sgt->sgl, sgt->nents, IOMMU_READ);
+                       if (err == 0) {
+                               __free_iova(&host->iova, alloc);
+                               err = -EINVAL;
+                               goto unpin;
+                       }
+
+                       job->addr_phys[job->num_unpins] =
+                               iova_dma_addr(&host->iova, alloc);
+                       job->unpins[job->num_unpins].size = gather_size;
+               } else {
+                       job->addr_phys[job->num_unpins] = phys_addr;
+               }
+
+               job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
 
-               job->addr_phys[job->num_unpins] = phys_addr;
                job->unpins[job->num_unpins].bo = g->bo;
                job->unpins[job->num_unpins].sgt = sgt;
                job->num_unpins++;
        }
 
-       return job->num_unpins;
+       return 0;
 
 unpin:
        host1x_job_unpin(job);
-       return 0;
+       return err;
 }
 
 static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
@@ -525,8 +569,8 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
                host1x_syncpt_load(host->syncpt + i);
 
        /* pin memory */
-       err = pin_job(job);
-       if (!err)
+       err = pin_job(host, job);
+       if (err)
                goto out;
 
        /* patch gathers */
@@ -572,11 +616,19 @@ EXPORT_SYMBOL(host1x_job_pin);
 
 void host1x_job_unpin(struct host1x_job *job)
 {
+       struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
        unsigned int i;
 
        for (i = 0; i < job->num_unpins; i++) {
                struct host1x_job_unpin_data *unpin = &job->unpins[i];
 
+               if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
+                       iommu_unmap(host->domain, job->addr_phys[i],
+                                   unpin->size);
+                       free_iova(&host->iova,
+                               iova_pfn(&host->iova, job->addr_phys[i]));
+               }
+
                host1x_bo_unpin(unpin->bo, unpin->sgt);
                host1x_bo_put(unpin->bo);
        }
index 8b3c15df066085e88a33d6efddc74cfd774bc04d..878239c476d20b7ec464d8082bb3af0dafa603ff 100644 (file)
@@ -44,6 +44,7 @@ struct host1x_waitchk {
 struct host1x_job_unpin_data {
        struct host1x_bo *bo;
        struct sg_table *sgt;
+       size_t size;
 };
 
 /*