fpga zynq: Use the scatterlist interface
authorJason Gunthorpe <jgunthorpe@obsidianresearch.com>
Wed, 1 Feb 2017 19:48:45 +0000 (12:48 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Feb 2017 14:20:44 +0000 (15:20 +0100)
This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.

Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/fpga/zynq-fpga.c

index c3fc2a231e281044697b6e0650e3ecf69bfde60f..34cb98139442dfb93a7de248105b799f0ab982f7 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/regmap.h>
 #include <linux/string.h>
+#include <linux/scatterlist.h>
 
 /* Offsets into SLCR regmap */
 
@@ -80,6 +81,7 @@
 
 /* FPGA init status */
 #define STATUS_DMA_Q_F                 BIT(31)
+#define STATUS_DMA_Q_E                 BIT(30)
 #define STATUS_PCFG_INIT_MASK          BIT(4)
 
 /* Interrupt Status/Mask Register Bit definitions */
 #define DMA_INVALID_ADDRESS            GENMASK(31, 0)
 /* Used to unlock the dev */
 #define UNLOCK_MASK                    0x757bdf0d
-/* Timeout for DMA to complete */
-#define DMA_DONE_TIMEOUT               msecs_to_jiffies(1000)
 /* Timeout for polling reset bits */
 #define INIT_POLL_TIMEOUT              2500000
 /* Delay for polling reset bits */
 #define INIT_POLL_DELAY                        20
+/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
+ * interrupting
+ */
+#define DMA_SRC_LAST_TRANSFER          1
+/* Timeout for DMA completion */
+#define DMA_TIMEOUT_MS                 5000
 
 /* Masks for controlling stuff in SLCR */
 /* Disable all Level shifters */
@@ -124,6 +130,11 @@ struct zynq_fpga_priv {
        void __iomem *io_base;
        struct regmap *slcr;
 
+       spinlock_t dma_lock;
+       unsigned int dma_elm;
+       unsigned int dma_nelms;
+       struct scatterlist *cur_sg;
+
        struct completion dma_done;
 };
 
@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
        zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
 }
 
+/* Must be called with dma_lock held */
+static void zynq_step_dma(struct zynq_fpga_priv *priv)
+{
+       u32 addr;
+       u32 len;
+       bool first;
+
+       first = priv->dma_elm == 0;
+       while (priv->cur_sg) {
+               /* Feed the DMA queue until it is full. */
+               if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
+                       break;
+
+               addr = sg_dma_address(priv->cur_sg);
+               len = sg_dma_len(priv->cur_sg);
+               if (priv->dma_elm + 1 == priv->dma_nelms) {
+                       /* The last transfer waits for the PCAP to finish too,
+                        * notice this also changes the irq_mask to ignore
+                        * IXR_DMA_DONE_MASK which ensures we do not trigger
+                        * the completion too early.
+                        */
+                       addr |= DMA_SRC_LAST_TRANSFER;
+                       priv->cur_sg = NULL;
+               } else {
+                       priv->cur_sg = sg_next(priv->cur_sg);
+                       priv->dma_elm++;
+               }
+
+               zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
+               zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
+               zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
+               zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+       }
+
+       /* Once the first transfer is queued we can turn on the ISR, future
+        * calls to zynq_step_dma will happen from the ISR context. The
+        * dma_lock spinlock guarentees this handover is done coherently, the
+        * ISR enable is put at the end to avoid another CPU spinning in the
+        * ISR on this lock.
+        */
+       if (first && priv->cur_sg) {
+               zynq_fpga_set_irq(priv,
+                                 IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+       } else if (!priv->cur_sg) {
+               /* The last transfer changes to DMA & PCAP mode since we do
+                * not want to continue until everything has been flushed into
+                * the PCAP.
+                */
+               zynq_fpga_set_irq(priv,
+                                 IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+       }
+}
+
 static irqreturn_t zynq_fpga_isr(int irq, void *data)
 {
        struct zynq_fpga_priv *priv = data;
+       u32 intr_status;
 
-       /* disable DMA and error IRQs */
-       zynq_fpga_set_irq(priv, 0);
+       /* If anything other than DMA completion is reported stop and hand
+        * control back to zynq_fpga_ops_write, something went wrong,
+        * otherwise progress the DMA.
+        */
+       spin_lock(&priv->dma_lock);
+       intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
+       if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
+           (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
+               zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
+               zynq_step_dma(priv);
+               spin_unlock(&priv->dma_lock);
+               return IRQ_HANDLED;
+       }
+       spin_unlock(&priv->dma_lock);
 
+       zynq_fpga_set_irq(priv, 0);
        complete(&priv->dma_done);
 
        return IRQ_HANDLED;
@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
        zynq_fpga_write(priv, CTRL_OFFSET,
                        (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));
 
-       /* check that we have room in the command queue */
+       /* We expect that the command queue is empty right now. */
        status = zynq_fpga_read(priv, STATUS_OFFSET);
-       if (status & STATUS_DMA_Q_F) {
-               dev_err(&mgr->dev, "DMA command queue full\n");
+       if ((status & STATUS_DMA_Q_F) ||
+           (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
+               dev_err(&mgr->dev, "DMA command queue not right\n");
                err = -EBUSY;
                goto out_err;
        }
@@ -288,27 +367,36 @@ out_err:
        return err;
 }
 
-static int zynq_fpga_ops_write(struct fpga_manager *mgr,
-                              const char *buf, size_t count)
+static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
 {
        struct zynq_fpga_priv *priv;
        const char *why;
        int err;
-       char *kbuf;
-       size_t in_count;
-       dma_addr_t dma_addr;
-       u32 transfer_length;
        u32 intr_status;
+       unsigned long timeout;
+       unsigned long flags;
+       struct scatterlist *sg;
+       int i;
 
-       in_count = count;
        priv = mgr->priv;
 
-       kbuf =
-           dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL);
-       if (!kbuf)
-               return -ENOMEM;
+       /* The hardware can only DMA multiples of 4 bytes, and it requires the
+        * starting addresses to be aligned to 64 bits (UG585 pg 212).
+        */
+       for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+               if ((sg->offset % 8) || (sg->length % 4)) {
+                       dev_err(&mgr->dev,
+                           "Invalid bitstream, chunks must be aligned\n");
+                       return -EINVAL;
+               }
+       }
 
-       memcpy(kbuf, buf, count);
+       priv->dma_nelms =
+           dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
+       if (priv->dma_nelms == 0) {
+               dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
+               return -ENOMEM;
+       }
 
        /* enable clock */
        err = clk_enable(priv->clk);
@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
                goto out_free;
 
        zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
-
        reinit_completion(&priv->dma_done);
 
-       /* enable DMA and error IRQs */
-       zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
-
-       /* the +1 in the src addr is used to hold off on DMA_DONE IRQ
-        * until both AXI and PCAP are done ...
-        */
-       zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
-       zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
-
-       /* convert #bytes to #words */
-       transfer_length = (count + 3) / 4;
+       /* zynq_step_dma will turn on interrupts */
+       spin_lock_irqsave(&priv->dma_lock, flags);
+       priv->dma_elm = 0;
+       priv->cur_sg = sgt->sgl;
+       zynq_step_dma(priv);
+       spin_unlock_irqrestore(&priv->dma_lock, flags);
 
-       zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length);
-       zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+       timeout = wait_for_completion_timeout(&priv->dma_done,
+                                             msecs_to_jiffies(DMA_TIMEOUT_MS));
 
-       wait_for_completion(&priv->dma_done);
+       spin_lock_irqsave(&priv->dma_lock, flags);
+       zynq_fpga_set_irq(priv, 0);
+       priv->cur_sg = NULL;
+       spin_unlock_irqrestore(&priv->dma_lock, flags);
 
        intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
-       zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
+       zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
+
+       /* There doesn't seem to be a way to force cancel any DMA, so if
+        * something went wrong we are relying on the hardware to have halted
+        * the DMA before we get here, if there was we could use
+        * wait_for_completion_interruptible too.
+        */
 
        if (intr_status & IXR_ERROR_FLAGS_MASK) {
                why = "DMA reported error";
@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
                goto out_report;
        }
 
-       if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
-               why = "DMA did not complete";
+       if (priv->cur_sg ||
+           !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
+               if (timeout == 0)
+                       why = "DMA timed out";
+               else
+                       why = "DMA did not complete";
                err = -EIO;
                goto out_report;
        }
@@ -369,7 +464,7 @@ out_clk:
        clk_disable(priv->clk);
 
 out_free:
-       dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr);
+       dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
        return err;
 }
 
@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
        .initial_header_size = 128,
        .state = zynq_fpga_ops_state,
        .write_init = zynq_fpga_ops_write_init,
-       .write = zynq_fpga_ops_write,
+       .write_sg = zynq_fpga_ops_write,
        .write_complete = zynq_fpga_ops_write_complete,
 };
 
@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
        priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
+       spin_lock_init(&priv->dma_lock);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        priv->io_base = devm_ioremap_resource(dev, res);