dmaengine: ioatdma: Add error handling to ioat driver
authorDave Jiang <dave.jiang@intel.com>
Wed, 20 Jul 2016 20:13:55 +0000 (13:13 -0700)
committerVinod Koul <vinod.koul@intel.com>
Mon, 8 Aug 2016 02:41:42 +0000 (08:11 +0530)
Adding error handling to the ioatdma driver so that when a
read/write error occurs the error results are reported back and
all the remaining descriptors are aborted. This utilizes the new
dmaengine callback function that allows reporting of results.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
drivers/dma/ioat/dma.c
drivers/dma/ioat/registers.h

index 6499de4b8e799ca0d43c0657fed0f2c5c22021d3..251f8be639c2dc48add48583f33b67428ffe0b32 100644 (file)
@@ -568,10 +568,14 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
 
                tx = &desc->txd;
                if (tx->cookie) {
+                       struct dmaengine_result res;
+
                        dma_cookie_complete(tx);
                        dma_descriptor_unmap(tx);
+                       res.result = DMA_TRANS_NOERROR;
                        dmaengine_desc_get_callback_invoke(tx, NULL);
                        tx->callback = NULL;
+                       tx->callback_result = NULL;
                }
 
                if (tx->phys == phys_complete)
@@ -620,7 +624,8 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
        if (is_ioat_halted(*ioat_chan->completion)) {
                u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 
-               if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
+               if (chanerr &
+                   (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
                        mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
                        ioat_eh(ioat_chan);
                }
@@ -650,6 +655,61 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
        __ioat_restart_chan(ioat_chan);
 }
 
+
+static void ioat_abort_descs(struct ioatdma_chan *ioat_chan)
+{
+       struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+       struct ioat_ring_ent *desc;
+       u16 active;
+       int idx = ioat_chan->tail, i;
+
+       /*
+        * We assume that the failed descriptor has been processed.
+        * Now we are just returning all the remaining submitted
+        * descriptors to abort.
+        */
+       active = ioat_ring_active(ioat_chan);
+
+       /* we skip the failed descriptor that tail points to */
+       for (i = 1; i < active; i++) {
+               struct dma_async_tx_descriptor *tx;
+
+               smp_read_barrier_depends();
+               prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+               desc = ioat_get_ring_ent(ioat_chan, idx + i);
+
+               tx = &desc->txd;
+               if (tx->cookie) {
+                       struct dmaengine_result res;
+
+                       dma_cookie_complete(tx);
+                       dma_descriptor_unmap(tx);
+                       res.result = DMA_TRANS_ABORTED;
+                       dmaengine_desc_get_callback_invoke(tx, &res);
+                       tx->callback = NULL;
+                       tx->callback_result = NULL;
+               }
+
+               /* skip extended descriptors */
+               if (desc_has_ext(desc)) {
+                       WARN_ON(i + 1 >= active);
+                       i++;
+               }
+
+               /* cleanup super extended descriptors */
+               if (desc->sed) {
+                       ioat_free_sed(ioat_dma, desc->sed);
+                       desc->sed = NULL;
+               }
+       }
+
+       smp_mb(); /* finish all descriptor reads before incrementing tail */
+       ioat_chan->tail = idx + active;
+
+       desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+       ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys;
+}
+
 static void ioat_eh(struct ioatdma_chan *ioat_chan)
 {
        struct pci_dev *pdev = to_pdev(ioat_chan);
@@ -660,6 +720,8 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan)
        u32 err_handled = 0;
        u32 chanerr_int;
        u32 chanerr;
+       bool abort = false;
+       struct dmaengine_result res;
 
        /* cleanup so tail points to descriptor that caused the error */
        if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
@@ -695,28 +757,50 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan)
                break;
        }
 
+       if (chanerr & IOAT_CHANERR_RECOVER_MASK) {
+               if (chanerr & IOAT_CHANERR_READ_DATA_ERR) {
+                       res.result = DMA_TRANS_READ_FAILED;
+                       err_handled |= IOAT_CHANERR_READ_DATA_ERR;
+               } else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) {
+                       res.result = DMA_TRANS_WRITE_FAILED;
+                       err_handled |= IOAT_CHANERR_WRITE_DATA_ERR;
+               }
+
+               abort = true;
+       } else
+               res.result = DMA_TRANS_NOERROR;
+
        /* fault on unhandled error or spurious halt */
        if (chanerr ^ err_handled || chanerr == 0) {
                dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
                        __func__, chanerr, err_handled);
                BUG();
-       } else { /* cleanup the faulty descriptor */
-               tx = &desc->txd;
-               if (tx->cookie) {
-                       dma_cookie_complete(tx);
-                       dma_descriptor_unmap(tx);
-                       dmaengine_desc_get_callback_invoke(tx, NULL);
-                       tx->callback = NULL;
-               }
        }
 
-       writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+       /* cleanup the faulty descriptor since we are continuing */
+       tx = &desc->txd;
+       if (tx->cookie) {
+               dma_cookie_complete(tx);
+               dma_descriptor_unmap(tx);
+               dmaengine_desc_get_callback_invoke(tx, &res);
+               tx->callback = NULL;
+               tx->callback_result = NULL;
+       }
 
        /* mark faulting descriptor as complete */
        *ioat_chan->completion = desc->txd.phys;
 
        spin_lock_bh(&ioat_chan->prep_lock);
+       /* we need abort all descriptors */
+       if (abort) {
+               ioat_abort_descs(ioat_chan);
+               /* clean up the channel, we could be in weird state */
+               ioat_reset_hw(ioat_chan);
+       }
+
+       writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+       pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
        ioat_restart_channel(ioat_chan);
        spin_unlock_bh(&ioat_chan->prep_lock);
 }
@@ -749,10 +833,25 @@ void ioat_timer_event(unsigned long data)
                chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
                dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
                        __func__, chanerr);
-               if (test_bit(IOAT_RUN, &ioat_chan->state))
-                       BUG_ON(is_ioat_bug(chanerr));
-               else /* we never got off the ground */
-                       return;
+               if (test_bit(IOAT_RUN, &ioat_chan->state)) {
+                       spin_lock_bh(&ioat_chan->cleanup_lock);
+                       spin_lock_bh(&ioat_chan->prep_lock);
+                       set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+                       spin_unlock_bh(&ioat_chan->prep_lock);
+
+                       ioat_abort_descs(ioat_chan);
+                       dev_warn(to_dev(ioat_chan), "Reset channel...\n");
+                       ioat_reset_hw(ioat_chan);
+                       dev_warn(to_dev(ioat_chan), "Restart channel...\n");
+                       ioat_restart_channel(ioat_chan);
+
+                       spin_lock_bh(&ioat_chan->prep_lock);
+                       clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+                       spin_unlock_bh(&ioat_chan->prep_lock);
+                       spin_unlock_bh(&ioat_chan->cleanup_lock);
+               }
+
+               return;
        }
 
        spin_lock_bh(&ioat_chan->cleanup_lock);
@@ -776,14 +875,23 @@ void ioat_timer_event(unsigned long data)
                u32 chanerr;
 
                chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-               dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
                dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
                         status, chanerr);
                dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
                         ioat_ring_active(ioat_chan));
 
                spin_lock_bh(&ioat_chan->prep_lock);
+               set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+               spin_unlock_bh(&ioat_chan->prep_lock);
+
+               ioat_abort_descs(ioat_chan);
+               dev_warn(to_dev(ioat_chan), "Resetting channel...\n");
+               ioat_reset_hw(ioat_chan);
+               dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
                ioat_restart_channel(ioat_chan);
+
+               spin_lock_bh(&ioat_chan->prep_lock);
+               clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
                spin_unlock_bh(&ioat_chan->prep_lock);
                spin_unlock_bh(&ioat_chan->cleanup_lock);
                return;
index 70534981a49bbf453be6b0bb90434402e66bb4f9..48fa4cf9f64aa7a6b839e4af26253a9717ac39ec 100644 (file)
 #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR      0x40000
 
 #define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \
+                                  IOAT_CHANERR_WRITE_DATA_ERR)
 
 #define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */