scsi: cxlflash: Handle AFU sync failures
authorUma Krishnan <ukrishn@linux.vnet.ibm.com>
Thu, 22 Jun 2017 02:14:30 +0000 (21:14 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Mon, 26 Jun 2017 19:01:08 +0000 (15:01 -0400)
AFU sync operations are not currently evaluated for failure. This is
acceptable for paths where there is not a dependency on the AFU being
consistent with the host. Examples include link reset events and LUN
cleanup operations. On paths where there is a dependency, such as a LUN
open, a sync failure should be acted upon.

In the event of AFU sync failures, either log or cleanup as appropriate for
operations that are dependent on a successful sync completion.

Update documentation to reflect behavior in the event of an AFU sync
failure.

Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Documentation/powerpc/cxlflash.txt
drivers/scsi/cxlflash/superpipe.c
drivers/scsi/cxlflash/vlun.c

index 66b4496d6619a41f36cefc577afbb85d2ca43cc6..f9036cb0170d28d9f2fe8e5dea339bcaea6b8e61 100644 (file)
@@ -257,6 +257,12 @@ DK_CXLFLASH_VLUN_RESIZE
     operating in the virtual mode and used to program a LUN translation
     table that the AFU references when provided with a resource handle.
 
+    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+    In addition to returning a failure to user, cxlflash will also schedule
+    an asynchronous AFU reset. Should the user choose to retry the operation,
+    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+    can either retry the operation or treat it as a failure.
+
 DK_CXLFLASH_RELEASE
 -------------------
     This ioctl is responsible for releasing a previously obtained
@@ -309,6 +315,12 @@ DK_CXLFLASH_VLUN_CLONE
     clone. This is to avoid a stale entry in the file descriptor table of the
     child process.
 
+    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+    In addition to returning a failure to user, cxlflash will also schedule
+    an asynchronous AFU reset. Should the user choose to retry the operation,
+    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+    can either retry the operation or treat it as a failure.
+
 DK_CXLFLASH_VERIFY
 ------------------
     This ioctl is used to detect various changes such as the capacity of
index fe9f17a6268b6ab6d5a016d480066c7c86a76f31..ad0f9968ccfbffdbf8fa4d4e570a61dc896d04ff 100644 (file)
@@ -56,6 +56,19 @@ static void marshal_det_to_rele(struct dk_cxlflash_detach *detach,
        release->context_id = detach->context_id;
 }
 
+/**
+ * marshal_udir_to_rele() - translate udirect to release structure
+ * @udirect:   Source structure from which to translate/copy.
+ * @release:   Destination structure for the translate/copy.
+ */
+static void marshal_udir_to_rele(struct dk_cxlflash_udirect *udirect,
+                                struct dk_cxlflash_release *release)
+{
+       release->hdr = udirect->hdr;
+       release->context_id = udirect->context_id;
+       release->rsrc_handle = udirect->rsrc_handle;
+}
+
 /**
  * cxlflash_free_errpage() - frees resources associated with global error page
  */
@@ -622,6 +635,7 @@ int _cxlflash_disk_release(struct scsi_device *sdev,
        res_hndl_t rhndl = release->rsrc_handle;
 
        int rc = 0;
+       int rcr = 0;
        u64 ctxid = DECODE_CTXID(release->context_id),
            rctxid = release->context_id;
 
@@ -686,8 +700,12 @@ int _cxlflash_disk_release(struct scsi_device *sdev,
                rhte_f1->dw = 0;
                dma_wmb(); /* Make RHT entry bottom-half clearing visible */
 
-               if (!ctxi->err_recovery_active)
-                       cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+               if (!ctxi->err_recovery_active) {
+                       rcr = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+                       if (unlikely(rcr))
+                               dev_dbg(dev, "%s: AFU sync failed rc=%d\n",
+                                       __func__, rcr);
+               }
                break;
        default:
                WARN(1, "Unsupported LUN mode!");
@@ -1929,6 +1947,7 @@ static int cxlflash_disk_direct_open(struct scsi_device *sdev, void *arg)
        struct afu *afu = cfg->afu;
        struct llun_info *lli = sdev->hostdata;
        struct glun_info *gli = lli->parent;
+       struct dk_cxlflash_release rel = { { 0 }, 0 };
 
        struct dk_cxlflash_udirect *pphys = (struct dk_cxlflash_udirect *)arg;
 
@@ -1970,13 +1989,18 @@ static int cxlflash_disk_direct_open(struct scsi_device *sdev, void *arg)
        rsrc_handle = (rhte - ctxi->rht_start);
 
        rht_format1(rhte, lli->lun_id[sdev->channel], ctxi->rht_perms, port);
-       cxlflash_afu_sync(afu, ctxid, rsrc_handle, AFU_LW_SYNC);
 
        last_lba = gli->max_lba;
        pphys->hdr.return_flags = 0;
        pphys->last_lba = last_lba;
        pphys->rsrc_handle = rsrc_handle;
 
+       rc = cxlflash_afu_sync(afu, ctxid, rsrc_handle, AFU_LW_SYNC);
+       if (unlikely(rc)) {
+               dev_dbg(dev, "%s: AFU sync failed rc=%d\n", __func__, rc);
+               goto err2;
+       }
+
 out:
        if (likely(ctxi))
                put_context(ctxi);
@@ -1984,6 +2008,10 @@ out:
                __func__, rsrc_handle, rc, last_lba);
        return rc;
 
+err2:
+       marshal_udir_to_rele(pphys, &rel);
+       _cxlflash_disk_release(sdev, ctxi, &rel);
+       goto out;
 err1:
        cxlflash_lun_detach(gli);
        goto out;
index 90b5c19f81f01b93b910d2a1e31bb8ab377ff4fd..0800bcba5a003bf864ff1d87df0ecc56c3b60199 100644 (file)
@@ -594,7 +594,9 @@ static int grow_lxt(struct afu *afu,
        rhte->lxt_cnt = my_new_size;
        dma_wmb(); /* Make RHT entry's LXT table size update visible */
 
-       cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
+       rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
+       if (unlikely(rc))
+               rc = -EAGAIN;
 
        /* free old lxt if reallocated */
        if (lxt != lxt_old)
@@ -673,8 +675,11 @@ static int shrink_lxt(struct afu *afu,
        rhte->lxt_start = lxt;
        dma_wmb(); /* Make RHT entry's LXT table update visible */
 
-       if (needs_sync)
-               cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+       if (needs_sync) {
+               rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+               if (unlikely(rc))
+                       rc = -EAGAIN;
+       }
 
        if (needs_ws) {
                /*
@@ -792,6 +797,21 @@ int _cxlflash_vlun_resize(struct scsi_device *sdev,
                rc = grow_lxt(afu, sdev, ctxid, rhndl, rhte, &new_size);
        else if (new_size < rhte->lxt_cnt)
                rc = shrink_lxt(afu, sdev, rhndl, rhte, ctxi, &new_size);
+       else {
+               /*
+                * Rare case where there is already sufficient space, just
+                * need to perform a translation sync with the AFU. This
+                * scenario likely follows a previous sync failure during
+                * a resize operation. Accordingly, perform the heavyweight
+                * form of translation sync as it is unknown which type of
+                * resize failed previously.
+                */
+               rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC);
+               if (unlikely(rc)) {
+                       rc = -EAGAIN;
+                       goto out;
+               }
+       }
 
        resize->hdr.return_flags = 0;
        resize->last_lba = (new_size * MC_CHUNK_SIZE * gli->blk_len);
@@ -1084,10 +1104,13 @@ static int clone_lxt(struct afu *afu,
 {
        struct cxlflash_cfg *cfg = afu->parent;
        struct device *dev = &cfg->dev->dev;
-       struct sisl_lxt_entry *lxt;
+       struct sisl_lxt_entry *lxt = NULL;
+       bool locked = false;
        u32 ngrps;
        u64 aun;                /* chunk# allocated by block allocator */
-       int i, j;
+       int j;
+       int i = 0;
+       int rc = 0;
 
        ngrps = LXT_NUM_GROUPS(rhte_src->lxt_cnt);
 
@@ -1095,33 +1118,29 @@ static int clone_lxt(struct afu *afu,
                /* allocate new LXTs for clone */
                lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps),
                                GFP_KERNEL);
-               if (unlikely(!lxt))
-                       return -ENOMEM;
+               if (unlikely(!lxt)) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
 
                /* copy over */
                memcpy(lxt, rhte_src->lxt_start,
                       (sizeof(*lxt) * rhte_src->lxt_cnt));
 
-               /* clone the LBAs in block allocator via ref_cnt */
+               /* clone the LBAs in block allocator via ref_cnt, note that the
+                * block allocator mutex must be held until it is established
+                * that this routine will complete without the need for a
+                * cleanup.
+                */
                mutex_lock(&blka->mutex);
+               locked = true;
                for (i = 0; i < rhte_src->lxt_cnt; i++) {
                        aun = (lxt[i].rlba_base >> MC_CHUNK_SHIFT);
                        if (ba_clone(&blka->ba_lun, aun) == -1ULL) {
-                               /* free the clones already made */
-                               for (j = 0; j < i; j++) {
-                                       aun = (lxt[j].rlba_base >>
-                                              MC_CHUNK_SHIFT);
-                                       ba_free(&blka->ba_lun, aun);
-                               }
-
-                               mutex_unlock(&blka->mutex);
-                               kfree(lxt);
-                               return -EIO;
+                               rc = -EIO;
+                               goto err;
                        }
                }
-               mutex_unlock(&blka->mutex);
-       } else {
-               lxt = NULL;
        }
 
        /*
@@ -1136,10 +1155,31 @@ static int clone_lxt(struct afu *afu,
        rhte->lxt_cnt = rhte_src->lxt_cnt;
        dma_wmb(); /* Make RHT entry's LXT table size update visible */
 
-       cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
+       rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC);
+       if (unlikely(rc)) {
+               rc = -EAGAIN;
+               goto err2;
+       }
 
-       dev_dbg(dev, "%s: returning\n", __func__);
-       return 0;
+out:
+       if (locked)
+               mutex_unlock(&blka->mutex);
+       dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc);
+       return rc;
+err2:
+       /* Reset the RHTE */
+       rhte->lxt_cnt = 0;
+       dma_wmb();
+       rhte->lxt_start = NULL;
+       dma_wmb();
+err:
+       /* free the clones already made */
+       for (j = 0; j < i; j++) {
+               aun = (lxt[j].rlba_base >> MC_CHUNK_SHIFT);
+               ba_free(&blka->ba_lun, aun);
+       }
+       kfree(lxt);
+       goto out;
 }
 
 /**