scsi: libfc: quarantine timed out xids
authorHannes Reinecke <hare@suse.de>
Thu, 13 Oct 2016 13:10:50 +0000 (15:10 +0200)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 8 Nov 2016 22:29:52 +0000 (17:29 -0500)
When a sequence times out we have no idea what happened to the
frame. And we do not know if we will ever receive the frame.
Hence we cannot re-use the xid as we would risk data corruption
if the xid had been re-used and the timed out frame would be
received after that.
So we need to quarantine the xid until the lport is reset.
Yes, I know this will (eventually) deplete the xid pool.
But for now it's the safest method.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/libfc/fc_exch.c
drivers/scsi/libfc/fc_fcp.c
include/scsi/libfc.h

index 7b47ab1389ca0d451b12f5c49d05be1c1a0dfc22..ca7d947dc427c4f8682a9da86b70aa59fe17f587 100644 (file)
@@ -94,6 +94,7 @@ struct fc_exch_pool {
 struct fc_exch_mgr {
        struct fc_exch_pool __percpu *pool;
        mempool_t       *ep_pool;
+       struct fc_lport *lport;
        enum fc_class   class;
        struct kref     kref;
        u16             min_xid;
@@ -408,6 +409,8 @@ static int fc_exch_done_locked(struct fc_exch *ep)
        return rc;
 }
 
+static struct fc_exch fc_quarantine_exch;
+
 /**
  * fc_exch_ptr_get() - Return an exchange from an exchange pool
  * @pool:  Exchange Pool to get an exchange from
@@ -452,14 +455,17 @@ static void fc_exch_delete(struct fc_exch *ep)
 
        /* update cache of free slot */
        index = (ep->xid - ep->em->min_xid) >> fc_cpu_order;
-       if (pool->left == FC_XID_UNKNOWN)
-               pool->left = index;
-       else if (pool->right == FC_XID_UNKNOWN)
-               pool->right = index;
-       else
-               pool->next_index = index;
-
-       fc_exch_ptr_set(pool, index, NULL);
+       if (!(ep->state & FC_EX_QUARANTINE)) {
+               if (pool->left == FC_XID_UNKNOWN)
+                       pool->left = index;
+               else if (pool->right == FC_XID_UNKNOWN)
+                       pool->right = index;
+               else
+                       pool->next_index = index;
+               fc_exch_ptr_set(pool, index, NULL);
+       } else {
+               fc_exch_ptr_set(pool, index, &fc_quarantine_exch);
+       }
        list_del(&ep->ex_list);
        spin_unlock_bh(&pool->lock);
        fc_exch_release(ep);    /* drop hold for exch in mp */
@@ -921,14 +927,14 @@ static struct fc_exch *fc_exch_alloc(struct fc_lport *lport,
  */
 static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
 {
+       struct fc_lport *lport = mp->lport;
        struct fc_exch_pool *pool;
        struct fc_exch *ep = NULL;
        u16 cpu = xid & fc_cpu_mask;
 
        if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
-               printk_ratelimited(KERN_ERR
-                       "libfc: lookup request for XID = %d, "
-                       "indicates invalid CPU %d\n", xid, cpu);
+               pr_err("host%u: lport %6.6x: xid %d invalid CPU %d\n:",
+                      lport->host->host_no, lport->port_id, xid, cpu);
                return NULL;
        }
 
@@ -936,6 +942,10 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
                pool = per_cpu_ptr(mp->pool, cpu);
                spin_lock_bh(&pool->lock);
                ep = fc_exch_ptr_get(pool, (xid - mp->min_xid) >> fc_cpu_order);
+               if (ep == &fc_quarantine_exch) {
+                       FC_LPORT_DBG(lport, "xid %x quarantined\n", xid);
+                       ep = NULL;
+               }
                if (ep) {
                        WARN_ON(ep->xid != xid);
                        fc_exch_hold(ep);
@@ -2434,6 +2444,7 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lport,
                return NULL;
 
        mp->class = class;
+       mp->lport = lport;
        /* adjust em exch xid range for offload */
        mp->min_xid = min_xid;
 
index f7700cccf793ed8d9e875ad7f3c8e5a610507de4..780d9f09a267cc0ec04b52f6d65a16723762ab54 100644 (file)
@@ -1529,13 +1529,14 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
                                   fsp->rport->port_id, rjt->er_reason,
                                   rjt->er_explan);
                        /*
-                        * If no data transfer, the command frame got dropped
-                        * so we just retry.  If data was transferred, we
-                        * lost the response but the target has no record,
-                        * so we abort and retry.
+                        * If response got lost or is stuck in the
+                        * queue somewhere we have no idea if and when
+                        * the response will be received. So quarantine
+                        * the xid and retry the command.
                         */
-                       if (rjt->er_explan == ELS_EXPL_OXID_RXID &&
-                           fsp->xfer_len == 0) {
+                       if (rjt->er_explan == ELS_EXPL_OXID_RXID) {
+                               struct fc_exch *ep = fc_seq_exch(fsp->seq_ptr);
+                               ep->state |= FC_EX_QUARANTINE;
                                fsp->state |= FC_SRB_ABORTED;
                                fc_fcp_retry_cmd(fsp, FC_TRANS_RESET);
                                break;
index dc42d8070f6fe8417f60204bff47f4df0a5aaa53..8cb752f8d12bf77f50c241df0f5e1713694fba73 100644 (file)
@@ -390,6 +390,7 @@ struct fc_seq {
 
 #define FC_EX_DONE             (1 << 0) /* ep is completed */
 #define FC_EX_RST_CLEANUP      (1 << 1) /* reset is forcing completion */
+#define FC_EX_QUARANTINE       (1 << 2) /* exch is quarantined */
 
 /**
  * struct fc_exch - Fibre Channel Exchange