scsi: lpfc: Limit amount of work processed in IRQ
authorDick Kennedy <dick.kennedy@broadcom.com>
Wed, 23 Aug 2017 23:55:41 +0000 (16:55 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 25 Aug 2017 02:29:40 +0000 (22:29 -0400)
Various oops being seen on being in the ISR too long and cpu lockups,
when under heavy load.

The amount of work being posted off of completion queues kept the ISR
running almost all the time

Correct the issue by limiting the amount of work per iteration.

[mkp: typo]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli4.h

index 6569fffb8b714b610a2c9c9e135cedf7dfec1d94..d731979e4c7cdfbc1f6b75c0468ce6560a735d62 100644 (file)
@@ -80,8 +80,8 @@ static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
                                    struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
                                       int);
-static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *, struct lpfc_eqe *,
-                       uint32_t);
+static int lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
+                                   struct lpfc_eqe *eqe, uint32_t qidx);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
 static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -13010,7 +13010,7 @@ lpfc_sli4_sp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * completion queue, and then return.
  *
  **/
-static void
+static int
 lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
        struct lpfc_queue *speq)
 {
@@ -13034,7 +13034,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
                        lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                        "0365 Slow-path CQ identifier "
                                        "(%d) does not exist\n", cqid);
-               return;
+               return 0;
        }
 
        /* Save EQ associated with this CQ */
@@ -13071,7 +13071,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
                lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                "0370 Invalid completion queue type (%d)\n",
                                cq->type);
-               return;
+               return 0;
        }
 
        /* Catch the no cq entry condition, log an error */
@@ -13086,6 +13086,8 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
        /* wake up worker thread if there are works to be done */
        if (workposted)
                lpfc_worker_wake_up(phba);
+
+       return ecount;
 }
 
 /**
@@ -13393,7 +13395,7 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * queue and process all the entries on the completion queue, rearm the
  * completion queue, and then return.
  **/
-static void
+static int
 lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
                        uint32_t qidx)
 {
@@ -13409,7 +13411,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
                                "event: majorcode=x%x, minorcode=x%x\n",
                                bf_get_le32(lpfc_eqe_major_code, eqe),
                                bf_get_le32(lpfc_eqe_minor_code, eqe));
-               return;
+               return 0;
        }
 
        /* Get the reference to the corresponding CQ */
@@ -13446,8 +13448,9 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 
        /* Otherwise this is a Slow path event */
        if (cq == NULL) {
-               lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
-               return;
+               ecount = lpfc_sli4_sp_handle_eqe(phba, eqe,
+                                                phba->sli4_hba.hba_eq[qidx]);
+               return ecount;
        }
 
 process_cq:
@@ -13456,7 +13459,7 @@ process_cq:
                                "0368 Miss-matched fast-path completion "
                                "queue identifier: eqcqid=%d, fcpcqid=%d\n",
                                cqid, cq->queue_id);
-               return;
+               return 0;
        }
 
        /* Save EQ associated with this CQ */
@@ -13486,6 +13489,8 @@ process_cq:
        /* wake up worker thread if there are works to be done */
        if (workposted)
                lpfc_worker_wake_up(phba);
+
+       return ecount;
 }
 
 static void
@@ -13706,6 +13711,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
        struct lpfc_eqe *eqe;
        unsigned long iflag;
        int ecount = 0;
+       int ccount = 0;
        int hba_eqidx;
 
        /* Get the driver's phba structure from the dev_id */
@@ -13757,8 +13763,9 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
                if (eqe == NULL)
                        break;
 
-               lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
-               if (!(++ecount % fpeq->entry_repost))
+               ccount += lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
+               if (!(++ecount % fpeq->entry_repost) ||
+                   ccount > LPFC_MAX_ISR_CQE)
                        break;
                fpeq->EQ_processed++;
        }
index 7a1d74e9e877057397921b644adbd5413e8419c3..540454b035877850c1bb1660fad75b5037c5c3da 100644 (file)
@@ -158,6 +158,7 @@ struct lpfc_queue {
 #define LPFC_MQ_REPOST         8
 #define LPFC_CQ_REPOST         64
 #define LPFC_RQ_REPOST         64
+#define LPFC_MAX_ISR_CQE       64
 #define LPFC_RELEASE_NOTIFICATION_INTERVAL     32  /* For WQs */
        uint32_t queue_id;      /* Queue ID assigned by the hardware */
        uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */