IB/ipath: Workaround problem of errormask register being overwritten
authorDave Olson <dave.olson@qlogic.com>
Fri, 20 Jul 2007 21:41:26 +0000 (14:41 -0700)
committerRoland Dreier <rolandd@cisco.com>
Mon, 30 Jul 2007 20:16:46 +0000 (13:16 -0700)
On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message.  This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts).  Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_stats.c

index 71e6c9d4a7144f193b90c8f9c117597f147021e9..9dd0bacf84610847f94270b79e5f243e2dd1ceb3 100644 (file)
@@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
        ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
                         dd->ipath_hwerrmask);
 
-       dd->ipath_maskederrs = dd->ipath_ignorederrs;
        /* clear all */
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
        /* enable errors that are masked, at least this first time. */
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
                         ~dd->ipath_maskederrs);
-       /* clear any interrups up to this point (ints still not enabled) */
+       dd->ipath_errormask = ipath_read_kreg64(dd,
+               dd->ipath_kregs->kr_errormask);
+       /* clear any interrupts up to this point (ints still not enabled) */
        ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
 
        /*
index 0c075cf8316ba1c0bd77b5ea043863c9c1e6a05b..b29fe7e9b11a598686052f3797b8f0b2c1c6f3d1 100644 (file)
@@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 
        supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
 
-       /*
-        * don't report errors that are masked (includes those always
-        * ignored)
-        */
+       /* don't report errors that are masked */
        errs &= ~dd->ipath_maskederrs;
 
        /* do these first, they are most important */
@@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                 * ones on this particular interrupt, which also isn't great
                 */
                dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+               dd->ipath_errormask &= ~dd->ipath_maskederrs;
                ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                ~dd->ipath_maskederrs);
+                       dd->ipath_errormask);
                s_iserr = ipath_decode_err(msg, sizeof msg,
-                                (dd->ipath_maskederrs & ~dd->
-                                 ipath_ignorederrs));
+                       dd->ipath_maskederrs);
 
-               if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+               if (dd->ipath_maskederrs &
                        ~(INFINIPATH_E_RRCVEGRFULL |
                        INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
                        ipath_dev_err(dd, "Temporarily disabling "
                            "error(s) %llx reporting; too frequent (%s)\n",
-                               (unsigned long long) (dd->ipath_maskederrs &
-                               ~dd->ipath_ignorederrs), msg);
+                               (unsigned long long)dd->ipath_maskederrs,
+                               msg);
                else {
                        /*
                         * rcvegrfull and rcvhdrqfull are "normal",
@@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
        /* disable error interrupts, to avoid confusion */
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
 
+       /* also disable interrupts; errormask is sometimes overwriten */
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
        /*
         * clear all sends, because they have may been
         * completed by usercode while in freeze mode, and
@@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
        for (i = 0; i < dd->ipath_pioavregs; i++) {
                /* deal with 6110 chip bug */
                im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
-               val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
+               val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
                dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
                        = le64_to_cpu(val);
        }
@@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
                E_SPKT_ERRS_IGNORE);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-               ~dd->ipath_maskederrs);
+               dd->ipath_errormask);
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
 }
 
index ef773298b80502b9de330df3b9c8fe8a17481abd..7a7966f7e4fff96cc5a2027072fb15893126d927 100644 (file)
@@ -261,18 +261,10 @@ struct ipath_devdata {
         * limiting of hwerror reporting
         */
        ipath_err_t ipath_lasthwerror;
-       /*
-        * errors masked because they occur too fast, also includes errors
-        * that are always ignored (ipath_ignorederrs)
-        */
+       /* errors masked because they occur too fast */
        ipath_err_t ipath_maskederrs;
        /* time in jiffies at which to re-enable maskederrs */
        unsigned long ipath_unmasktime;
-       /*
-        * errors always ignored (masked), at least for a given
-        * chip/device, because they are wrong or not useful
-        */
-       ipath_err_t ipath_ignorederrs;
        /* count of egrfull errors, combined for all ports */
        u64 ipath_last_tidfull;
        /* for ipath_qcheck() */
@@ -436,6 +428,7 @@ struct ipath_devdata {
        u64 ipath_lastibcstat;
        /* hwerrmask shadow */
        ipath_err_t ipath_hwerrmask;
+       ipath_err_t ipath_errormask; /* errormask shadow */
        /* interrupt config reg shadow */
        u64 ipath_intconfig;
        /* kr_sendpiobufbase value */
index 73ed17d031887cbf5da73d57ee92f0c8b6aeb457..bae4f56f7271f05b65660a84bf886a1ed19a1b6f 100644 (file)
@@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
        }
 }
 
+static void ipath_chk_errormask(struct ipath_devdata *dd)
+{
+       static u32 fixed;
+       u32 ctrl;
+       unsigned long errormask;
+       unsigned long hwerrs;
+
+       if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
+               return;
+
+       errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+
+       if (errormask == dd->ipath_errormask)
+               return;
+       fixed++;
+
+       hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+       ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+
+       ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+               dd->ipath_errormask);
+
+       if ((hwerrs & dd->ipath_hwerrmask) ||
+               (ctrl & INFINIPATH_C_FREEZEMODE)) {
+               /* force re-interrupt of pending events, just in case */
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+               dev_info(&dd->pcidev->dev,
+                       "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
+                       fixed, errormask, (unsigned long)dd->ipath_errormask,
+                       ctrl, hwerrs);
+       } else
+               ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
+                       fixed, errormask,
+                       (unsigned long)dd->ipath_errormask);
+}
+
+
 /**
  * ipath_get_faststats - get word counters from chip before they overflow
  * @opaque - contains a pointer to the infinipath device ipath_devdata
@@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
                dd->ipath_lasterror = 0;
        if (dd->ipath_lasthwerror)
                dd->ipath_lasthwerror = 0;
-       if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
+       if (dd->ipath_maskederrs
            && time_after(jiffies, dd->ipath_unmasktime)) {
                char ebuf[256];
                int iserr;
                iserr = ipath_decode_err(ebuf, sizeof ebuf,
-                                (dd->ipath_maskederrs & ~dd->
-                                 ipath_ignorederrs));
-               if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+                       dd->ipath_maskederrs);
+               if (dd->ipath_maskederrs &
                                ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
                                INFINIPATH_E_PKTERRS ))
                        ipath_dev_err(dd, "Re-enabling masked errors "
@@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
                                ipath_cdbg(ERRPKT, "Re-enabling packet"
                                                " problem interrupt (%s)\n", ebuf);
                }
-               dd->ipath_maskederrs = dd->ipath_ignorederrs;
+
+               /* re-enable masked errors */
+               dd->ipath_errormask |= dd->ipath_maskederrs;
                ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-                                ~dd->ipath_maskederrs);
+                       dd->ipath_errormask);
+               dd->ipath_maskederrs = 0;
        }
 
        /* limit qfull messages to ~one per minute per port */
@@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
                }
        }
 
+       ipath_chk_errormask(dd);
 done:
        mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
 }