~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+ ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+ ipath_update_eeprom_log(dd);
}
/**
* @len: number of bytes to receive
*/
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
- void *buffer, int len)
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+ u8 eeprom_offset, void *buffer, int len)
{
/* compiler complains unless initialized */
u8 single_byte = 0;
return ret;
}
+
/**
* ipath_eeprom_write - writes data to the eeprom via I2C
* @dd: the infinipath device
* @buffer: data to write
* @len: number of bytes to write
*/
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
- const void *buffer, int len)
+int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buffer, int len)
{
u8 single_byte;
int sub_len;
return ret;
}
+/*
+ * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
+ * are now just wrappers around the internal functions.
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+ void *buff, int len)
+{
+ int ret;
+
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (!ret) {
+ ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+ up(&dd->ipath_eep_sem);
+ }
+
+ return ret;
+}
+
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buff, int len)
+{
+ int ret;
+
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (!ret) {
+ ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+ up(&dd->ipath_eep_sem);
+ }
+
+ return ret;
+}
+
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
u8 *ip = (u8 *) ifp;
void *buf;
struct ipath_flash *ifp;
__be64 guid;
- int len;
+ int len, eep_stat;
u8 csum, *bguid;
int t = dd->ipath_unit;
struct ipath_devdata *dd0 = ipath_lookup(0);
goto bail;
}
- if (ipath_eeprom_read(dd, 0, buf, len)) {
+ down(&dd->ipath_eep_sem);
+ eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+ up(&dd->ipath_eep_sem);
+
+ if (eep_stat) {
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
goto done;
}
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
+ memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+ /*
+ * Power-on (actually "active") hours are kept as little-endian value
+ * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+ * atomic_t while running.
+ */
+ atomic_set(&dd->ipath_active_time, 0);
+ dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
done:
vfree(buf);
bail:;
}
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+ void *buf;
+ struct ipath_flash *ifp;
+ int len, hi_water;
+ uint32_t new_time, new_hrs;
+ u8 csum;
+ int ret, idx;
+ unsigned long flags;
+
+ /* first, check if we actually need to do anything. */
+ ret = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ if (dd->ipath_eep_st_new_errs[idx]) {
+ ret = 1;
+ break;
+ }
+ }
+ new_time = atomic_read(&dd->ipath_active_time);
+
+ if (ret == 0 && new_time < 3600)
+ return 0;
+
+ /*
+ * The quick-check above determined that there is something worthy
+ * of logging, so get current contents and do a more detailed idea.
+ */
+ len = offsetof(struct ipath_flash, if_future);
+ buf = vmalloc(len);
+ ret = 1;
+ if (!buf) {
+ ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+ "bytes from eeprom for logging\n", len);
+ goto bail;
+ }
+
+ /* Grab semaphore and read current EEPROM. If we get an
+ * error, let go, but if not, keep it until we finish write.
+ */
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (ret) {
+ ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+ goto free_bail;
+ }
+ ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+ if (ret) {
+ up(&dd->ipath_eep_sem);
+ ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+ goto free_bail;
+ }
+ ifp = (struct ipath_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ up(&dd->ipath_eep_sem);
+ ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+ csum, ifp->if_csum);
+ ret = 1;
+ goto free_bail;
+ }
+ hi_water = 0;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ int new_val = dd->ipath_eep_st_new_errs[idx];
+ if (new_val) {
+ /*
+ * If we have seen any errors, add to EEPROM values
+ * We need to saturate at 0xFF (255) and we also
+ * would need to adjust the checksum if we were
+ * trying to minimize EEPROM traffic
+ * Note that we add to actual current count in EEPROM,
+ * in case it was altered while we were running.
+ */
+ new_val += ifp->if_errcntp[idx];
+ if (new_val > 0xFF)
+ new_val = 0xFF;
+ if (ifp->if_errcntp[idx] != new_val) {
+ ifp->if_errcntp[idx] = new_val;
+ hi_water = offsetof(struct ipath_flash,
+ if_errcntp) + idx;
+ }
+ /*
+ * update our shadow (used to minimize EEPROM
+ * traffic), to match what we are about to write.
+ */
+ dd->ipath_eep_st_errs[idx] = new_val;
+ dd->ipath_eep_st_new_errs[idx] = 0;
+ }
+ }
+ /*
+ * now update active-time. We would like to round to the nearest hour
+ * but unless atomic_t are sure to be proper signed ints we cannot,
+ * because we need to account for what we "transfer" to EEPROM and
+ * if we log an hour at 31 minutes, then we would need to set
+ * active_time to -29 to accurately count the _next_ hour.
+ */
+ if (new_time > 3600) {
+ new_hrs = new_time / 3600;
+ atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+ new_hrs += dd->ipath_eep_hrs;
+ if (new_hrs > 0xFFFF)
+ new_hrs = 0xFFFF;
+ dd->ipath_eep_hrs = new_hrs;
+ if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+ ifp->if_powerhour[0] = new_hrs & 0xFF;
+ hi_water = offsetof(struct ipath_flash, if_powerhour);
+ }
+ if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+ ifp->if_powerhour[1] = new_hrs >> 8;
+ hi_water = offsetof(struct ipath_flash, if_powerhour)
+ + 1;
+ }
+ }
+ /*
+ * There is a tiny possibility that we could somehow fail to write
+ * the EEPROM after updating our shadows, but problems from holding
+ * the spinlock too long are a much bigger issue.
+ */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ if (hi_water) {
+ /* we made some change to the data, uopdate cksum and write */
+ csum = flash_csum(ifp, 1);
+ ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+ }
+ up(&dd->ipath_eep_sem);
+ if (ret)
+ ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+ vfree(buf);
+bail:
+ return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+ uint new_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+ if (new_val > 255)
+ new_val = 255;
+ dd->ipath_eep_st_new_errs[eidx] = new_val;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ return;
+}
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
+ int log_idx;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
hwerrs &= dd->ipath_hwerrmask;
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
/*
* make sure we get this much out, unless told to be quiet,
* it's a parity error we may recover from,
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log =
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
}
/**
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
+ int log_idx;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
if (!hwerrs) {
hwerrs &= dd->ipath_hwerrmask;
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
/*
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ /* Ignore errors in PIO/PBC on systems with unordered write-combining */
+ if (ipath_unordered_wc())
+ dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log =
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
+
}
/* setup the MSI stuff again after a reset. I'd like to just call
spin_lock_init(&dd->ipath_tid_lock);
spin_lock_init(&dd->ipath_gpio_lock);
+ spin_lock_init(&dd->ipath_eep_st_lock);
+ sema_init(&dd->ipath_eep_sem, 1);
done:
*pdp = pd;
int i, iserr = 0;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
+ int log_idx;
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
if (errs & INFINIPATH_E_HARDWARE) {
/* reuse same msg buf */
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+ } else {
+ u64 mask;
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+ mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+ if (errs & mask)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+ }
}
if (!noprint && (errs & ~dd->ipath_e_bitsextant))
extern struct infinipath_stats ipath_stats;
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+ u64 errs_to_log;
+ u64 hwerrs_to_log;
+};
struct ipath_portdata {
void **port_rcvegrbuf;
/* Used to flash LEDs in override mode */
struct timer_list ipath_led_override_timer;
+ /* Support (including locks) for EEPROM logging of errors and time */
+ /* control access to actual counters, timer */
+ spinlock_t ipath_eep_st_lock;
+ /* control high-level access to EEPROM */
+ struct semaphore ipath_eep_sem;
+ /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+ uint64_t ipath_traffic_wds;
+ /* active time is kept in seconds, but logged in hours */
+ atomic_t ipath_active_time;
+ /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+ uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+ uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+ uint16_t ipath_eep_hrs;
+ /*
+ * masks for which bits of errs, hwerrs that cause
+ * each of the counters to increment.
+ */
+ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
};
/* Private data for file operations */
void ipath_init_iba6120_funcs(struct ipath_devdata *);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
u64 val64;
unsigned long t0, t1;
u64 ret;
+ unsigned long flags;
t0 = jiffies;
/* If fast increment counters are only 32 bits, snapshot them,
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
if (val != dd->ipath_lastsword) {
dd->ipath_sword += val - dd->ipath_lastsword;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ dd->ipath_traffic_wds += val - dd->ipath_lastsword;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastsword = val;
}
val64 = dd->ipath_sword;
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
if (val != dd->ipath_lastrword) {
dd->ipath_rword += val - dd->ipath_lastrword;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ dd->ipath_traffic_wds += val - dd->ipath_lastrword;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastrword = val;
}
val64 = dd->ipath_rword;
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
u32 val;
static unsigned cnt;
+ unsigned long flags;
/*
* don't access the chip while running diags, or memory diags can
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
+ /*
+ * We now try to maintain a "active timer", based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+ dd->ipath_traffic_wds = 0;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
}
return ret;
}
+static ssize_t show_logged_errs(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (ipath_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->ipath_eep_st_errs[idx],
+ idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
&dev_attr_enabled.attr,
&dev_attr_rx_pol_inv.attr,
&dev_attr_led_override.attr,
+ &dev_attr_logged_errors.attr,
NULL
};