[IA64] printing support for MCA/INIT
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Tue, 26 Sep 2006 21:44:37 +0000 (14:44 -0700)
committerTony Luck <tony.luck@intel.com>
Tue, 26 Sep 2006 21:44:37 +0000 (14:44 -0700)
Printing message to console from MCA/INIT handler is useful,
however doing oops_in_progress = 1 in them exactly makes
something in kernel wrong. Especially it sounds ugly if
system goes wrong after returning from recoverable MCA.

This patch adds ia64_mca_printk() function that collects
messages into temporary-not-so-large message buffer during
in MCA/INIT environment and print them out later, after
returning to normal context or when handlers determine to
down the system.

Also this print function is exported for use in extensional
MCA handler. It would be useful to describe detail about
recovery.

NOTE:
I don't think it is sane thing if temporary message buffer
is enlarged enough to hold whole stack dumps from INIT, so
buffering is disabled during stack dump from INIT-monarch
(= default_monarch_init_process). please fix it in future.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
arch/ia64/kernel/mca.c
arch/ia64/kernel/mca_drv.c
arch/ia64/kernel/mca_drv.h
arch/ia64/kernel/salinfo.c

index 2fbe4536fe181b5574d99a77add70e5330a5f490..98f3b26d7aff3d5d3918631f032cd9dfc3948a90 100644 (file)
@@ -54,6 +54,9 @@
  *
  * 2005-10-07 Keith Owens <kaos@sgi.com>
  *           Add notify_die() hooks.
+ *
+ * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *           Add printing support for MCA/INIT.
  */
 #include <linux/types.h>
 #include <linux/init.h>
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
 
 static int mca_init __initdata;
 
+/*
+ * limited & delayed printing support for MCA/INIT handler
+ */
+
+#define mprintk(fmt...) ia64_mca_printk(fmt)
+
+#define MLOGBUF_SIZE (512+256*NR_CPUS)
+#define MLOGBUF_MSGMAX 256
+static char mlogbuf[MLOGBUF_SIZE];
+static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */
+static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */
+static unsigned long mlogbuf_start;
+static unsigned long mlogbuf_end;
+static unsigned int mlogbuf_finished = 0;
+static unsigned long mlogbuf_timestamp = 0;
+
+static int loglevel_save = -1;
+#define BREAK_LOGLEVEL(__console_loglevel)             \
+       oops_in_progress = 1;                           \
+       if (loglevel_save < 0)                          \
+               loglevel_save = __console_loglevel;     \
+       __console_loglevel = 15;
+
+#define RESTORE_LOGLEVEL(__console_loglevel)           \
+       if (loglevel_save >= 0) {                       \
+               __console_loglevel = loglevel_save;     \
+               loglevel_save = -1;                     \
+       }                                               \
+       mlogbuf_finished = 0;                           \
+       oops_in_progress = 0;
+
+/*
+ * Push messages into buffer, print them later if not urgent.
+ */
+void ia64_mca_printk(const char *fmt, ...)
+{
+       va_list args;
+       int printed_len;
+       char temp_buf[MLOGBUF_MSGMAX];
+       char *p;
+
+       va_start(args, fmt);
+       printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
+       va_end(args);
+
+       /* Copy the output into mlogbuf */
+       if (oops_in_progress) {
+               /* mlogbuf was abandoned, use printk directly instead. */
+               printk(temp_buf);
+       } else {
+               spin_lock(&mlogbuf_wlock);
+               for (p = temp_buf; *p; p++) {
+                       unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
+                       if (next != mlogbuf_start) {
+                               mlogbuf[mlogbuf_end] = *p;
+                               mlogbuf_end = next;
+                       } else {
+                               /* buffer full */
+                               break;
+                       }
+               }
+               mlogbuf[mlogbuf_end] = '\0';
+               spin_unlock(&mlogbuf_wlock);
+       }
+}
+EXPORT_SYMBOL(ia64_mca_printk);
+
+/*
+ * Print buffered messages.
+ *  NOTE: call this after returning normal context. (ex. from salinfod)
+ */
+void ia64_mlogbuf_dump(void)
+{
+       char temp_buf[MLOGBUF_MSGMAX];
+       char *p;
+       unsigned long index;
+       unsigned long flags;
+       unsigned int printed_len;
+
+       /* Get output from mlogbuf */
+       while (mlogbuf_start != mlogbuf_end) {
+               temp_buf[0] = '\0';
+               p = temp_buf;
+               printed_len = 0;
+
+               spin_lock_irqsave(&mlogbuf_rlock, flags);
+
+               index = mlogbuf_start;
+               while (index != mlogbuf_end) {
+                       *p = mlogbuf[index];
+                       index = (index + 1) % MLOGBUF_SIZE;
+                       if (!*p)
+                               break;
+                       p++;
+                       if (++printed_len >= MLOGBUF_MSGMAX - 1)
+                               break;
+               }
+               *p = '\0';
+               if (temp_buf[0])
+                       printk(temp_buf);
+               mlogbuf_start = index;
+
+               mlogbuf_timestamp = 0;
+               spin_unlock_irqrestore(&mlogbuf_rlock, flags);
+       }
+}
+EXPORT_SYMBOL(ia64_mlogbuf_dump);
+
+/*
+ * Call this if system is going to down or if immediate flushing messages to
+ * console is required. (ex. recovery was failed, crash dump is going to be
+ * invoked, long-wait rendezvous etc.)
+ *  NOTE: this should be called from monarch.
+ */
+static void ia64_mlogbuf_finish(int wait)
+{
+       BREAK_LOGLEVEL(console_loglevel);
+
+       spin_lock_init(&mlogbuf_rlock);
+       ia64_mlogbuf_dump();
+       printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
+               "MCA/INIT might be dodgy or fail.\n");
+
+       if (!wait)
+               return;
+
+       /* wait for console */
+       printk("Delaying for 5 seconds...\n");
+       udelay(5*1000000);
+
+       mlogbuf_finished = 1;
+}
+EXPORT_SYMBOL(ia64_mlogbuf_finish);
+
+/*
+ * Print buffered messages from INIT context.
+ */
+static void ia64_mlogbuf_dump_from_init(void)
+{
+       if (mlogbuf_finished)
+               return;
+
+       if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+               printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
+                       " and the system seems to be messed up.\n");
+               ia64_mlogbuf_finish(0);
+               return;
+       }
+
+       if (!spin_trylock(&mlogbuf_rlock)) {
+               printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
+                       "Generated messages other than stack dump will be "
+                       "buffered to mlogbuf and will be printed later.\n");
+               printk(KERN_ERR "INIT: If messages would not printed after "
+                       "this INIT, wait 30sec and assert INIT again.\n");
+               if (!mlogbuf_timestamp)
+                       mlogbuf_timestamp = jiffies;
+               return;
+       }
+       spin_unlock(&mlogbuf_rlock);
+       ia64_mlogbuf_dump();
+}
 
 static void inline
 ia64_mca_spin(const char *func)
 {
-       printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
+       if (monarch_cpu == smp_processor_id())
+               ia64_mlogbuf_finish(0);
+       mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
        while (1)
                cpu_relax();
 }
@@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type)
        }
        if (!missing)
                goto all_in;
-       printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
+       /*
+        * Maybe slave(s) dead. Print buffered messages immediately.
+        */
+       ia64_mlogbuf_finish(0);
+       mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
        for_each_online_cpu(c) {
                if (c == monarch)
                        continue;
                if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
-                       printk(" %d", c);
+                       mprintk(" %d", c);
        }
-       printk("\n");
+       mprintk("\n");
        return;
 
 all_in:
-       printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
+       mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
        return;
 }
 
@@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
        struct ia64_mca_notify_die nd =
                { .sos = sos, .monarch_cpu = &monarch_cpu };
 
-       oops_in_progress = 1;   /* FIXME: make printk NMI/MCA/INIT safe */
-       console_loglevel = 15;  /* make sure printks make it to console */
-       printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
-               sos->proc_state_param, cpu, sos->monarch);
+       mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
+               "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
 
        previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
        monarch_cpu = cpu;
@@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
                rh->severity = sal_log_severity_corrected;
                ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
                sos->os_status = IA64_MCA_CORRECTED;
+       } else {
+               /* Dump buffered message to console */
+               ia64_mlogbuf_finish(1);
        }
        if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
                        == NOTIFY_STOP)
@@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
        struct task_struct *g, *t;
        if (val != DIE_INIT_MONARCH_PROCESS)
                return NOTIFY_DONE;
+
+       /*
+        * FIXME: mlogbuf will brim over with INIT stack dumps.
+        * To enable show_stack from INIT, we use oops_in_progress which should
+        * be used in real oops. This would cause something wrong after INIT.
+        */
+       BREAK_LOGLEVEL(console_loglevel);
+       ia64_mlogbuf_dump_from_init();
+
        printk(KERN_ERR "Processes interrupted by INIT -");
        for_each_online_cpu(c) {
                struct ia64_sal_os_state *s;
@@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
                } while_each_thread (g, t);
                read_unlock(&tasklist_lock);
        }
+       /* FIXME: This will not restore zapped printk locks. */
+       RESTORE_LOGLEVEL(console_loglevel);
        return NOTIFY_DONE;
 }
 
@@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
        struct ia64_mca_notify_die nd =
                { .sos = sos, .monarch_cpu = &monarch_cpu };
 
-       oops_in_progress = 1;   /* FIXME: make printk NMI/MCA/INIT safe */
-       console_loglevel = 15;  /* make sure printks make it to console */
-
        (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
 
-       printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
+       mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
                sos->proc_state_param, cpu, sos->monarch);
        salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
 
@@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
         * fix their proms and get their customers updated.
         */
        if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
-               printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
+               mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
                       __FUNCTION__, cpu);
                atomic_dec(&slaves);
                sos->monarch = 1;
@@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
         * fix their proms and get their customers updated.
         */
        if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
-               printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
+               mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
                               __FUNCTION__, cpu);
                atomic_dec(&monarchs);
                sos->monarch = 0;
@@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
                if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
                                == NOTIFY_STOP)
                        ia64_mca_spin(__FUNCTION__);
-               printk("Slave on cpu %d returning to normal service.\n", cpu);
+               mprintk("Slave on cpu %d returning to normal service.\n", cpu);
                set_curr_task(cpu, previous_current);
                ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
                atomic_dec(&slaves);
@@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
         * same serial line, the user will need some time to switch out of the BMC before
         * the dump begins.
         */
-       printk("Delaying for 5 seconds...\n");
+       mprintk("Delaying for 5 seconds...\n");
        udelay(5*1000000);
        ia64_wait_for_slaves(cpu, "INIT");
        /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
@@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
        if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
                        == NOTIFY_STOP)
                ia64_mca_spin(__FUNCTION__);
-       printk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
+       mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
        atomic_dec(&monarchs);
        set_curr_task(cpu, previous_current);
        monarch_cpu = -1;
index 8db6e0cedadcbc3d660bd6f5621cbc496e75b466..a45009d2bc90149e5199a97c9eff86a8d552f5b0 100644 (file)
@@ -79,14 +79,30 @@ static int
 fatal_mca(const char *fmt, ...)
 {
        va_list args;
+       char buf[256];
 
        va_start(args, fmt);
-       vprintk(fmt, args);
+       vsnprintf(buf, sizeof(buf), fmt, args);
        va_end(args);
+       ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
 
        return MCA_NOT_RECOVERED;
 }
 
+static int
+mca_recovered(const char *fmt, ...)
+{
+       va_list args;
+       char buf[256];
+
+       va_start(args, fmt);
+       vsnprintf(buf, sizeof(buf), fmt, args);
+       va_end(args);
+       ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
+
+       return MCA_RECOVERED;
+}
+
 /**
  * mca_page_isolate - isolate a poisoned page in order not to use it later
  * @paddr:     poisoned memory location
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr)
 void
 mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
 {
+       ia64_mlogbuf_dump();
        printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
                "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
                raw_smp_processor_id(), current->pid, current->uid,
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx,
 
        /* Is target address valid? */
        if (!pbci->tv)
-               return fatal_mca(KERN_ALERT "MCA: target address not valid\n");
+               return fatal_mca("target address not valid");
 
        /*
         * cpu read or memory-mapped io read
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx,
 
        /* Is minstate valid? */
        if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
-               return fatal_mca(KERN_ALERT "MCA: minstate not valid\n");
+               return fatal_mca("minstate not valid");
        psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
        psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
 
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx,
                        psr2->bn  = 1;
                        psr2->i  = 0;
 
-                       return MCA_RECOVERED;
+                       return mca_recovered("user memory corruption. "
+                               "kill affected process - recovered.");
                }
 
        }
 
-       return fatal_mca(KERN_ALERT "MCA: kernel context not recovered,"
-                         " iip 0x%lx\n", pmsa->pmsa_iip);
+       return fatal_mca("kernel context not recovered, iip 0x%lx\n",
+                        pmsa->pmsa_iip);
 }
 
 /**
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
         * The machine check is corrected.
         */
        if (psp->cm == 1)
-               return MCA_RECOVERED;
+               return mca_recovered("machine check is already corrected.");
 
        /*
         * The error was not contained.  Software must be reset.
         */
        if (psp->us || psp->ci == 0)
-               return fatal_mca(KERN_ALERT "MCA: error not contained\n");
+               return fatal_mca("error not contained");
 
        /*
         * The cache check and bus check bits have four possible states
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
         *    1  1      Memory error, attempt recovery
         */
        if (psp->bc == 0 || pbci == NULL)
-               return fatal_mca(KERN_ALERT "MCA: No bus check\n");
+               return fatal_mca("No bus check");
 
        /*
         * Sorry, we cannot handle so many.
         */
        if (peidx_bus_check_num(peidx) > 1)
-               return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n");
+               return fatal_mca("Too many bus checks");
        /*
         * Well, here is only one bus error.
         */
        if (pbci->ib)
-               return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n");
+               return fatal_mca("Internal Bus error");
        if (pbci->cc)
-               return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n");
+               return fatal_mca("Cache-cache error");
        if (pbci->eb && pbci->bsi > 0)
-               return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n");
+               return fatal_mca("External bus check fatal status");
 
        /*
         * This is a local MCA and estimated as recoverble external bus error.
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
        /*
         * On account of strange SAL error record, we cannot recover.
         */
-       return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n");
+       return fatal_mca("Strange SAL record");
 }
 
 /**
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
         /* Now, OS can recover when there is one processor error section */
        if (n_proc_err > 1)
-               return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n");
+               return fatal_mca("Too Many Errors");
        else if (n_proc_err == 0)
-               /* Weird SAL record ... We need not to recover */
-               return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n");
+               /* Weird SAL record ... We can't do anything */
+               return fatal_mca("Weird SAL record");
 
        /* Make index of processor error section */
        mca_make_peidx((sal_log_processor_info_t*)
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
 
        /* Check whether MCA is global or not */
        if (is_mca_global(&peidx, &pbci, sos))
-               return fatal_mca(KERN_ALERT "MCA: global MCA\n");
+               return fatal_mca("global MCA");
        
        /* Try to recover a processor error */
        return recover_from_processor_error(platform_err, &slidx, &peidx,
index 31a2e52bb16fc7c6b6af5bcfdb8a9caebd95cdb1..c85e943ba5fd93017a305b34aef00da0b56689f9 100644 (file)
@@ -118,3 +118,7 @@ struct mca_table_entry {
 
 extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
 extern int mca_recover_range(unsigned long);
+extern void ia64_mca_printk(const char * fmt, ...)
+        __attribute__ ((format (printf, 1, 2)));
+extern void ia64_mlogbuf_dump(void);
+
index 9065f0f01ba3e7f70b48abf36af96cf46b205514..e63b8ca5344a202d85b8cb1d25a4edb08c486894 100644 (file)
@@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 /* Check for outstanding MCA/INIT records every minute (arbitrary) */
 #define SALINFO_TIMER_DELAY (60*HZ)
 static struct timer_list salinfo_timer;
+extern void ia64_mlogbuf_dump(void);
 
 static void
 salinfo_timeout_check(struct salinfo_data *data)
@@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data)
 static void
 salinfo_timeout (unsigned long arg)
 {
+       ia64_mlogbuf_dump();
        salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
        salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
        salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
@@ -332,6 +334,8 @@ retry:
        if (cpu == -1)
                goto retry;
 
+       ia64_mlogbuf_dump();
+
        /* for next read, start checking at next CPU */
        data->cpu_check = cpu;
        if (++data->cpu_check == NR_CPUS)