[Blackfin] arch: fix bug - trap_tests fails to recover on some tests.
authorRobin Getz <robin.getz@analog.com>
Sun, 27 Jan 2008 07:38:56 +0000 (15:38 +0800)
committerBryan Wu <bryan.wu@analog.com>
Sun, 27 Jan 2008 07:38:56 +0000 (15:38 +0800)
http://blackfin.uclinux.org/gf/project/uclinux-dist/tracker/?action=TrackerItemEdit&tracker_item_id=3719

When the CPLBs get a miss, we do:
  - find a victim in the HW table
  - remove the victim
  - find the replacement in the software table
  - put it into the HW table.

If we can't find a replacement in the software table, we accidently
leave a duplicate in the HW table. This patch ensures that duplicate
is marked as not valid.

What we should do is find the replacement in the software table, before
we find a victim in the HW table - but its too late in the release cycle
to do that much restructuring of this code.

Rather that duplicate code, connect Hardware Errors (irq5) into trap_c,
so user space processes get killed properly.

The rest of irq_panic() can be moved into traps.c (later)

There is still a small corner case that causes problems when a
pheriperal interrupt goes off a single cycle before a user space
hardware error. This causes a kernel panic, rather than the user
space process being killed.

But, this checkin makes things work in 99.9% of the cases, and is a vast
improvement from what is there today (which fails 100% of the time).

Signed-off-by: Robin Getz <robin.getz@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
arch/blackfin/kernel/traps.c
arch/blackfin/mach-common/cplbmgr.S
arch/blackfin/mach-common/interrupt.S
arch/blackfin/mach-common/irqpanic.c
include/asm-blackfin/traps.h

index 21a55ef19cbd3d89fe8213a6f1ba442156584178..4be5ff0be60f75ce6679481563ee1e12173cf48b 100644 (file)
@@ -433,6 +433,36 @@ asmlinkage void trap_c(struct pt_regs *fp)
        /* 0x3D - Reserved, Caught by default */
        /* 0x3E - Reserved, Caught by default */
        /* 0x3F - Reserved, Caught by default */
+       case VEC_HWERR:
+               info.si_code = BUS_ADRALN;
+               sig = SIGBUS;
+               switch (fp->seqstat & SEQSTAT_HWERRCAUSE) {
+               /* System MMR Error */
+               case (SEQSTAT_HWERRCAUSE_SYSTEM_MMR):
+                       info.si_code = BUS_ADRALN;
+                       sig = SIGBUS;
+                       printk(KERN_NOTICE HWC_x2(KERN_NOTICE));
+                       break;
+               /* External Memory Addressing Error */
+               case (SEQSTAT_HWERRCAUSE_EXTERN_ADDR):
+                       info.si_code = BUS_ADRERR;
+                       sig = SIGBUS;
+                       printk(KERN_NOTICE HWC_x3(KERN_NOTICE));
+                       break;
+               /* Performance Monitor Overflow */
+               case (SEQSTAT_HWERRCAUSE_PERF_FLOW):
+                       printk(KERN_NOTICE HWC_x12(KERN_NOTICE));
+                       break;
+               /* RAISE 5 instruction */
+               case (SEQSTAT_HWERRCAUSE_RAISE_5):
+                       printk(KERN_NOTICE HWC_x18(KERN_NOTICE));
+                       break;
+               default:        /* Reserved */
+                       printk(KERN_NOTICE HWC_default(KERN_NOTICE));
+                       break;
+               }
+               CHK_DEBUGGER_TRAP();
+               break;
        default:
                info.si_code = TRAP_ILLTRAP;
                sig = SIGTRAP;
@@ -447,7 +477,11 @@ asmlinkage void trap_c(struct pt_regs *fp)
        if (sig != SIGTRAP) {
                unsigned long stack;
                dump_bfin_process(fp);
-               dump_bfin_mem((void *)fp->retx);
+               /* Is it an interrupt, or an exception? */
+               if (trapnr == VEC_HWERR)
+                       dump_bfin_mem((void *)fp->pc);
+               else
+                       dump_bfin_mem((void *)fp->retx);
                show_regs(fp);
 
                /* Print out the trace buffer if it makes sense */
@@ -672,12 +706,11 @@ void dump_bfin_mem(void *retaddr)
                         * context, which should mean an oops is happening
                         */
                        if (oops_in_progress && x >= 0x0040 && x <= 0x0047 && i <= 0)
-                               panic("\n\nWARNING : You should reconfigure"
+                               printk(KERN_EMERG "\n"
+                                       KERN_EMERG "WARNING : You should reconfigure"
                                        " the kernel to turn on\n"
-                                       " 'Hardware error interrupt"
-                                       " debugging'\n"
-                                       " The rest of this error"
-                                       " is meanless\n");
+                                       KERN_EMERG " 'Hardware error interrupt debugging'\n"
+                                       KERN_EMERG " The rest of this error is meanless\n");
 #endif
                        if (i == (unsigned int)retaddr)
                                printk("[%04x]", x);
@@ -698,6 +731,10 @@ void show_regs(struct pt_regs *fp)
        printk(KERN_NOTICE "\n" KERN_NOTICE "SEQUENCER STATUS:\n");
        printk(KERN_NOTICE " SEQSTAT: %08lx  IPEND: %04lx  SYSCFG: %04lx\n",
                (long)fp->seqstat, fp->ipend, fp->syscfg);
+       printk(KERN_NOTICE "  HWERRCAUSE: 0x%lx\n",
+               (fp->seqstat & SEQSTAT_HWERRCAUSE) >> 14);
+       printk(KERN_NOTICE "  EXCAUSE   : 0x%lx\n",
+               fp->seqstat & SEQSTAT_EXCAUSE);
 
        decode_address(buf, fp->rete);
        printk(KERN_NOTICE " RETE: %s\n", buf);
@@ -708,9 +745,10 @@ void show_regs(struct pt_regs *fp)
        decode_address(buf, fp->rets);
        printk(KERN_NOTICE " RETS: %s\n", buf);
        decode_address(buf, fp->pc);
-       printk(KERN_NOTICE " PC: %s\n", buf);
+       printk(KERN_NOTICE " PC  : %s\n", buf);
 
-       if ((long)fp->seqstat & SEQSTAT_EXCAUSE) {
+       if (((long)fp->seqstat &  SEQSTAT_EXCAUSE) &&
+           (((long)fp->seqstat & SEQSTAT_EXCAUSE) != VEC_HWERR)) {
                decode_address(buf, bfin_read_DCPLB_FAULT_ADDR());
                printk(KERN_NOTICE "DCPLB_FAULT_ADDR: %s\n", buf);
                decode_address(buf, bfin_read_ICPLB_FAULT_ADDR());
index faca1ab344d2069399e7c50cf657c8090cb0dcbe..f5cf3accef378fadae792b6660edf13eb76c284b 100644 (file)
@@ -190,7 +190,14 @@ ENTRY(_cplb_mgr)
        [P0 - 4] = R0;
        R0 = [P0 - 0x100];
        [P0-0x104] = R0;
-.Lie_move:P0+=4;
+.Lie_move:
+       P0+=4;
+
+       /* Clear ICPLB_DATA15, in case we don't find a replacement
+        * otherwise, we would have a duplicate entry, and will crash
+        */
+       R0 = 0;
+       [P0 - 4] = R0;
 
        /* We've made space in the ICPLB table, so that ICPLB15
         * is now free to be overwritten. Next, we have to determine
@@ -515,14 +522,23 @@ ENTRY(_cplb_mgr)
        R0 = [P0++];    /* move data */
        [P0 - 8] = R0;
        R0 = [P0-0x104] /* move address */
-.Lde_move: [P0-0x108] = R0;
+.Lde_move:
+        [P0-0x108] = R0;
+
+.Lde_moved:
+       NOP;
+
+       /* Clear DCPLB_DATA15, in case we don't find a replacement
+        * otherwise, we would have a duplicate entry, and will crash
+        */
+       R0 = 0;
+       [P0 - 0x4] = R0;
 
        /* We've now made space in DCPLB15 for the new CPLB to be
         * installed. The next stage is to locate a CPLB in the
         * config table that covers the faulting address.
         */
 
-.Lde_moved:NOP;
        R0 = I0;                /* Our faulting address */
 
        P2.L = _dpdt_table;
index 4de376418a18650d2e956c5337a351384e2fe977..f983ac7ea3520040e64eddba86b6587ad2eddc5b 100644 (file)
 #include <asm/entry.h>
 #include <asm/asm-offsets.h>
 #include <asm/trace.h>
+#include <asm/traps.h>
+#include <asm/thread_info.h>
 
 #include <asm/mach-common/context.S>
 
+.extern _ret_from_exception
+
 #ifdef CONFIG_I_ENTRY_L1
 .section .l1.text
 #else
@@ -134,10 +138,11 @@ __common_int_entry:
 
 /* interrupt routine for ivhw - 5 */
 ENTRY(_evt_ivhw)
-       SAVE_CONTEXT
+       SAVE_ALL_SYS
 #ifdef CONFIG_FRAME_POINTER
        fp = 0;
 #endif
+
 #if ANOMALY_05000283
        cc = r7 == r7;
        p5.h = 0xffc0;
@@ -147,13 +152,8 @@ ENTRY(_evt_ivhw)
 1:
 #endif
 
-       trace_buffer_stop(p0, r0);
-
-       r0 = IRQ_HWERR;
-       r1 = sp;
-
 #ifdef CONFIG_HARDWARE_PM
-       r7 = SEQSTAT;
+       r7 = [sp + PT_SEQSTAT];
        r7 = r7 >>> 0xe;
        r6 = 0x1F;
        r7 = r7 & r6;
@@ -161,11 +161,29 @@ ENTRY(_evt_ivhw)
        cc = r7 == r5;
        if cc jump .Lcall_do_ovf; /* deal with performance counter overflow */
 #endif
-
+       # We are going to dump something out, so make sure we print IPEND properly
+       p2.l = lo(IPEND);
+       p2.h = hi(IPEND);
+       r0 = [p2];
+       [sp + PT_IPEND] = r0;
+
+       /* set the EXCAUSE to HWERR for trap_c */
+       r0 = [sp + PT_SEQSTAT];
+       R1.L = LO(VEC_HWERR);
+       R1.H = HI(VEC_HWERR);
+       R0 = R0 | R1;
+       [sp + PT_SEQSTAT] = R0;
+
+       r0 = sp;        /* stack frame pt_regs pointer argument ==> r0 */
        SP += -12;
-       call _irq_panic;
+       call _trap_c;
        SP += 12;
+
+       call _ret_from_exception;
+.Lcommon_restore_all_sys:
+       RESTORE_ALL_SYS
        rti;
+
 #ifdef CONFIG_HARDWARE_PM
 .Lcall_do_ovf:
 
@@ -173,9 +191,11 @@ ENTRY(_evt_ivhw)
        call _pm_overflow;
        SP += 12;
 
-       jump .Lcommon_restore_context;
+       jump .Lcommon_restore_all_sys;
 #endif
 
+ENDPROC(_evt_ivhw)
+
 /* Interrupt routine for evt2 (NMI).
  * We don't actually use this, so just return.
  * For inner circle type details, please see:
index b22959b197e5c3e3d0643c19653092fa93238072..606ded9ff4e1963babcdec88999ef5767628451a 100644 (file)
@@ -46,9 +46,6 @@ void irq_panic(int reason, struct pt_regs *regs) __attribute__ ((l1_text));
  */
 asmlinkage void irq_panic(int reason, struct pt_regs *regs)
 {
-       int sig = 0;
-       siginfo_t info;
-
 #ifdef CONFIG_DEBUG_ICACHE_CHECK
        unsigned int cmd, tag, ca, cache_hi, cache_lo, *pa;
        unsigned short i, j, die;
@@ -136,53 +133,6 @@ asmlinkage void irq_panic(int reason, struct pt_regs *regs)
        }
 #endif
 
-       printk(KERN_EMERG "\n");
-       printk(KERN_EMERG "Exception: IRQ 0x%x entered\n", reason);
-       printk(KERN_EMERG " code=[0x%08lx],   stack frame=0x%08lx,  "
-           " bad PC=0x%08lx\n",
-           (unsigned long)regs->seqstat,
-           (unsigned long)regs,
-           (unsigned long)regs->pc);
-       if (reason == 0x5) {
-               printk(KERN_EMERG "----------- HARDWARE ERROR -----------\n");
-
-               /* There is only need to check for Hardware Errors, since other
-                * EXCEPTIONS are handled in TRAPS.c (MH)
-                */
-               switch (regs->seqstat & SEQSTAT_HWERRCAUSE) {
-               case (SEQSTAT_HWERRCAUSE_SYSTEM_MMR):   /* System MMR Error */
-                       info.si_code = BUS_ADRALN;
-                       sig = SIGBUS;
-                       printk(KERN_EMERG HWC_x2(KERN_EMERG));
-                       break;
-               case (SEQSTAT_HWERRCAUSE_EXTERN_ADDR):  /* External Memory Addressing Error */
-                       info.si_code = BUS_ADRERR;
-                       sig = SIGBUS;
-                       printk(KERN_EMERG HWC_x3(KERN_EMERG));
-                       break;
-               case (SEQSTAT_HWERRCAUSE_PERF_FLOW):    /* Performance Monitor Overflow */
-                       printk(KERN_EMERG HWC_x12(KERN_EMERG));
-                       break;
-               case (SEQSTAT_HWERRCAUSE_RAISE_5):      /* RAISE 5 instruction */
-                       printk(KERN_EMERG HWC_x18(KERN_EMERG));
-                       break;
-               default:        /* Reserved */
-                       printk(KERN_EMERG HWC_default(KERN_EMERG));
-                       break;
-               }
-       }
-
-       regs->ipend = bfin_read_IPEND();
-       dump_bfin_process(regs);
-       dump_bfin_mem((void *)regs->pc);
-       show_regs(regs);
-       if (0 == (info.si_signo = sig) || 0 == user_mode(regs)) /* in kernelspace */
-               panic("Unhandled IRQ or exceptions!\n");
-       else {                  /* in userspace */
-               info.si_errno = 0;
-               info.si_addr = (void *)regs->pc;
-               force_sig_info(sig, &info, current);
-       }
 }
 
 #ifdef CONFIG_HARDWARE_PM
index ee1cbf73a9ab2a8993eb8eec90531a7e90b70baf..f0e5f940d9cae503d55be15fb74982c15be47a2a 100644 (file)
 #define VEC_CPLB_I_M   (44)
 #define VEC_CPLB_I_MHIT        (45)
 #define VEC_ILL_RES    (46)    /* including unvalid supervisor mode insn */
+/* The hardware reserves (63) for future use - we use it to tell our
+ * normal exception handling code we have a hardware error
+ */
+#define VEC_HWERR      (63)
 
 #ifndef __ASSEMBLY__