From: Paul Mundt Date: Wed, 27 Sep 2006 05:57:44 +0000 (+0900) Subject: sh: Add control register barriers. X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=298476220d1f793ca0ac6c9e5dc817e1ad3e9851;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git sh: Add control register barriers. Currently when making changes to control registers, we typically need some time for changes to take effect (8 nops, generally). However, for sh4a we simply need to do an icbi.. This is a simple patch for implementing a general purpose ctrl_barrier() which functions as a control register write barrier. There's some additional documentation in the patch itself, but it's pretty self explanatory. There were also some places where we were not doing the barrier, which didn't seem to have any adverse effects on legacy parts, but certainly did on sh4a. It's safer to have the barrier in place for legacy parts as well in these cases, though this does make flush_tlb_all() more expensive (by an order of 8 nops). We can ifdef around the flush_tlb_all() case for now if it's clear that all legacy parts won't have a problem with this. Signed-off-by: Paul Mundt --- diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index c036c2b4ac2b..2203bd6aadb3 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -269,6 +269,11 @@ static inline void flush_icache_all(void) ccr |= CCR_CACHE_ICI; ctrl_outl(ccr, CCR); + /* + * back_to_P1() will take care of the barrier for us, don't add + * another one! + */ + back_to_P1(); local_irq_restore(flags); } diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 775f86cd3fe8..364181f27b79 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -369,12 +369,13 @@ void flush_tlb_all(void) * Flush all the TLB. * * Write to the MMU control register's bit: - * TF-bit for SH-3, TI-bit for SH-4. + * TF-bit for SH-3, TI-bit for SH-4. * It's same position, bit #2. */ local_irq_save(flags); status = ctrl_inl(MMUCR); - status |= 0x04; + status |= 0x04; ctrl_outl(status, MMUCR); + ctrl_barrier(); local_irq_restore(flags); } diff --git a/include/asm-sh/mmu_context.h b/include/asm-sh/mmu_context.h index 6760d064bd02..87678ba8d6b6 100644 --- a/include/asm-sh/mmu_context.h +++ b/include/asm-sh/mmu_context.h @@ -174,9 +174,7 @@ static inline void enable_mmu(void) { /* Enable MMU */ ctrl_outl(MMU_CONTROL_INIT, MMUCR); - - /* The manual suggests doing some nops after turning on the MMU */ - __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t"); + ctrl_barrier(); if (mmu_context_cache == NO_CONTEXT) mmu_context_cache = MMU_CONTEXT_FIRST_VERSION; @@ -191,7 +189,8 @@ static inline void disable_mmu(void) cr = ctrl_inl(MMUCR); cr &= ~MMU_CONTROL_INIT; ctrl_outl(cr, MMUCR); - __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t"); + + ctrl_barrier(); } #else /* diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h index eb4902ed920a..1630a5411e5f 100644 --- a/include/asm-sh/system.h +++ b/include/asm-sh/system.h @@ -67,8 +67,17 @@ static inline void sched_cacheflush(void) { } -#define nop() __asm__ __volatile__ ("nop") - +#ifdef CONFIG_CPU_SH4A +#define __icbi() \ +{ \ + unsigned long __addr; \ + __addr = 0xa8000000; \ + __asm__ __volatile__( \ + "icbi %0\n\t" \ + : /* no output */ \ + : "m" (__m(__addr))); \ +} +#endif #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) @@ -84,15 +93,31 @@ static __inline__ unsigned long tas(volatile int *m) extern void __xchg_called_with_bad_pointer(void); +/* + * A brief note on ctrl_barrier(), the control register write barrier. + * + * Legacy SH cores typically require a sequence of 8 nops after + * modification of a control register in order for the changes to take + * effect. On newer cores (like the sh4a and sh5) this is accomplished + * with icbi. + * + * Also note that on sh4a in the icbi case we can forego a synco for the + * write barrier, as it's not necessary for control registers. + * + * Historically we have only done this type of barrier for the MMUCR, but + * it's also necessary for the CCR, so we make it generic here instead. + */ #ifdef CONFIG_CPU_SH4A -#define mb() __asm__ __volatile__ ("synco": : :"memory") -#define rmb() mb() -#define wmb() __asm__ __volatile__ ("synco": : :"memory") +#define mb() __asm__ __volatile__ ("synco": : :"memory") +#define rmb() mb() +#define wmb() __asm__ __volatile__ ("synco": : :"memory") +#define ctrl_barrier() __icbi() #define read_barrier_depends() do { } while(0) #else -#define mb() __asm__ __volatile__ ("": : :"memory") -#define rmb() mb() -#define wmb() __asm__ __volatile__ ("": : :"memory") +#define mb() __asm__ __volatile__ ("": : :"memory") +#define rmb() mb() +#define wmb() __asm__ __volatile__ ("": : :"memory") +#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #define read_barrier_depends() do { } while(0) #endif @@ -218,8 +243,8 @@ do { \ #define back_to_P1() \ do { \ unsigned long __dummy; \ + ctrl_barrier(); \ __asm__ __volatile__( \ - "nop;nop;nop;nop;nop;nop;nop\n\t" \ "mov.l 1f, %0\n\t" \ "jmp @%0\n\t" \ " nop\n\t" \