From 5a5488d3bb9a23d9884572e5d85dfeefe8749d3d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Apr 2009 01:47:10 -0700 Subject: [PATCH] sparc64: Store per-cpu offset in trap_block[] Surprisingly this actually makes LOAD_PER_CPU_BASE() a little more efficient. Signed-off-by: David S. Miller --- arch/sparc/include/asm/percpu_64.h | 6 +++--- arch/sparc/include/asm/trap_block.h | 14 +++++++------- arch/sparc/kernel/head_64.S | 22 ---------------------- arch/sparc/kernel/smp_64.c | 18 +++++++----------- arch/sparc/kernel/traps_64.c | 5 ++++- 5 files changed, 21 insertions(+), 44 deletions(-) diff --git a/arch/sparc/include/asm/percpu_64.h b/arch/sparc/include/asm/percpu_64.h index bee64593023e..c0ab102d11f6 100644 --- a/arch/sparc/include/asm/percpu_64.h +++ b/arch/sparc/include/asm/percpu_64.h @@ -7,12 +7,12 @@ register unsigned long __local_per_cpu_offset asm("g5"); #ifdef CONFIG_SMP +#include + extern void real_setup_per_cpu_areas(void); -extern unsigned long __per_cpu_base; -extern unsigned long __per_cpu_shift; #define __per_cpu_offset(__cpu) \ - (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) + (trap_block[(__cpu)].__per_cpu_base) #define per_cpu_offset(x) (__per_cpu_offset(x)) #define __my_cpu_offset __local_per_cpu_offset diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 68fd9ee3e8ae..7e26b2db6211 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -48,7 +48,7 @@ struct trap_per_cpu { unsigned int dev_mondo_qmask; unsigned int resum_qmask; unsigned int nonresum_qmask; - unsigned long __unused; + unsigned long __per_cpu_base; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(struct thread_info *); @@ -101,6 +101,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec #define TRAP_PER_CPU_RESUM_QMASK 0xf0 #define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 +#define TRAP_PER_CPU_PER_CPU_BASE 0xf8 #define TRAP_BLOCK_SZ_SHIFT 8 @@ -172,12 +173,11 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, */ #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ lduh [THR + TI_CPU], REG1; \ - sethi %hi(__per_cpu_shift), REG3; \ - sethi %hi(__per_cpu_base), REG2; \ - ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ - ldx [REG2 + %lo(__per_cpu_base)], REG2; \ - sllx REG1, REG3, REG3; \ - add REG3, REG2, DEST; + sethi %hi(trap_block), REG2; \ + sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \ + or REG2, %lo(trap_block), REG2; \ + add REG2, REG1, REG2; \ + ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST; #else diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 91bf4c7f79b9..f8f21050448b 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -641,28 +641,6 @@ tlb_fixup_done: /* Not reached... */ 1: - /* If we boot on a non-zero cpu, all of the per-cpu - * variable references we make before setting up the - * per-cpu areas will use a bogus offset. Put a - * compensating factor into __per_cpu_base to handle - * this cleanly. - * - * What the per-cpu code calculates is: - * - * __per_cpu_base + (cpu << __per_cpu_shift) - * - * These two variables are zero initially, so to - * make it all cancel out to zero we need to put - * "0 - (cpu << 0)" into __per_cpu_base so that the - * above formula evaluates to zero. - * - * We cannot even perform a printk() until this stuff - * is setup as that calls cpu_clock() which uses - * per-cpu variables. - */ - sub %g0, %o0, %o1 - sethi %hi(__per_cpu_base), %o2 - stx %o1, [%o2 + %lo(__per_cpu_base)] #else mov 0, %o0 #endif diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 4226d0ebaea5..b20f253857b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1371,23 +1371,17 @@ void smp_send_stop(void) { } -unsigned long __per_cpu_base __read_mostly; -unsigned long __per_cpu_shift __read_mostly; - -EXPORT_SYMBOL(__per_cpu_base); -EXPORT_SYMBOL(__per_cpu_shift); - void __init real_setup_per_cpu_areas(void) { - unsigned long paddr, goal, size, i; + unsigned long base, shift, paddr, goal, size, i; char *ptr; /* Copy section for each CPU (we discard the original) */ goal = PERCPU_ENOUGH_ROOM; - __per_cpu_shift = PAGE_SHIFT; + shift = PAGE_SHIFT; for (size = PAGE_SIZE; size < goal; size <<= 1UL) - __per_cpu_shift++; + shift++; paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); if (!paddr) { @@ -1396,10 +1390,12 @@ void __init real_setup_per_cpu_areas(void) } ptr = __va(paddr); - __per_cpu_base = ptr - __per_cpu_start; + base = ptr - __per_cpu_start; - for (i = 0; i < NR_CPUS; i++, ptr += size) + for (i = 0; i < NR_CPUS; i++, ptr += size) { + __per_cpu_offset(i) = base + (i * size); memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d809c4ebb48f..d073aabf65ed 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs) } struct trap_per_cpu trap_block[NR_CPUS]; +EXPORT_SYMBOL(trap_block); /* This can get invoked before sched_init() so play it super safe * and use hard_smp_processor_id(). @@ -2592,7 +2593,9 @@ void __init trap_init(void) (TRAP_PER_CPU_RESUM_QMASK != offsetof(struct trap_per_cpu, resum_qmask)) || (TRAP_PER_CPU_NONRESUM_QMASK != - offsetof(struct trap_per_cpu, nonresum_qmask))) + offsetof(struct trap_per_cpu, nonresum_qmask)) || + (TRAP_PER_CPU_PER_CPU_BASE != + offsetof(struct trap_per_cpu, __per_cpu_base))) trap_per_cpu_offsets_are_bolixed_dave(); if ((TSB_CONFIG_TSB != -- 2.20.1