From: Ingo Molnar Date: Sun, 18 Jan 2009 17:37:14 +0000 (+0100) Subject: Merge branch 'core/percpu' into stackprotector X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=b2b062b8163391c42b3219d466ca1ac9742b9c7b;p=GitHub%2Fmoto-9609%2Fandroid_kernel_motorola_exynos9610.git Merge branch 'core/percpu' into stackprotector Conflicts: arch/x86/include/asm/pda.h arch/x86/include/asm/system.h Also, moved include/asm-x86/stackprotector.h to arch/x86/include/asm. Signed-off-by: Ingo Molnar --- b2b062b8163391c42b3219d466ca1ac9742b9c7b diff --cc arch/x86/include/asm/pda.h index 3fea2fdb3302,c31ca048a901..5976cd803e9a --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@@ -9,114 -11,30 +11,28 @@@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ - unsigned long kernelstack; /* 16 top of kernel stack for current */ - unsigned long oldrsp; /* 24 user rsp for system call */ - int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ + unsigned long unused1; + unsigned long unused2; + unsigned long unused3; + unsigned long unused4; + int unused5; + unsigned int unused6; /* 36 was cpunumber */ -#ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ - char *irqstackptr; - short nodenumber; /* number of current node (32k max) */ -#endif short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ - short mmu_state; - short isidle; - struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; - extern struct x8664_pda **_cpu_pda; + DECLARE_PER_CPU(struct x8664_pda, __pda); extern void pda_init(int); - #define cpu_pda(i) (_cpu_pda[i]) + #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) - /* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ - extern void __bad_pda_field(void) __attribute__((noreturn)); - - /* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ - extern struct x8664_pda _proxy_pda; - - #define pda_offset(field) offsetof(struct x8664_pda, field) - - #define pda_to_op(op, field, val) \ - do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - } while (0) - - #define pda_from_op(op, field) \ - ({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ - }) - - #define read_pda(field) pda_from_op("mov", field) - #define write_pda(field, val) pda_to_op("mov", field, val) - #define add_pda(field, val) pda_to_op("add", field, val) - #define sub_pda(field, val) pda_to_op("sub", field, val) - #define or_pda(field, val) pda_to_op("or", field, val) + #define read_pda(field) percpu_read(__pda.field) + #define write_pda(field, val) percpu_write(__pda.field, val) + #define add_pda(field, val) percpu_add(__pda.field, val) + #define sub_pda(field, val) percpu_sub(__pda.field, val) + #define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ @@@ -130,8 -42,4 +40,6 @@@ #endif - #define PDA_STACKOFFSET (5*8) - +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) + #endif /* _ASM_X86_PDA_H */ diff --cc arch/x86/include/asm/stackprotector.h index 000000000000,000000000000..c7f0d10bae7b new file mode 100644 --- /dev/null +++ b/arch/x86/include/asm/stackprotector.h @@@ -1,0 -1,0 +1,39 @@@ ++#ifndef _ASM_STACKPROTECTOR_H ++#define _ASM_STACKPROTECTOR_H 1 ++ ++#include ++#include ++ ++/* ++ * Initialize the stackprotector canary value. ++ * ++ * NOTE: this must only be called from functions that never return, ++ * and it must always be inlined. ++ */ ++static __always_inline void boot_init_stack_canary(void) ++{ ++ u64 canary; ++ u64 tsc; ++ ++ /* ++ * If we're the non-boot CPU, nothing set the PDA stack ++ * canary up for us - and if we are the boot CPU we have ++ * a 0 stack canary. This is a good place for updating ++ * it, as we wont ever return from this function (so the ++ * invalid canaries already on the stack wont ever ++ * trigger). ++ * ++ * We both use the random pool and the current TSC as a source ++ * of randomness. The TSC only matters for very early init, ++ * there it already has some randomness on most systems. Later ++ * on during the bootup the random pool has true entropy too. ++ */ ++ get_random_bytes(&canary, sizeof(canary)); ++ tsc = __native_read_tsc(); ++ canary += tsc + (tsc << 32UL); ++ ++ current->stack_canary = canary; ++ write_pda(stack_canary, canary); ++} ++ ++#endif diff --cc arch/x86/include/asm/system.h index 2f6340a44291,d1dc27dba36d..8cadfe9b1194 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@@ -94,9 -94,7 +94,9 @@@ do { "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@@ -108,9 -106,7 +108,9 @@@ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (per_cpu_var(current_task)) \ + [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ ++ [current_task] "m" (per_cpu_var(current_task)), \ + [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --cc arch/x86/kernel/process_64.c index efb0396e19bf,4523ff88a69d..aa89eabf09e0 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@@ -625,14 -619,15 +631,14 @@@ __switch_to(struct task_struct *prev_p /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); + percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); /* * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement diff --cc init/main.c index 07da4dea50c3,844209453c02..bfe4fb0c9842 --- a/init/main.c +++ b/init/main.c @@@ -561,15 -537,8 +538,14 @@@ asmlinkage void __init start_kernel(voi * Need to run as early as possible, to initialize the * lockdep hash: */ - unwind_init(); lockdep_init(); debug_objects_early_init(); + + /* + * Set up the the initial canary ASAP: + */ + boot_init_stack_canary(); + cgroup_init_early(); local_irq_disable();