From 889a7a6a5d5e64063effd40056bdc7b8fb336bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 17:31:54 +0100 Subject: [PATCH] percpu, x86: Fix percpu_xchg_op() These recent percpu commits: 2485b6464cf8: x86,percpu: Move out of place 64 bit ops into X86_64 section 8270137a0d50: cpuops: Use cmpxchg for xchg to avoid lock semantics Caused this 'perf top' crash: Kernel panic - not syncing: Fatal exception in interrupt Pid: 0, comm: swapper Tainted: G D 2.6.38-rc2-00181-gef71723 #413 Call Trace: [] ? panic ? kmsg_dump ? kmsg_dump ? oops_end ? no_context ? __bad_area_nosemaphore ? perf_output_begin ? bad_area_nosemaphore ? do_page_fault ? __task_pid_nr_ns ? perf_event_tid ? __perf_event_header__init_id ? validate_chain ? perf_output_sample ? trace_hardirqs_off ? page_fault ? irq_work_run ? update_process_times ? tick_sched_timer ? tick_sched_timer ? __run_hrtimer ? hrtimer_interrupt ? account_system_vtime ? smp_apic_timer_interrupt ? apic_timer_interrupt ... Looking at assembly code, I found: list = this_cpu_xchg(irq_work_list, NULL); gives this wrong code : (gcc-4.1.2 cross compiler) ffffffff810bc45e: mov %gs:0xead0,%rax cmpxchg %rax,%gs:0xead0 jne ffffffff810bc45e test %rax,%rax je ffffffff810bc4aa Tell gcc we dirty eax/rax register in percpu_xchg_op() Compiler must use another register to store pxo_new__ We also dont need to reload percpu value after a jump, since a 'failed' cmpxchg already updated eax/rax Wrong generated code was : xor %rax,%rax /* load 0 into %rax */ 1: mov %gs:0xead0,%rax cmpxchg %rax,%gs:0xead0 jne 1b test %rax,%rax After patch : xor %rdx,%rdx /* load 0 into %rdx */ mov %gs:0xead0,%rax 1: cmpxchg %rdx,%gs:0xead0 jne 1b: test %rax,%rax Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Tejun Heo LKML-Reference: <1295973114.3588.312.camel@edumazet-laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f4649db4..7e172955ee57 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -273,34 +273,34 @@ do { \ typeof(var) pxo_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("\n1:mov "__percpu_arg(1)",%%al" \ - "\n\tcmpxchgb %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%al" \ + "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "q" (pxo_new__) \ : "memory"); \ break; \ case 2: \ - asm("\n1:mov "__percpu_arg(1)",%%ax" \ - "\n\tcmpxchgw %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%ax" \ + "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 4: \ - asm("\n1:mov "__percpu_arg(1)",%%eax" \ - "\n\tcmpxchgl %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%eax" \ + "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 8: \ - asm("\n1:mov "__percpu_arg(1)",%%rax" \ - "\n\tcmpxchgq %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%rax" \ + "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ -- 2.20.1