x86/asm: Fix inline asm call constraints for Clang
authorJosh Poimboeuf <jpoimboe@redhat.com>
Wed, 20 Sep 2017 21:24:33 +0000 (16:24 -0500)
committerIngo Molnar <mingo@kernel.org>
Sat, 23 Sep 2017 13:06:20 +0000 (15:06 +0200)
For inline asm statements which have a CALL instruction, we list the
stack pointer as a constraint to convince GCC to ensure the frame
pointer is set up first:

  static inline void foo()
  {
register void *__sp asm(_ASM_SP);
asm("call bar" : "+r" (__sp))
  }

Unfortunately, that pattern causes Clang to corrupt the stack pointer.

The fix is easy: convert the stack pointer register variable to a global
variable.

It should be noted that the end result is different based on the GCC
version.  With GCC 6.4, this patch has exactly the same result as
before:

defconfig defconfig-nofp distro distro-nofp
 before 9820389 9491555 8816046 8516940
 after 9820389 9491555 8816046 8516940

With GCC 7.2, however, GCC's behavior has changed.  It now changes its
behavior based on the conversion of the register variable to a global.
That somehow convinces it to *always* set up the frame pointer before
inserting *any* inline asm.  (Therefore, listing the variable as an
output constraint is a no-op and is no longer necessary.)  It's a bit
overkill, but the performance impact should be negligible.  And in fact,
there's a nice improvement with frame pointers disabled:

defconfig defconfig-nofp distro distro-nofp
 before 9796316 9468236 9076191 8790305
 after 9796957 9464267 9076381 8785949

So in summary, while listing the stack pointer as an output constraint
is no longer necessary for newer versions of GCC, it's still needed for
older versions.

Suggested-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
13 files changed:
arch/x86/include/asm/alternative.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/preempt.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/rwsem.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/kvm/emulate.c
arch/x86/kvm/vmx.c
arch/x86/mm/fault.c
tools/objtool/Documentation/stack-validation.txt

index 1b020381ab38965ae2a80d2d86df32c66c92e2e8..c096624137ae831fa4d0f9cebf528453598fdefb 100644 (file)
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
                           output, input...)                                  \
 {                                                                            \
-       register void *__sp asm(_ASM_SP);                                     \
        asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
                "call %P[new2]", feature2)                                    \
-               : output, "+r" (__sp)                                         \
+               : output, ASM_CALL_CONSTRAINT                                 \
                : [old] "i" (oldfunc), [new1] "i" (newfunc1),                 \
                  [new2] "i" (newfunc2), ## input);                           \
 }
index 676ee5807d864d94538bf16d2ed7535655045128..c1eadbaf1115a4ad79cf55803e6c3bc72c15c4a4 100644 (file)
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
 #endif /* _ASM_X86_ASM_H */
index 63cc96f064dc4987813352eb8ceba869d3738856..738503e1f80c14f1b5720275dd57161509927ef8 100644 (file)
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        u64 input_address = input ? virt_to_phys(input) : 0;
        u64 output_address = output ? virt_to_phys(output) : 0;
        u64 hv_status;
-       register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
        if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
        __asm__ __volatile__("mov %4, %%r8\n"
                             "call *%5"
-                            : "=a" (hv_status), "+r" (__sp),
+                            : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                               "+c" (control), "+d" (input_address)
                             :  "r" (output_address), "m" (hv_hypercall_pg)
                             : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
        __asm__ __volatile__("call *%7"
                             : "=A" (hv_status),
-                              "+c" (input_address_lo), "+r" (__sp)
+                              "+c" (input_address_lo), ASM_CALL_CONSTRAINT
                             : "A" (control),
                               "b" (input_address_hi),
                               "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
        u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-       register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
        {
                __asm__ __volatile__("call *%4"
-                                    : "=a" (hv_status), "+r" (__sp),
+                                    : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                       "+c" (control), "+d" (input1)
                                     : "m" (hv_hypercall_pg)
                                     : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
                __asm__ __volatile__ ("call *%5"
                                      : "=A"(hv_status),
                                        "+c"(input1_lo),
-                                       "+r"(__sp)
+                                       ASM_CALL_CONSTRAINT
                                      : "A" (control),
                                        "b" (input1_hi),
                                        "m" (hv_hypercall_pg)
index 42873edd9f9d20cda2d1cfcb92b3798c4a75b3aa..280d94c36dada184fce42cd03d5d05fe7ebc8698 100644 (file)
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
  */
 #ifdef CONFIG_X86_32
 #define PVOP_VCALL_ARGS                                                        \
-       unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;      \
-       register void *__sp asm("esp")
+       unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
 #define PVOP_CALL_ARGS                 PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)              "a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
 /* [re]ax isn't an arg, but the return val */
 #define PVOP_VCALL_ARGS                                                \
        unsigned long __edi = __edi, __esi = __esi,             \
-               __edx = __edx, __ecx = __ecx, __eax = __eax;    \
-       register void *__sp asm("rsp")
+               __edx = __edx, __ecx = __ecx, __eax = __eax;
+
 #define PVOP_CALL_ARGS         PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)              "D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
                        asm volatile(pre                                \
                                     paravirt_alt(PARAVIRT_CALL)        \
                                     post                               \
-                                    : call_clbr, "+r" (__sp)           \
+                                    : call_clbr, ASM_CALL_CONSTRAINT   \
                                     : paravirt_type(op),               \
                                       paravirt_clobber(clbr),          \
                                       ##__VA_ARGS__                    \
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
                        asm volatile(pre                                \
                                     paravirt_alt(PARAVIRT_CALL)        \
                                     post                               \
-                                    : call_clbr, "+r" (__sp)           \
+                                    : call_clbr, ASM_CALL_CONSTRAINT   \
                                     : paravirt_type(op),               \
                                       paravirt_clobber(clbr),          \
                                       ##__VA_ARGS__                    \
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
                asm volatile(pre                                        \
                             paravirt_alt(PARAVIRT_CALL)                \
                             post                                       \
-                            : call_clbr, "+r" (__sp)                   \
+                            : call_clbr, ASM_CALL_CONSTRAINT           \
                             : paravirt_type(op),                       \
                               paravirt_clobber(clbr),                  \
                               ##__VA_ARGS__                            \
index ec1f3c6511506ee1f0ff5240a9ff95d0e6fa68c1..4f44505dbf870e79d4689af9db2f537fe764b831 100644 (file)
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
 
 #ifdef CONFIG_PREEMPT
   extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()                                  \
-({                                                             \
-       register void *__sp asm(_ASM_SP);                       \
-       asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+       asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
 
   extern asmlinkage void preempt_schedule(void);
   extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()                                  \
-({                                                                     \
-       register void *__sp asm(_ASM_SP);                               \
-       asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+       asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
   extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
index 3fa26a61eabcef0dc95b19221ca5e76a6ba2a01a..b390ff76e58fd9f28c62ba5e3a8ab169ac18ad62 100644 (file)
@@ -677,8 +677,6 @@ static inline void sync_core(void)
         * Like all of Linux's memory ordering operations, this is a
         * compiler barrier as well.
         */
-       register void *__sp asm(_ASM_SP);
-
 #ifdef CONFIG_X86_32
        asm volatile (
                "pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
                "pushl $1f\n\t"
                "iret\n\t"
                "1:"
-               : "+r" (__sp) : : "memory");
+               : ASM_CALL_CONSTRAINT : : "memory");
 #else
        unsigned int tmp;
 
@@ -703,7 +701,7 @@ static inline void sync_core(void)
                "iretq\n\t"
                UNWIND_HINT_RESTORE
                "1:"
-               : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+               : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
 }
 
index a34e0d4b957d639afb5978863e57fefb74a173f2..7116b7931c7b807766fef6d820ec929da43940c7 100644 (file)
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({                                                     \
        long tmp;                                       \
        struct rw_semaphore* ret;                       \
-       register void *__sp asm(_ASM_SP);               \
                                                        \
        asm volatile("# beginning down_write\n\t"       \
                     LOCK_PREFIX "  xadd      %1,(%4)\n\t"      \
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
                     "  call " slow_path "\n"           \
                     "1:\n"                             \
                     "# ending down_write"              \
-                    : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+                    : "+m" (sem->count), "=d" (tmp),   \
+                      "=a" (ret), ASM_CALL_CONSTRAINT  \
                     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
                     : "memory", "cc");                 \
        ret;                                            \
index 184eb9894dae3f2cca10bfe1813a348614830305..78e8fcc87d4c62a686e27ab6c82451911e862496 100644 (file)
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 ({                                                                     \
        int __ret_gu;                                                   \
        register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);            \
-       register void *__sp asm(_ASM_SP);                               \
        __chk_user_ptr(ptr);                                            \
        might_fault();                                                  \
        asm volatile("call __get_user_%P4"                              \
-                    : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)    \
+                    : "=a" (__ret_gu), "=r" (__val_gu),                \
+                       ASM_CALL_CONSTRAINT                             \
                     : "0" (ptr), "i" (sizeof(*(ptr))));                \
        (x) = (__force __typeof__(*(ptr))) __val_gu;                    \
        __builtin_expect(__ret_gu, 0);                                  \
index 9606688caa4bea8db0175fe71fe61d9706108aba..128a1a0b145050749e82a0096b7f24de7680ab29 100644 (file)
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
        register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
        register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
        register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-       register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-       register void *__sp asm(_ASM_SP);
+       register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM     "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM     "=r" (__res), ASM_CALL_CONSTRAINT
 #define __HYPERCALL_1PARAM     __HYPERCALL_0PARAM, "+r" (__arg1)
 #define __HYPERCALL_2PARAM     __HYPERCALL_1PARAM, "+r" (__arg2)
 #define __HYPERCALL_3PARAM     __HYPERCALL_2PARAM, "+r" (__arg3)
index 16bf6655aa858e2815135ada11a462329a0df386..f23f13403f33049686de4cae3e546c0c53ca864d 100644 (file)
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
-       register void *__sp asm(_ASM_SP);
        ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
        if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 
        asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
            : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-             [fastop]"+S"(fop), "+r"(__sp)
+             [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
            : "c"(ctxt->src2.val));
 
        ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
index 06c0c6d0541e9bf95eabbcaa8d20c8ec45f19496..6ee237f509dc7cdb757ef414993e7da18af8f08b 100644 (file)
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
        u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       register void *__sp asm(_ASM_SP);
 
        if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
                        == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
                        [sp]"=&r"(tmp),
 #endif
-                       "+r"(__sp)
+                       ASM_CALL_CONSTRAINT
                        :
                        [entry]"r"(entry),
                        [ss]"i"(__KERNEL_DS),
index b836a7274e123af88edd7b4f261da88609778be9..39567b5c33da3b09a8ab6a50509e7de92625d31b 100644 (file)
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
        if (is_vmalloc_addr((void *)address) &&
            (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
             address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-               register void *__sp asm("rsp");
                unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
                /*
                 * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
                asm volatile ("movq %[stack], %%rsp\n\t"
                              "call handle_stack_overflow\n\t"
                              "1: jmp 1b"
-                             : "+r" (__sp)
+                             : ASM_CALL_CONSTRAINT
                              : "D" ("kernel stack overflow (page fault)"),
                                "S" (regs), "d" (address),
                                [stack] "rm" (stack));
index 6a1af43862df759239b8848921b0492e23b95e6b..3995735a878fe796f36513b2bb22b20deaf642d1 100644 (file)
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.