x86/paravirt: split sysret and sysexit
authorJeremy Fitzhardinge <jeremy@goop.org>
Wed, 25 Jun 2008 04:19:26 +0000 (00:19 -0400)
committerIngo Molnar <mingo@elte.hu>
Tue, 8 Jul 2008 11:13:15 +0000 (13:13 +0200)
Don't conflate sysret and sysexit; they're different instructions with
different semantics, and may be in use at the same time (at least
within the same kernel, depending on whether its an Intel or AMD
system).

sysexit - just return to userspace, does no register restoration of
    any kind; must explicitly atomically enable interrupts.

sysret - reloads flags from r11, so no need to explicitly enable
    interrupts on 64-bit, responsible for restoring usermode %gs

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/paravirt.c
arch/x86/kernel/paravirt_patch_32.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/vmi_32.c
arch/x86/xen/enlighten.c
include/asm-x86/irqflags.h
include/asm-x86/paravirt.h

index 92588083950f8106bd08aafb081ebe46ce3023fd..6649d09ad88f8830df49427c7d8763196eb0a0f6 100644 (file)
@@ -111,7 +111,7 @@ void foo(void)
        OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
        OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
        OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-       OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+       OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
        OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
 
index f126c05d6170a17e8fdd880b5973324eb8ebb7d0..27ac2deca465583870fc1c321ff398d2993b5d69 100644 (file)
@@ -62,7 +62,7 @@ int main(void)
        OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
        OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
        OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-       OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret);
+       OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
        OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
        OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
 #endif
index 159a1c76d2bdde81a2b055462af1fe0396d27f46..53393c306e11b553f1691c4ee607873f18c894af 100644 (file)
@@ -58,7 +58,7 @@
  * for paravirtualization.  The following will never clobber any registers:
  *   INTERRUPT_RETURN (aka. "iret")
  *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
+ *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  *
  * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -349,7 +349,7 @@ sysenter_past_esp:
        xorl %ebp,%ebp
        TRACE_IRQS_ON
 1:     mov  PT_FS(%esp), %fs
-       ENABLE_INTERRUPTS_SYSCALL_RET
+       ENABLE_INTERRUPTS_SYSEXIT
        CFI_ENDPROC
 .pushsection .fixup,"ax"
 2:     movl $0,PT_FS(%esp)
@@ -874,10 +874,10 @@ ENTRY(native_iret)
 .previous
 END(native_iret)
 
-ENTRY(native_irq_enable_syscall_ret)
+ENTRY(native_irq_enable_sysexit)
        sti
        sysexit
-END(native_irq_enable_syscall_ret)
+END(native_irq_enable_sysexit)
 #endif
 
 KPROBE_ENTRY(int3)
index 6d1101469e9757f72311fa3230043dd5534433f7..0056bc4c61a9b22c1d713f2ea3b8924c792f00e3 100644 (file)
@@ -59,7 +59,7 @@
 #endif 
 
 #ifdef CONFIG_PARAVIRT
-ENTRY(native_irq_enable_syscall_ret)
+ENTRY(native_usersp_sysret)
        movq    %gs:pda_oldrsp,%rsp
        swapgs
        sysretq
@@ -275,7 +275,7 @@ sysret_check:
        CFI_REGISTER    rip,rcx
        RESTORE_ARGS 0,-ARG_SKIP,1
        /*CFI_REGISTER  rflags,r11*/
-       ENABLE_INTERRUPTS_SYSCALL_RET
+       USERSP_SYSRET
 
        CFI_RESTORE_STATE
        /* Handle reschedules */
index 78c9a1b9e6b011df976cd333d3bab2574c94af1b..565ee7a990ea85f3b63ac0b44624010fd3946865 100644 (file)
@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
                /* If the operation is a nop, then nop the callsite */
                ret = paravirt_patch_nop();
        else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-                type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret))
+                type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
+                type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
                /* If operation requires a jmp, then jmp */
                ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
        else
@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr)
 
 /* These are in entry.S */
 extern void native_iret(void);
-extern void native_irq_enable_syscall_ret(void);
+extern void native_irq_enable_sysexit(void);
+extern void native_usersp_sysret(void);
 
 static int __init print_banner(void)
 {
@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = {
        .write_idt_entry = native_write_idt_entry,
        .load_sp0 = native_load_sp0,
 
-       .irq_enable_syscall_ret = native_irq_enable_syscall_ret,
+#ifdef CONFIG_X86_32
+       .irq_enable_sysexit = native_irq_enable_sysexit,
+#else
+       .usersp_sysret = native_usersp_sysret,
+#endif
        .iret = native_iret,
        .swapgs = native_swapgs,
 
index 82fc5fcab4f4eb7cba501db2e7d6a9a89d027020..58262218781bd3582b1b99f7d42986170e49e23c 100644 (file)
@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
 DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit");
+DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_irq_ops, restore_fl);
                PATCH_SITE(pv_irq_ops, save_fl);
                PATCH_SITE(pv_cpu_ops, iret);
-               PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+               PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
index 7d904e138d7e482cc9bb83e67db1b9718273a576..4a170552b852924f5a896eb688defbf177915fc5 100644 (file)
@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 /* the three commands give us more control to how to return from a syscall */
-DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
+DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_irq_ops, irq_enable);
                PATCH_SITE(pv_irq_ops, irq_disable);
                PATCH_SITE(pv_cpu_ops, iret);
-               PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret);
+               PATCH_SITE(pv_cpu_ops, usersp_sysret);
                PATCH_SITE(pv_cpu_ops, swapgs);
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
index 956f38927aa7c5533cc41fd3c2ba0ea807ccb6dd..946bf13b44abf9be798df19449edff6804efe43d 100644 (file)
@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
                                              insns, ip);
                case PARAVIRT_PATCH(pv_cpu_ops.iret):
                        return patch_internal(VMI_CALL_IRET, len, insns, ip);
-               case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret):
+               case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
                        return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
                default:
                        break;
@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
         * the backend.  They are performance critical anyway, so requiring
         * a patch is not a big problem.
         */
-       pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0;
+       pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
        pv_cpu_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
index d62f14e2070808b5a8dfe2338c981060f02f89e5..119c88fa769d90bf605b7cbe6185ebe1755202de 100644 (file)
@@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .read_pmc = native_read_pmc,
 
        .iret = xen_iret,
-       .irq_enable_syscall_ret = xen_sysexit,
+       .irq_enable_sysexit = xen_sysexit,
 
        .load_tr_desc = paravirt_nop,
        .set_ldt = xen_set_ldt,
index c242527f970e8e7f8ae33e6dc080216560fbaa96..99ee5256a7e3b4d54c2bcf47f19f6f800ec9215c 100644 (file)
@@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #ifdef CONFIG_X86_64
 #define INTERRUPT_RETURN       iretq
-#define ENABLE_INTERRUPTS_SYSCALL_RET                  \
+#define USERSP_SYSRET                                  \
                        movq    %gs:pda_oldrsp, %rsp;   \
                        swapgs;                         \
                        sysretq;
 #else
 #define INTERRUPT_RETURN               iret
-#define ENABLE_INTERRUPTS_SYSCALL_RET  sti; sysexit
+#define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
 #define GET_CR0_INTO_EAX               movl %cr0, %eax
 #endif
 
index 82cdcde4b222f5f41d802f75629ee8b300f9328b..2668903b70f5e24d601b070d76fcf3905ab7a3ce 100644 (file)
@@ -141,8 +141,9 @@ struct pv_cpu_ops {
        u64 (*read_pmc)(int counter);
        unsigned long long (*read_tscp)(unsigned int *aux);
 
-       /* These two are jmp to, not actually called. */
-       void (*irq_enable_syscall_ret)(void);
+       /* These three are jmp to, not actually called. */
+       void (*irq_enable_sysexit)(void);
+       void (*usersp_sysret)(void);
        void (*iret)(void);
 
        void (*swapgs)(void);
@@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void)
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
                  PV_RESTORE_REGS;)
 
-#define ENABLE_INTERRUPTS_SYSCALL_RET                                  \
-       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\
+#define ENABLE_INTERRUPTS_SYSEXIT                                      \
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
                  CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret))
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 
 
 #ifdef CONFIG_X86_32
@@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
        movq %rax, %rcx;                                \
        xorq %rax, %rax;
 
+#define USERSP_SYSRET                                                  \
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret),         \
+                 CLBR_NONE,                                            \
+                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
 #endif
 
 #endif /* __ASSEMBLY__ */