[PATCH] x86: make IOPL explicit
authorZachary Amsden <zach@vmware.com>
Sat, 3 Sep 2005 22:56:44 +0000 (15:56 -0700)
committerLinus Torvalds <torvalds@evo.osdl.org>
Mon, 5 Sep 2005 07:06:12 +0000 (00:06 -0700)
The pushf/popf in switch_to are ONLY used to switch IOPL.  Making this
explicit in C code is more clear.  This pushf/popf pair was added as a
bugfix for leaking IOPL to unprivileged processes when using
sysenter/sysexit based system calls (sysexit does not restore flags).

When requesting an IOPL change in sys_iopl(), it is just as easy to change
the current flags and the flags in the stack image (in case an IRET is
required), but there is no reason to force an IRET if we came in from the
SYSENTER path.

This change is the minimal solution for supporting a paravirtualized Linux
kernel that allows user processes to run with I/O privilege.  Other
solutions require radical rewrites of part of the low level fault / system
call handling code, or do not fully support sysenter based system calls.

Unfortunately, this added one field to the thread_struct.  But as a bonus,
on P4, the fastest time measured for switch_to() went from 312 to 260
cycles, a win of about 17% in the fast case through this performance
critical path.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
arch/i386/kernel/ioport.c
arch/i386/kernel/process.c
include/asm-i386/processor.h
include/asm-i386/system.h

index 8b25160393c1f557d99f60f6ccab11d487b25a4b..f2b37654777f522af8231e14a6520a572dcb9cfb 100644 (file)
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
        volatile struct pt_regs * regs = (struct pt_regs *) &unused;
        unsigned int level = regs->ebx;
        unsigned int old = (regs->eflags >> 12) & 3;
+       struct thread_struct *t = &current->thread;
 
        if (level > 3)
                return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
                if (!capable(CAP_SYS_RAWIO))
                        return -EPERM;
        }
-       regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
-       /* Make sure we return the long way (not sysenter) */
-       set_thread_flag(TIF_IRET);
+       t->iopl = level << 12;
+       regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+       set_iopl_mask(t->iopl);
        return 0;
 }
index 66099780039388c80367c99f01431c1c7957cd72..b45cbf93d4391b49411cfdaecf7c1e084e184c33 100644 (file)
@@ -711,6 +711,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
        if (prev->gs | next->gs)
                loadsegment(gs, next->gs);
 
+       /*
+        * Restore IOPL if needed.
+        */
+       if (unlikely(prev->iopl != next->iopl))
+               set_iopl_mask(next->iopl);
+
        /*
         * Now maybe reload the debug registers
         */
index 992224bff54953852fe36ff605f1da4f0bfa335f..37bef8ed7bed12bd33bf3670ccf7991516aa7484 100644 (file)
@@ -455,6 +455,7 @@ struct thread_struct {
        unsigned int            saved_fs, saved_gs;
 /* IO permissions */
        unsigned long   *io_bitmap_ptr;
+       unsigned long   iopl;
 /* max allowed port in the bitmap, in bytes: */
        unsigned long   io_bitmap_max;
 };
@@ -511,6 +512,21 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa
                        : /* no output */                       \
                        :"r" (value))
 
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static inline void set_iopl_mask(unsigned mask)
+{
+       unsigned int reg;
+       __asm__ __volatile__ ("pushfl;"
+                             "popl %0;"
+                             "andl %1, %0;"
+                             "orl %2, %0;"
+                             "pushl %0;"
+                             "popfl"
+                               : "=&r" (reg)
+                               : "i" (~X86_EFLAGS_IOPL), "r" (mask));
+}
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
index 37fd2f8c71963dc78153292459d62a0d6a9860d0..acd5c26b69ba68f9a2964b59cdf01b86daab9fda 100644 (file)
@@ -14,8 +14,7 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
 
 #define switch_to(prev,next,last) do {                                 \
        unsigned long esi,edi;                                          \
-       asm volatile("pushfl\n\t"                                       \
-                    "pushl %%ebp\n\t"                                  \
+       asm volatile("pushl %%ebp\n\t"                                  \
                     "movl %%esp,%0\n\t"        /* save ESP */          \
                     "movl %5,%%esp\n\t"        /* restore ESP */       \
                     "movl $1f,%1\n\t"          /* save EIP */          \
@@ -23,7 +22,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
                     "jmp __switch_to\n"                                \
                     "1:\t"                                             \
                     "popl %%ebp\n\t"                                   \
-                    "popfl"                                            \
                     :"=m" (prev->thread.esp),"=m" (prev->thread.eip),  \
                      "=a" (last),"=S" (esi),"=D" (edi)                 \
                     :"m" (next->thread.esp),"m" (next->thread.eip),    \