s390: restore address space when returning to user space
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Fri, 17 Feb 2017 07:13:28 +0000 (08:13 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 23 Feb 2017 09:06:38 +0000 (10:06 +0100)
Unbalanced set_fs usages (e.g. early exit from a function and a
forgotten set_fs(USER_DS) call) may lead to a situation where the
secondary asce is the kernel space asce when returning to user
space. This would allow user space to modify kernel space at will.

This would only be possible with the above mentioned kernel bug,
however we can detect this and fix the secondary asce before returning
to user space.

Therefore a new TIF_ASCE_SECONDARY which is used within set_fs. When
returning to user space check if TIF_ASCE_SECONDARY is set, which
would indicate a bug. If it is set print a message to the console,
fixup the secondary asce, and then return to user space.

This is similar to what is being discussed for x86 and arm:
"[RFC] syscalls: Restore address limit after a syscall".

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/processor.h
arch/s390/include/asm/uaccess.h
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/process.c

index 3c8fae02ac2de3fdd7ad84b9b1818426b0e8a016..c57c5c2f2484ab821a4f8aec2d4cf05af3209f70 100644 (file)
 
 #define CIF_MCCK_PENDING       0       /* machine check handling is pending */
 #define CIF_ASCE_PRIMARY       1       /* primary asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY         2       /* delay HZ disable for a tick */
-#define CIF_FPU                        3       /* restore FPU registers */
-#define CIF_IGNORE_IRQ         4       /* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT       5       /* in enabled wait state */
+#define CIF_ASCE_SECONDARY     2       /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY         3       /* delay HZ disable for a tick */
+#define CIF_FPU                        4       /* restore FPU registers */
+#define CIF_IGNORE_IRQ         5       /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT       6       /* in enabled wait state */
 
 #define _CIF_MCCK_PENDING      _BITUL(CIF_MCCK_PENDING)
 #define _CIF_ASCE_PRIMARY      _BITUL(CIF_ASCE_PRIMARY)
+#define _CIF_ASCE_SECONDARY    _BITUL(CIF_ASCE_SECONDARY)
 #define _CIF_NOHZ_DELAY                _BITUL(CIF_NOHZ_DELAY)
 #define _CIF_FPU               _BITUL(CIF_FPU)
 #define _CIF_IGNORE_IRQ                _BITUL(CIF_IGNORE_IRQ)
@@ -200,10 +202,12 @@ struct stack_frame {
 struct task_struct;
 struct mm_struct;
 struct seq_file;
+struct pt_regs;
 
 typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
 void dump_trace(dump_trace_func_t func, void *data,
                struct task_struct *task, unsigned long sp);
+void show_registers(struct pt_regs *regs);
 
 void show_cacheinfo(struct seq_file *m);
 
index b2988fc60f65e53db21815e5e3536ee178db7d06..136932ff42502027820a94702a924d65b3049622 100644 (file)
@@ -14,6 +14,7 @@
  */
 #include <linux/sched.h>
 #include <linux/errno.h>
+#include <asm/processor.h>
 #include <asm/ctl_reg.h>
 
 #define VERIFY_READ     0
 
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
-
-#define set_fs(x)                                                      \
-do {                                                                   \
-       unsigned long __pto;                                            \
-       current->thread.mm_segment = (x);                               \
-       __pto = current->thread.mm_segment.ar4 ?                        \
-               S390_lowcore.user_asce : S390_lowcore.kernel_asce;      \
-       __ctl_load(__pto, 7, 7);                                        \
-} while (0)
-
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
+static inline void set_fs(mm_segment_t fs)
+{
+       current->thread.mm_segment = fs;
+       if (segment_eq(fs, KERNEL_DS)) {
+               set_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+       } else {
+               clear_cpu_flag(CIF_ASCE_SECONDARY);
+               __ctl_load(S390_lowcore.user_asce, 7, 7);
+       }
+}
+
 static inline int __range_ok(unsigned long addr, unsigned long size)
 {
        return 1;
index ae7d1a230abf3dee033b3341b2e7c596933eda37..dff2152350a7ebaaf3df6c8b000eb36b03afd19e 100644 (file)
@@ -50,7 +50,8 @@ _TIF_WORK     = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
                   _TIF_UPROBE)
 _TIF_TRACE     = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
                   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | _CIF_FPU)
+_CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
+                  _CIF_ASCE_SECONDARY | _CIF_FPU)
 _PIF_WORK      = (_PIF_PER_TRAP)
 
 #define BASED(name) name-cleanup_critical(%r13)
@@ -339,8 +340,8 @@ ENTRY(system_call)
        jo      .Lsysc_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lsysc_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-       jo      .Lsysc_asce_primary
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lsysc_asce
        j       .Lsysc_return           # beware of critical section cleanup
 
 #
@@ -358,12 +359,15 @@ ENTRY(system_call)
        jg      s390_handle_mcck        # TIF bit will be cleared by handler
 
 #
-# _CIF_ASCE_PRIMARY is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lsysc_asce_primary:
+.Lsysc_asce:
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lsysc_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lsysc_return
+       larl    %r14,.Lsysc_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
@@ -661,8 +665,8 @@ ENTRY(io_int_handler)
        jo      .Lio_notify_resume
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lio_vxrs
-       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-       jo      .Lio_asce_primary
+       TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+       jnz     .Lio_asce
        j       .Lio_return             # beware of critical section cleanup
 
 #
@@ -675,12 +679,15 @@ ENTRY(io_int_handler)
        j       .Lio_return
 
 #
-# _CIF_ASCE_PRIMARY is set, load user space asce
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
 #
-.Lio_asce_primary:
+.Lio_asce:
        ni      __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
-       j       .Lio_return
+       TSTMSK  __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY
+       jz      .Lio_return
+       larl    %r14,.Lio_return
+       jg      set_fs_fixup
 
 #
 # CIF_FPU is set, restore floating-point controls and floating-point registers.
index e79f030dd276381bfeea25643077c607be951f1c..33f9018653261c33e819ecdfc7ad8645b965cbde 100644 (file)
@@ -80,5 +80,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
 void verify_facilities(void);
+void set_fs_fixup(void);
 
 #endif /* _ENTRY_H */
index c5b86b4a1a8b613716dbc7a8947ec31dbd8c147c..a49dc2bdeb17db67d597742d9a6bd4fc976e4dbf 100644 (file)
@@ -234,3 +234,16 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
        ret = PAGE_ALIGN(mm->brk + brk_rnd());
        return (ret > mm->brk) ? ret : mm->brk;
 }
+
+void set_fs_fixup(void)
+{
+       struct pt_regs *regs = current_pt_regs();
+       static bool warned;
+
+       set_fs(USER_DS);
+       if (warned)
+               return;
+       WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
+       show_registers(regs);
+       warned = true;
+}