s390: add a system call for guarded storage
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 26 Jan 2016 13:10:34 +0000 (14:10 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 22 Mar 2017 07:14:25 +0000 (08:14 +0100)
This adds a new system call to enable the use of guarded storage for
user space processes. The system call takes two arguments, a command
and pointer to a guarded storage control block:

    s390_guarded_storage(int command, struct gs_cb *gs_cb);

The second argument is relevant only for the GS_SET_BC_CB command.

The commands in detail:

0 - GS_ENABLE
    Enable the guarded storage facility for the current task. The
    initial content of the guarded storage control block will be
    all zeros. After the enablement the user space code can use
    load-guarded-storage-controls instruction (LGSC) to load an
    arbitrary control block. While a task is enabled the kernel
    will save and restore the current content of the guarded
    storage registers on context switch.
1 - GS_DISABLE
    Disables the use of the guarded storage facility for the current
    task. The kernel will cease to save and restore the content of
    the guarded storage registers, the task specific content of
    these registers is lost.
2 - GS_SET_BC_CB
    Set a broadcast guarded storage control block. This is called
    per thread and stores a specific guarded storage control block
    in the task struct of the current task. This control block will
    be used for the broadcast event GS_BROADCAST.
3 - GS_CLEAR_BC_CB
    Clears the broadcast guarded storage control block. The guarded-
    storage control block is removed from the task struct that was
    established by GS_SET_BC_CB.
4 - GS_BROADCAST
    Sends a broadcast to all thread siblings of the current task.
    Every sibling that has established a broadcast guarded storage
    control block will load this control block and will be enabled
    for guarded storage. The broadcast guarded storage control block
    is used up, a second broadcast without a refresh of the stored
    control block with GS_SET_BC_CB will not have any effect.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
27 files changed:
arch/s390/include/asm/elf.h
arch/s390/include/asm/lowcore.h
arch/s390/include/asm/nmi.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/setup.h
arch/s390/include/asm/switch_to.h
arch/s390/include/asm/thread_info.h
arch/s390/include/uapi/asm/Kbuild
arch/s390/include/uapi/asm/guarded_storage.h [new file with mode: 0644]
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/Makefile
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/early.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry.h
arch/s390/kernel/guarded_storage.c [new file with mode: 0644]
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/nmi.c
arch/s390/kernel/process.c
arch/s390/kernel/processor.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/kernel/syscalls.S
arch/s390/kvm/interrupt.c
include/uapi/linux/elf.h

index 1d48880b3cc14292bcf4b846fe2917b09bea388c..e8f62304176957ac217a5524c49e5b779c352b45 100644 (file)
 #define HWCAP_S390_VXRS                2048
 #define HWCAP_S390_VXRS_BCD    4096
 #define HWCAP_S390_VXRS_EXT    8192
+#define HWCAP_S390_GS          16384
 
 /* Internal bits, not exposed via elf */
 #define HWCAP_INT_SIE          1UL
index 61261e0e95c069abb7c8cb2a310f608af9ee045e..8a5b082797f89ad0e456211de630a06ab98140f8 100644 (file)
@@ -157,8 +157,8 @@ struct lowcore {
        __u64   stfle_fac_list[32];             /* 0x0f00 */
        __u8    pad_0x1000[0x11b0-0x1000];      /* 0x1000 */
 
-       /* Pointer to vector register save area */
-       __u64   vector_save_area_addr;          /* 0x11b0 */
+       /* Pointer to the machine check extended save area */
+       __u64   mcesad;                         /* 0x11b0 */
 
        /* 64 bit extparam used for pfault/diag 250: defined by architecture */
        __u64   ext_params2;                    /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
 
        /* Transaction abort diagnostic block */
        __u8    pgm_tdb[256];                   /* 0x1800 */
-       __u8    pad_0x1900[0x1c00-0x1900];      /* 0x1900 */
-
-       /* Software defined save area for vector registers */
-       __u8    vector_save_area[1024];         /* 0x1c00 */
+       __u8    pad_0x1900[0x2000-0x1900];      /* 0x1900 */
 } __packed;
 
 #define S390_lowcore (*((struct lowcore *) 0))
index b75fd910386ab81858c71c89f3abb5c40b70b394..e3e8895f5d3ea36860c183304d412172d15ea46a 100644 (file)
@@ -58,7 +58,9 @@ union mci {
                u64 ie :  1; /* 32 indirect storage error */
                u64 ar :  1; /* 33 access register validity */
                u64 da :  1; /* 34 delayed access exception */
-               u64    :  7; /* 35-41 */
+               u64    :  1; /* 35 */
+               u64 gs :  1; /* 36 guarded storage registers */
+               u64    :  5; /* 37-41 */
                u64 pr :  1; /* 42 tod programmable register validity */
                u64 fc :  1; /* 43 fp control register validity */
                u64 ap :  1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
        };
 };
 
+#define MCESA_ORIGIN_MASK      (~0x3ffUL)
+#define MCESA_LC_MASK          (0xfUL)
+
+struct mcesa {
+       u8 vector_save_area[1024];
+       u8 guarded_storage_save_area[32];
+};
+
 struct pt_regs;
 
 extern void s390_handle_mcck(void);
index e4988710aa86219a2e4e7e0d9f2338584ae0077a..cc101f9371cbc20dfb8dd3bfdec838e3d8cd76eb 100644 (file)
@@ -135,6 +135,8 @@ struct thread_struct {
        struct list_head list;
        /* cpu runtime instrumentation */
        struct runtime_instr_cb *ri_cb;
+       struct gs_cb *gs_cb;            /* Current guarded storage cb */
+       struct gs_cb *gs_bc_cb;         /* Broadcast guarded storage cb */
        unsigned char trap_tdb[256];    /* Transaction abort diagnose block */
        /*
         * Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m);
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
+/* Free guarded storage control block for current */
+void exit_thread_gs(void);
+
 /*
  * Return saved PC of a blocked thread.
  */
index 30bdb5a027f3106b93638db082571b119a85ae6f..383bd8358a8c78d2c01095245071963d0133388f 100644 (file)
@@ -31,6 +31,7 @@
 #define MACHINE_FLAG_VX                _BITUL(13)
 #define MACHINE_FLAG_CAD       _BITUL(14)
 #define MACHINE_FLAG_NX                _BITUL(15)
+#define MACHINE_FLAG_GS                _BITUL(16)
 
 #define LPP_MAGIC              _BITUL(31)
 #define LPP_PFAULT_PID_MASK    _AC(0xffffffff, UL)
@@ -70,6 +71,7 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_VX         (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
 #define MACHINE_HAS_CAD                (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
 #define MACHINE_HAS_NX         (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS         (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
 
 /*
  * Console mode. Override with conmode=
index 12d45f0cfdd931e3d8049c4333b1f359e3539443..f6c2b5814ab0d14cf0f70977d8d29607a6ca7c00 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/thread_info.h>
 #include <asm/fpu/api.h>
 #include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
                save_fpu_regs();                                        \
                save_access_regs(&prev->thread.acrs[0]);                \
                save_ri_cb(prev->thread.ri_cb);                         \
+               save_gs_cb(prev->thread.gs_cb);                         \
        }                                                               \
        if (next->mm) {                                                 \
                update_cr_regs(next);                                   \
                set_cpu_flag(CIF_FPU);                                  \
                restore_access_regs(&next->thread.acrs[0]);             \
                restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);  \
+               restore_gs_cb(next->thread.gs_cb);                      \
        }                                                               \
        prev = __switch_to(prev,next);                                  \
 } while (0)
index a5b54a445eb804f55ba57301a0b4ac8280ee96b9..f36e6e2b73f053c33da4d910fe0e89e3cfdfd746 100644 (file)
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define TIF_NOTIFY_RESUME      0       /* callback before returning to user */
 #define TIF_SIGPENDING         1       /* signal pending */
 #define TIF_NEED_RESCHED       2       /* rescheduling necessary */
-#define TIF_SYSCALL_TRACE      3       /* syscall trace active */
-#define TIF_SYSCALL_AUDIT      4       /* syscall auditing active */
-#define TIF_SECCOMP            5       /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 6       /* syscall tracepoint instrumentation */
-#define TIF_UPROBE             7       /* breakpointed or single-stepping */
+#define TIF_UPROBE             3       /* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE    4       /* load guarded storage control block */
+#define TIF_SYSCALL_TRACE      8       /* syscall trace active */
+#define TIF_SYSCALL_AUDIT      9       /* syscall auditing active */
+#define TIF_SECCOMP            10      /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 11      /* syscall tracepoint instrumentation */
 #define TIF_31BIT              16      /* 32bit process */
 #define TIF_MEMDIE             17      /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK    18      /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define _TIF_UPROBE            _BITUL(TIF_UPROBE)
 #define _TIF_31BIT             _BITUL(TIF_31BIT)
 #define _TIF_SINGLE_STEP       _BITUL(TIF_SINGLE_STEP)
+#define _TIF_GUARDED_STORAGE   _BITUL(TIF_GUARDED_STORAGE)
 
 #endif /* _ASM_THREAD_INFO_H */
index 6848ba5c1454f347c8f4053c276a41bd919506ea..86b761e583e397d1d2b3fc5aac3ef2e22b70b48a 100644 (file)
@@ -12,6 +12,7 @@ header-y += dasd.h
 header-y += debug.h
 header-y += errno.h
 header-y += fcntl.h
+header-y += guarded_storage.h
 header-y += hypfs.h
 header-y += ioctl.h
 header-y += ioctls.h
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644 (file)
index 0000000..852850e
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+       __u64 reserved;
+       __u64 gsd;
+       __u64 gssm;
+       __u64 gs_epl_a;
+};
+
+struct gs_epl {
+       __u8 pad1;
+       union {
+               __u8 gs_eam;
+               struct {
+                       __u8    : 6;
+                       __u8 e  : 1;
+                       __u8 b  : 1;
+               };
+       };
+       union {
+               __u8 gs_eci;
+               struct {
+                       __u8 tx : 1;
+                       __u8 cx : 1;
+                       __u8    : 5;
+                       __u8 in : 1;
+               };
+       };
+       union {
+               __u8 gs_eai;
+               struct {
+                       __u8    : 1;
+                       __u8 t  : 1;
+                       __u8 as : 2;
+                       __u8 ar : 4;
+               };
+       };
+       __u32 pad2;
+       __u64 gs_eha;
+       __u64 gs_eia;
+       __u64 gs_eoa;
+       __u64 gs_eir;
+       __u64 gs_era;
+};
+
+#define GS_ENABLE      0
+#define        GS_DISABLE      1
+#define GS_SET_BC_CB   2
+#define GS_CLEAR_BC_CB 3
+#define GS_BROADCAST   4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+       asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+       asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+       if (gs_cb)
+               store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+       if (gs_cb)
+               load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
index 152de9b796e149ed3745f41351a5cc5e637bb55e..ea42290e7d51f7673b194c32ebda31c2158b21e2 100644 (file)
 #define __NR_copy_file_range   375
 #define __NR_preadv2           376
 #define __NR_pwritev2          377
-/* Number 378 is reserved for guarded storage */
+#define __NR_s390_guarded_storage      378
 #define __NR_statx             379
 #define NR_syscalls 380
 
index 060ce548fe8b7d2ece352097130db635b7680c32..aa5adbdaf200773537706ca32472f485a054cdc6 100644 (file)
@@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y  += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y  += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
 obj-y  += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y  += runtime_instr.o cache.o fpu.o dumpstack.o
+obj-y  += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
 obj-y  += entry.o reipl.o relocate_kernel.o
 
 extra-y                                += head.o head64.o vmlinux.lds
index c4b3570ded5b358547527843cbd7ab25a2328a6c..6bb29633e1f1b701aa1b8737339c350f45e65464 100644 (file)
@@ -175,7 +175,7 @@ int main(void)
        /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
        OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
        /* hardware defined lowcore locations 0x1000 - 0x18ff */
-       OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+       OFFSET(__LC_MCESAD, lowcore, mcesad);
        OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
        OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
        OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
index e89cc2e71db1693c4c03f6e6ccc37ba9297b4012..986642a3543bb0a1e670613ccc50a3804d452480 100644 (file)
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
 COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
index 4e65c79cc5f2111cf0428ab2a876c9afe963d1d8..95298a41076f364a33b93dfd7b670629e45f5192 100644 (file)
@@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
                __ctl_set_bit(0, 20);
        }
+       if (test_facility(133))
+               S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
 }
 
 static inline void save_vector_registers(void)
index 6a7d737d514c4c0064ddd8ef1ca80b824ae60c0c..fa8b8f28e08b514369bde5bf3396b213bb62efd8 100644 (file)
@@ -47,7 +47,7 @@ STACK_SIZE  = 1 << STACK_SHIFT
 STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
 _TIF_WORK      = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-                  _TIF_UPROBE)
+                  _TIF_UPROBE | _TIF_GUARDED_STORAGE)
 _TIF_TRACE     = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
                   _TIF_SYSCALL_TRACEPOINT)
 _CIF_WORK      = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -332,6 +332,8 @@ ENTRY(system_call)
        TSTMSK  __TI_flags(%r12),_TIF_UPROBE
        jo      .Lsysc_uprobe_notify
 #endif
+       TSTMSK  __TI_flags(%r12),_TIF_GUARDED_STORAGE
+       jo      .Lsysc_guarded_storage
        TSTMSK  __PT_FLAGS(%r11),_PIF_PER_TRAP
        jo      .Lsysc_singlestep
        TSTMSK  __TI_flags(%r12),_TIF_SIGPENDING
@@ -408,6 +410,14 @@ ENTRY(system_call)
        jg      uprobe_notify_resume
 #endif
 
+#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+       lgr     %r2,%r11                # pass pointer to pt_regs
+       larl    %r14,.Lsysc_return
+       jg      gs_load_bc_cb
+
 #
 # _PIF_PER_TRAP is set, call do_per_trap
 #
@@ -663,6 +673,8 @@ ENTRY(io_int_handler)
        jo      .Lio_sigpending
        TSTMSK  __TI_flags(%r12),_TIF_NOTIFY_RESUME
        jo      .Lio_notify_resume
+       TSTMSK  __TI_flags(%r12),_TIF_GUARDED_STORAGE
+       jo      .Lio_guarded_storage
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
        jo      .Lio_vxrs
        TSTMSK  __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -696,6 +708,18 @@ ENTRY(io_int_handler)
        larl    %r14,.Lio_return
        jg      load_fpu_regs
 
+#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+       # TRACE_IRQS_ON already done at .Lio_return
+       ssm     __LC_SVC_NEW_PSW        # reenable interrupts
+       lgr     %r2,%r11                # pass pointer to pt_regs
+       brasl   %r14,gs_load_bc_cb
+       ssm     __LC_PGM_NEW_PSW        # disable I/O and ext. interrupts
+       TRACE_IRQS_OFF
+       j       .Lio_return
+
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
index 33f9018653261c33e819ecdfc7ad8645b965cbde..dbf5f7e182469da8754b6c407396798938bd839c 100644 (file)
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
 
 long sys_s390_personality(unsigned int personality);
 long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
 long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
 long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
 void verify_facilities(void);
+void gs_load_bc_cb(struct pt_regs *regs);
 void set_fs_fixup(void);
 
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644 (file)
index 0000000..6f06474
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void exit_thread_gs(void)
+{
+       kfree(current->thread.gs_cb);
+       kfree(current->thread.gs_bc_cb);
+       current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
+}
+
+static int gs_enable(void)
+{
+       struct gs_cb *gs_cb;
+
+       if (!current->thread.gs_cb) {
+               gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+               if (!gs_cb)
+                       return -ENOMEM;
+               gs_cb->gsd = 25;
+               preempt_disable();
+               __ctl_set_bit(2, 4);
+               load_gs_cb(gs_cb);
+               current->thread.gs_cb = gs_cb;
+               preempt_enable();
+       }
+       return 0;
+}
+
+static int gs_disable(void)
+{
+       if (current->thread.gs_cb) {
+               preempt_disable();
+               kfree(current->thread.gs_cb);
+               current->thread.gs_cb = NULL;
+               __ctl_clear_bit(2, 4);
+               preempt_enable();
+       }
+       return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+       struct gs_cb *gs_cb;
+
+       gs_cb = current->thread.gs_bc_cb;
+       if (!gs_cb) {
+               gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+               if (!gs_cb)
+                       return -ENOMEM;
+               current->thread.gs_bc_cb = gs_cb;
+       }
+       if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+               return -EFAULT;
+       return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+       struct gs_cb *gs_cb;
+
+       gs_cb = current->thread.gs_bc_cb;
+       current->thread.gs_bc_cb = NULL;
+       kfree(gs_cb);
+       return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+       struct gs_cb *gs_cb;
+
+       preempt_disable();
+       clear_thread_flag(TIF_GUARDED_STORAGE);
+       gs_cb = current->thread.gs_bc_cb;
+       if (gs_cb) {
+               kfree(current->thread.gs_cb);
+               current->thread.gs_bc_cb = NULL;
+               __ctl_set_bit(2, 4);
+               load_gs_cb(gs_cb);
+               current->thread.gs_cb = gs_cb;
+       }
+       preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+       struct task_struct *sibling;
+
+       read_lock(&tasklist_lock);
+       for_each_thread(current, sibling) {
+               if (!sibling->thread.gs_bc_cb)
+                       continue;
+               if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+                       kick_process(sibling);
+       }
+       read_unlock(&tasklist_lock);
+       return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+               struct gs_cb __user *, gs_cb)
+{
+       if (!MACHINE_HAS_GS)
+               return -EOPNOTSUPP;
+       switch (command) {
+       case GS_ENABLE:
+               return gs_enable();
+       case GS_DISABLE:
+               return gs_disable();
+       case GS_SET_BC_CB:
+               return gs_set_bc_cb(gs_cb);
+       case GS_CLEAR_BC_CB:
+               return gs_clear_bc_cb();
+       case GS_BROADCAST:
+               return gs_broadcast();
+       default:
+               return -EINVAL;
+       }
+}
index 3074c1d83829de3e7bbb3a07f2fc72698c340284..db5658daf9945702f634c714177825acda8fa44a 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
+#include <asm/nmi.h>
 
 typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
 
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
  */
 static noinline void __machine_kdump(void *image)
 {
+       struct mcesa *mcesa;
+       unsigned long cr2_old, cr2_new;
        int this_cpu, cpu;
 
        lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
                        continue;
        }
        /* Store status of the boot CPU */
+       mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
        if (MACHINE_HAS_VX)
-               save_vx_regs((void *) &S390_lowcore.vector_save_area);
+               save_vx_regs((__vector128 *) mcesa->vector_save_area);
+       if (MACHINE_HAS_GS) {
+               __ctl_store(cr2_old, 2, 2);
+               cr2_new = cr2_old | (1UL << 4);
+               __ctl_load(cr2_new, 2, 2);
+               save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+               __ctl_load(cr2_old, 2, 2);
+       }
        /*
         * To create a good backchain for this CPU in the dump store_status
         * is passed the address of a function. The address is saved into
index 9bf8327154eeee8442eafdbf7b2352b4d900922e..9855895239704f10deedaeff259edc5279a01519 100644 (file)
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
        int kill_task;
        u64 zero;
        void *fpt_save_area;
+       struct mcesa *mcesa;
 
        kill_task = 0;
        zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
                             : : "Q" (S390_lowcore.fpt_creg_save_area));
        }
 
+       mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
        if (!MACHINE_HAS_VX) {
                /* Validate floating point registers */
                asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
                        "       la      1,%0\n"
                        "       .word   0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
                        "       .word   0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
-                       : : "Q" (*(struct vx_array *)
-                                &S390_lowcore.vector_save_area) : "1");
+                       : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
+                       : "1");
                __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
        }
        /* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
                 */
                kill_task = 1;
        }
+       /* Validate guarded storage registers */
+       if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
+               if (!mci.gs)
+                       /*
+                        * Guarded storage register can't be restored and
+                        * the current processes uses guarded storage.
+                        * It has to be terminated.
+                        */
+                       kill_task = 1;
+               else
+                       load_gs_cb((struct gs_cb *)
+                                  mcesa->guarded_storage_save_area);
+       }
        /*
         * We don't even try to validate the TOD register, since we simply
         * can't write something sensible into that register.
index f29e41c5e2ecf6d28018463cf89a2db677dffccc..999d7154bbdcd0891f6e2d5e6c55ea4ab62d0554 100644 (file)
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
  */
 void exit_thread(struct task_struct *tsk)
 {
-       if (tsk == current)
+       if (tsk == current) {
                exit_thread_runtime_instr();
+               exit_thread_gs();
+       }
 }
 
 void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
        /* Don't copy runtime instrumentation info */
        p->thread.ri_cb = NULL;
        frame->childregs.psw.mask &= ~PSW_MASK_RI;
+       /* Don't copy guarded storage control block */
+       p->thread.gs_cb = NULL;
+       p->thread.gs_bc_cb = NULL;
 
        /* Set a new TLS ?  */
        if (clone_flags & CLONE_SETTLS) {
index 928b929a62614a7bffa8797036d3c88e7425433d..c73709869447fd474461d5083e0e7988b5103d16 100644 (file)
@@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
 {
        static const char *hwcap_str[] = {
                "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
-               "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe"
+               "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
        };
        static const char * const int_hwcap_str[] = {
                "sie"
index c14df0a1ec3ca5f15ce34373c87438672611f622..c933e255b5d577009b8d2abcf753a400cbb727b2 100644 (file)
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
        struct pt_regs *regs = task_pt_regs(task);
        struct thread_struct *thread = &task->thread;
        struct per_regs old, new;
-
+       unsigned long cr0_old, cr0_new;
+       unsigned long cr2_old, cr2_new;
+       int cr0_changed, cr2_changed;
+
+       __ctl_store(cr0_old, 0, 0);
+       __ctl_store(cr2_old, 2, 2);
+       cr0_new = cr0_old;
+       cr2_new = cr2_old;
        /* Take care of the enable/disable of transactional execution. */
        if (MACHINE_HAS_TE) {
-               unsigned long cr, cr_new;
-
-               __ctl_store(cr, 0, 0);
                /* Set or clear transaction execution TXC bit 8. */
-               cr_new = cr | (1UL << 55);
+               cr0_new |= (1UL << 55);
                if (task->thread.per_flags & PER_FLAG_NO_TE)
-                       cr_new &= ~(1UL << 55);
-               if (cr_new != cr)
-                       __ctl_load(cr_new, 0, 0);
+                       cr0_new &= ~(1UL << 55);
                /* Set or clear transaction execution TDC bits 62 and 63. */
-               __ctl_store(cr, 2, 2);
-               cr_new = cr & ~3UL;
+               cr2_new &= ~3UL;
                if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
                        if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-                               cr_new |= 1UL;
+                               cr2_new |= 1UL;
                        else
-                               cr_new |= 2UL;
+                               cr2_new |= 2UL;
                }
-               if (cr_new != cr)
-                       __ctl_load(cr_new, 2, 2);
        }
+       /* Take care of enable/disable of guarded storage. */
+       if (MACHINE_HAS_GS) {
+               cr2_new &= ~(1UL << 4);
+               if (task->thread.gs_cb)
+                       cr2_new |= (1UL << 4);
+       }
+       /* Load control register 0/2 iff changed */
+       cr0_changed = cr0_new != cr0_old;
+       cr2_changed = cr2_new != cr2_old;
+       if (cr0_changed)
+               __ctl_load(cr0_new, 0, 0);
+       if (cr2_changed)
+               __ctl_load(cr2_new, 2, 2);
        /* Copy user specified PER registers */
        new.control = thread->per_user.control;
        new.start = thread->per_user.start;
@@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target,
                                  data, 0, sizeof(unsigned int));
 }
 
+static int s390_gs_cb_get(struct task_struct *target,
+                         const struct user_regset *regset,
+                         unsigned int pos, unsigned int count,
+                         void *kbuf, void __user *ubuf)
+{
+       struct gs_cb *data = target->thread.gs_cb;
+
+       if (!MACHINE_HAS_GS)
+               return -ENODEV;
+       if (!data)
+               return -ENODATA;
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                  data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+                         const struct user_regset *regset,
+                         unsigned int pos, unsigned int count,
+                         const void *kbuf, const void __user *ubuf)
+{
+       struct gs_cb *data = target->thread.gs_cb;
+
+       if (!MACHINE_HAS_GS)
+               return -ENODEV;
+       if (!data)
+               return -ENODATA;
+       return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 data, 0, sizeof(struct gs_cb));
+}
+
 static const struct user_regset s390_regsets[] = {
        {
                .core_note_type = NT_PRSTATUS,
@@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = {
                .get = s390_vxrs_high_get,
                .set = s390_vxrs_high_set,
        },
+       {
+               .core_note_type = NT_S390_GS_CB,
+               .n = sizeof(struct gs_cb) / sizeof(__u64),
+               .size = sizeof(__u64),
+               .align = sizeof(__u64),
+               .get = s390_gs_cb_get,
+               .set = s390_gs_cb_set,
+       },
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = {
                .get = s390_compat_regs_high_get,
                .set = s390_compat_regs_high_set,
        },
+       {
+               .core_note_type = NT_S390_GS_CB,
+               .n = sizeof(struct gs_cb) / sizeof(__u64),
+               .size = sizeof(__u64),
+               .align = sizeof(__u64),
+               .get = s390_gs_cb_get,
+               .set = s390_gs_cb_set,
+       },
 };
 
 static const struct user_regset_view user_s390_compat_view = {
index 911dc0b49be05bbf8c1a450fc721a938cb0e1f3c..3ae756c0db3de276936283a5c3577fa264ba2aa9 100644 (file)
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
        lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
        memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
               MAX_FACILITY_BIT/8);
-       if (MACHINE_HAS_VX)
-               lc->vector_save_area_addr =
-                       (unsigned long) &lc->vector_save_area;
+       if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+               unsigned long bits, size;
+
+               bits = MACHINE_HAS_GS ? 11 : 10;
+               size = 1UL << bits;
+               lc->mcesad = (__u64) memblock_virt_alloc(size, size);
+               if (MACHINE_HAS_GS)
+                       lc->mcesad |= bits;
+       }
        lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
        lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
        lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
                        elf_hwcap |= HWCAP_S390_VXRS_BCD;
        }
 
+       /*
+        * Guarded storage support HWCAP_S390_GS is bit 12.
+        */
+       if (MACHINE_HAS_GS)
+               elf_hwcap |= HWCAP_S390_GS;
+
        get_cpu_id(&cpu_id);
        add_device_randomness(&cpu_id, sizeof(cpu_id));
        switch (cpu_id.machine) {
index 47a973b5b4f184adfa3855828d042bd73d33e61c..286bcee800f48a24b96e9512dd94cbc625cc9d72 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/os_info.h>
 #include <asm/sigp.h>
 #include <asm/idle.h>
+#include <asm/nmi.h>
 #include "entry.h"
 
 enum {
@@ -78,6 +79,8 @@ struct pcpu {
 static u8 boot_core_type;
 static struct pcpu pcpu_devices[NR_CPUS];
 
+static struct kmem_cache *pcpu_mcesa_cache;
+
 unsigned int smp_cpu_mt_shift;
 EXPORT_SYMBOL(smp_cpu_mt_shift);
 
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 {
        unsigned long async_stack, panic_stack;
+       unsigned long mcesa_origin, mcesa_bits;
        struct lowcore *lc;
 
+       mcesa_origin = mcesa_bits = 0;
        if (pcpu != &pcpu_devices[0]) {
                pcpu->lowcore = (struct lowcore *)
                        __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
                panic_stack = __get_free_page(GFP_KERNEL);
                if (!pcpu->lowcore || !panic_stack || !async_stack)
                        goto out;
+               if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+                       mcesa_origin = (unsigned long)
+                               kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
+                       if (!mcesa_origin)
+                               goto out;
+                       mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
+               }
        } else {
                async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
                panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+               mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+               mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
        }
        lc = pcpu->lowcore;
        memcpy(lc, &S390_lowcore, 512);
        memset((char *) lc + 512, 0, sizeof(*lc) - 512);
        lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
        lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+       lc->mcesad = mcesa_origin | mcesa_bits;
        lc->cpu_nr = cpu;
        lc->spinlock_lockval = arch_spin_lockval(cpu);
-       if (MACHINE_HAS_VX)
-               lc->vector_save_area_addr =
-                       (unsigned long) &lc->vector_save_area;
        if (vdso_alloc_per_cpu(lc))
                goto out;
        lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
        return 0;
 out:
        if (pcpu != &pcpu_devices[0]) {
+               if (mcesa_origin)
+                       kmem_cache_free(pcpu_mcesa_cache,
+                                       (void *) mcesa_origin);
                free_page(panic_stack);
                free_pages(async_stack, ASYNC_ORDER);
                free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
 
 static void pcpu_free_lowcore(struct pcpu *pcpu)
 {
+       unsigned long mcesa_origin;
+
        pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
        lowcore_ptr[pcpu - pcpu_devices] = NULL;
        vdso_free_per_cpu(pcpu->lowcore);
        if (pcpu == &pcpu_devices[0])
                return;
+       if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+               mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+               kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
+       }
        free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
        free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
        free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
        if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
                              pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
                return -EIO;
-       if (!MACHINE_HAS_VX)
+       if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
                return 0;
-       pa = __pa(pcpu->lowcore->vector_save_area_addr);
+       pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+       if (MACHINE_HAS_GS)
+               pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
        if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
                              pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
                return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+       unsigned long size;
+
        /* request the 0x1201 emergency signal external interrupt */
        if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
                panic("Couldn't request external interrupt 0x1201");
        /* request the 0x1202 external call external interrupt */
        if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
                panic("Couldn't request external interrupt 0x1202");
+       /* create slab cache for the machine-check-extended-save-areas */
+       if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+               size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
+               pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
+                                                    size, size, 0, NULL);
+               if (!pcpu_mcesa_cache)
+                       panic("Couldn't create nmi save area cache");
+       }
 }
 
 void __init smp_prepare_boot_cpu(void)
index 2659b5cfeddba4cd294e71e356d1149cca68314f..54fce7b065de2610c762763764cf1b97f7805b02 100644 (file)
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
-NI_SYSCALL
+SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
 SYSCALL(sys_statx,compat_sys_statx)
index 0f8f14199734e94bd96ae0158be5c4a9a3c0333d..169558dc7daf0d353857c544521ce26eddea62c8 100644 (file)
@@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
        save_access_regs(vcpu->run->s.regs.acrs);
 
        /* Extended save area */
-       rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
-                           sizeof(unsigned long));
+       rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+                          sizeof(unsigned long));
        /* Only bits 0-53 are used for address formation */
        ext_sa_addr &= ~0x3ffUL;
        if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
index b59ee077a5964a888be764fdb5b817e3e377bcc0..8c6d3bdb9a00c5f7c7c4404e79ca35c8240e6e71 100644 (file)
@@ -409,6 +409,7 @@ typedef struct elf64_shdr {
 #define NT_S390_TDB    0x308           /* s390 transaction diagnostic block */
 #define NT_S390_VXRS_LOW       0x309   /* s390 vector registers 0-15 upper half */
 #define NT_S390_VXRS_HIGH      0x30a   /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB  0x30b           /* s390 guarded storage registers */
 #define NT_ARM_VFP     0x400           /* ARM VFP/NEON registers */
 #define NT_ARM_TLS     0x401           /* ARM TLS register */
 #define NT_ARM_HW_BREAK        0x402           /* ARM hardware breakpoint registers */