s390: add no-execute support
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 22 Mar 2016 09:54:24 +0000 (10:54 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Wed, 8 Feb 2017 13:13:25 +0000 (14:13 +0100)
Bit 0x100 of a page table, segment table of region table entry
can be used to disallow code execution for the virtual addresses
associated with the entry.

There is one tricky bit, the system call to return from a signal
is part of the signal frame written to the user stack. With a
non-executable stack this would stop working. To avoid breaking
things the protection fault handler checks the opcode that caused
the fault for 0x0a77 (sys_sigreturn) and 0x0aad (sys_rt_sigreturn)
and injects a system call. This is preferable to the alternative
solution with a stub function in the vdso because it works for
vdso=off and statically linked binaries as well.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
15 files changed:
arch/s390/include/asm/cacheflush.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/setup.h
arch/s390/kernel/early.c
arch/s390/kernel/entry.S
arch/s390/kernel/kprobes.c
arch/s390/kernel/module.c
arch/s390/kernel/vmlinux.lds.S
arch/s390/mm/dump_pagetables.c
arch/s390/mm/fault.c
arch/s390/mm/hugetlbpage.c
arch/s390/mm/init.c
arch/s390/mm/pageattr.c
arch/s390/mm/pgtable.c
arch/s390/mm/vmem.c

index 58fae7d098cf0993f7484b3f2df7191fe1afe14d..0499334f9473a94e6a80e9144e9fd2946167f4d7 100644 (file)
@@ -4,9 +4,31 @@
 /* Caches aren't brain-dead on the s390. */
 #include <asm-generic/cacheflush.h>
 
-int set_memory_ro(unsigned long addr, int numpages);
-int set_memory_rw(unsigned long addr, int numpages);
-int set_memory_nx(unsigned long addr, int numpages);
-int set_memory_x(unsigned long addr, int numpages);
+#define SET_MEMORY_RO  1UL
+#define SET_MEMORY_RW  2UL
+#define SET_MEMORY_NX  4UL
+#define SET_MEMORY_X   8UL
+
+int __set_memory(unsigned long addr, int numpages, unsigned long flags);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+       return __set_memory(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+       return __set_memory(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+       return __set_memory(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+       return __set_memory(addr, numpages, SET_MEMORY_X);
+}
 
 #endif /* _S390_CACHEFLUSH_H */
index 0362cd5fa187c5e580fd1c3ab250546873eca7ba..d03b60d53f99e331021b7504761be673a5700970 100644 (file)
@@ -200,6 +200,7 @@ static inline int is_module_addr(void *addr)
  */
 
 /* Hardware bits in the page table entry */
+#define _PAGE_NOEXEC   0x100           /* HW no-execute bit  */
 #define _PAGE_PROTECT  0x200           /* HW read-only bit  */
 #define _PAGE_INVALID  0x400           /* HW invalid bit    */
 #define _PAGE_LARGE    0x800           /* Bit to mark a large pte */
@@ -277,6 +278,7 @@ static inline int is_module_addr(void *addr)
 /* Bits in the region table entry */
 #define _REGION_ENTRY_ORIGIN   ~0xfffUL/* region/segment table origin      */
 #define _REGION_ENTRY_PROTECT  0x200   /* region protection bit            */
+#define _REGION_ENTRY_NOEXEC   0x100   /* region no-execute bit            */
 #define _REGION_ENTRY_OFFSET   0xc0    /* region table offset              */
 #define _REGION_ENTRY_INVALID  0x20    /* invalid region table entry       */
 #define _REGION_ENTRY_TYPE_MASK        0x0c    /* region/segment table type mask   */
@@ -316,6 +318,7 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address       */
 #define _SEGMENT_ENTRY_ORIGIN  ~0x7ffUL/* segment table origin             */
 #define _SEGMENT_ENTRY_PROTECT 0x200   /* page protection bit              */
+#define _SEGMENT_ENTRY_NOEXEC  0x100   /* region no-execute bit            */
 #define _SEGMENT_ENTRY_INVALID 0x20    /* invalid segment table entry      */
 
 #define _SEGMENT_ENTRY         (0)
@@ -385,17 +388,23 @@ static inline int is_module_addr(void *addr)
  * Page protection definitions.
  */
 #define PAGE_NONE      __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_READ      __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define PAGE_RO                __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+                                _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RX                __pgprot(_PAGE_PRESENT | _PAGE_READ | \
                                 _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_WRITE     __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define PAGE_RW                __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+                                _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RWX       __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
                                 _PAGE_INVALID | _PAGE_PROTECT)
 
 #define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
-                                _PAGE_YOUNG | _PAGE_DIRTY)
+                                _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
 #define PAGE_KERNEL    __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
-                                _PAGE_YOUNG | _PAGE_DIRTY)
+                                _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
 #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
-                                _PAGE_PROTECT)
+                                _PAGE_PROTECT | _PAGE_NOEXEC)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+                                 _PAGE_YOUNG | _PAGE_DIRTY)
 
 /*
  * On s390 the page table entry has an invalid bit and a read-only bit.
@@ -404,43 +413,51 @@ static inline int is_module_addr(void *addr)
  */
          /*xwr*/
 #define __P000 PAGE_NONE
-#define __P001 PAGE_READ
-#define __P010 PAGE_READ
-#define __P011 PAGE_READ
-#define __P100 PAGE_READ
-#define __P101 PAGE_READ
-#define __P110 PAGE_READ
-#define __P111 PAGE_READ
+#define __P001 PAGE_RO
+#define __P010 PAGE_RO
+#define __P011 PAGE_RO
+#define __P100 PAGE_RX
+#define __P101 PAGE_RX
+#define __P110 PAGE_RX
+#define __P111 PAGE_RX
 
 #define __S000 PAGE_NONE
-#define __S001 PAGE_READ
-#define __S010 PAGE_WRITE
-#define __S011 PAGE_WRITE
-#define __S100 PAGE_READ
-#define __S101 PAGE_READ
-#define __S110 PAGE_WRITE
-#define __S111 PAGE_WRITE
+#define __S001 PAGE_RO
+#define __S010 PAGE_RW
+#define __S011 PAGE_RW
+#define __S100 PAGE_RX
+#define __S101 PAGE_RX
+#define __S110 PAGE_RWX
+#define __S111 PAGE_RWX
 
 /*
  * Segment entry (large page) protection definitions.
  */
 #define SEGMENT_NONE   __pgprot(_SEGMENT_ENTRY_INVALID | \
                                 _SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_READ   __pgprot(_SEGMENT_ENTRY_PROTECT | \
+#define SEGMENT_RO     __pgprot(_SEGMENT_ENTRY_PROTECT | \
+                                _SEGMENT_ENTRY_READ | \
+                                _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RX     __pgprot(_SEGMENT_ENTRY_PROTECT | \
                                 _SEGMENT_ENTRY_READ)
-#define SEGMENT_WRITE  __pgprot(_SEGMENT_ENTRY_READ | \
+#define SEGMENT_RW     __pgprot(_SEGMENT_ENTRY_READ | \
+                                _SEGMENT_ENTRY_WRITE | \
+                                _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RWX    __pgprot(_SEGMENT_ENTRY_READ | \
                                 _SEGMENT_ENTRY_WRITE)
 #define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY |       \
                                 _SEGMENT_ENTRY_LARGE | \
                                 _SEGMENT_ENTRY_READ |  \
                                 _SEGMENT_ENTRY_WRITE | \
                                 _SEGMENT_ENTRY_YOUNG | \
-                                _SEGMENT_ENTRY_DIRTY)
+                                _SEGMENT_ENTRY_DIRTY | \
+                                _SEGMENT_ENTRY_NOEXEC)
 #define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |    \
                                 _SEGMENT_ENTRY_LARGE | \
                                 _SEGMENT_ENTRY_READ |  \
                                 _SEGMENT_ENTRY_YOUNG | \
-                                _SEGMENT_ENTRY_PROTECT)
+                                _SEGMENT_ENTRY_PROTECT | \
+                                _SEGMENT_ENTRY_NOEXEC)
 
 /*
  * Region3 entry (large page) protection definitions.
@@ -451,12 +468,14 @@ static inline int is_module_addr(void *addr)
                                 _REGION3_ENTRY_READ |   \
                                 _REGION3_ENTRY_WRITE |  \
                                 _REGION3_ENTRY_YOUNG |  \
-                                _REGION3_ENTRY_DIRTY)
+                                _REGION3_ENTRY_DIRTY | \
+                                _REGION_ENTRY_NOEXEC)
 #define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
                                   _REGION3_ENTRY_LARGE |  \
                                   _REGION3_ENTRY_READ |   \
                                   _REGION3_ENTRY_YOUNG |  \
-                                  _REGION_ENTRY_PROTECT)
+                                  _REGION_ENTRY_PROTECT | \
+                                  _REGION_ENTRY_NOEXEC)
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
@@ -801,14 +820,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
        pte_val(pte) &= _PAGE_CHG_MASK;
        pte_val(pte) |= pgprot_val(newprot);
        /*
-        * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
-        * invalid bit set, clear it again for readable, young pages
+        * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX
+        * has the invalid bit set, clear it again for readable, young pages
         */
        if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
                pte_val(pte) &= ~_PAGE_INVALID;
        /*
-        * newprot for PAGE_READ and PAGE_WRITE has the page protection
-        * bit set, clear it again for writable, dirty pages
+        * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page
+        * protection bit set, clear it again for writable, dirty pages
         */
        if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
                pte_val(pte) &= ~_PAGE_PROTECT;
@@ -1029,6 +1048,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t entry)
 {
+       if (!MACHINE_HAS_NX)
+               pte_val(entry) &= ~_PAGE_NOEXEC;
        if (mm_has_pgste(mm))
                ptep_set_pte_at(mm, addr, ptep, entry);
        else
@@ -1173,14 +1194,18 @@ static inline pud_t pud_mkdirty(pud_t pud)
 static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
 {
        /*
-        * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
-        * Convert to segment table entry format.
+        * pgprot is PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW or PAGE_RWX
+        * (see __Pxxx / __Sxxx). Convert to segment table entry format.
         */
        if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
                return pgprot_val(SEGMENT_NONE);
-       if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
-               return pgprot_val(SEGMENT_READ);
-       return pgprot_val(SEGMENT_WRITE);
+       if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
+               return pgprot_val(SEGMENT_RO);
+       if (pgprot_val(pgprot) == pgprot_val(PAGE_RX))
+               return pgprot_val(SEGMENT_RX);
+       if (pgprot_val(pgprot) == pgprot_val(PAGE_RW))
+               return pgprot_val(SEGMENT_RW);
+       return pgprot_val(SEGMENT_RWX);
 }
 
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
@@ -1315,6 +1340,8 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                              pmd_t *pmdp, pmd_t entry)
 {
+       if (!MACHINE_HAS_NX)
+               pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC;
        *pmdp = entry;
 }
 
index 5e8d57e1cc5ef19b685126341e83ad5335192dc5..040a4b49ab42a397eb501262352197c9e4ea8464 100644 (file)
@@ -30,6 +30,7 @@
 #define MACHINE_FLAG_TLB_LC    _BITUL(12)
 #define MACHINE_FLAG_VX                _BITUL(13)
 #define MACHINE_FLAG_CAD       _BITUL(14)
+#define MACHINE_FLAG_NX                _BITUL(15)
 
 #define LPP_MAGIC              _BITUL(31)
 #define LPP_PFAULT_PID_MASK    _AC(0xffffffff, UL)
@@ -71,6 +72,7 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TLB_LC     (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
 #define MACHINE_HAS_VX         (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
 #define MACHINE_HAS_CAD                (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
+#define MACHINE_HAS_NX         (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
 
 /*
  * Console mode. Override with conmode=
index d756315b79851c55d7abef027ff99c278d925efb..4e65c79cc5f2111cf0428ab2a876c9afe963d1d8 100644 (file)
@@ -354,6 +354,10 @@ static __init void detect_machine_facilities(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
                __ctl_set_bit(0, 17);
        }
+       if (test_facility(130)) {
+               S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+               __ctl_set_bit(0, 20);
+       }
 }
 
 static inline void save_vector_registers(void)
@@ -384,6 +388,21 @@ static int __init disable_vector_extension(char *str)
 }
 early_param("novx", disable_vector_extension);
 
+static int __init noexec_setup(char *str)
+{
+       bool enabled;
+       int rc;
+
+       rc = kstrtobool(str, &enabled);
+       if (!rc && !enabled) {
+               /* Disable no-execute support */
+               S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
+               __ctl_clear_bit(0, 20);
+       }
+       return rc;
+}
+early_param("noexec", noexec_setup);
+
 static int __init cad_setup(char *str)
 {
        int val;
index f687f168c94d50030082654373b35d178da4930b..34ab7e8d6a767d6ffb127b325673f321c8ff4736 100644 (file)
@@ -380,13 +380,11 @@ ENTRY(system_call)
        brasl   %r14,do_signal
        TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL
        jno     .Lsysc_return
+.Lsysc_do_syscall:
+       lghi    %r13,__TASK_thread
        lmg     %r2,%r7,__PT_R2(%r11)   # load svc arguments
-       lghi    %r8,0                   # svc 0 returns -ENOSYS
-       llgh    %r1,__PT_INT_CODE+2(%r11)       # load new svc number
-       cghi    %r1,NR_syscalls
-       jnl     .Lsysc_nr_ok            # invalid svc number -> do svc 0
-       slag    %r8,%r1,2
-       j       .Lsysc_nr_ok            # restart svc
+       lghi    %r1,0                   # svc 0 returns -ENOSYS
+       j       .Lsysc_do_svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
@@ -528,6 +526,8 @@ ENTRY(pgm_check_handler)
        LOCKDEP_SYS_EXIT
        tm      __PT_PSW+1(%r11),0x01   # returning to user ?
        jno     .Lsysc_restore
+       TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL
+       jo      .Lsysc_do_syscall
        j       .Lsysc_tif
 
 #
index 84e0557b16fed77c14afe62396dc6c7a95fffd90..76f9eda1d7c0e8b5f0150a1b2fe0481241b1b0fa 100644 (file)
@@ -45,11 +45,17 @@ DEFINE_INSN_CACHE_OPS(dmainsn);
 
 static void *alloc_dmainsn_page(void)
 {
-       return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+       void *page;
+
+       page = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
+       if (page)
+               set_memory_x((unsigned long) page, 1);
+       return page;
 }
 
 static void free_dmainsn_page(void *page)
 {
+       set_memory_nx((unsigned long) page, 1);
        free_page((unsigned long)page);
 }
 
index fbc07891f9e7534abd8c150a49cb1539610dc232..1a27f307a92079e79ead66c5e50e4f54e992530c 100644 (file)
@@ -45,7 +45,8 @@ void *module_alloc(unsigned long size)
        if (PAGE_ALIGN(size) > MODULES_LEN)
                return NULL;
        return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-                                   GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+                                   GFP_KERNEL, PAGE_KERNEL_EXEC,
+                                   0, NUMA_NO_NODE,
                                    __builtin_return_address(0));
 }
 
index 3667d20e997f3ccac943438ad2e03588795afb33..5ccf953962518294e2fc0d1b6e6b633e589f223b 100644 (file)
@@ -44,6 +44,7 @@ SECTIONS
                *(.gnu.warning)
        } :text = 0x0700
 
+       . = ALIGN(PAGE_SIZE);
        _etext = .;             /* End of text section */
 
        NOTES :text :note
@@ -79,7 +80,13 @@ SECTIONS
        . = ALIGN(PAGE_SIZE);   /* Init code and data */
        __init_begin = .;
 
-       INIT_TEXT_SECTION(PAGE_SIZE)
+       . = ALIGN(PAGE_SIZE);
+       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+               VMLINUX_SYMBOL(_sinittext) = . ;
+               INIT_TEXT
+               . = ALIGN(PAGE_SIZE);
+               VMLINUX_SYMBOL(_einittext) = . ;
+       }
 
        /*
         * .exit.text is discarded at runtime, not link time,
index 861880df12c72933a7993b5b9fb257c9641e960b..5a46b1d7e57852ed44d17bc14aed56377041718a 100644 (file)
@@ -49,8 +49,8 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
                seq_printf(m, "I\n");
                return;
        }
-       seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
-       seq_putc(m, '\n');
+       seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+       seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
 }
 
 static void note_page(struct seq_file *m, struct pg_state *st,
@@ -117,7 +117,8 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
        for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
                st->current_address = addr;
                pte = pte_offset_kernel(pmd, addr);
-               prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
+               prot = pte_val(*pte) &
+                       (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
                note_page(m, st, prot, 4);
                addr += PAGE_SIZE;
        }
@@ -135,7 +136,9 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
                pmd = pmd_offset(pud, addr);
                if (!pmd_none(*pmd)) {
                        if (pmd_large(*pmd)) {
-                               prot = pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT;
+                               prot = pmd_val(*pmd) &
+                                       (_SEGMENT_ENTRY_PROTECT |
+                                        _SEGMENT_ENTRY_NOEXEC);
                                note_page(m, st, prot, 3);
                        } else
                                walk_pte_level(m, st, pmd, addr);
@@ -157,7 +160,9 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
                pud = pud_offset(pgd, addr);
                if (!pud_none(*pud))
                        if (pud_large(*pud)) {
-                               prot = pud_val(*pud) & _REGION_ENTRY_PROTECT;
+                               prot = pud_val(*pud) &
+                                       (_REGION_ENTRY_PROTECT |
+                                        _REGION_ENTRY_NOEXEC);
                                note_page(m, st, prot, 2);
                        } else
                                walk_pmd_level(m, st, pud, addr);
index d1faae5cdd122bbfefdfb18ca9874c3ed1c55952..bb5560eb2435ec936f3e027b9a344eae6ff7ee44 100644 (file)
@@ -311,12 +311,34 @@ static noinline void do_sigbus(struct pt_regs *regs)
        force_sig_info(SIGBUS, &si, tsk);
 }
 
-static noinline void do_fault_error(struct pt_regs *regs, int fault)
+static noinline int signal_return(struct pt_regs *regs)
+{
+       u16 instruction;
+       int rc;
+
+       rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
+       if (rc)
+               return rc;
+       if (instruction == 0x0a77) {
+               set_pt_regs_flag(regs, PIF_SYSCALL);
+               regs->int_code = 0x00040077;
+               return 0;
+       } else if (instruction == 0x0aad) {
+               set_pt_regs_flag(regs, PIF_SYSCALL);
+               regs->int_code = 0x000400ad;
+               return 0;
+       }
+       return -EACCES;
+}
+
+static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
 {
        int si_code;
 
        switch (fault) {
        case VM_FAULT_BADACCESS:
+               if (access == VM_EXEC && signal_return(regs) == 0)
+                       break;
        case VM_FAULT_BADMAP:
                /* Bad memory access. Check if it is kernel or user space. */
                if (user_mode(regs)) {
@@ -324,7 +346,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
                        si_code = (fault == VM_FAULT_BADMAP) ?
                                SEGV_MAPERR : SEGV_ACCERR;
                        do_sigsegv(regs, si_code);
-                       return;
+                       break;
                }
        case VM_FAULT_BADCONTEXT:
        case VM_FAULT_PFAULT:
@@ -525,7 +547,7 @@ out:
 void do_protection_exception(struct pt_regs *regs)
 {
        unsigned long trans_exc_code;
-       int fault;
+       int access, fault;
 
        trans_exc_code = regs->int_parm_long;
        /*
@@ -544,9 +566,17 @@ void do_protection_exception(struct pt_regs *regs)
                do_low_address(regs);
                return;
        }
-       fault = do_exception(regs, VM_WRITE);
+       if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
+               regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
+                                       (regs->psw.addr & PAGE_MASK);
+               access = VM_EXEC;
+               fault = VM_FAULT_BADACCESS;
+       } else {
+               access = VM_WRITE;
+               fault = do_exception(regs, access);
+       }
        if (unlikely(fault))
-               do_fault_error(regs, fault);
+               do_fault_error(regs, access, fault);
 }
 NOKPROBE_SYMBOL(do_protection_exception);
 
@@ -557,7 +587,7 @@ void do_dat_exception(struct pt_regs *regs)
        access = VM_READ | VM_EXEC | VM_WRITE;
        fault = do_exception(regs, access);
        if (unlikely(fault))
-               do_fault_error(regs, fault);
+               do_fault_error(regs, access, fault);
 }
 NOKPROBE_SYMBOL(do_dat_exception);
 
index 4a0c5bce3552b00ba4863622fcc63de7b711614c..a038162277195eaed7c65e92655096dd3ae36564 100644 (file)
@@ -59,6 +59,8 @@ static inline unsigned long __pte_to_rste(pte_t pte)
                rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
                                     _SEGMENT_ENTRY_SOFT_DIRTY);
 #endif
+               rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
+                                    _SEGMENT_ENTRY_NOEXEC);
        } else
                rste = _SEGMENT_ENTRY_INVALID;
        return rste;
@@ -113,6 +115,8 @@ static inline pte_t __rste_to_pte(unsigned long rste)
                pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
                                             _PAGE_DIRTY);
 #endif
+               pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
+                                            _PAGE_NOEXEC);
        } else
                pte_val(pte) = _PAGE_INVALID;
        return pte;
@@ -121,7 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
                     pte_t *ptep, pte_t pte)
 {
-       unsigned long rste = __pte_to_rste(pte);
+       unsigned long rste;
+
+       rste = __pte_to_rste(pte);
+       if (!MACHINE_HAS_NX)
+               rste &= ~_SEGMENT_ENTRY_NOEXEC;
 
        /* Set correct table type for 2G hugepages */
        if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
index b67454ad8408c94c799f3c953d6ba4ee0552e7b4..ba0c8d18e10d4d4164f2562c61c2763cebf452dd 100644 (file)
@@ -137,6 +137,9 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
+       __set_memory((unsigned long) _sinittext,
+                    (_einittext - _sinittext) >> PAGE_SHIFT,
+                    SET_MEMORY_RW | SET_MEMORY_NX);
        free_initmem_default(POISON_FREE_INITMEM);
 }
 
index 44f150312a16195b5a21821b2d8fe7c458d8cb09..a1543b74ee004c857b491f791ad5661813d97754 100644 (file)
@@ -81,24 +81,24 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
        }
 }
 
-struct cpa {
-       unsigned int set_ro     : 1;
-       unsigned int clear_ro   : 1;
-};
-
 static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
-                         struct cpa cpa)
+                         unsigned long flags)
 {
        pte_t *ptep, new;
 
        ptep = pte_offset(pmdp, addr);
        do {
-               if (pte_none(*ptep))
+               new = *ptep;
+               if (pte_none(new))
                        return -EINVAL;
-               if (cpa.set_ro)
-                       new = pte_wrprotect(*ptep);
-               else if (cpa.clear_ro)
-                       new = pte_mkwrite(pte_mkdirty(*ptep));
+               if (flags & SET_MEMORY_RO)
+                       new = pte_wrprotect(new);
+               else if (flags & SET_MEMORY_RW)
+                       new = pte_mkwrite(pte_mkdirty(new));
+               if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+                       pte_val(new) |= _PAGE_NOEXEC;
+               else if (flags & SET_MEMORY_X)
+                       pte_val(new) &= ~_PAGE_NOEXEC;
                pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
                ptep++;
                addr += PAGE_SIZE;
@@ -112,14 +112,17 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
        unsigned long pte_addr, prot;
        pte_t *pt_dir, *ptep;
        pmd_t new;
-       int i, ro;
+       int i, ro, nx;
 
        pt_dir = vmem_pte_alloc();
        if (!pt_dir)
                return -ENOMEM;
        pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
        ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+       nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
        prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+       if (!nx)
+               prot &= ~_PAGE_NOEXEC;
        ptep = pt_dir;
        for (i = 0; i < PTRS_PER_PTE; i++) {
                pte_val(*ptep) = pte_addr | prot;
@@ -133,19 +136,24 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
        return 0;
 }
 
-static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa)
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
+                           unsigned long flags)
 {
-       pmd_t new;
-
-       if (cpa.set_ro)
-               new = pmd_wrprotect(*pmdp);
-       else if (cpa.clear_ro)
-               new = pmd_mkwrite(pmd_mkdirty(*pmdp));
+       pmd_t new = *pmdp;
+
+       if (flags & SET_MEMORY_RO)
+               new = pmd_wrprotect(new);
+       else if (flags & SET_MEMORY_RW)
+               new = pmd_mkwrite(pmd_mkdirty(new));
+       if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+               pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
+       else if (flags & SET_MEMORY_X)
+               pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
        pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
 }
 
 static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
-                         struct cpa cpa)
+                         unsigned long flags)
 {
        unsigned long next;
        pmd_t *pmdp;
@@ -163,9 +171,9 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
                                        return rc;
                                continue;
                        }
-                       modify_pmd_page(pmdp, addr, cpa);
+                       modify_pmd_page(pmdp, addr, flags);
                } else {
-                       rc = walk_pte_level(pmdp, addr, next, cpa);
+                       rc = walk_pte_level(pmdp, addr, next, flags);
                        if (rc)
                                return rc;
                }
@@ -181,14 +189,17 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
        unsigned long pmd_addr, prot;
        pmd_t *pm_dir, *pmdp;
        pud_t new;
-       int i, ro;
+       int i, ro, nx;
 
        pm_dir = vmem_pmd_alloc();
        if (!pm_dir)
                return -ENOMEM;
        pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
        ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+       nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
        prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+       if (!nx)
+               prot &= ~_SEGMENT_ENTRY_NOEXEC;
        pmdp = pm_dir;
        for (i = 0; i < PTRS_PER_PMD; i++) {
                pmd_val(*pmdp) = pmd_addr | prot;
@@ -202,19 +213,24 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
        return 0;
 }
 
-static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa)
+static void modify_pud_page(pud_t *pudp, unsigned long addr,
+                           unsigned long flags)
 {
-       pud_t new;
-
-       if (cpa.set_ro)
-               new = pud_wrprotect(*pudp);
-       else if (cpa.clear_ro)
-               new = pud_mkwrite(pud_mkdirty(*pudp));
+       pud_t new = *pudp;
+
+       if (flags & SET_MEMORY_RO)
+               new = pud_wrprotect(new);
+       else if (flags & SET_MEMORY_RW)
+               new = pud_mkwrite(pud_mkdirty(new));
+       if ((flags & SET_MEMORY_NX) && MACHINE_HAS_NX)
+               pud_val(new) |= _REGION_ENTRY_NOEXEC;
+       else if (flags & SET_MEMORY_X)
+               pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
        pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
 }
 
 static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
-                         struct cpa cpa)
+                         unsigned long flags)
 {
        unsigned long next;
        pud_t *pudp;
@@ -232,9 +248,9 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
                                        break;
                                continue;
                        }
-                       modify_pud_page(pudp, addr, cpa);
+                       modify_pud_page(pudp, addr, flags);
                } else {
-                       rc = walk_pmd_level(pudp, addr, next, cpa);
+                       rc = walk_pmd_level(pudp, addr, next, flags);
                }
                pudp++;
                addr = next;
@@ -246,7 +262,7 @@ static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
 static DEFINE_MUTEX(cpa_mutex);
 
 static int change_page_attr(unsigned long addr, unsigned long end,
-                           struct cpa cpa)
+                           unsigned long flags)
 {
        unsigned long next;
        int rc = -EINVAL;
@@ -262,7 +278,7 @@ static int change_page_attr(unsigned long addr, unsigned long end,
                if (pgd_none(*pgdp))
                        break;
                next = pgd_addr_end(addr, end);
-               rc = walk_pud_level(pgdp, addr, next, cpa);
+               rc = walk_pud_level(pgdp, addr, next, flags);
                if (rc)
                        break;
                cond_resched();
@@ -271,35 +287,10 @@ static int change_page_attr(unsigned long addr, unsigned long end,
        return rc;
 }
 
-int set_memory_ro(unsigned long addr, int numpages)
+int __set_memory(unsigned long addr, int numpages, unsigned long flags)
 {
-       struct cpa cpa = {
-               .set_ro = 1,
-       };
-
        addr &= PAGE_MASK;
-       return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
-}
-
-int set_memory_rw(unsigned long addr, int numpages)
-{
-       struct cpa cpa = {
-               .clear_ro = 1,
-       };
-
-       addr &= PAGE_MASK;
-       return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
-}
-
-/* not possible */
-int set_memory_nx(unsigned long addr, int numpages)
-{
-       return 0;
-}
-
-int set_memory_x(unsigned long addr, int numpages)
-{
-       return 0;
+       return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -339,7 +330,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
                nr = min(numpages - i, nr);
                if (enable) {
                        for (j = 0; j < nr; j++) {
-                               pte_val(*pte) = address | pgprot_val(PAGE_KERNEL);
+                               pte_val(*pte) &= ~_PAGE_INVALID;
                                address += PAGE_SIZE;
                                pte++;
                        }
index 7a1897c51c5495f3f2b13d86ba8f6344e2234788..190d0c65904ae0051f133d05f253d4b8c3c99cec 100644 (file)
@@ -274,6 +274,8 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 {
        pgste_t pgste;
 
+       if (!MACHINE_HAS_NX)
+               pte_val(pte) &= ~_PAGE_NOEXEC;
        if (mm_has_pgste(mm)) {
                pgste = pgste_get(ptep);
                pgste_set_key(ptep, pgste, pte, mm);
index 45becc8a44ec660c8799ef13868db8b8036f1c9f..253046344b3c2c0a486b784e43ec4dc0b2dc4d50 100644 (file)
@@ -79,6 +79,7 @@ pte_t __ref *vmem_pte_alloc(void)
  */
 static int vmem_add_mem(unsigned long start, unsigned long size)
 {
+       unsigned long pgt_prot, sgt_prot, r3_prot;
        unsigned long pages4k, pages1m, pages2g;
        unsigned long end = start + size;
        unsigned long address = start;
@@ -88,6 +89,14 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
        pte_t *pt_dir;
        int ret = -ENOMEM;
 
+       pgt_prot = pgprot_val(PAGE_KERNEL);
+       sgt_prot = pgprot_val(SEGMENT_KERNEL);
+       r3_prot = pgprot_val(REGION3_KERNEL);
+       if (!MACHINE_HAS_NX) {
+               pgt_prot &= ~_PAGE_NOEXEC;
+               sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+               r3_prot &= ~_REGION_ENTRY_NOEXEC;
+       }
        pages4k = pages1m = pages2g = 0;
        while (address < end) {
                pg_dir = pgd_offset_k(address);
@@ -101,7 +110,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
                if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
                    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
                     !debug_pagealloc_enabled()) {
-                       pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL);
+                       pud_val(*pu_dir) = address | r3_prot;
                        address += PUD_SIZE;
                        pages2g++;
                        continue;
@@ -116,7 +125,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
                if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
                    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
                    !debug_pagealloc_enabled()) {
-                       pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL);
+                       pmd_val(*pm_dir) = address | sgt_prot;
                        address += PMD_SIZE;
                        pages1m++;
                        continue;
@@ -129,7 +138,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
                }
 
                pt_dir = pte_offset_kernel(pm_dir, address);
-               pte_val(*pt_dir) = address |  pgprot_val(PAGE_KERNEL);
+               pte_val(*pt_dir) = address | pgt_prot;
                address += PAGE_SIZE;
                pages4k++;
        }
@@ -200,6 +209,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
  */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
+       unsigned long pgt_prot, sgt_prot;
        unsigned long address = start;
        pgd_t *pg_dir;
        pud_t *pu_dir;
@@ -207,6 +217,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
        pte_t *pt_dir;
        int ret = -ENOMEM;
 
+       pgt_prot = pgprot_val(PAGE_KERNEL);
+       sgt_prot = pgprot_val(SEGMENT_KERNEL);
+       if (!MACHINE_HAS_NX) {
+               pgt_prot &= ~_PAGE_NOEXEC;
+               sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+       }
        for (address = start; address < end;) {
                pg_dir = pgd_offset_k(address);
                if (pgd_none(*pg_dir)) {
@@ -238,8 +254,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
                                new_page = vmemmap_alloc_block(PMD_SIZE, node);
                                if (!new_page)
                                        goto out;
-                               pmd_val(*pm_dir) = __pa(new_page) |
-                                       _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE;
+                               pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
                                address = (address + PMD_SIZE) & PMD_MASK;
                                continue;
                        }
@@ -259,8 +274,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
                        new_page = vmemmap_alloc_block(PAGE_SIZE, node);
                        if (!new_page)
                                goto out;
-                       pte_val(*pt_dir) =
-                               __pa(new_page) | pgprot_val(PAGE_KERNEL);
+                       pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
                }
                address += PAGE_SIZE;
        }
@@ -372,13 +386,21 @@ out:
  */
 void __init vmem_map_init(void)
 {
-       unsigned long size = _eshared - _stext;
        struct memblock_region *reg;
 
        for_each_memblock(memory, reg)
                vmem_add_mem(reg->base, reg->size);
-       set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT);
-       pr_info("Write protected kernel read-only data: %luk\n", size >> 10);
+       __set_memory((unsigned long) _stext,
+                    (_etext - _stext) >> PAGE_SHIFT,
+                    SET_MEMORY_RO | SET_MEMORY_X);
+       __set_memory((unsigned long) _etext,
+                    (_eshared - _etext) >> PAGE_SHIFT,
+                    SET_MEMORY_RO);
+       __set_memory((unsigned long) _sinittext,
+                    (_einittext - _sinittext) >> PAGE_SHIFT,
+                    SET_MEMORY_RO | SET_MEMORY_X);
+       pr_info("Write protected kernel read-only data: %luk\n",
+               (_eshared - _stext) >> 10);
 }
 
 /*