s390/mm,tlb: correct tlb flush on page table upgrade
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 28 Oct 2013 13:48:30 +0000 (14:48 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 4 Nov 2013 12:51:47 +0000 (13:51 +0100)
The IDTE instruction used to flush TLB entries for a specific address
space uses the address-space-control element (ASCE) to identify
affected TLB entries. The upgrade of a page table adds a new top
level page table which changes the ASCE. The TLB entries associated
with the old ASCE need to be flushed and the ASCE for the address space
needs to be replaced synchronously on all CPUs which currently use it.
The concept of a lazy ASCE update with an exception handler is broken.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/processor.h
arch/s390/kernel/entry.h
arch/s390/kernel/pgm_check.S
arch/s390/mm/fault.c
arch/s390/mm/mmap.c
arch/s390/mm/pgtable.c

index a56e63483e0f73bc36b8493c1a34b5cd1d3f5579..0a876bc543d36f3c121a39f4acf1a351233c1667 100644 (file)
@@ -144,9 +144,7 @@ struct stack_frame {
        regs->psw.mask  = PSW_USER_BITS | PSW_MASK_BA;                  \
        regs->psw.addr  = new_psw | PSW_ADDR_AMODE;                     \
        regs->gprs[15]  = new_stackp;                                   \
-       __tlb_flush_mm(current->mm);                                    \
        crst_table_downgrade(current->mm, 1UL << 31);                   \
-       update_mm(current->mm, current);                                \
        execve_tail();                                                  \
 } while (0)
 
index e9b04c33d38306781f1c5d3ead2c5a3ba3b0d009..cb533f78c09ed5795ee365eec00374c5db662a9c 100644 (file)
@@ -23,7 +23,6 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
-void do_asce_exception(struct pt_regs *regs);
 
 void addressing_exception(struct pt_regs *regs);
 void data_exception(struct pt_regs *regs);
index 14bdecb619238b6994f890e9263a10cd515189f1..4a460c44e17ec15763da0cf24207ed048fa929f9 100644 (file)
@@ -78,7 +78,7 @@ PGM_CHECK_DEFAULT                     /* 34 */
 PGM_CHECK_DEFAULT                      /* 35 */
 PGM_CHECK_DEFAULT                      /* 36 */
 PGM_CHECK_DEFAULT                      /* 37 */
-PGM_CHECK_64BIT(do_asce_exception)     /* 38 */
+PGM_CHECK_DEFAULT                      /* 38 */
 PGM_CHECK_64BIT(do_dat_exception)      /* 39 */
 PGM_CHECK_64BIT(do_dat_exception)      /* 3a */
 PGM_CHECK_64BIT(do_dat_exception)      /* 3b */
index 8f29762671cf171dc6a79d64f718e4b02d247cb9..d95265b2719f8e7fb0b15305b49a52130069aebd 100644 (file)
@@ -423,43 +423,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
                do_fault_error(regs, fault);
 }
 
-#ifdef CONFIG_64BIT
-void __kprobes do_asce_exception(struct pt_regs *regs)
-{
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long trans_exc_code;
-
-       /*
-        * The instruction that caused the program check has
-        * been nullified. Don't signal single step via SIGTRAP.
-        */
-       clear_tsk_thread_flag(current, TIF_PER_TRAP);
-
-       trans_exc_code = regs->int_parm_long;
-       if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
-               goto no_context;
-
-       down_read(&mm->mmap_sem);
-       vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK);
-       up_read(&mm->mmap_sem);
-
-       if (vma) {
-               update_mm(mm, current);
-               return;
-       }
-
-       /* User mode accesses just cause a SIGSEGV */
-       if (user_mode(regs)) {
-               do_sigsegv(regs, SEGV_MAPERR);
-               return;
-       }
-
-no_context:
-       do_no_context(regs);
-}
-#endif
-
 int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
 {
        struct pt_regs regs;
index 40023290ee5b43dea2cf72b68c97f9495b6c59fd..6bcb045d2bd2f8ab3b1234e13f00be719dc0441e 100644 (file)
@@ -101,18 +101,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 
 int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 {
-       int rc;
-
        if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
                return 0;
        if (!(flags & MAP_FIXED))
                addr = 0;
-       if ((addr + len) >= TASK_SIZE) {
-               rc = crst_table_upgrade(current->mm, 1UL << 53);
-               if (rc)
-                       return rc;
-               update_mm(current->mm, current);
-       }
+       if ((addr + len) >= TASK_SIZE)
+               return crst_table_upgrade(current->mm, 1UL << 53);
        return 0;
 }
 
@@ -132,7 +126,6 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
                rc = crst_table_upgrade(mm, 1UL << 53);
                if (rc)
                        return (unsigned long) rc;
-               update_mm(mm, current);
                area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
        }
        return area;
@@ -155,7 +148,6 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
                rc = crst_table_upgrade(mm, 1UL << 53);
                if (rc)
                        return (unsigned long) rc;
-               update_mm(mm, current);
                area = arch_get_unmapped_area_topdown(filp, addr, len,
                                                      pgoff, flags);
        }
index a9be08899b0c7a1153974cb5146079077811547e..0a2e5e086749c00b98bbcba84446377935fd87df 100644 (file)
@@ -48,12 +48,23 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
 }
 
 #ifdef CONFIG_64BIT
+static void __crst_table_upgrade(void *arg)
+{
+       struct mm_struct *mm = arg;
+
+       if (current->active_mm == mm)
+               update_mm(mm, current);
+       __tlb_flush_local();
+}
+
 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
 {
        unsigned long *table, *pgd;
        unsigned long entry;
+       int flush;
 
        BUG_ON(limit > (1UL << 53));
+       flush = 0;
 repeat:
        table = crst_table_alloc(mm);
        if (!table)
@@ -79,12 +90,15 @@ repeat:
                mm->pgd = (pgd_t *) table;
                mm->task_size = mm->context.asce_limit;
                table = NULL;
+               flush = 1;
        }
        spin_unlock_bh(&mm->page_table_lock);
        if (table)
                crst_table_free(mm, table);
        if (mm->context.asce_limit < limit)
                goto repeat;
+       if (flush)
+               on_each_cpu(__crst_table_upgrade, mm, 0);
        return 0;
 }
 
@@ -92,6 +106,8 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 {
        pgd_t *pgd;
 
+       if (current->active_mm == mm)
+               __tlb_flush_mm(mm);
        while (mm->context.asce_limit > limit) {
                pgd = mm->pgd;
                switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -114,6 +130,8 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
                mm->task_size = mm->context.asce_limit;
                crst_table_free(mm, (unsigned long *) pgd);
        }
+       if (current->active_mm == mm)
+               update_mm(mm, current);
 }
 #endif