s390/uaccess: fix page table walk
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Thu, 21 Mar 2013 11:50:39 +0000 (12:50 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 2 Apr 2013 06:53:08 +0000 (08:53 +0200)
When translating user space addresses to kernel addresses the follow_table()
function had two bugs:

- PROT_NONE mappings could be read accessed via the kernel mapping. That is
  e.g. putting a filename into a user page, then protecting the page with
  PROT_NONE and afterwards issuing the "open" syscall with a pointer to
  the filename would incorrectly succeed.

- when walking the page tables it used the pgd/pud/pmd/pte primitives which
  with dynamic page tables give no indication which real level of page tables
  is being walked (region2, region3, segment or page table). So in case of an
  exception the translation exception code passed to __handle_fault() is not
  necessarily correct.
  This is not really an issue since __handle_fault() doesn't evaluate the code.
  Only in case of e.g. a SIGBUS this code gets passed to user space. If user
  space can do something sane with the value is a different question though.

To fix these issues don't use any Linux primitives. Only walk the page tables
like the hardware would do it, however we leave quite some checks away since
we know that we only have full size page tables and each index is within bounds.

In theory this should fix all issues...

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/pgtable.h
arch/s390/lib/uaccess_pt.c

index 4a2930844d43a078492732cf156d392ac10fc29e..1686d8f0f878ba306cb181599a8019bcf081a9a1 100644 (file)
@@ -344,6 +344,7 @@ extern unsigned long MODULES_END;
 #define _REGION3_ENTRY_CO      0x100   /* change-recording override        */
 
 /* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address       */
 #define _SEGMENT_ENTRY_ORIGIN  ~0x7ffUL/* segment table origin             */
 #define _SEGMENT_ENTRY_RO      0x200   /* page protection bit              */
 #define _SEGMENT_ENTRY_INV     0x20    /* invalid segment table entry      */
index 6771fdd893772088a8c5cd664f1e5ce3eaa49ad6..466fb3383960442b7624e51990536d2cf0b8c65d 100644 (file)
@@ -77,42 +77,69 @@ static size_t copy_in_kernel(size_t count, void __user *to,
  * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
  * contains the (negative) exception code.
  */
-static __always_inline unsigned long follow_table(struct mm_struct *mm,
-                                                 unsigned long addr, int write)
+#ifdef CONFIG_64BIT
+static unsigned long follow_table(struct mm_struct *mm,
+                                 unsigned long address, int write)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *ptep;
+       unsigned long *table = (unsigned long *)__pa(mm->pgd);
+
+       switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
+       case _ASCE_TYPE_REGION1:
+               table = table + ((address >> 53) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV))
+                       return -0x39UL;
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+       case _ASCE_TYPE_REGION2:
+               table = table + ((address >> 42) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV))
+                       return -0x3aUL;
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+       case _ASCE_TYPE_REGION3:
+               table = table + ((address >> 31) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV))
+                       return -0x3bUL;
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+       case _ASCE_TYPE_SEGMENT:
+               table = table + ((address >> 20) & 0x7ff);
+               if (unlikely(*table & _SEGMENT_ENTRY_INV))
+                       return -0x10UL;
+               if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
+                       if (write && (*table & _SEGMENT_ENTRY_RO))
+                               return -0x04UL;
+                       return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
+                               (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
+               }
+               table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+       }
+       table = table + ((address >> 12) & 0xff);
+       if (unlikely(*table & _PAGE_INVALID))
+               return -0x11UL;
+       if (write && (*table & _PAGE_RO))
+               return -0x04UL;
+       return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
+}
 
-       pgd = pgd_offset(mm, addr);
-       if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-               return -0x3aUL;
+#else /* CONFIG_64BIT */
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-               return -0x3bUL;
+static unsigned long follow_table(struct mm_struct *mm,
+                                 unsigned long address, int write)
+{
+       unsigned long *table = (unsigned long *)__pa(mm->pgd);
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       table = table + ((address >> 20) & 0x7ff);
+       if (unlikely(*table & _SEGMENT_ENTRY_INV))
                return -0x10UL;
-       if (pmd_large(*pmd)) {
-               if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
-                       return -0x04UL;
-               return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);
-       }
-       if (unlikely(pmd_bad(*pmd)))
-               return -0x10UL;
-
-       ptep = pte_offset_map(pmd, addr);
-       if (!pte_present(*ptep))
+       table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+       table = table + ((address >> 12) & 0xff);
+       if (unlikely(*table & _PAGE_INVALID))
                return -0x11UL;
-       if (write && (!pte_write(*ptep) || !pte_dirty(*ptep)))
+       if (write && (*table & _PAGE_RO))
                return -0x04UL;
-
-       return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
+       return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
 }
 
+#endif /* CONFIG_64BIT */
+
 static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
                                             size_t n, int write_user)
 {