IB/ipoib: rtnl_unlock can not come after free_netdev
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / exec.c
index ffd7a813ad3d06ee1e1de4e996c72282c24e7039..e3abc8e3d58f7b2a3b0bc823780d61c5d52f3679 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
  * current->executable is only used by the procfs.  This allows a dispatch
  * table to check for several different types  of binary formats.  We keep
  * trying until we recognize the file or we run out of supported binary
- * formats. 
+ * formats.
  */
 
 #include <linux/slab.h>
@@ -196,8 +196,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
        if (write) {
                unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+               unsigned long ptr_size;
                struct rlimit *rlim;
 
+               /*
+                * Since the stack will hold pointers to the strings, we
+                * must account for them as well.
+                *
+                * The size calculation is the entire vma while each arg page is
+                * built, so each time we get here it's calculating how far it
+                * is currently (rather than each call being just the newly
+                * added size from the arg page).  As a result, we need to
+                * always add the entire size of the pointers, so that on the
+                * last call to get_arg_page() we'll actually have the entire
+                * correct size.
+                */
+               ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+               if (ptr_size > ULONG_MAX - size)
+                       goto fail;
+               size += ptr_size;
+
                acct_arg_size(bprm, size / PAGE_SIZE);
 
                /*
@@ -215,13 +233,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                 *    to work from.
                 */
                rlim = current->signal->rlim;
-               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-                       put_page(page);
-                       return NULL;
-               }
+               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+                       goto fail;
        }
 
        return page;
+
+fail:
+       put_page(page);
+       return NULL;
 }
 
 static void put_arg_page(struct page *page)
@@ -607,7 +627,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                return -ENOMEM;
 
        lru_add_drain();
-       tlb_gather_mmu(&tlb, mm, 0);
+       tlb_gather_mmu(&tlb, mm, old_start, old_end);
        if (new_end > old_start) {
                /*
                 * when the old and new regions overlap clear from new_end.
@@ -624,7 +644,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                free_pgd_range(&tlb, old_start, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
        }
-       tlb_finish_mmu(&tlb, new_end, old_end);
+       tlb_finish_mmu(&tlb, old_start, old_end);
 
        /*
         * Shrink the vma to just the new range.  Always succeeds.
@@ -654,10 +674,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
        unsigned long rlim_stack;
 
 #ifdef CONFIG_STACK_GROWSUP
-       /* Limit stack size to 1GB */
+       /* Limit stack size */
        stack_base = rlimit_max(RLIMIT_STACK);
-       if (stack_base > (1 << 30))
-               stack_base = 1 << 30;
+       if (stack_base > STACK_SIZE_MAX)
+               stack_base = STACK_SIZE_MAX;
 
        /* Make sure we didn't let the argument array grow too large. */
        if (vma->vm_end - vma->vm_start > stack_base)
@@ -1091,6 +1111,13 @@ int flush_old_exec(struct linux_binprm * bprm)
        flush_thread();
        current->personality &= ~bprm->per_clear;
 
+       /*
+        * We have to apply CLOEXEC before we change whether the process is
+        * dumpable (in setup_new_exec) to avoid a race with a process in userspace
+        * trying to access the should-be-closed file descriptors of a process
+        * undergoing exec(2).
+        */
+       do_close_on_exec(current->files);
        return 0;
 
 out:
@@ -1141,7 +1168,6 @@ void setup_new_exec(struct linux_binprm * bprm)
        current->self_exec_id++;
                        
        flush_signal_handlers(current, 0);
-       do_close_on_exec(current->files);
 }
 EXPORT_SYMBOL(setup_new_exec);
 
@@ -1265,6 +1291,53 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
        return res;
 }
 
+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+       struct inode *inode;
+       unsigned int mode;
+       kuid_t uid;
+       kgid_t gid;
+
+       /* clear any previous set[ug]id data from a previous binary */
+       bprm->cred->euid = current_euid();
+       bprm->cred->egid = current_egid();
+
+       if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+               return;
+
+       if (current->no_new_privs)
+               return;
+
+       inode = file_inode(bprm->file);
+       mode = ACCESS_ONCE(inode->i_mode);
+       if (!(mode & (S_ISUID|S_ISGID)))
+               return;
+
+       /* Be careful if suid/sgid is set */
+       mutex_lock(&inode->i_mutex);
+
+       /* reload atomically mode/uid/gid now that lock held */
+       mode = inode->i_mode;
+       uid = inode->i_uid;
+       gid = inode->i_gid;
+       mutex_unlock(&inode->i_mutex);
+
+       /* We ignore suid/sgid if there are no mappings for them in the ns */
+       if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
+                !kgid_has_mapping(bprm->cred->user_ns, gid))
+               return;
+
+       if (mode & S_ISUID) {
+               bprm->per_clear |= PER_CLEAR_ON_SETID;
+               bprm->cred->euid = uid;
+       }
+
+       if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+               bprm->per_clear |= PER_CLEAR_ON_SETID;
+               bprm->cred->egid = gid;
+       }
+}
+
 /* 
  * Fill the binprm structure from the inode. 
  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,39 +1346,12 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
  */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-       umode_t mode;
-       struct inode * inode = file_inode(bprm->file);
        int retval;
 
-       mode = inode->i_mode;
        if (bprm->file->f_op == NULL)
                return -EACCES;
 
-       /* clear any previous set[ug]id data from a previous binary */
-       bprm->cred->euid = current_euid();
-       bprm->cred->egid = current_egid();
-
-       if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-           !current->no_new_privs &&
-           kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
-           kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
-               /* Set-uid? */
-               if (mode & S_ISUID) {
-                       bprm->per_clear |= PER_CLEAR_ON_SETID;
-                       bprm->cred->euid = inode->i_uid;
-               }
-
-               /* Set-gid? */
-               /*
-                * If setgid is set but no group execute bit then this
-                * is a candidate for mandatory locking, not a setgid
-                * executable.
-                */
-               if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-                       bprm->per_clear |= PER_CLEAR_ON_SETID;
-                       bprm->cred->egid = inode->i_gid;
-               }
-       }
+       bprm_fill_uid(bprm);
 
        /* fill in binprm security blob */
        retval = security_bprm_set_creds(bprm);
@@ -1669,6 +1715,12 @@ int __get_dumpable(unsigned long mm_flags)
        return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
 }
 
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
 int get_dumpable(struct mm_struct *mm)
 {
        return __get_dumpable(mm->flags);