New locking/refcounting for fs_struct
authorAl Viro <viro@zeniv.linux.org.uk>
Mon, 30 Mar 2009 11:20:30 +0000 (07:20 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Wed, 1 Apr 2009 03:00:26 +0000 (23:00 -0400)
* all changes of current->fs are done under task_lock and write_lock of
  old fs->lock
* refcount is not atomic anymore (same protection)
* its decrements are done when removing reference from current; at the
  same time we decide whether to free it.
* put_fs_struct() is gone
* new field - ->in_exec.  Set by check_unsafe_exec() if we are trying to do
  execve() and only subthreads share fs_struct.  Cleared when finishing exec
  (success and failure alike).  Makes CLONE_FS fail with -EAGAIN if set.
* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
  is in progress.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/compat.c
fs/exec.c
fs/fs_struct.c
fs/internal.h
fs/proc/task_nommu.c
include/linux/fs_struct.h
kernel/fork.c

index 55efdfebdf5ae14f5937add03075fdc8e716819d..baabf203b8479a771f7e702bc6f60cc934d7e7e3 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/eventpoll.h>
+#include <linux/fs_struct.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1441,12 +1442,15 @@ int compat_do_execve(char * filename,
        bprm->cred = prepare_exec_creds();
        if (!bprm->cred)
                goto out_unlock;
-       check_unsafe_exec(bprm);
+
+       retval = check_unsafe_exec(bprm);
+       if (retval)
+               goto out_unlock;
 
        file = open_exec(filename);
        retval = PTR_ERR(file);
        if (IS_ERR(file))
-               goto out_unlock;
+               goto out_unmark;
 
        sched_exec();
 
@@ -1488,6 +1492,9 @@ int compat_do_execve(char * filename,
                goto out;
 
        /* execve succeeded */
+       write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+       write_unlock(&current->fs->lock);
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
        acct_update_integrals(current);
@@ -1506,6 +1513,11 @@ out_file:
                fput(bprm->file);
        }
 
+out_unmark:
+       write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+       write_unlock(&current->fs->lock);
+
 out_unlock:
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
index c5128fbc9165235832a3851b663eb23a2ab8935d..07a059664b73084c59155ba5fe2ce69487e78210 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1056,16 +1056,18 @@ EXPORT_SYMBOL(install_exec_creds);
  * - the caller must hold current->cred_exec_mutex to protect against
  *   PTRACE_ATTACH
  */
-void check_unsafe_exec(struct linux_binprm *bprm)
+int check_unsafe_exec(struct linux_binprm *bprm)
 {
        struct task_struct *p = current, *t;
        unsigned long flags;
        unsigned n_fs, n_sighand;
+       int res = 0;
 
        bprm->unsafe = tracehook_unsafe_exec(p);
 
        n_fs = 1;
        n_sighand = 1;
+       write_lock(&p->fs->lock);
        lock_task_sighand(p, &flags);
        for (t = next_thread(p); t != p; t = next_thread(t)) {
                if (t->fs == p->fs)
@@ -1073,11 +1075,19 @@ void check_unsafe_exec(struct linux_binprm *bprm)
                n_sighand++;
        }
 
-       if (atomic_read(&p->fs->count) > n_fs ||
-           atomic_read(&p->sighand->count) > n_sighand)
+       if (p->fs->users > n_fs ||
+           atomic_read(&p->sighand->count) > n_sighand) {
                bprm->unsafe |= LSM_UNSAFE_SHARE;
+       } else {
+               if (p->fs->in_exec)
+                       res = -EAGAIN;
+               p->fs->in_exec = 1;
+       }
 
        unlock_task_sighand(p, &flags);
+       write_unlock(&p->fs->lock);
+
+       return res;
 }
 
 /* 
@@ -1296,12 +1306,15 @@ int do_execve(char * filename,
        bprm->cred = prepare_exec_creds();
        if (!bprm->cred)
                goto out_unlock;
-       check_unsafe_exec(bprm);
+
+       retval = check_unsafe_exec(bprm);
+       if (retval)
+               goto out_unlock;
 
        file = open_exec(filename);
        retval = PTR_ERR(file);
        if (IS_ERR(file))
-               goto out_unlock;
+               goto out_unmark;
 
        sched_exec();
 
@@ -1344,6 +1357,9 @@ int do_execve(char * filename,
                goto out;
 
        /* execve succeeded */
+       write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+       write_unlock(&current->fs->lock);
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
        acct_update_integrals(current);
@@ -1362,6 +1378,11 @@ out_file:
                fput(bprm->file);
        }
 
+out_unmark:
+       write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+       write_unlock(&current->fs->lock);
+
 out_unlock:
        current->in_execve = 0;
        mutex_unlock(&current->cred_exec_mutex);
index 36e0a123bbf30a1116dc9c9cf10708c3687b9250..41cff72b377b450273876cdd03a850789a409560 100644 (file)
@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
                path_put(old_root);
 }
 
-void put_fs_struct(struct fs_struct *fs)
+void free_fs_struct(struct fs_struct *fs)
 {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-               path_put(&fs->root);
-               path_put(&fs->pwd);
-               kmem_cache_free(fs_cachep, fs);
-       }
+       path_put(&fs->root);
+       path_put(&fs->pwd);
+       kmem_cache_free(fs_cachep, fs);
 }
 
 void exit_fs(struct task_struct *tsk)
 {
-       struct fs_struct * fs = tsk->fs;
+       struct fs_struct *fs = tsk->fs;
 
        if (fs) {
+               int kill;
                task_lock(tsk);
+               write_lock(&fs->lock);
                tsk->fs = NULL;
+               kill = !--fs->users;
+               write_unlock(&fs->lock);
                task_unlock(tsk);
-               put_fs_struct(fs);
+               if (kill)
+                       free_fs_struct(fs);
        }
 }
 
@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
        struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
        /* We don't need to lock fs - think why ;-) */
        if (fs) {
-               atomic_set(&fs->count, 1);
+               fs->users = 1;
+               fs->in_exec = 0;
                rwlock_init(&fs->lock);
                fs->umask = old->umask;
                read_lock(&old->lock);
@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 
 int unshare_fs_struct(void)
 {
-       struct fs_struct *fsp = copy_fs_struct(current->fs);
-       if (!fsp)
+       struct fs_struct *fs = current->fs;
+       struct fs_struct *new_fs = copy_fs_struct(fs);
+       int kill;
+
+       if (!new_fs)
                return -ENOMEM;
-       exit_fs(current);
-       current->fs = fsp;
+
+       task_lock(current);
+       write_lock(&fs->lock);
+       kill = !--fs->users;
+       current->fs = new_fs;
+       write_unlock(&fs->lock);
+       task_unlock(current);
+
+       if (kill)
+               free_fs_struct(fs);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(unshare_fs_struct);
 
 /* to be mentioned only in INIT_TASK */
 struct fs_struct init_fs = {
-       .count          = ATOMIC_INIT(1),
+       .users          = 1,
        .lock           = __RW_LOCK_UNLOCKED(init_fs.lock),
        .umask          = 0022,
 };
 
 void daemonize_fs_struct(void)
 {
-       struct fs_struct *fs;
+       struct fs_struct *fs = current->fs;
+
+       if (fs) {
+               int kill;
+
+               task_lock(current);
 
-       exit_fs(current);       /* current->fs->count--; */
-       fs = &init_fs;
-       current->fs = fs;
-       atomic_inc(&fs->count);
+               write_lock(&init_fs.lock);
+               init_fs.users++;
+               write_unlock(&init_fs.lock);
+
+               write_lock(&fs->lock);
+               current->fs = &init_fs;
+               kill = !--fs->users;
+               write_unlock(&fs->lock);
+
+               task_unlock(current);
+               if (kill)
+                       free_fs_struct(fs);
+       }
 }
index 477a105f8df370e7e3d0cd2f1f7ecf8757bd9491..b4dac4fb6b61fbff06d9357d9c75415bbab0a3ac 100644 (file)
@@ -44,7 +44,7 @@ extern void __init chrdev_init(void);
 /*
  * exec.c
  */
-extern void check_unsafe_exec(struct linux_binprm *);
+extern int check_unsafe_exec(struct linux_binprm *);
 
 /*
  * namespace.c
index 343ea1216bc8e05ddc84e75af47cfbab73fee529..6ca01052c5bc265540e62f148c4c043ff9258fac 100644 (file)
@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
        else
                bytes += kobjsize(mm);
        
-       if (current->fs && atomic_read(&current->fs->count) > 1)
+       if (current->fs && current->fs->users > 1)
                sbytes += kobjsize(current->fs);
        else
                bytes += kobjsize(current->fs);
index 298cef1c079331dda1469752e803b19f6f89423e..78a05bfcd8ebd6075b2add0da9da064804e645d6 100644 (file)
@@ -4,12 +4,10 @@
 #include <linux/path.h>
 
 struct fs_struct {
-       atomic_t count; /* This usage count is used by check_unsafe_exec() for
-                        * security checking purposes - therefore it may not be
-                        * incremented, except by clone(CLONE_FS).
-                        */
+       int users;
        rwlock_t lock;
        int umask;
+       int in_exec;
        struct path root, pwd;
 };
 
@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *);
 extern void set_fs_root(struct fs_struct *, struct path *);
 extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
-extern void put_fs_struct(struct fs_struct *);
+extern void free_fs_struct(struct fs_struct *);
 extern void daemonize_fs_struct(void);
 extern int unshare_fs_struct(void);
 
index 05c02dc586b11ce0290220b22d3457057bbfde11..51f138a131defc3586195ff63b5589d1989ebd12 100644 (file)
@@ -683,11 +683,19 @@ fail_nomem:
 
 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
+       struct fs_struct *fs = current->fs;
        if (clone_flags & CLONE_FS) {
-               atomic_inc(&current->fs->count);
+               /* tsk->fs is already what we want */
+               write_lock(&fs->lock);
+               if (fs->in_exec) {
+                       write_unlock(&fs->lock);
+                       return -EAGAIN;
+               }
+               fs->users++;
+               write_unlock(&fs->lock);
                return 0;
        }
-       tsk->fs = copy_fs_struct(current->fs);
+       tsk->fs = copy_fs_struct(fs);
        if (!tsk->fs)
                return -ENOMEM;
        return 0;
@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 {
        struct fs_struct *fs = current->fs;
 
-       if ((unshare_flags & CLONE_FS) &&
-           (fs && atomic_read(&fs->count) > 1)) {
-               *new_fsp = copy_fs_struct(current->fs);
-               if (!*new_fsp)
-                       return -ENOMEM;
-       }
+       if (!(unshare_flags & CLONE_FS) || !fs)
+               return 0;
+
+       /* don't need lock here; in the worst case we'll do useless copy */
+       if (fs->users == 1)
+               return 0;
+
+       *new_fsp = copy_fs_struct(fs);
+       if (!*new_fsp)
+               return -ENOMEM;
 
        return 0;
 }
@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 
                if (new_fs) {
                        fs = current->fs;
+                       write_lock(&fs->lock);
                        current->fs = new_fs;
-                       new_fs = fs;
+                       if (--fs->users)
+                               new_fs = NULL;
+                       else
+                               new_fs = fs;
+                       write_unlock(&fs->lock);
                }
 
                if (new_mm) {
@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh:
 
 bad_unshare_cleanup_fs:
        if (new_fs)
-               put_fs_struct(new_fs);
+               free_fs_struct(new_fs);
 
 bad_unshare_cleanup_thread:
 bad_unshare_out: