userns: Better restrictions on when proc and sysfs can be mounted
authorEric W. Biederman <ebiederm@xmission.com>
Sun, 31 Mar 2013 02:57:41 +0000 (19:57 -0700)
committerEric W. Biederman <ebiederm@xmission.com>
Tue, 27 Aug 2013 02:17:03 +0000 (19:17 -0700)
Rely on the fact that another flavor of the filesystem is already
mounted and do not rely on state in the user namespace.

Verify that the mounted filesystem is not covered in any significant
way.  I would love to verify that the previously mounted filesystem
has no mounts on top but there are at least the directories
/proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
for other filesystems to mount on top of.

Refactor the test into a function named fs_fully_visible and call that
function from the mount routines of proc and sysfs.  This makes this
test local to the filesystems involved and the results current of when
the mounts take place, removing a weird threading of the user
namespace, the mount namespace and the filesystems themselves.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
fs/namespace.c
fs/proc/root.c
fs/sysfs/mount.c
include/linux/fs.h
include/linux/user_namespace.h
kernel/user.c
kernel/user_namespace.c

index 64627f883bf2005c835eaa0fad791ebe566ec4a2..877e4277f4962cd1699a9dddcd2e2cec67982928 100644 (file)
@@ -2867,25 +2867,38 @@ bool current_chrooted(void)
        return chrooted;
 }
 
-void update_mnt_policy(struct user_namespace *userns)
+bool fs_fully_visible(struct file_system_type *type)
 {
        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
        struct mount *mnt;
+       bool visible = false;
 
-       down_read(&namespace_sem);
+       if (unlikely(!ns))
+               return false;
+
+       namespace_lock();
        list_for_each_entry(mnt, &ns->list, mnt_list) {
-               switch (mnt->mnt.mnt_sb->s_magic) {
-               case SYSFS_MAGIC:
-                       userns->may_mount_sysfs = true;
-                       break;
-               case PROC_SUPER_MAGIC:
-                       userns->may_mount_proc = true;
-                       break;
+               struct mount *child;
+               if (mnt->mnt.mnt_sb->s_type != type)
+                       continue;
+
+               /* This mount is not fully visible if there are any child mounts
+                * that cover anything except for empty directories.
+                */
+               list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+                       struct inode *inode = child->mnt_mountpoint->d_inode;
+                       if (!S_ISDIR(inode->i_mode))
+                               goto next;
+                       if (inode->i_nlink != 2)
+                               goto next;
                }
-               if (userns->may_mount_sysfs && userns->may_mount_proc)
-                       break;
+               visible = true;
+               goto found;
+       next:   ;
        }
-       up_read(&namespace_sem);
+found:
+       namespace_unlock();
+       return visible;
 }
 
 static void *mntns_get(struct task_struct *task)
index 38bd5d423fcd2d1cbc022baa8a9a242a133e8852..45e5fb7da09bdadd24e2805812cedb529c52a9e7 100644 (file)
@@ -110,8 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
                ns = task_active_pid_ns(current);
                options = data;
 
-               if (!current_user_ns()->may_mount_proc ||
-                   !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+               if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+                       return ERR_PTR(-EPERM);
+
+               /* Does the mounter have privilege over the pid namespace? */
+               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
                        return ERR_PTR(-EPERM);
        }
 
index afd83273e6cea8112e1570c8835bfa829303b2f8..4a2da3a4b1b11effd60dc54a80ceca5ca0d2a9d6 100644 (file)
@@ -112,7 +112,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        struct super_block *sb;
        int error;
 
-       if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+       if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
+           !fs_fully_visible(fs_type))
                return ERR_PTR(-EPERM);
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
index 981874773e85adfe81c2d24d59daa79aea157114..3050c620f062f92e601a45d146409c8df5294038 100644 (file)
@@ -1897,6 +1897,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
+extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
index b6b215f13b453091ff2be0fae11b37a5a51c0c84..4ce009324933ebc2b048ebd5f834b0c1bf44e52d 100644 (file)
@@ -26,8 +26,6 @@ struct user_namespace {
        kuid_t                  owner;
        kgid_t                  group;
        unsigned int            proc_inum;
-       bool                    may_mount_sysfs;
-       bool                    may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -84,6 +82,4 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
-void update_mnt_policy(struct user_namespace *userns);
-
 #endif /* _LINUX_USER_H */
index 69b4c3d48cdee20fc94be4d2869e2e330ba0a7b2..5bbb91988e69278f2cd012896db29688c9234476 100644 (file)
@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
        .owner = GLOBAL_ROOT_UID,
        .group = GLOBAL_ROOT_GID,
        .proc_inum = PROC_USER_INIT_INO,
-       .may_mount_sysfs = true,
-       .may_mount_proc = true,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
index d8c30db06c5b75456098fe2f1a26785e9a9110ee..d58ad1e7a79469772c7a0e07611cbe3120b3afa6 100644 (file)
@@ -97,8 +97,6 @@ int create_user_ns(struct cred *new)
 
        set_cred_user_ns(new, ns);
 
-       update_mnt_policy(ns);
-
        return 0;
 }