inotify: Convert to using per-namespace limits
authorNikolay Borisov <n.borisov.lkml@gmail.com>
Wed, 14 Dec 2016 13:56:33 +0000 (15:56 +0200)
committerEric W. Biederman <ebiederm@xmission.com>
Mon, 23 Jan 2017 23:03:07 +0000 (12:03 +1300)
This patchset converts inotify to using the newly introduced
per-userns sysctl infrastructure.

Currently the inotify instances/watches are being accounted in the
user_struct structure. This means that in setups where multiple
users in unprivileged containers map to the same underlying
real user (i.e. pointing to the same user_struct) the inotify limits
are going to be shared as well, allowing one user(or application) to exhaust
all others limits.

Fix this by switching the inotify sysctls to using the
per-namespace/per-user limits. This will allow the server admin to
set sensible global limits, which can further be tuned inside every
individual user namespace. Additionally, in order to preserve the
sysctl ABI make the existing inotify instances/watches sysctls
modify the values of the initial user namespace.

Signed-off-by: Nikolay Borisov <n.borisov.lkml@gmail.com>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
fs/notify/inotify/inotify.h
fs/notify/inotify/inotify_fsnotify.c
fs/notify/inotify/inotify_user.c
include/linux/fsnotify_backend.h
include/linux/sched.h
include/linux/user_namespace.h
kernel/ucount.c

index a6f5907a3feef4e878e6275efd0dc0a4d7fe083b..7c461fd49c4ccf1f81d87d1eddcacd16c3c3ab86 100644 (file)
@@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group,
                                const unsigned char *file_name, u32 cookie);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
+
+#ifdef CONFIG_INOTIFY_USER
+static inline void dec_inotify_instances(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
+}
+
+static inline struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
+{
+       return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
+}
+
+static inline void dec_inotify_watches(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
+}
+#endif
index 19e7ec109a75c91ff71a0fb2a3c7bdad69e883ba..f36c29398de371784feaa230b3b030554dbd2dc2 100644 (file)
@@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
        /* ideally the idr is empty and we won't hit the BUG in the callback */
        idr_for_each(&group->inotify_data.idr, idr_callback, group);
        idr_destroy(&group->inotify_data.idr);
-       if (group->inotify_data.user) {
-               atomic_dec(&group->inotify_data.user->inotify_devs);
-               free_uid(group->inotify_data.user);
-       }
+       if (group->inotify_data.ucounts)
+               dec_inotify_instances(group->inotify_data.ucounts);
 }
 
 static void inotify_free_event(struct fsnotify_event *fsn_event)
index 69d1ea3d292a8659f0306d93332bcd0a0e2f07d0..1cf41c623be1d10220fe08e1bf5dde87632dacd7 100644 (file)
 
 #include <asm/ioctls.h>
 
-/* these are configurable via /proc/sys/fs/inotify/ */
-static int inotify_max_user_instances __read_mostly;
+/* configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_queued_events __read_mostly;
-static int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 
@@ -60,7 +58,7 @@ static int zero;
 struct ctl_table inotify_table[] = {
        {
                .procname       = "max_user_instances",
-               .data           = &inotify_max_user_instances,
+               .data           = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
@@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = {
        },
        {
                .procname       = "max_user_watches",
-               .data           = &inotify_max_user_watches,
+               .data           = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
@@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
        /* remove this mark from the idr */
        inotify_remove_from_idr(group, i_mark);
 
-       atomic_dec(&group->inotify_data.user->inotify_watches);
+       dec_inotify_watches(group->inotify_data.ucounts);
 }
 
 /* ding dong the mark is dead */
@@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
        tmp_i_mark->fsn_mark.mask = mask;
        tmp_i_mark->wd = -1;
 
-       ret = -ENOSPC;
-       if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
-               goto out_err;
-
        ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
        if (ret)
                goto out_err;
 
+       /* increment the number of watches the user has */
+       if (!inc_inotify_watches(group->inotify_data.ucounts)) {
+               inotify_remove_from_idr(group, tmp_i_mark);
+               ret = -ENOSPC;
+               goto out_err;
+       }
+
        /* we are on the idr, now get on the inode */
        ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
                                       NULL, 0);
@@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
                goto out_err;
        }
 
-       /* increment the number of watches the user has */
-       atomic_inc(&group->inotify_data.user->inotify_watches);
 
        /* return the watch descriptor for this new mark */
        ret = tmp_i_mark->wd;
@@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 
        spin_lock_init(&group->inotify_data.idr_lock);
        idr_init(&group->inotify_data.idr);
-       group->inotify_data.user = get_current_user();
+       group->inotify_data.ucounts = inc_ucount(current_user_ns(),
+                                                current_euid(),
+                                                UCOUNT_INOTIFY_INSTANCES);
 
-       if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
-           inotify_max_user_instances) {
+       if (!group->inotify_data.ucounts) {
                fsnotify_destroy_group(group);
                return ERR_PTR(-EMFILE);
        }
@@ -819,8 +819,8 @@ static int __init inotify_user_setup(void)
        inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 
        inotify_max_queued_events = 16384;
-       inotify_max_user_instances = 128;
-       inotify_max_user_watches = 8192;
+       init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
+       init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
 
        return 0;
 }
index 0cf34d6cc253853c459e60908aa06128b4a8cf45..c8f2738113f4fb08a3ebb622cf6539bdf0751a32 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/atomic.h>
+#include <linux/user_namespace.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -170,7 +171,7 @@ struct fsnotify_group {
                struct inotify_group_private_data {
                        spinlock_t      idr_lock;
                        struct idr      idr;
-                       struct user_struct      *user;
+                       struct ucounts *ucounts;
                } inotify_data;
 #endif
 #ifdef CONFIG_FANOTIFY
index 4d1905245c7aa50df56acf0f77c77f3347c28c04..d2334229167f48d8a9101570ce55de10506000af 100644 (file)
@@ -868,10 +868,6 @@ struct user_struct {
        atomic_t __count;       /* reference count */
        atomic_t processes;     /* How many processes does this user have? */
        atomic_t sigpending;    /* How many pending signals does this user have? */
-#ifdef CONFIG_INOTIFY_USER
-       atomic_t inotify_watches; /* How many inotify watches does this user have? */
-       atomic_t inotify_devs;  /* How many inotify devs does this user have opened? */
-#endif
 #ifdef CONFIG_FANOTIFY
        atomic_t fanotify_listeners;
 #endif
index eb209d4523f5a8e33d23b66ee658d3bddeb2d8b4..363e0e8082a9d73c4f53b19ced2a9a1e0ecf979b 100644 (file)
@@ -32,6 +32,10 @@ enum ucount_type {
        UCOUNT_NET_NAMESPACES,
        UCOUNT_MNT_NAMESPACES,
        UCOUNT_CGROUP_NAMESPACES,
+#ifdef CONFIG_INOTIFY_USER
+       UCOUNT_INOTIFY_INSTANCES,
+       UCOUNT_INOTIFY_WATCHES,
+#endif
        UCOUNT_COUNTS,
 };
 
index 4bbd38ec37886d3d104e3d37dc80d101ab3767ac..68716403b2611264b6803097ed64a6ab7ab842ff 100644 (file)
@@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
 
 static int zero = 0;
 static int int_max = INT_MAX;
-#define UCOUNT_ENTRY(name)                             \
+#define UCOUNT_ENTRY(name)                             \
        {                                               \
                .procname       = name,                 \
                .maxlen         = sizeof(int),          \
@@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
        UCOUNT_ENTRY("max_net_namespaces"),
        UCOUNT_ENTRY("max_mnt_namespaces"),
        UCOUNT_ENTRY("max_cgroup_namespaces"),
+#ifdef CONFIG_INOTIFY_USER
+       UCOUNT_ENTRY("max_inotify_instances"),
+       UCOUNT_ENTRY("max_inotify_watches"),
+#endif
        { }
 };
 #endif /* CONFIG_SYSCTL */