From 7c60c48f58a78195acc1f71c9a9d01958c02ab89 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 13:34:05 -0800 Subject: [PATCH] sysctl: Improve the sysctl sanity checks - Stop validating subdirectories now that we only register leaf tables - Cleanup and improve the duplicate filename check. * Run the duplicate filename check under the sysctl_lock to guarantee we never add duplicate names. * Reduce the duplicate filename check to nearly O(M*N) where M is the number of entries in tthe table we are registering and N is the number of entries in the directory before we got there. - Move the duplicate filename check into it's own function and call it directtly from __register_sysctl_table - Kill the config option as the sanity checks are now cheap enough the config option is unnecessary. The original reason for the config option was because we had a huge table used to verify the proc filename to binary sysctl mapping. That table has now evolved into the binary_sysctl translation layer and is no longer part of the sysctl_check code. - Tighten up the permission checks. Guarnateeing that files only have read or write permissions. - Removed redudant check for parents having a procname as now everything has a procname. - Generalize the backtrace logic so that we print a backtrace from any failure of __register_sysctl_table that was not caused by a memmory allocation failure. The backtrace allows us to track down who erroneously registered a sysctl table. Bechmark before (CONFIG_SYSCTL_CHECK=y): make-dummies 0 999 -> 12s rmmod dummy -> 0.08s Bechmark before (CONFIG_SYSCTL_CHECK=n): make-dummies 0 999 -> 0.7s rmmod dummy -> 0.06s make-dummies 0 99999 -> 1m13s rmmod dummy -> 0.38s Benchmark after: make-dummies 0 999 -> 0.65s rmmod dummy -> 0.055s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.39s The sysctl sanity checks now impose no measurable cost. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 222 ++++++++++++++++-------------------------- lib/Kconfig.debug | 8 -- 2 files changed, 86 insertions(+), 144 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6bab2ae9e395..a492ff60e071 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) } } -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK -static int sysctl_depth(struct ctl_table *table) +static int sysctl_check_table_dups(const char *path, struct ctl_table *old, + struct ctl_table *table) { - struct ctl_table *tmp; - int depth; - - depth = 0; - for (tmp = table; tmp->parent; tmp = tmp->parent) - depth++; + struct ctl_table *entry, *test; + int error = 0; - return depth; + for (entry = old; entry->procname; entry++) { + for (test = table; test->procname; test++) { + if (strcmp(entry->procname, test->procname) == 0) { + printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", + path, test->procname); + error = -EEXIST; + } + } + } + return error; } -static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +static int sysctl_check_dups(struct nsproxy *namespaces, + struct ctl_table_header *header, + const char *path, struct ctl_table *table) { - int i; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table_header *dir_head, *head; + struct ctl_table *dir_table; + int error = 0; - for (i = 0; table && i < n; i++) - table = table->parent; + /* No dups if we are the only member of our directory */ + if (header->attached_by != table) + return 0; - return table; -} + dir_head = header->parent; + dir_table = header->attached_to; + error = sysctl_check_table_dups(path, dir_table, table); -static void sysctl_print_path(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth, i; - depth = sysctl_depth(table); - if (table->procname) { - for (i = depth; i >= 0; i--) { - tmp = sysctl_parent(table, i); - printk("/%s", tmp->procname?tmp->procname:""); - } - } - printk(" "); -} + root = &sysctl_table_root; + do { + set = lookup_header_set(root, namespaces); -static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, - struct ctl_table *table) -{ - struct ctl_table_header *head; - struct ctl_table *ref, *test; - int depth, cur_depth; - - depth = sysctl_depth(table); - - for (head = __sysctl_head_next(namespaces, NULL); head; - head = __sysctl_head_next(namespaces, head)) { - cur_depth = depth; - ref = head->ctl_table; -repeat: - test = sysctl_parent(table, cur_depth); - for (; ref->procname; ref++) { - int match = 0; - if (cur_depth && !ref->child) + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) continue; - - if (test->procname && ref->procname && - (strcmp(test->procname, ref->procname) == 0)) - match++; - - if (match) { - if (cur_depth != 0) { - cur_depth--; - ref = ref->child; - goto repeat; - } - goto out; - } + if (head->attached_to != dir_table) + continue; + error = sysctl_check_table_dups(path, head->attached_by, + table); } - } - ref = NULL; -out: - sysctl_head_finish(head); - return ref; + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + } while (root != &sysctl_table_root); + return error; } -static void set_fail(const char **fail, struct ctl_table *table, const char *str) +static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { - if (*fail) { - printk(KERN_ERR "sysctl table check failed: "); - sysctl_print_path(table); - printk(" %s\n", *fail); - dump_stack(); - } - *fail = str; -} + struct va_format vaf; + va_list args; -static void sysctl_check_leaf(struct nsproxy *namespaces, - struct ctl_table *table, const char **fail) -{ - struct ctl_table *ref; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", + path, table->procname, &vaf); - ref = sysctl_check_lookup(namespaces, table); - if (ref && (ref != table)) - set_fail(fail, table, "Sysctl already exists"); + va_end(args); + return -EINVAL; } -static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +static int sysctl_check_table(const char *path, struct ctl_table *table) { - int error = 0; + int err = 0; for (; table->procname; table++) { - const char *fail = NULL; - - if (table->parent) { - if (!table->parent->procname) - set_fail(&fail, table, "Parent without procname"); - } - if (table->child) { - if (table->data) - set_fail(&fail, table, "Directory with data?"); - if (table->maxlen) - set_fail(&fail, table, "Directory with maxlen?"); - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) - set_fail(&fail, table, "Writable sysctl directory"); - if (table->proc_handler) - set_fail(&fail, table, "Directory with proc_handler"); - if (table->extra1) - set_fail(&fail, table, "Directory with extra1"); - if (table->extra2) - set_fail(&fail, table, "Directory with extra2"); - } else { - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - set_fail(&fail, table, "No data"); - if (!table->maxlen) - set_fail(&fail, table, "No maxlen"); - } -#ifdef CONFIG_PROC_SYSCTL - if (!table->proc_handler) - set_fail(&fail, table, "No proc_handler"); -#endif - sysctl_check_leaf(namespaces, table, &fail); - } - if (table->mode > 0777) - set_fail(&fail, table, "bogus .mode"); - if (fail) { - set_fail(&fail, table, NULL); - error = -EINVAL; - } if (table->child) - error |= sysctl_check_table(namespaces, table->child); + err = sysctl_err(path, table, "Not a file"); + + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + err = sysctl_err(path, table, "No data"); + if (!table->maxlen) + err = sysctl_err(path, table, "No maxlen"); + } + if (!table->proc_handler) + err = sysctl_err(path, table, "No proc_handler"); + + if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) + err = sysctl_err(path, table, "bogus .mode 0%o", + table->mode); } - return error; + return err; } -#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** * __register_sysctl_table - register a leaf sysctl table @@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table( header->root = root; sysctl_set_parent(NULL, header->ctl_table); header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif + if (sysctl_check_table(path, table)) + goto fail; spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; @@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_root, root_list); set = lookup_header_set(root, namespaces); } + if (sysctl_check_dups(namespaces, header, path, table)) + goto fail_locked; header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); spin_unlock(&sysctl_lock); return header; +fail_locked: + spin_unlock(&sysctl_lock); +fail: + kfree(header); + dump_stack(); + return NULL; } static char *append_path(const char *path, char *pos, const char *name) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8745ac7d1f75..943a6182cdf2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1113,14 +1113,6 @@ config LATENCYTOP Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. -config SYSCTL_SYSCALL_CHECK - bool "Sysctl checks" - depends on SYSCTL - ---help--- - sys_sysctl uses binary paths that have been found challenging - to properly maintain and use. This enables checks that help - you to keep things correct. - source mm/Kconfig.debug source kernel/trace/Kconfig -- 2.20.1