[TCP]: Restrict congestion control choices.
authorStephen Hemminger <shemminger@osdl.org>
Fri, 10 Nov 2006 00:35:15 +0000 (16:35 -0800)
committerDavid S. Miller <davem@sunset.davemloft.net>
Sun, 3 Dec 2006 05:21:49 +0000 (21:21 -0800)
Allow normal users to only choose among a restricted set of congestion
control choices.  The default is reno and what ever has been configured
as default. But the policy can be changed by administrator at any time.

For example, to allow any choice:
    cp /proc/sys/net/ipv4/tcp_available_congestion_control \
       /proc/sys/net/ipv4/tcp_allowed_congestion_control

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.txt
include/linux/sysctl.h
include/net/tcp.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_cong.c

index db4280856588527ce8324ee115facfd022147436..bbcc8deda172f3acafff9e5fb96b4539cc936266 100644 (file)
@@ -351,6 +351,12 @@ tcp_frto - BOOLEAN
        where packet loss is typically due to random radio interference
        rather than intermediate router congestion.
 
+tcp_allowed_congestion_control - STRING
+       Show/set the congestion control choices available to non-privileged
+       processes. The list is a subset of those listed in
+       tcp_available_congestion_control.
+       Default is "reno" and the default setting (tcp_congestion_control).
+
 tcp_available_congestion_control - STRING
        Shows the available congestion control choices that are registered.
        More congestion control algorithms may be available as modules,
index 28a48279654de84e8ca56f8c61bb60c2557fd2c6..0725441621d090fcdb1ca9e67c80647956740a26 100644 (file)
@@ -427,6 +427,7 @@ enum
        NET_CIPSOV4_RBM_OPTFMT=120,
        NET_CIPSOV4_RBM_STRICTVALID=121,
        NET_TCP_AVAIL_CONG_CONTROL=122,
+       NET_TCP_ALLOWED_CONG_CONTROL=123,
 };
 
 enum {
index 6af4baf5b7692a6942adbf90510a65c70b43c5c9..e1a5d29d0a1fccfa524bec6f4df1e195bbc30c93 100644 (file)
@@ -625,6 +625,7 @@ enum tcp_ca_event {
 
 struct tcp_congestion_ops {
        struct list_head        list;
+       int     non_restricted;
 
        /* initialize private data (optional) */
        void (*init)(struct sock *sk);
@@ -663,6 +664,8 @@ extern void tcp_cleanup_congestion_control(struct sock *sk);
 extern int tcp_set_default_congestion_control(const char *name);
 extern void tcp_get_default_congestion_control(char *name);
 extern void tcp_get_available_congestion_control(char *buf, size_t len);
+extern void tcp_get_allowed_congestion_control(char *buf, size_t len);
+extern int tcp_set_allowed_congestion_control(char *allowed);
 extern int tcp_set_congestion_control(struct sock *sk, const char *name);
 extern void tcp_slow_start(struct tcp_sock *tp);
 
index 2e770f45d829002edcfadfe7f05eaa53a0cf94dc..dfcf47f10f88b94318a33fa36768d012606afba1 100644 (file)
@@ -146,6 +146,50 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
        return ret;
 }
 
+static int proc_allowed_congestion_control(ctl_table *ctl,
+                                          int write, struct file * filp,
+                                          void __user *buffer, size_t *lenp,
+                                          loff_t *ppos)
+{
+       ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
+       int ret;
+
+       tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+       if (!tbl.data)
+               return -ENOMEM;
+
+       tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
+       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       if (write && ret == 0)
+               ret = tcp_set_allowed_congestion_control(tbl.data);
+       kfree(tbl.data);
+       return ret;
+}
+
+static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
+                                              int nlen, void __user *oldval,
+                                              size_t __user *oldlenp,
+                                              void __user *newval, size_t newlen,
+                                              void **context)
+{
+       ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
+       int ret;
+
+       tbl.data = kmalloc(tbl.maxlen, GFP_USER);
+       if (!tbl.data)
+               return -ENOMEM;
+
+       tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
+       ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen,
+                           context);
+       if (ret == 0 && newval && newlen)
+               ret = tcp_set_allowed_congestion_control(tbl.data);
+       kfree(tbl.data);
+
+       return ret;
+
+}
+
 ctl_table ipv4_table[] = {
         {
                .ctl_name       = NET_IPV4_TCP_TIMESTAMPS,
@@ -755,6 +799,14 @@ ctl_table ipv4_table[] = {
                .mode           = 0444,
                .proc_handler   = &proc_tcp_available_congestion_control,
        },
+       {
+               .ctl_name       = NET_TCP_ALLOWED_CONG_CONTROL,
+               .procname       = "tcp_allowed_congestion_control",
+               .maxlen         = TCP_CA_BUF_MAX,
+               .mode           = 0644,
+               .proc_handler   = &proc_allowed_congestion_control,
+               .strategy       = &strategy_allowed_congestion_control,
+       },
        { .ctl_name = 0 }
 };
 
index d846d7b95e1f6cafc893349b098fcfffa3a4104c..343d6197c92e95f0660cfc6853bda4e58435bc7a 100644 (file)
@@ -123,6 +123,7 @@ int tcp_set_default_congestion_control(const char *name)
 #endif
 
        if (ca) {
+               ca->non_restricted = 1; /* default is always allowed */
                list_move(&ca->list, &tcp_cong_list);
                ret = 0;
        }
@@ -168,6 +169,64 @@ void tcp_get_default_congestion_control(char *name)
        rcu_read_unlock();
 }
 
+/* Built list of non-restricted congestion control values */
+void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
+{
+       struct tcp_congestion_ops *ca;
+       size_t offs = 0;
+
+       *buf = '\0';
+       rcu_read_lock();
+       list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+               if (!ca->non_restricted)
+                       continue;
+               offs += snprintf(buf + offs, maxlen - offs,
+                                "%s%s",
+                                offs == 0 ? "" : " ", ca->name);
+
+       }
+       rcu_read_unlock();
+}
+
+/* Change list of non-restricted congestion control */
+int tcp_set_allowed_congestion_control(char *val)
+{
+       struct tcp_congestion_ops *ca;
+       char *clone, *name;
+       int ret = 0;
+
+       clone = kstrdup(val, GFP_USER);
+       if (!clone)
+               return -ENOMEM;
+
+       spin_lock(&tcp_cong_list_lock);
+       /* pass 1 check for bad entries */
+       while ((name = strsep(&clone, " ")) && *name) {
+               ca = tcp_ca_find(name);
+               if (!ca) {
+                       ret = -ENOENT;
+                       goto out;
+               }
+       }
+
+       /* pass 2 clear */
+       list_for_each_entry_rcu(ca, &tcp_cong_list, list)
+               ca->non_restricted = 0;
+
+       /* pass 3 mark as allowed */
+       while ((name = strsep(&val, " ")) && *name) {
+               ca = tcp_ca_find(name);
+               WARN_ON(!ca);
+               if (ca)
+                       ca->non_restricted = 1;
+       }
+out:
+       spin_unlock(&tcp_cong_list_lock);
+
+       return ret;
+}
+
+
 /* Change congestion control for socket */
 int tcp_set_congestion_control(struct sock *sk, const char *name)
 {
@@ -183,6 +242,9 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
        if (!ca)
                err = -ENOENT;
 
+       else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+               err = -EPERM;
+
        else if (!try_module_get(ca->owner))
                err = -EBUSY;
 
@@ -284,6 +346,7 @@ EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
 
 struct tcp_congestion_ops tcp_reno = {
        .name           = "reno",
+       .non_restricted = 1,
        .owner          = THIS_MODULE,
        .ssthresh       = tcp_reno_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,