[INET]: local port range robustness
authorStephen Hemminger <shemminger@linux-foundation.org>
Thu, 11 Oct 2007 00:30:46 +0000 (17:30 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 11 Oct 2007 00:30:46 +0000 (17:30 -0700)
Expansion of original idea from Denis V. Lunev <den@openvz.org>

Add robustness and locking to the local_port_range sysctl.
1. Enforce that low < high when setting.
2. Use seqlock to ensure atomic update.

The locking might seem like overkill, but there are
cases where sysadmin might want to change value in the
middle of a DoS attack.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/infiniband/core/cma.c
include/net/ip.h
net/ipv4/inet_connection_sock.c
net/ipv4/inet_hashtables.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv6/inet6_hashtables.c
net/sctp/socket.c
security/selinux/hooks.c

index 9ffb9987450a8ef2ddf294ab32dd300b9621c2cf..2e641b255db48b197ab51bcb49e2dcde94e1aae3 100644 (file)
@@ -1866,13 +1866,14 @@ err1:
 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
 {
        struct rdma_bind_list *bind_list;
-       int port, ret;
+       int port, ret, low, high;
 
        bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
        if (!bind_list)
                return -ENOMEM;
 
 retry:
+       /* FIXME: add proper port randomization per like inet_csk_get_port */
        do {
                ret = idr_get_new_above(ps, bind_list, next_port, &port);
        } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
@@ -1880,18 +1881,19 @@ retry:
        if (ret)
                goto err1;
 
-       if (port > sysctl_local_port_range[1]) {
-               if (next_port != sysctl_local_port_range[0]) {
+       inet_get_local_port_range(&low, &high);
+       if (port > high) {
+               if (next_port != low) {
                        idr_remove(ps, port);
-                       next_port = sysctl_local_port_range[0];
+                       next_port = low;
                        goto retry;
                }
                ret = -EADDRNOTAVAIL;
                goto err2;
        }
 
-       if (port == sysctl_local_port_range[1])
-               next_port = sysctl_local_port_range[0];
+       if (port == high)
+               next_port = low;
        else
                next_port = port + 1;
 
@@ -2769,12 +2771,12 @@ static void cma_remove_one(struct ib_device *device)
 
 static int cma_init(void)
 {
-       int ret;
+       int ret, low, high;
 
        get_random_bytes(&next_port, sizeof next_port);
-       next_port = ((unsigned int) next_port %
-                   (sysctl_local_port_range[1] - sysctl_local_port_range[0])) +
-                   sysctl_local_port_range[0];
+       inet_get_local_port_range(&low, &high);
+       next_port = ((unsigned int) next_port % (high - low)) + low;
+
        cma_wq = create_singlethread_workqueue("rdma_cm");
        if (!cma_wq)
                return -ENOMEM;
index abf2820a1125d73af2a83b8dab7a6e66509c88e5..3af3ed9d320bbdeb1e996dfceb7b8cd10cdcfc21 100644 (file)
@@ -171,7 +171,8 @@ extern unsigned long snmp_fold_field(void *mib[], int offt);
 extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 extern void snmp_mib_free(void *ptr[2]);
 
-extern int sysctl_local_port_range[2];
+extern void inet_get_local_port_range(int *low, int *high);
+
 extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
 
index fbe7714f21d08593beecb3945f2f85848b506be8..3cef12835c4b2bfa86b50a74a788488cd5ac9ea1 100644 (file)
@@ -33,6 +33,19 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  * This array holds the first and last local port number.
  */
 int sysctl_local_port_range[2] = { 32768, 61000 };
+DEFINE_SEQLOCK(sysctl_port_range_lock);
+
+void inet_get_local_port_range(int *low, int *high)
+{
+       unsigned seq;
+       do {
+               seq = read_seqbegin(&sysctl_port_range_lock);
+
+               *low = sysctl_local_port_range[0];
+               *high = sysctl_local_port_range[1];
+       } while (read_seqretry(&sysctl_port_range_lock, seq));
+}
+EXPORT_SYMBOL(inet_get_local_port_range);
 
 int inet_csk_bind_conflict(const struct sock *sk,
                           const struct inet_bind_bucket *tb)
@@ -77,10 +90,11 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
 
        local_bh_disable();
        if (!snum) {
-               int low = sysctl_local_port_range[0];
-               int high = sysctl_local_port_range[1];
-               int remaining = (high - low) + 1;
-               int rover = net_random() % (high - low) + low;
+               int remaining, rover, low, high;
+
+               inet_get_local_port_range(&low, &high);
+               remaining = high - low;
+               rover = net_random() % remaining + low;
 
                do {
                        head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
index fb662621c54ecf65a5812726859553b8e03a42d8..fac6398e436709c714b95992c9045e740454d004 100644 (file)
@@ -279,19 +279,18 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
        int ret;
 
        if (!snum) {
-               int low = sysctl_local_port_range[0];
-               int high = sysctl_local_port_range[1];
-               int range = high - low;
-               int i;
-               int port;
+               int i, remaining, low, high, port;
                static u32 hint;
                u32 offset = hint + inet_sk_port_offset(sk);
                struct hlist_node *node;
                struct inet_timewait_sock *tw = NULL;
 
+               inet_get_local_port_range(&low, &high);
+               remaining = high - low;
+
                local_bh_disable();
-               for (i = 1; i <= range; i++) {
-                       port = low + (i + offset) % range;
+               for (i = 1; i <= remaining; i++) {
+                       port = low + (i + offset) % remaining;
                        head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
                        spin_lock(&head->lock);
 
index 53ef0f4bbdaa507137d0a99b7a120fe43bddf1bb..eb286abcf5dc7f600e84c1cb43de53e90962fe50 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/igmp.h>
 #include <linux/inetdevice.h>
+#include <linux/seqlock.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -89,6 +90,74 @@ static int ipv4_sysctl_forward_strategy(ctl_table *table,
        return 1;
 }
 
+extern seqlock_t sysctl_port_range_lock;
+extern int sysctl_local_port_range[2];
+
+/* Update system visible IP port range */
+static void set_local_port_range(int range[2])
+{
+       write_seqlock(&sysctl_port_range_lock);
+       sysctl_local_port_range[0] = range[0];
+       sysctl_local_port_range[1] = range[1];
+       write_sequnlock(&sysctl_port_range_lock);
+}
+
+/* Validate changes from /proc interface. */
+static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+                                void __user *buffer,
+                                size_t *lenp, loff_t *ppos)
+{
+       int ret;
+       int range[2] = { sysctl_local_port_range[0],
+                        sysctl_local_port_range[1] };
+       ctl_table tmp = {
+               .data = &range,
+               .maxlen = sizeof(range),
+               .mode = table->mode,
+               .extra1 = &ip_local_port_range_min,
+               .extra2 = &ip_local_port_range_max,
+       };
+
+       ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+
+       if (write && ret == 0) {
+               if (range[1] <= range[0])
+                       ret = -EINVAL;
+               else
+                       set_local_port_range(range);
+       }
+
+       return ret;
+}
+
+/* Validate changes from sysctl interface. */
+static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
+                                        int nlen, void __user *oldval,
+                                        size_t __user *oldlenp,
+                                       void __user *newval, size_t newlen)
+{
+       int ret;
+       int range[2] = { sysctl_local_port_range[0],
+                        sysctl_local_port_range[1] };
+       ctl_table tmp = {
+               .data = &range,
+               .maxlen = sizeof(range),
+               .mode = table->mode,
+               .extra1 = &ip_local_port_range_min,
+               .extra2 = &ip_local_port_range_max,
+       };
+
+       ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
+       if (ret == 0 && newval && newlen) {
+               if (range[1] <= range[0])
+                       ret = -EINVAL;
+               else
+                       set_local_port_range(range);
+       }
+       return ret;
+}
+
+
 static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
                                       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -427,10 +496,8 @@ ctl_table ipv4_table[] = {
                .data           = &sysctl_local_port_range,
                .maxlen         = sizeof(sysctl_local_port_range),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = ip_local_port_range_min,
-               .extra2         = ip_local_port_range_max
+               .proc_handler   = &ipv4_local_port_range,
+               .strategy       = &ipv4_sysctl_local_port_range,
        },
        {
                .ctl_name       = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
index 8855e640e95845d260f4e572d1ca6d35d4d706aa..38cf73a5673144ed40cab33d07501a972c5299be 100644 (file)
@@ -2470,6 +2470,5 @@ EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 EXPORT_SYMBOL(tcp_proc_register);
 EXPORT_SYMBOL(tcp_proc_unregister);
 #endif
-EXPORT_SYMBOL(sysctl_local_port_range);
 EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
index ef4d901ee9ad62f8e6a0118d6cb394db125175ad..cb9fc58efb2f1da00ad4789370be1d1ac07f1fcd 100644 (file)
@@ -147,11 +147,11 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
        write_lock_bh(&udp_hash_lock);
 
        if (!snum) {
-               int i;
-               int low = sysctl_local_port_range[0];
-               int high = sysctl_local_port_range[1];
+               int i, low, high;
                unsigned rover, best, best_size_so_far;
 
+               inet_get_local_port_range(&low, &high);
+
                best_size_so_far = UINT_MAX;
                best = rover = net_random() % (high - low) + low;
 
index ae6b0e7eb4885b1722fe6e2a89a5d167b0cdf5ab..1c2c2765543505d7e61dd021fa2fcf2b9482757f 100644 (file)
@@ -254,18 +254,18 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
        int ret;
 
        if (snum == 0) {
-               const int low = sysctl_local_port_range[0];
-               const int high = sysctl_local_port_range[1];
-               const int range = high - low;
-               int i, port;
+               int i, port, low, high, remaining;
                static u32 hint;
                const u32 offset = hint + inet6_sk_port_offset(sk);
                struct hlist_node *node;
                struct inet_timewait_sock *tw = NULL;
 
+               inet_get_local_port_range(&low, &high);
+               remaining = high - low;
+
                local_bh_disable();
-               for (i = 1; i <= range; i++) {
-                       port = low + (i + offset) % range;
+               for (i = 1; i <= remaining; i++) {
+                       port = low + (i + offset) % remaining;
                        head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
                        spin_lock(&head->lock);
 
index 7cd58ef84eda9ee129680c76971e36a2843a4c43..9c6a4b5f6264639724c52717d0e84c75129ed747 100644 (file)
@@ -5315,11 +5315,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 
        if (snum == 0) {
                /* Search for an available port. */
-               unsigned int low = sysctl_local_port_range[0];
-               unsigned int high = sysctl_local_port_range[1];
-               unsigned int remaining = (high - low) + 1;
-               unsigned int rover = net_random() % remaining + low;
-               int index;
+               int low, high, remaining, index;
+               unsigned int rover;
+
+               inet_get_local_port_range(&low, &high);
+               remaining = (high - low) + 1;
+               rover = net_random() % remaining + low;
 
                do {
                        rover++;
index 0753b20e23fed48c80f4864b67fbe476de8daced..3c3fff33d1ce9644ef2fdac0d00af18ca4742df4 100644 (file)
@@ -47,7 +47,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/tty.h>
 #include <net/icmp.h>
-#include <net/ip.h>            /* for sysctl_local_port_range[] */
+#include <net/ip.h>            /* for local_port_range[] */
 #include <net/tcp.h>           /* struct or_callable used in sock_rcv_skb */
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -3232,8 +3232,6 @@ static int selinux_socket_post_create(struct socket *sock, int family,
 /* Range of port numbers used to automatically bind.
    Need to determine whether we should perform a name_bind
    permission check between the socket and the port number. */
-#define ip_local_port_range_0 sysctl_local_port_range[0]
-#define ip_local_port_range_1 sysctl_local_port_range[1]
 
 static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
 {
@@ -3276,20 +3274,27 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                        addrp = (char *)&addr6->sin6_addr.s6_addr;
                }
 
-               if (snum&&(snum < max(PROT_SOCK,ip_local_port_range_0) ||
-                          snum > ip_local_port_range_1)) {
-                       err = security_port_sid(sk->sk_family, sk->sk_type,
-                                               sk->sk_protocol, snum, &sid);
-                       if (err)
-                               goto out;
-                       AVC_AUDIT_DATA_INIT(&ad,NET);
-                       ad.u.net.sport = htons(snum);
-                       ad.u.net.family = family;
-                       err = avc_has_perm(isec->sid, sid,
-                                          isec->sclass,
-                                          SOCKET__NAME_BIND, &ad);
-                       if (err)
-                               goto out;
+               if (snum) {
+                       int low, high;
+
+                       inet_get_local_port_range(&low, &high);
+
+                       if (snum < max(PROT_SOCK, low) || snum > high) {
+                               err = security_port_sid(sk->sk_family,
+                                                       sk->sk_type,
+                                                       sk->sk_protocol, snum,
+                                                       &sid);
+                               if (err)
+                                       goto out;
+                               AVC_AUDIT_DATA_INIT(&ad,NET);
+                               ad.u.net.sport = htons(snum);
+                               ad.u.net.family = family;
+                               err = avc_has_perm(isec->sid, sid,
+                                                  isec->sclass,
+                                                  SOCKET__NAME_BIND, &ad);
+                               if (err)
+                                       goto out;
+                       }
                }
                
                switch(isec->sclass) {