tcp: bool conversions
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / tcp.c
index 8bb6adeb62c0eb7dcba14841a1f08375cfde6304..63ddaee7209f164b1f93baedd48639dcd1ad9c9a 100644 (file)
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
        return period;
 }
 
+/* Address-family independent initialization for a tcp_sock.
+ *
+ * NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+void tcp_init_sock(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       skb_queue_head_init(&tp->out_of_order_queue);
+       tcp_init_xmit_timers(sk);
+       tcp_prequeue_init(tp);
+
+       icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       tp->mdev = TCP_TIMEOUT_INIT;
+
+       /* So many TCP implementations out there (incorrectly) count the
+        * initial SYN frame in their delayed-ACK and congestion control
+        * algorithms that we must have the following bandaid to talk
+        * efficiently to them.  -DaveM
+        */
+       tp->snd_cwnd = TCP_INIT_CWND;
+
+       /* See draft-stevens-tcpca-spec-01 for discussion of the
+        * initialization of these values.
+        */
+       tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+       tp->snd_cwnd_clamp = ~0;
+       tp->mss_cache = TCP_MSS_DEFAULT;
+
+       tp->reordering = sysctl_tcp_reordering;
+       tcp_enable_early_retrans(tp);
+       icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+
+       sk->sk_state = TCP_CLOSE;
+
+       sk->sk_write_space = sk_stream_write_space;
+       sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+       icsk->icsk_sync_mss = tcp_sync_mss;
+
+       /* TCP Cookie Transactions */
+       if (sysctl_tcp_cookie_size > 0) {
+               /* Default, cookies without s_data_payload. */
+               tp->cookie_values =
+                       kzalloc(sizeof(*tp->cookie_values),
+                               sk->sk_allocation);
+               if (tp->cookie_values != NULL)
+                       kref_init(&tp->cookie_values->kref);
+       }
+       /* Presumed zeroed, in order of appearance:
+        *      cookie_in_always, cookie_out_never,
+        *      s_data_constant, s_data_in, s_data_out
+        */
+       sk->sk_sndbuf = sysctl_tcp_wmem[1];
+       sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+       local_bh_disable();
+       sock_update_memcg(sk);
+       sk_sockets_allocated_inc(sk);
+       local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_init_sock);
+
 /*
  *     Wait for a TCP event.
  *
@@ -528,7 +593,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
        tp->pushed_seq = tp->write_seq;
 }
 
-static inline int forced_push(const struct tcp_sock *tp)
+static inline bool forced_push(const struct tcp_sock *tp)
 {
        return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
@@ -784,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
        while (psize > 0) {
                struct sk_buff *skb = tcp_write_queue_tail(sk);
                struct page *page = pages[poffset / PAGE_SIZE];
-               int copy, i, can_coalesce;
+               int copy, i;
                int offset = poffset % PAGE_SIZE;
                int size = min_t(size_t, psize, PAGE_SIZE - offset);
+               bool can_coalesce;
 
                if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
@@ -919,7 +985,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *skb;
        int iovlen, flags, err, copied;
-       int mss_now, size_goal;
+       int mss_now = 0, size_goal;
        bool sg;
        long timeo;
 
@@ -933,6 +999,19 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
                        goto out_err;
 
+       if (unlikely(tp->repair)) {
+               if (tp->repair_queue == TCP_RECV_QUEUE) {
+                       copied = tcp_send_rcvq(sk, msg, size);
+                       goto out;
+               }
+
+               err = -EINVAL;
+               if (tp->repair_queue == TCP_NO_QUEUE)
+                       goto out_err;
+
+               /* 'common' sending to sendq */
+       }
+
        /* This should be in poll */
        clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
@@ -1003,7 +1082,7 @@ new_segment:
                                if (err)
                                        goto do_fault;
                        } else {
-                               int merge = 0;
+                               bool merge = false;
                                int i = skb_shinfo(skb)->nr_frags;
                                struct page *page = sk->sk_sndmsg_page;
                                int off;
@@ -1017,7 +1096,7 @@ new_segment:
                                    off != PAGE_SIZE) {
                                        /* We can extend the last page
                                         * fragment. */
-                                       merge = 1;
+                                       merge = true;
                                } else if (i == MAX_SKB_FRAGS || !sg) {
                                        /* Need to add new fragment and cannot
                                         * do this because interface is non-SG,
@@ -1089,7 +1168,7 @@ new_segment:
                        if ((seglen -= copy) == 0 && iovlen == 0)
                                goto out;
 
-                       if (skb->len < max || (flags & MSG_OOB))
+                       if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
                                continue;
 
                        if (forced_push(tp)) {
@@ -1102,7 +1181,7 @@ new_segment:
 wait_for_sndbuf:
                        set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-                       if (copied)
+                       if (copied && likely(!tp->repair))
                                tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
                        if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1113,7 +1192,7 @@ wait_for_memory:
        }
 
 out:
-       if (copied)
+       if (copied && likely(!tp->repair))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        release_sock(sk);
        return copied;
@@ -1187,6 +1266,24 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
        return -EAGAIN;
 }
 
+static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
+{
+       struct sk_buff *skb;
+       int copied = 0, err = 0;
+
+       /* XXX -- need to support SO_PEEK_OFF */
+
+       skb_queue_walk(&sk->sk_write_queue, skb) {
+               err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+               if (err)
+                       break;
+
+               copied += skb->len;
+       }
+
+       return err ?: copied;
+}
+
 /* Clean up the receive buffer for full frames taken by the user,
  * then send an ACK if necessary.  COPIED is the number of bytes
  * tcp_recvmsg has given to the user so far, it speeds up the
@@ -1196,7 +1293,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
 void tcp_cleanup_rbuf(struct sock *sk, int copied)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       int time_to_ack = 0;
+       bool time_to_ack = false;
 
        struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
@@ -1222,7 +1319,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
                      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
                       !icsk->icsk_ack.pingpong)) &&
                      !atomic_read(&sk->sk_rmem_alloc)))
-                       time_to_ack = 1;
+                       time_to_ack = true;
        }
 
        /* We send an ACK if we can now advertise a non-zero window
@@ -1244,7 +1341,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
                         * "Lots" means "at least twice" here.
                         */
                        if (new_window && new_window >= 2 * rcv_window_now)
-                               time_to_ack = 1;
+                               time_to_ack = true;
                }
        }
        if (time_to_ack)
@@ -1376,11 +1473,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                                break;
                }
                if (tcp_hdr(skb)->fin) {
-                       sk_eat_skb(sk, skb, 0);
+                       sk_eat_skb(sk, skb, false);
                        ++seq;
                        break;
                }
-               sk_eat_skb(sk, skb, 0);
+               sk_eat_skb(sk, skb, false);
                if (!desc->count)
                        break;
                tp->copied_seq = seq;
@@ -1416,7 +1513,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        int target;             /* Read at least this many bytes */
        long timeo;
        struct task_struct *user_recv = NULL;
-       int copied_early = 0;
+       bool copied_early = false;
        struct sk_buff *skb;
        u32 urg_hole = 0;
 
@@ -1432,6 +1529,21 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (flags & MSG_OOB)
                goto recv_urg;
 
+       if (unlikely(tp->repair)) {
+               err = -EPERM;
+               if (!(flags & MSG_PEEK))
+                       goto out;
+
+               if (tp->repair_queue == TCP_SEND_QUEUE)
+                       goto recv_sndq;
+
+               err = -EINVAL;
+               if (tp->repair_queue == TCP_NO_QUEUE)
+                       goto out;
+
+               /* 'common' recv queue MSG_PEEK-ing */
+       }
+
        seq = &tp->copied_seq;
        if (flags & MSG_PEEK) {
                peek_seq = tp->copied_seq;
@@ -1633,9 +1745,9 @@ do_prequeue:
                }
                if ((flags & MSG_PEEK) &&
                    (peek_seq - copied - urg_hole != tp->copied_seq)) {
-                       if (net_ratelimit())
-                               printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
-                                      current->comm, task_pid_nr(current));
+                       net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
+                                           current->comm,
+                                           task_pid_nr(current));
                        peek_seq = tp->copied_seq;
                }
                continue;
@@ -1689,7 +1801,7 @@ do_prequeue:
                                dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
 
                                if ((offset + used) == skb->len)
-                                       copied_early = 1;
+                                       copied_early = true;
 
                        } else
 #endif
@@ -1723,7 +1835,7 @@ skip_copy:
                        goto found_fin_ok;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                       copied_early = 0;
+                       copied_early = false;
                }
                continue;
 
@@ -1732,7 +1844,7 @@ skip_copy:
                ++*seq;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                       copied_early = 0;
+                       copied_early = false;
                }
                break;
        } while (len > 0);
@@ -1783,6 +1895,10 @@ out:
 recv_urg:
        err = tcp_recv_urg(sk, msg, len, flags);
        goto out;
+
+recv_sndq:
+       err = tcp_peek_sndq(sk, msg, len);
+       goto out;
 }
 EXPORT_SYMBOL(tcp_recvmsg);
 
@@ -1886,10 +2002,10 @@ bool tcp_check_oom(struct sock *sk, int shift)
        too_many_orphans = tcp_too_many_orphans(sk, shift);
        out_of_socket_memory = tcp_out_of_memory(sk);
 
-       if (too_many_orphans && net_ratelimit())
-               pr_info("too many orphaned sockets\n");
-       if (out_of_socket_memory && net_ratelimit())
-               pr_info("out of memory -- consider tuning tcp_mem\n");
+       if (too_many_orphans)
+               net_info_ratelimited("too many orphaned sockets\n");
+       if (out_of_socket_memory)
+               net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
        return too_many_orphans || out_of_socket_memory;
 }
 
@@ -1935,7 +2051,9 @@ void tcp_close(struct sock *sk, long timeout)
         * advertise a zero window, then kill -9 the FTP client, wheee...
         * Note: timeout is always zero in such a case.
         */
-       if (data_was_unread) {
+       if (unlikely(tcp_sk(sk)->repair)) {
+               sk->sk_prot->disconnect(sk, 0);
+       } else if (data_was_unread) {
                /* Unread data was tossed, zap the connection. */
                NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
                tcp_set_state(sk, TCP_CLOSE);
@@ -2053,7 +2171,7 @@ EXPORT_SYMBOL(tcp_close);
 
 /* These states need RST on ABORT according to RFC793 */
 
-static inline int tcp_need_reset(int state)
+static inline bool tcp_need_reset(int state)
 {
        return (1 << state) &
               (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
@@ -2074,6 +2192,8 @@ int tcp_disconnect(struct sock *sk, int flags)
        /* ABORT function of RFC793 */
        if (old_state == TCP_LISTEN) {
                inet_csk_listen_stop(sk);
+       } else if (unlikely(tp->repair)) {
+               sk->sk_err = ECONNABORTED;
        } else if (tcp_need_reset(old_state) ||
                   (tp->snd_nxt != tp->write_seq &&
                    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2245,54 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
+static inline bool tcp_can_repair_sock(const struct sock *sk)
+{
+       return capable(CAP_NET_ADMIN) &&
+               ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+}
+
+static int tcp_repair_options_est(struct tcp_sock *tp,
+               struct tcp_repair_opt __user *optbuf, unsigned int len)
+{
+       struct tcp_repair_opt opt;
+
+       while (len >= sizeof(opt)) {
+               if (copy_from_user(&opt, optbuf, sizeof(opt)))
+                       return -EFAULT;
+
+               optbuf++;
+               len -= sizeof(opt);
+
+               switch (opt.opt_code) {
+               case TCPOPT_MSS:
+                       tp->rx_opt.mss_clamp = opt.opt_val;
+                       break;
+               case TCPOPT_WINDOW:
+                       if (opt.opt_val > 14)
+                               return -EFBIG;
+
+                       tp->rx_opt.snd_wscale = opt.opt_val;
+                       break;
+               case TCPOPT_SACK_PERM:
+                       if (opt.opt_val != 0)
+                               return -EINVAL;
+
+                       tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
+                       if (sysctl_tcp_fack)
+                               tcp_enable_fack(tp);
+                       break;
+               case TCPOPT_TIMESTAMP:
+                       if (opt.opt_val != 0)
+                               return -EINVAL;
+
+                       tp->rx_opt.tstamp_ok = 1;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
 /*
  *     Socket option code for TCP.
  */
@@ -2295,6 +2463,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                        err = -EINVAL;
                else
                        tp->thin_dupack = val;
+                       if (tp->thin_dupack)
+                               tcp_disable_early_retrans(tp);
+               break;
+
+       case TCP_REPAIR:
+               if (!tcp_can_repair_sock(sk))
+                       err = -EPERM;
+               else if (val == 1) {
+                       tp->repair = 1;
+                       sk->sk_reuse = SK_FORCE_REUSE;
+                       tp->repair_queue = TCP_NO_QUEUE;
+               } else if (val == 0) {
+                       tp->repair = 0;
+                       sk->sk_reuse = SK_NO_REUSE;
+                       tcp_send_window_probe(sk);
+               } else
+                       err = -EINVAL;
+
+               break;
+
+       case TCP_REPAIR_QUEUE:
+               if (!tp->repair)
+                       err = -EPERM;
+               else if (val < TCP_QUEUES_NR)
+                       tp->repair_queue = val;
+               else
+                       err = -EINVAL;
+               break;
+
+       case TCP_QUEUE_SEQ:
+               if (sk->sk_state != TCP_CLOSE)
+                       err = -EPERM;
+               else if (tp->repair_queue == TCP_SEND_QUEUE)
+                       tp->write_seq = val;
+               else if (tp->repair_queue == TCP_RECV_QUEUE)
+                       tp->rcv_nxt = val;
+               else
+                       err = -EINVAL;
+               break;
+
+       case TCP_REPAIR_OPTIONS:
+               if (!tp->repair)
+                       err = -EINVAL;
+               else if (sk->sk_state == TCP_ESTABLISHED)
+                       err = tcp_repair_options_est(tp,
+                                       (struct tcp_repair_opt __user *)optval,
+                                       optlen);
+               else
+                       err = -EPERM;
                break;
 
        case TCP_CORK:
@@ -2530,6 +2747,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = tp->mss_cache;
                if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
                        val = tp->rx_opt.user_mss;
+               if (tp->repair)
+                       val = tp->rx_opt.mss_clamp;
                break;
        case TCP_NODELAY:
                val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -2632,6 +2851,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = tp->thin_dupack;
                break;
 
+       case TCP_REPAIR:
+               val = tp->repair;
+               break;
+
+       case TCP_REPAIR_QUEUE:
+               if (tp->repair)
+                       val = tp->repair_queue;
+               else
+                       return -EINVAL;
+               break;
+
+       case TCP_QUEUE_SEQ:
+               if (tp->repair_queue == TCP_SEND_QUEUE)
+                       val = tp->write_seq;
+               else if (tp->repair_queue == TCP_RECV_QUEUE)
+                       val = tp->rcv_nxt;
+               else
+                       return -EINVAL;
+               break;
+
        case TCP_USER_TIMEOUT:
                val = jiffies_to_msecs(icsk->icsk_user_timeout);
                break;
@@ -2675,7 +2914,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct tcphdr *th;
-       unsigned thlen;
+       unsigned int thlen;
        unsigned int seq;
        __be32 delta;
        unsigned int oldlen;
@@ -2933,13 +3172,13 @@ out_free:
 struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
 {
        struct tcp_md5sig_pool __percpu *pool;
-       int alloc = 0;
+       bool alloc = false;
 
 retry:
        spin_lock_bh(&tcp_md5sig_pool_lock);
        pool = tcp_md5sig_pool;
        if (tcp_md5sig_users++ == 0) {
-               alloc = 1;
+               alloc = true;
                spin_unlock_bh(&tcp_md5sig_pool_lock);
        } else if (!pool) {
                tcp_md5sig_users--;
@@ -3033,9 +3272,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
        struct scatterlist sg;
        const struct tcphdr *tp = tcp_hdr(skb);
        struct hash_desc *desc = &hp->md5_desc;
-       unsigned i;
-       const unsigned head_data_len = skb_headlen(skb) > header_len ?
-                                      skb_headlen(skb) - header_len : 0;
+       unsigned int i;
+       const unsigned int head_data_len = skb_headlen(skb) > header_len ?
+                                          skb_headlen(skb) - header_len : 0;
        const struct skb_shared_info *shi = skb_shinfo(skb);
        struct sk_buff *frag_iter;
 
@@ -3243,7 +3482,7 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-       int max_share, cnt;
+       int max_rshare, max_wshare, cnt;
        unsigned int i;
        unsigned long jiffy = jiffies;
 
@@ -3303,15 +3542,16 @@ void __init tcp_init(void)
        tcp_init_mem(&init_net);
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
        limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-       max_share = min(4UL*1024*1024, limit);
+       max_wshare = min(4UL*1024*1024, limit);
+       max_rshare = min(6UL*1024*1024, limit);
 
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-       sysctl_tcp_wmem[2] = max(64*1024, max_share);
+       sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-       sysctl_tcp_rmem[2] = max(87380, max_share);
+       sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
        pr_info("Hash tables configured (established %u bind %u)\n",
                tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);