tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / tcp.c
index ab450c099aa49a3d4b68ca531e7f5426a8eabaf1..b80b399f237724fc71f0d1191c67e0658aeb7620 100644 (file)
 
 int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -721,6 +723,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
                                ret = -EAGAIN;
                                break;
                        }
+                       /* if __tcp_splice_read() got nothing while we have
+                        * an skb in receive queue, we do not want to loop.
+                        * This might happen with URG data.
+                        */
+                       if (!skb_queue_empty(&sk->sk_receive_queue))
+                               break;
                        sk_wait_data(sk, &timeo);
                        if (signal_pending(current)) {
                                ret = sock_intr_errno(timeo);
@@ -786,14 +794,24 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
        xmit_size_goal = mss_now;
 
        if (large_allowed && sk_can_gso(sk)) {
-               xmit_size_goal = ((sk->sk_gso_max_size - 1) -
-                                 inet_csk(sk)->icsk_af_ops->net_header_len -
-                                 inet_csk(sk)->icsk_ext_hdr_len -
-                                 tp->tcp_header_len);
+               u32 gso_size, hlen;
+
+               /* Maybe we should/could use sk->sk_prot->max_header here ? */
+               hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+                      inet_csk(sk)->icsk_ext_hdr_len +
+                      tp->tcp_header_len;
 
-               /* TSQ : try to have two TSO segments in flight */
-               xmit_size_goal = min_t(u32, xmit_size_goal,
-                                      sysctl_tcp_limit_output_bytes >> 1);
+               /* Goal is to send at least one packet per ms,
+                * not one big TSO packet every 100 ms.
+                * This preserves ACK clocking and is consistent
+                * with tcp_tso_should_defer() heuristic.
+                */
+               gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+               gso_size = max_t(u32, gso_size,
+                                sysctl_tcp_min_tso_segs * mss_now);
+
+               xmit_size_goal = min_t(u32, gso_size,
+                                      sk->sk_gso_max_size - 1 - hlen);
 
                xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
 
@@ -989,7 +1007,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
        }
 }
 
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+                               int *copied, size_t size)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int err, flags;
@@ -1004,11 +1023,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
        if (unlikely(tp->fastopen_req == NULL))
                return -ENOBUFS;
        tp->fastopen_req->data = msg;
+       tp->fastopen_req->size = size;
 
        flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
        err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
                                    msg->msg_namelen, flags);
-       *size = tp->fastopen_req->copied;
+       *copied = tp->fastopen_req->copied;
        tcp_free_fastopen_req(tp);
        return err;
 }
@@ -1028,7 +1048,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        flags = msg->msg_flags;
        if (flags & MSG_FASTOPEN) {
-               err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+               err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
                if (err == -EINPROGRESS && copied_syn > 0)
                        goto out;
                else if (err)
@@ -1051,7 +1071,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (unlikely(tp->repair)) {
                if (tp->repair_queue == TCP_RECV_QUEUE) {
                        copied = tcp_send_rcvq(sk, msg, size);
-                       goto out;
+                       goto out_nopush;
                }
 
                err = -EINVAL;
@@ -1117,6 +1137,13 @@ new_segment:
                                if (!skb)
                                        goto wait_for_memory;
 
+                               /*
+                                * All packets are restored as if they have
+                                * already been sent.
+                                */
+                               if (tp->repair)
+                                       TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
                                /*
                                 * Check whether we can use HW checksum.
                                 */
@@ -1217,6 +1244,7 @@ wait_for_memory:
 out:
        if (copied)
                tcp_push(sk, flags, mss_now, tp->nonagle);
+out_nopush:
        release_sock(sk);
        return copied + copied_syn;
 
@@ -2285,9 +2313,15 @@ int tcp_disconnect(struct sock *sk, int flags)
        tcp_set_ca_state(sk, TCP_CA_Open);
        tcp_clear_retrans(tp);
        inet_csk_delack_init(sk);
+       /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+        * issue in __tcp_select_window()
+        */
+       icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
        tcp_init_send_head(sk);
        memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
        __sk_dst_reset(sk);
+       dst_release(sk->sk_rx_dst);
+       sk->sk_rx_dst = NULL;
 
        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
@@ -2440,10 +2474,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
        case TCP_THIN_DUPACK:
                if (val < 0 || val > 1)
                        err = -EINVAL;
-               else
+               else {
                        tp->thin_dupack = val;
                        if (tp->thin_dupack)
                                tcp_disable_early_retrans(tp);
+               }
                break;
 
        case TCP_REPAIR:
@@ -2879,6 +2914,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
+       unsigned int sum_truesize = 0;
        struct tcphdr *th;
        unsigned int thlen;
        unsigned int seq;
@@ -2962,13 +2998,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                if (copy_destructor) {
                        skb->destructor = gso_skb->destructor;
                        skb->sk = gso_skb->sk;
-                       /* {tcp|sock}_wfree() use exact truesize accounting :
-                        * sum(skb->truesize) MUST be exactly be gso_skb->truesize
-                        * So we account mss bytes of 'true size' for each segment.
-                        * The last segment will contain the remaining.
-                        */
-                       skb->truesize = mss;
-                       gso_skb->truesize -= mss;
+                       sum_truesize += skb->truesize;
                }
                skb = skb->next;
                th = tcp_hdr(skb);
@@ -2985,7 +3015,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        if (copy_destructor) {
                swap(gso_skb->sk, skb->sk);
                swap(gso_skb->destructor, skb->destructor);
-               swap(gso_skb->truesize, skb->truesize);
+               sum_truesize += skb->truesize;
+               atomic_add(sum_truesize - gso_skb->truesize,
+                          &skb->sk->sk_wmem_alloc);
        }
 
        delta = htonl(oldlen + (skb->tail - skb->transport_header) +