slow start after idle might reduce cwnd, but we perform this
after first packet was cooked and sent.
With TSO/GSO, it means that we might send a full TSO packet
even if cwnd should have been reduced to IW10.
Moving the SSAI check in skb_entail() makes sense, because
we slightly reduce number of times this check is done,
especially for large send() and TCP Small queue callbacks from
softirq context.
As Neal pointed out, we also need to perform the check
if/when receive window opens.
Tested:
Following packetdrill test demonstrates the problem
// Test of slow start after idle
`sysctl -q net.ipv4.tcp_slow_start_after_idle=1`
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.100 < . 1:1(0) ack 1 win 511
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+0 write(4, ..., 26000) = 26000
+0 > . 1:5001(5000) ack 1
+0 > . 5001:10001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10 }%
+.100 < . 1:1(0) ack 10001 win 511
+0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+0 > . 10001:20001(10000) ack 1
+0 > P. 20001:26001(6000) ack 1
+.100 < . 1:1(0) ack 26001 win 511
+0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+4 write(4, ..., 20000) = 20000
// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+0 > . 26001:31001(5000) ack 1
+0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+0 > . 31001:36001(5000) ack 1
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
}
u32 tcp_default_init_rwnd(u32 mss);
+void tcp_cwnd_restart(struct sock *sk, s32 delta);
+
+static inline void tcp_slow_start_after_idle_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ s32 delta;
+
+ if (!sysctl_tcp_slow_start_after_idle || tp->packets_out)
+ return;
+ delta = tcp_time_stamp - tp->lsndtime;
+ if (delta > inet_csk(sk)->icsk_rto)
+ tcp_cwnd_restart(sk, delta);
+}
/* Determine a window scaling and initial window to offer. */
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
+
+ tcp_slow_start_after_idle_check(sk);
}
static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
tp->pred_flags = 0;
tcp_fast_path_check(sk);
+ if (tcp_send_head(sk))
+ tcp_slow_start_after_idle_check(sk);
+
if (nwin > tp->max_window) {
tp->max_window = nwin;
tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
}
/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
- * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
+ * This is the first part of cwnd validation mechanism.
+ */
+void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
- s32 delta = tcp_time_stamp - tp->lsndtime;
- u32 restart_cwnd = tcp_init_cwnd(tp, dst);
+ u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 cwnd = tp->snd_cwnd;
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
- if (sysctl_tcp_slow_start_after_idle &&
- (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
- tcp_cwnd_restart(sk, __sk_dst_get(sk));
-
tp->lsndtime = now;
/* If it is a reply for ato after last received