Allows TCP to send "duplicate" SACKs.
tcp_early_retrans - INTEGER
- Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
- for triggering fast retransmit when the amount of outstanding data is
- small and when no previously unsent data can be transmitted (such
- that limited transmit could be used). Also controls the use of
- Tail loss probe (TLP) that converts RTOs occurring due to tail
- losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
+ Tail loss probe (TLP) converts RTOs occurring due to tail
+ losses into fast recovery (draft-ietf-tcpm-rack). Note that
+ TLP requires RACK to function properly (see tcp_recovery below)
Possible values:
- 0 disables ER
- 1 enables ER
- 2 enables ER but delays fast recovery and fast retransmit
- by a fourth of RTT. This mitigates connection falsely
- recovers when network has a small degree of reordering
- (less than 3 packets).
- 3 enables delayed ER and TLP.
- 4 enables TLP only.
+ 0 disables TLP
+ 3 or 4 enables TLP
Default: 3
tcp_ecn - INTEGER
repair : 1,
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
u8 repair_queue;
- u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
- syn_data:1, /* SYN includes data */
+ u8 syn_data:1, /* SYN includes data */
syn_fastopen:1, /* SYN includes Fast Open option */
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
const struct sk_buff *next_skb);
/* tcp_input.c */
-void tcp_resume_early_retransmit(struct sock *sk);
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk);
tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
}
-/* TCP early-retransmit (ER) is similar to but more conservative than
- * the thin-dupack feature. Enable ER only if thin-dupack is disabled.
- */
-static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
-{
- struct net *net = sock_net((struct sock *)tp);
-
- tp->do_early_retrans = sysctl_tcp_early_retrans &&
- sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
- !(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
- net->ipv4.sysctl_tcp_reordering == 3;
-}
-
-static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
-{
- tp->do_early_retrans = 0;
-}
-
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
}
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
r->idiag_timer = 1;
tp->mss_cache = TCP_MSS_DEFAULT;
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
- tcp_enable_early_retrans(tp);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
err = -EINVAL;
else {
tp->thin_dupack = val;
- if (tp->thin_dupack)
- tcp_disable_early_retrans(tp);
}
break;
tcp_disable_fack(tp);
}
- if (metric > 0)
- tcp_disable_early_retrans(tp);
tp->rack.reord = 1;
}
return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
}
-static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- unsigned long delay;
-
- /* Delay early retransmit and entering fast recovery for
- * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
- * available, or RTO is scheduled to fire first.
- */
- if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
- (flag & FLAG_ECE) || !tp->srtt_us)
- return false;
-
- delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
- msecs_to_jiffies(2));
-
- if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
- return false;
-
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
- TCP_RTO_MAX);
- return true;
-}
-
/* Linux NewReno/SACK/FACK/ECN state machine.
* --------------------------------------
*
tcp_is_sack(tp) && !tcp_send_head(sk))
return true;
- /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
- * retransmissions due to small network reorderings, we implement
- * Mitigation A.3 in the RFC and delay the retransmission for a short
- * interval if appropriate.
- */
- if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
- (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
- !tcp_may_send_now(sk))
- return !tcp_pause_early_retransmit(sk, flag);
-
return false;
}
} else {
u32 rto = inet_csk(sk)->icsk_rto;
/* Offset the time elapsed after installing regular RTO */
- if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
const u32 rto_time_stamp =
}
}
-/* This function is called when the delayed ER timer fires. TCP enters
- * fast recovery and performs fast-retransmit.
- */
-void tcp_resume_early_retransmit(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- tcp_rearm_rto(sk);
-
- /* Stop if ER is disabled after the delayed ER timer is scheduled */
- if (!tp->do_early_retrans)
- return;
-
- tcp_enter_recovery(sk, false);
- tcp_update_scoreboard(sk, 1);
- tcp_xmit_retransmit_queue(sk);
-}
-
/* If we get here, the whole TSO packet has not been acked. */
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
skb_mstamp_get(&sack_state.ack_time);
- if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+ if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
if (after(ack, prior_snd_una)) {
int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val) {
tcp_disable_fack(tp);
- tcp_disable_early_retrans(tp);
tp->reordering = val;
}
newtp->sacked_out = 0;
newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tcp_enable_early_retrans(newtp);
newtp->tlp_high_seq = 0;
newtp->lsndtime = treq->snt_synack.stamp_jiffies;
newsk->sk_txhash = treq->txhash;
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
tp->packets_out += tcp_skb_pcount(skb);
- if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
- }
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
u32 timeout, tlp_time_stamp, rto_time_stamp;
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
- if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
- return false;
/* No consecutive loss probes. */
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
tcp_rearm_rto(sk);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
- !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+ if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+ !tp->packets_out || !tcp_is_sack(tp) ||
+ icsk->icsk_ca_state != TCP_CA_Open)
return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
case ICSK_TIME_REO_TIMEOUT:
tcp_rack_reo_timeout(sk);
break;
- case ICSK_TIME_EARLY_RETRANS:
- tcp_resume_early_retransmit(sk);
- break;
case ICSK_TIME_LOSS_PROBE:
tcp_send_loss_probe(sk);
break;
srcp = ntohs(inet->inet_sport);
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;