extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
+extern int sysctl_tcp_recovery;
+#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
+
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
extern int sysctl_tcp_min_tso_segs;
tp->do_early_retrans = sysctl_tcp_early_retrans &&
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
+ !(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
net->ipv4.sysctl_tcp_reordering == 3;
}
void tcp_init(void);
/* tcp_recovery.c */
-
-/* Flags to enable various loss recovery features. See below */
-extern int sysctl_tcp_recovery;
-
-/* Use TCP RACK to detect (some) tail and retransmit losses */
-#define TCP_RACK_LOST_RETRANS 0x1
-
extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
const struct skb_mstamp *xmit_time,
* F.e. after RTO, when all the queue is considered as lost,
* lost_out = packets_out and in_flight = retrans_out.
*
- * Essentially, we have now two algorithms counting
+ * Essentially, we have now a few algorithms detecting
* lost packets.
*
- * FACK: It is the simplest heuristics. As soon as we decided
+ * If the receiver supports SACK:
+ *
+ * RFC6675/3517: It is the conventional algorithm. A packet is
+ * considered lost if the number of higher sequence packets
+ * SACKed is greater than or equal the DUPACK thoreshold
+ * (reordering). This is implemented in tcp_mark_head_lost and
+ * tcp_update_scoreboard.
+ *
+ * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
+ * (2017-) that checks timing instead of counting DUPACKs.
+ * Essentially a packet is considered lost if it's not S/ACKed
+ * after RTT + reordering_window, where both metrics are
+ * dynamically measured and adjusted. This is implemented in
+ * tcp_rack_mark_lost.
+ *
+ * FACK: it is the simplest heuristics. As soon as we decided
* that something is lost, we decide that _all_ not SACKed
* packets until the most forward SACK are lost. I.e.
* lost_out = fackets_out - sacked_out and left_out = fackets_out.
* takes place. We use FACK by default until reordering
* is suspected on the path to this destination.
*
- * NewReno: when Recovery is entered, we assume that one segment
+ * If the receiver does not support SACK:
+ *
+ * NewReno (RFC6582): in Recovery we assume that one segment
* is lost (classic Reno). While we are in Recovery and
* a partial ACK arrives, we assume that one more packet
* is lost (NewReno). This heuristics are the same in NewReno
* and SACK.
*
- * Imagine, that's all! Forget about all this shamanism about CWND inflation
- * deflation etc. CWND is real congestion window, never inflated, changes
- * only according to classic VJ rules.
- *
* Really tricky (and requiring careful tuning) part of algorithm
* is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
* The first determines the moment _when_ we should reduce CWND and,
struct tcp_sock *tp = tcp_sk(sk);
/* Use RACK to detect loss */
- if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS) {
+ if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk, ack_time);
#include <linux/tcp.h>
#include <net/tcp.h>
-int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
+int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{
(t1->v64 == t2->v64 && after(seq1, seq2));
}
-/* Marks a packet lost, if some packet sent later has been (s)acked.
+/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
+ *
+ * Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK
* but they look at different metrics:
*
* is being more resilient to reordering by simply allowing some
* "settling delay", instead of tweaking the dupthresh.
*
- * The current version is only used after recovery starts but can be
- * easily extended to detect the first loss.
+ * When tcp_rack_detect_loss() detects some packets are lost and we
+ * are not already in the CA_Recovery state, either tcp_rack_reo_timeout()
+ * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
+ * make us enter the CA_Recovery state.
*/
static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
+ if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
tcp_for_write_queue(skb, sk) {
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout;
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
+ if (!tp->rack.advanced)
return;
/* Reset the advanced flag to avoid unnecessary queue scanning */