tcp: tcp_vegas cong avoid fix
authorDoug Leith <doug.leith@nuim.ie>
Tue, 9 Dec 2008 08:13:04 +0000 (00:13 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 9 Dec 2008 08:13:04 +0000 (00:13 -0800)
This patch addresses a book-keeping issue in tcp_vegas.c.  At present
tcp_vegas does separate book-keeping of cwnd based on packet sequence
numbers.  A mismatch can develop between this book-keeping and
tp->snd_cwnd due, for example, to delayed acks acking multiple
packets.  When vegas transitions to reno operation (e.g. following
loss), then this mismatch leads to incorrect behaviour (akin to a cwnd
backoff).  This seems mostly to affect operation at low cwnds where
delayed acking can lead to a significant fraction of cwnd being
covered by a single ack, leading to the book-keeping mismatch.  This
patch modifies the congestion avoidance update to avoid the need for
separate book-keeping while leaving vegas congestion avoidance
functionally unchanged.  A secondary advantage of this modification is
that the use of fixed-point (via V_PARAM_SHIFT) and 64 bit arithmetic
is no longer necessary, simplifying the code.

Some example test measurements with the patched code (confirming no functional
change in the congestion avoidance algorithm) can be seen at:

http://www.hamilton.ie/doug/vegaspatch/

Signed-off-by: Doug Leith <doug.leith@nuim.ie>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp_vegas.c

index 7cd22262de3af3fa0f2917eefaea885c068855ac..a453aac91bd3b740ef44363ef10e5be5390bf6dc 100644 (file)
 
 #include "tcp_vegas.h"
 
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
-static int alpha = 2<<V_PARAM_SHIFT;
-static int beta  = 4<<V_PARAM_SHIFT;
-static int gamma = 1<<V_PARAM_SHIFT;
+static int alpha = 2;
+static int beta  = 4;
+static int gamma = 1;
 
 module_param(alpha, int, 0644);
-MODULE_PARM_DESC(alpha, "lower bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(alpha, "lower bound of packets in network");
 module_param(beta, int, 0644);
-MODULE_PARM_DESC(beta, "upper bound of packets in network (scale by 2)");
+MODULE_PARM_DESC(beta, "upper bound of packets in network");
 module_param(gamma, int, 0644);
 MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
 
@@ -172,49 +168,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
                return;
        }
 
-       /* The key players are v_beg_snd_una and v_beg_snd_nxt.
-        *
-        * These are so named because they represent the approximate values
-        * of snd_una and snd_nxt at the beginning of the current RTT. More
-        * precisely, they represent the amount of data sent during the RTT.
-        * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
-        * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
-        * bytes of data have been ACKed during the course of the RTT, giving
-        * an "actual" rate of:
-        *
-        *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
-        *
-        * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
-        * because delayed ACKs can cover more than one segment, so they
-        * don't line up nicely with the boundaries of RTTs.
-        *
-        * Another unfortunate fact of life is that delayed ACKs delay the
-        * advance of the left edge of our send window, so that the number
-        * of bytes we send in an RTT is often less than our cwnd will allow.
-        * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
-        */
-
        if (after(ack, vegas->beg_snd_nxt)) {
                /* Do the Vegas once-per-RTT cwnd adjustment. */
-               u32 old_wnd, old_snd_cwnd;
-
-
-               /* Here old_wnd is essentially the window of data that was
-                * sent during the previous RTT, and has all
-                * been acknowledged in the course of the RTT that ended
-                * with the ACK we just received. Likewise, old_snd_cwnd
-                * is the cwnd during the previous RTT.
-                */
-               old_wnd = (vegas->beg_snd_nxt - vegas->beg_snd_una) /
-                       tp->mss_cache;
-               old_snd_cwnd = vegas->beg_snd_cwnd;
 
                /* Save the extent of the current window so we can use this
                 * at the end of the next RTT.
                 */
-               vegas->beg_snd_una  = vegas->beg_snd_nxt;
                vegas->beg_snd_nxt  = tp->snd_nxt;
-               vegas->beg_snd_cwnd = tp->snd_cwnd;
 
                /* We do the Vegas calculations only if we got enough RTT
                 * samples that we can be reasonably sure that we got
@@ -252,22 +212,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
                         *
                         * This is:
                         *     (actual rate in segments) * baseRTT
-                        * We keep it as a fixed point number with
-                        * V_PARAM_SHIFT bits to the right of the binary point.
                         */
-                       target_cwnd = ((u64)old_wnd * vegas->baseRTT);
-                       target_cwnd <<= V_PARAM_SHIFT;
-                       do_div(target_cwnd, rtt);
+                       target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
 
                        /* Calculate the difference between the window we had,
                         * and the window we would like to have. This quantity
                         * is the "Diff" from the Arizona Vegas papers.
-                        *
-                        * Again, this is a fixed point number with
-                        * V_PARAM_SHIFT bits to the right of the binary
-                        * point.
                         */
-                       diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
+                       diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
 
                        if (diff > gamma && tp->snd_ssthresh > 2 ) {
                                /* Going too fast. Time to slow down
@@ -282,16 +234,13 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
                                 * truncation robs us of full link
                                 * utilization.
                                 */
-                               tp->snd_cwnd = min(tp->snd_cwnd,
-                                                  ((u32)target_cwnd >>
-                                                   V_PARAM_SHIFT)+1);
+                               tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
 
                        } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
                                /* Slow start.  */
                                tcp_slow_start(tp);
                        } else {
                                /* Congestion avoidance. */
-                               u32 next_snd_cwnd;
 
                                /* Figure out where we would like cwnd
                                 * to be.
@@ -300,26 +249,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
                                        /* The old window was too fast, so
                                         * we slow down.
                                         */
-                                       next_snd_cwnd = old_snd_cwnd - 1;
+                                       tp->snd_cwnd--;
                                } else if (diff < alpha) {
                                        /* We don't have enough extra packets
                                         * in the network, so speed up.
                                         */
-                                       next_snd_cwnd = old_snd_cwnd + 1;
+                                       tp->snd_cwnd++;
                                } else {
                                        /* Sending just as fast as we
                                         * should be.
                                         */
-                                       next_snd_cwnd = old_snd_cwnd;
                                }
-
-                               /* Adjust cwnd upward or downward, toward the
-                                * desired value.
-                                */
-                               if (next_snd_cwnd > tp->snd_cwnd)
-                                       tp->snd_cwnd++;
-                               else if (next_snd_cwnd < tp->snd_cwnd)
-                                       tp->snd_cwnd--;
                        }
 
                        if (tp->snd_cwnd < 2)