ipv4: Create probe timer for tcp PMTU as per RFC4821
authorFan Du <fan.du@intel.com>
Fri, 6 Mar 2015 03:18:24 +0000 (11:18 +0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 6 Mar 2015 19:57:42 +0000 (14:57 -0500)
As per RFC4821 7.3.  Selecting Probe Size, a probe timer should
be armed once probing has converged. Once this timer expired,
probing again to take advantage of any path PMTU change. The
recommended probing interval is 10 minutes per RFC1981. Probing
interval could be sysctled by sysctl_tcp_probe_interval.

Eric Dumazet suggested to implement pseudo timer based on 32bits
jiffies tcp_time_stamp instead of using classic timer for such
rare event.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inet_connection_sock.h
include/net/netns/ipv4.h
include/net/tcp.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c

index 5976bdecf58b05b26980c76ae140d7c016ca939b..b9a6b0a94cc6b52a70158dd2bc7eb847baa6bed0 100644 (file)
@@ -126,6 +126,8 @@ struct inet_connection_sock {
 
                /* Information on the current probe. */
                int               probe_size;
+
+               u32               probe_timestamp;
        } icsk_mtup;
        u32                       icsk_ca_priv[16];
        u32                       icsk_user_timeout;
index e051d399fa170f70602fdacccd5ae6d29b901c80..8f3a1a1a5a94e5626e8b13f5bcc1fb89b590d54a 100644 (file)
@@ -88,6 +88,7 @@ struct netns_ipv4 {
        int sysctl_tcp_mtu_probing;
        int sysctl_tcp_base_mss;
        int sysctl_tcp_probe_threshold;
+       u32 sysctl_tcp_probe_interval;
 
        struct ping_group_range ping_group_range;
 
index 1ad82e334e2760c779ca2cbe37c565f3aed3522e..2e11e38205c226b3a0644e1b4752570414918e50 100644 (file)
@@ -67,6 +67,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* The least MTU to use for probing */
 #define TCP_BASE_MSS           1024
 
+/* probing interval, default to 10 minutes as per RFC4821 */
+#define TCP_PROBE_INTERVAL     600
+
 /* Specify interval when tcp mtu probing will stop */
 #define TCP_PROBE_THRESHOLD    8
 
index d3c09c12ee815eb008cd7ee40449d3e2aa9dc4e2..fdf899163d4412af8bc1df82de74d410de2f7c15 100644 (file)
@@ -890,6 +890,13 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "tcp_probe_interval",
+               .data           = &init_net.ipv4.sysctl_tcp_probe_interval,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        { }
 };
 
index 35790d977a2b951de3de9070aadca5bf99632fd0..f0c6fc32bfa836cee854a4abb7ca733c8bbc6a65 100644 (file)
@@ -2461,6 +2461,7 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_ecn = 2;
        net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
+       net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
        return 0;
 
 fail:
index ed024cbb097f6b526609eb3b20f01097901bfe37..5a73ad5afaf7206537a5603950a6826e5dbe01ea 100644 (file)
@@ -1354,6 +1354,8 @@ void tcp_mtup_init(struct sock *sk)
                               icsk->icsk_af_ops->net_header_len;
        icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
        icsk->icsk_mtup.probe_size = 0;
+       if (icsk->icsk_mtup.enabled)
+               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
 
@@ -1828,6 +1830,31 @@ send_now:
        return false;
 }
 
+static inline void tcp_mtu_check_reprobe(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
+       u32 interval;
+       s32 delta;
+
+       interval = net->ipv4.sysctl_tcp_probe_interval;
+       delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+       if (unlikely(delta >= interval * HZ)) {
+               int mss = tcp_current_mss(sk);
+
+               /* Update current search range */
+               icsk->icsk_mtup.probe_size = 0;
+               icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+                       sizeof(struct tcphdr) +
+                       icsk->icsk_af_ops->net_header_len;
+               icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+
+               /* Update probe time stamp */
+               icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+       }
+}
+
 /* Create a new MTU probe if we are ready.
  * MTU probe is regularly attempting to increase the path MTU by
  * deliberately sending larger packets.  This discovers routing
@@ -1870,9 +1897,16 @@ static int tcp_mtu_probe(struct sock *sk)
                                    icsk->icsk_mtup.search_low) >> 1);
        size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
        interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+       /* When misfortune happens, we are reprobing actively,
+        * and then reprobe timer has expired. We stick with current
+        * probing process by not resetting search range to its orignal.
+        */
        if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
-           interval < max(1, net->ipv4.sysctl_tcp_probe_threshold)) {
-               /* TODO: set timer for probe_converge_event */
+               interval < net->ipv4.sysctl_tcp_probe_threshold) {
+               /* Check whether enough time has elaplased for
+                * another round of probing.
+                */
+               tcp_mtu_check_reprobe(sk);
                return -1;
        }
 
index 0732b787904ed32003bb776c744ed56457e0cb37..15505936511d4b21a2f34786e9481eabcd900a7c 100644 (file)
@@ -107,6 +107,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
        if (net->ipv4.sysctl_tcp_mtu_probing) {
                if (!icsk->icsk_mtup.enabled) {
                        icsk->icsk_mtup.enabled = 1;
+                       icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
                        tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                } else {
                        struct net *net = sock_net(sk);