ipv4: Namespecify TCP PMTU mechanism
authorFan Du <fan.du@intel.com>
Tue, 10 Feb 2015 01:53:16 +0000 (09:53 +0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 10 Feb 2015 02:45:00 +0000 (18:45 -0800)
Packetization Layer Path MTU Discovery works separately beside
Path MTU Discovery at IP level, different net namespace has
various requirements on which one to chose, e.g., a virutalized
container instance would require TCP PMTU to probe an usable
effective mtu for underlying tunnel, while the host would
employ classical ICMP based PMTU to function.

Hence making TCP PMTU mechanism per net namespace to decouple
two functionality. Furthermore the probe base MSS should also
be configured separately for each namespace.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/netns/ipv4.h
include/net/tcp.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c

index e0bdcb1473269d0d18e67b77fb773137bbbac087..dbe225478adb08a31cc4e6ee798a6edf15a5d282 100644 (file)
@@ -82,6 +82,8 @@ struct netns_ipv4 {
 
        int sysctl_fwmark_reflect;
        int sysctl_tcp_fwmark_accept;
+       int sysctl_tcp_mtu_probing;
+       int sysctl_tcp_base_mss;
 
        struct ping_group_range ping_group_range;
 
index da4196fb78dbf39c12a421332bc0405948fc26c9..8d6b983d509959dcc270b73605bbbcb5835d5885 100644 (file)
@@ -262,8 +262,6 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_mtu_probing;
-extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_thin_linear_timeouts;
index 82601a68cf905cd871905a522cd919c670817fc8..d151539da8e6948571bfdfbc105c838b3b561d71 100644 (file)
@@ -603,20 +603,6 @@ static struct ctl_table ipv4_table[] = {
                .maxlen         = TCP_CA_NAME_MAX,
                .proc_handler   = proc_tcp_congestion_control,
        },
-       {
-               .procname       = "tcp_mtu_probing",
-               .data           = &sysctl_tcp_mtu_probing,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "tcp_base_mss",
-               .data           = &sysctl_tcp_base_mss,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        {
                .procname       = "tcp_workaround_signed_windows",
                .data           = &sysctl_tcp_workaround_signed_windows,
@@ -883,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "tcp_mtu_probing",
+               .data           = &init_net.ipv4.sysctl_tcp_mtu_probing,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "tcp_base_mss",
+               .data           = &init_net.ipv4.sysctl_tcp_base_mss,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        { }
 };
 
index 67bc95fb5d9e236c4ad58eb09c674e4cad01ab23..5a2dfed4783b6ed0185dccded960972b4d6e13b0 100644 (file)
@@ -2459,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net)
                *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
        }
        net->ipv4.sysctl_tcp_ecn = 2;
+       net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
        return 0;
 
 fail:
index 4fcc9a7688499ed13cca430f2d2542c0f92136ee..a2a796c5536b032264e2a71f596f673e8307f25c 100644 (file)
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
  */
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
-
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
 
-       icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
+       icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
        icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
                               icsk->icsk_af_ops->net_header_len;
-       icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
+       icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
        icsk->icsk_mtup.probe_size = 0;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
index 1829c7fbc77e4ded47744a60e0b55c11c635e82d..0732b787904ed32003bb776c744ed56457e0cb37 100644 (file)
@@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 
 static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 {
+       struct net *net = sock_net(sk);
+
        /* Black hole detection */
-       if (sysctl_tcp_mtu_probing) {
+       if (net->ipv4.sysctl_tcp_mtu_probing) {
                if (!icsk->icsk_mtup.enabled) {
                        icsk->icsk_mtup.enabled = 1;
                        tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                } else {
+                       struct net *net = sock_net(sk);
                        struct tcp_sock *tp = tcp_sk(sk);
                        int mss;
 
                        mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
-                       mss = min(sysctl_tcp_base_mss, mss);
+                       mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
                        mss = max(mss, 68 - tp->tcp_header_len);
                        icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
                        tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);