tcp: fix a potential deadlock in tcp_get_info()
authorEric Dumazet <edumazet@google.com>
Fri, 22 May 2015 04:51:19 +0000 (21:51 -0700)
committerDanny Wood <danwood76@gmail.com>
Thu, 15 Jul 2021 09:45:37 +0000 (10:45 +0100)
Taking socket spinlock in tcp_get_info() can deadlock, as
inet_diag_dump_icsk() holds the &hashinfo->ehash_locks[i],
while packet processing can use the reverse locking order.

We could avoid this locking for TCP_LISTEN states, but lockdep would
certainly get confused as all TCP sockets share same lockdep classes.

[  523.722504] ======================================================
[  523.728706] [ INFO: possible circular locking dependency detected ]
[  523.734990] 4.1.0-dbg-DEV #1676 Not tainted
[  523.739202] -------------------------------------------------------
[  523.745474] ss/18032 is trying to acquire lock:
[  523.750002]  (slock-AF_INET){+.-...}, at: [<ffffffff81669d44>] tcp_get_info+0x2c4/0x360
[  523.758129]
[  523.758129] but task is already holding lock:
[  523.763968]  (&(&hashinfo->ehash_locks[i])->rlock){+.-...}, at: [<ffffffff816bcb75>] inet_diag_dump_icsk+0x1d5/0x6c0
[  523.774661]
[  523.774661] which lock already depends on the new lock.
[  523.774661]
[  523.782850]
[  523.782850] the existing dependency chain (in reverse order) is:
[  523.790326]
-> #1 (&(&hashinfo->ehash_locks[i])->rlock){+.-...}:
[  523.796599]        [<ffffffff811126bb>] lock_acquire+0xbb/0x270
[  523.802565]        [<ffffffff816f5868>] _raw_spin_lock+0x38/0x50
[  523.808628]        [<ffffffff81665af8>] __inet_hash_nolisten+0x78/0x110
[  523.815273]        [<ffffffff816819db>] tcp_v4_syn_recv_sock+0x24b/0x350
[  523.822067]        [<ffffffff81684d41>] tcp_check_req+0x3c1/0x500
[  523.828199]        [<ffffffff81682d09>] tcp_v4_do_rcv+0x239/0x3d0
[  523.834331]        [<ffffffff816842fe>] tcp_v4_rcv+0xa8e/0xc10
[  523.840202]        [<ffffffff81658fa3>] ip_local_deliver_finish+0x133/0x3e0
[  523.847214]        [<ffffffff81659a9a>] ip_local_deliver+0xaa/0xc0
[  523.853440]        [<ffffffff816593b8>] ip_rcv_finish+0x168/0x5c0
[  523.859624]        [<ffffffff81659db7>] ip_rcv+0x307/0x420

Lets use u64_sync infrastructure instead. As a bonus, 64bit
arches get optimized, as these are nop for them.

Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Change-Id: Ic33ba5a9c4ca5dfd3d2224b7c0ed9fbe9eccd0ca

include/linux/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_input.c

index 32edfbe4b7e13ea0cbd8c0adb1243c6ca5aa3a15..cf4d327661f49cb37211866166c73aeb31d72af2 100644 (file)
@@ -166,6 +166,8 @@ struct tcp_sock {
                                 * sum(delta(snd_una)), or how many bytes
                                 * were acked.
                                 */
+       struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
+
        u32     snd_una;        /* First byte we want an ack for        */
        u32     snd_sml;        /* Last byte of the most recently transmitted small packet */
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
index 219d4dfe8d8c263e183cf0b013f389db02c91c4e..b6ffa1e2c3ea8edff03a68407e8ab2d6e0a2533c 100644 (file)
@@ -401,6 +401,7 @@ void tcp_init_sock(struct sock *sk)
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = TCP_MSS_DEFAULT;
+       u64_stats_init(&tp->syncp);
 
        tp->reordering = sysctl_tcp_reordering;
        tcp_enable_early_retrans(tp);
@@ -2711,6 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        const struct tcp_sock *tp = tcp_sk(sk);
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp;
+       unsigned int start;
 
        memset(info, 0, sizeof(*info));
 
@@ -2776,12 +2778,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
                                        sk->sk_max_pacing_rate : ~0ULL;
 
-       spin_lock_bh(&sk->sk_lock.slock);
-       info->tcpi_bytes_acked = tp->bytes_acked;
-       info->tcpi_bytes_received = tp->bytes_received;
+       do {
+               start = u64_stats_fetch_begin_irq(&tp->syncp);
+               info->tcpi_bytes_acked = tp->bytes_acked;
+               info->tcpi_bytes_received = tp->bytes_received;
+       } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
-       spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
index ac7d718c1ae7a6fa60dcdcfcc72c082fac7c090b..e47fcec5dea1b3da0d269cb0a4b2cd69647fed9d 100644 (file)
@@ -3253,7 +3253,9 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
 {
        u32 delta = ack - tp->snd_una;
 
+       u64_stats_update_begin(&tp->syncp);
        tp->bytes_acked += delta;
+       u64_stats_update_end(&tp->syncp);
        tp->snd_una = ack;
 }
 
@@ -3262,7 +3264,9 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
 {
        u32 delta = seq - tp->rcv_nxt;
 
+       u64_stats_update_begin(&tp->syncp);
        tp->bytes_received += delta;
+       u64_stats_update_end(&tp->syncp);
        tp->rcv_nxt = seq;
 }