rds: cancel send/recv work before queuing connection shutdown
authorSowmini Varadhan <sowmini.varadhan@oracle.com>
Sun, 16 Jul 2017 23:43:46 +0000 (16:43 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 17 Jul 2017 02:07:35 +0000 (19:07 -0700)
We could end up executing rds_conn_shutdown before the rds_recv_worker
thread, then rds_conn_shutdown -> rds_tcp_conn_shutdown can do a
sock_release and set sock->sk to null, which may interleave in bad
ways with rds_recv_worker, e.g., it could result in:

"BUG: unable to handle kernel NULL pointer dereference at 0000000000000078"
    [ffff881769f6fd70] release_sock at ffffffff815f337b
    [ffff881769f6fd90] rds_tcp_recv at ffffffffa043c888 [rds_tcp]
    [ffff881769f6fdb0] rds_recv_worker at ffffffffa04a4810 [rds]
    [ffff881769f6fde0] process_one_work at ffffffff810a14c1
    [ffff881769f6fe40] worker_thread at ffffffff810a1940
    [ffff881769f6fec0] kthread at ffffffff810a6b1e

Also, do not enqueue any new shutdown workq items when the connection is
shutting down (this may happen for rds-tcp in softirq mode, if a FIN
or CLOSE is received while the modules is in the middle of an unload)

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/connection.c
net/rds/rds.h
net/rds/tcp.c
net/rds/tcp_connect.c
net/rds/tcp_send.c
net/rds/threads.c

index 50a3789ac23e4630e096fc4539e9fd8e99551a0e..005bca68aa943e97adefe71ed6ca6155f766928a 100644 (file)
@@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
        if (!cp->cp_transport_data)
                return;
 
-       rds_conn_path_drop(cp);
-       flush_work(&cp->cp_down_w);
-
        /* make sure lingering queued work won't try to ref the conn */
        cancel_delayed_work_sync(&cp->cp_send_w);
        cancel_delayed_work_sync(&cp->cp_recv_w);
 
+       rds_conn_path_drop(cp, true);
+       flush_work(&cp->cp_down_w);
+
        /* tear down queued messages */
        list_for_each_entry_safe(rm, rtmp,
                                 &cp->cp_send_queue,
@@ -664,9 +664,13 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
 {
        atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+       if (!destroy && cp->cp_conn->c_destroy_in_prog)
+               return;
+
        queue_work(rds_wq, &cp->cp_down_w);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 void rds_conn_drop(struct rds_connection *conn)
 {
        WARN_ON(conn->c_trans->t_mp_capable);
-       rds_conn_path_drop(&conn->c_path[0]);
+       rds_conn_path_drop(&conn->c_path[0], false);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
        vprintk(fmt, ap);
        va_end(ap);
 
-       rds_conn_path_drop(cp);
+       rds_conn_path_drop(cp, false);
 }
index 516bcc89b46fdfd630dad78ad432a753e977f633..3382695bf46c8893d6c6a432b58df593d9937853 100644 (file)
@@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
 void rds_conn_connect_if_down(struct rds_connection *conn);
 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
index 431404dbdad1cebe5d80cdb25de8b60db75fa87b..6b7ee71f40c63e879dc46d7bed025aac3def2c1f 100644 (file)
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
                        continue;
 
                /* reconnect with new parameters */
-               rds_conn_path_drop(tc->t_cpath);
+               rds_conn_path_drop(tc->t_cpath, false);
        }
        spin_unlock_irq(&rds_tcp_conn_lock);
 }
index cbe08a1fa4c71b93b0f81cd4a5782cd777c24b49..46f74dad0e164438594d9f195d906c9921bb8b23 100644 (file)
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
                if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
                    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
                                             RDS_CONN_ERROR)) {
-                       rds_conn_path_drop(cp);
+                       rds_conn_path_drop(cp, false);
                } else {
                        rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
                }
                break;
        case TCP_CLOSE_WAIT:
        case TCP_CLOSE:
-               rds_conn_path_drop(cp);
+               rds_conn_path_drop(cp, false);
        default:
                break;
        }
index 0d8616aa5bad00201cbda687fd836cfc59512116..dc860d1bb6088929591bb670ef96079de7c1819c 100644 (file)
@@ -157,7 +157,7 @@ out:
                                        "returned %d, "
                                        "disconnecting and reconnecting\n",
                                        &conn->c_faddr, cp->cp_index, ret);
-                               rds_conn_path_drop(cp);
+                               rds_conn_path_drop(cp, false);
                        }
                }
        }
index 2852bc1d37d452eba9745039bd389055446d78f0..f121daa402c81df6ea111da3d5bee6472629025f 100644 (file)
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                                "current state is %d\n",
                                __func__,
                                atomic_read(&cp->cp_state));
-               rds_conn_path_drop(cp);
+               rds_conn_path_drop(cp, false);
                return;
        }