net/ceph: Only clear SOCK_NOSPACE when there is sufficient space in the socket buffer
authorJim Schutt <jaschut@sandia.gov>
Wed, 29 Feb 2012 15:30:58 +0000 (08:30 -0700)
committerAlex Elder <elder@dreamhost.com>
Thu, 22 Mar 2012 15:47:45 +0000 (10:47 -0500)
The Ceph messenger would sometimes queue multiple work items to write
data to a socket when the socket buffer was full.

Fix this problem by making ceph_write_space() use SOCK_NOSPACE in the
same way that net/core/stream.c:sk_stream_write_space() does, i.e.,
clearing it only when sufficient space is available in the socket buffer.

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Alex Elder <elder@dreamhost.com>
net/ceph/messenger.c

index ad5b70801f37788edbb71b6d95cbe7d25ab94abd..d11f91b05452fc9b95d7d5beebb5061ea294d7f0 100644 (file)
@@ -143,16 +143,22 @@ static void ceph_write_space(struct sock *sk)
        struct ceph_connection *con =
                (struct ceph_connection *)sk->sk_user_data;
 
-       /* only queue to workqueue if there is data we want to write. */
+       /* only queue to workqueue if there is data we want to write,
+        * and there is sufficient space in the socket buffer to accept
+        * more data.  clear SOCK_NOSPACE so that ceph_write_space()
+        * doesn't get called again until try_write() fills the socket
+        * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
+        * and net/core/stream.c:sk_stream_write_space().
+        */
        if (test_bit(WRITE_PENDING, &con->state)) {
-               dout("ceph_write_space %p queueing write work\n", con);
-               queue_con(con);
+               if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+                       dout("ceph_write_space %p queueing write work\n", con);
+                       clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+                       queue_con(con);
+               }
        } else {
                dout("ceph_write_space %p nothing to write\n", con);
        }
-
-       /* since we have our own write_space, clear the SOCK_NOSPACE flag */
-       clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 }
 
 /* socket's state has changed */