ceph: fix connection fault STANDBY check
authorSage Weil <sage@newdream.net>
Thu, 25 Feb 2010 20:40:45 +0000 (12:40 -0800)
committerSage Weil <sage@newdream.net>
Thu, 25 Feb 2010 20:40:45 +0000 (12:40 -0800)
Move any out_sent messages to out_queue _before_ checking if
out_queue is empty and going to STANDBY, or else we may drop
something that was never acked.

And clean up the code a bit (less goto).

Signed-off-by: Sage Weil <sage@newdream.net>
fs/ceph/messenger.c

index 9ea7b763c8dce3346ec530b1f726c1c5d1b63e74..0ddc2c75f6b407f4ddd7a4384308f301df211668 100644 (file)
@@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con)
                con->in_msg = NULL;
        }
 
+       /* Requeue anything that hasn't been acked */
+       list_splice_init(&con->out_sent, &con->out_queue);
 
        /* If there are no messages in the queue, place the connection
         * in a STANDBY state (i.e., don't try to reconnect just yet). */
        if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
                dout("fault setting STANDBY\n");
                set_bit(STANDBY, &con->state);
-               mutex_unlock(&con->mutex);
-               goto out;
+       } else {
+               /* retry after a delay. */
+               if (con->delay == 0)
+                       con->delay = BASE_DELAY_INTERVAL;
+               else if (con->delay < MAX_DELAY_INTERVAL)
+                       con->delay *= 2;
+               dout("fault queueing %p delay %lu\n", con, con->delay);
+               con->ops->get(con);
+               if (queue_delayed_work(ceph_msgr_wq, &con->work,
+                                      round_jiffies_relative(con->delay)) == 0)
+                       con->ops->put(con);
        }
 
-       /* Requeue anything that hasn't been acked, and retry after a
-        * delay. */
-       list_splice_init(&con->out_sent, &con->out_queue);
-
-       if (con->delay == 0)
-               con->delay = BASE_DELAY_INTERVAL;
-       else if (con->delay < MAX_DELAY_INTERVAL)
-               con->delay *= 2;
-
-       /* explicitly schedule work to try to reconnect again later. */
-       dout("fault queueing %p delay %lu\n", con, con->delay);
-       con->ops->get(con);
-       if (queue_delayed_work(ceph_msgr_wq, &con->work,
-                              round_jiffies_relative(con->delay)) == 0)
-               con->ops->put(con);
-
 out_unlock:
        mutex_unlock(&con->mutex);
 out: