drbd: Fix an connection drop issue after enabling allow-two-primaries
authorPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 23 Oct 2013 08:59:16 +0000 (10:59 +0200)
committerJens Axboe <axboe@kernel.dk>
Fri, 8 Nov 2013 16:10:28 +0000 (09:10 -0700)
Since drbd-8.4.0 it is possible to change the allow-two-primaries
network option while the connection is established.

The sequence code used to partially order packets from the
data socket with packets from the meta-data socket, still assued
that the allow-two-primaries option is constant while the
connection is established.

I.e.
On a node that has the RESOLVE_CONFLICTS bits set, after enabling
allow-two-primaries, when receiving the next data packet it timed out
while waiting for the necessary packets on the data socket to arrive
(wait_for_and_update_peer_seq() function).

Fixed that by always tracking the sequence number, but only waiting
for it if allow-two-primaries is set.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/drbd/drbd_receiver.c

index cc29cd3bf78b6a758f85255f56f6cdeca1b2cc4b..12c59eb3b1274ef7f7c9fe0ac5f854eea10f722a 100644 (file)
@@ -1890,29 +1890,11 @@ static u32 seq_max(u32 a, u32 b)
        return seq_greater(a, b) ? a : b;
 }
 
-static bool need_peer_seq(struct drbd_conf *mdev)
-{
-       struct drbd_tconn *tconn = mdev->tconn;
-       int tp;
-
-       /*
-        * We only need to keep track of the last packet_seq number of our peer
-        * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
-        * handle_write_conflicts().
-        */
-
-       rcu_read_lock();
-       tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
-       rcu_read_unlock();
-
-       return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
-}
-
 static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
 {
        unsigned int newest_peer_seq;
 
-       if (need_peer_seq(mdev)) {
+       if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
                spin_lock(&mdev->peer_seq_lock);
                newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
                mdev->peer_seq = newest_peer_seq;
@@ -1972,22 +1954,31 @@ static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_s
 {
        DEFINE_WAIT(wait);
        long timeout;
-       int ret;
+       int ret = 0, tp;
 
-       if (!need_peer_seq(mdev))
+       if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
                return 0;
 
        spin_lock(&mdev->peer_seq_lock);
        for (;;) {
                if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
                        mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
-                       ret = 0;
                        break;
                }
+
                if (signal_pending(current)) {
                        ret = -ERESTARTSYS;
                        break;
                }
+
+               rcu_read_lock();
+               tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+               rcu_read_unlock();
+
+               if (!tp)
+                       break;
+
+               /* Only need to wait if two_primaries is enabled */
                prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
                spin_unlock(&mdev->peer_seq_lock);
                rcu_read_lock();
@@ -2228,8 +2219,10 @@ static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
                        }
                        goto out_interrupted;
                }
-       } else
+       } else {
+               update_peer_seq(mdev, peer_seq);
                spin_lock_irq(&mdev->tconn->req_lock);
+       }
        list_add(&peer_req->w.list, &mdev->active_ee);
        spin_unlock_irq(&mdev->tconn->req_lock);