drbd: send intermediate state change results to the peer
authorLars Ellenberg <lars.ellenberg@linbit.com>
Thu, 27 Oct 2011 14:52:30 +0000 (16:52 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 9 May 2012 13:15:56 +0000 (15:15 +0200)
DRBD state changes schedule after_state_ch() actions to a worker thread,
which decides on the old and new states of that change, whether to send
an informational state update packet (P_STATE) to the peer.
If it decides to drbd_send_state(), it would however always send the
_curent_ state, which, if a second state change happens before the
after_state_ch() of the first ran, may "fast-forward" the peer's view
about this node.  In most cases that is harmless, but sometimes this can
confuse DRBD, for example into not actually starting a necessary resync
if you do a very tight detach/attach loop on a Connected Secondary.

Fix this by always sending the "new" state of the respective state
transition which scheduled this after_state_ch() work.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c

index c7976a77dfba54051394ab2847a2e9adce5e0311..31dee20f3411e6228737b23c13776b3d8ec47ff4 100644 (file)
@@ -1229,8 +1229,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev);
 extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
 extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
 extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
-extern int _drbd_send_state(struct drbd_conf *mdev);
-extern int drbd_send_state(struct drbd_conf *mdev);
+extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s);
+extern int drbd_send_current_state(struct drbd_conf *mdev);
 extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
                        enum drbd_packets cmd, struct p_header80 *h,
                        size_t size, unsigned msg_flags);
index 64318d4ca9ece8ba0210c6f2485c6584057d3848..3a5b4dec529fe8f0dc946032210404101d5b9e03 100644 (file)
@@ -1487,7 +1487,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        /* Do not change the order of the if above and the two below... */
        if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
                drbd_send_uuids(mdev);
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
        }
        /* No point in queuing send_bitmap if we don't have a connection
         * anymore, so check also the _current_ state, not only the new state
@@ -1552,14 +1552,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
            os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
                drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
                drbd_send_uuids(mdev);
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
        }
 
        /* We want to pause/continue resync, tell peer. */
        if (ns.conn >= C_CONNECTED &&
             ((os.aftr_isp != ns.aftr_isp) ||
              (os.user_isp != ns.user_isp)))
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
 
        /* In case one of the isp bits got set, suspend other devices. */
        if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1569,10 +1569,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        /* Make sure the peer gets informed about eventual state
           changes (ISP bits) while we were in WFReportParams. */
        if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
 
        if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
 
        /* We are in the progress to start a full sync... */
        if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1612,7 +1612,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                                "ASSERT FAILED: disk is %s during detach\n",
                                drbd_disk_str(mdev->state.disk));
 
-               if (drbd_send_state(mdev))
+               if (drbd_send_state(mdev, ns))
                        dev_info(DEV, "Notified peer that I am detaching my disk\n");
 
                drbd_rs_cancel_all(mdev);
@@ -1642,7 +1642,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                 mdev->rs_failed = 0;
                 atomic_set(&mdev->rs_pending_cnt, 0);
 
-               if (drbd_send_state(mdev))
+               if (drbd_send_state(mdev, ns))
                        dev_info(DEV, "Notified peer that I'm now diskless.\n");
                /* corresponding get_ldev in __drbd_set_state
                 * this may finally trigger drbd_ldev_destroy. */
@@ -1651,7 +1651,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
        /* Notify peer that I had a local IO error, and did not detached.. */
        if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
 
        /* Disks got bigger while they were detached */
        if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1669,7 +1669,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        /* sync target done with resync.  Explicitly notify peer, even though
         * it should (at least for non-empty resyncs) already know itself. */
        if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
-               drbd_send_state(mdev);
+               drbd_send_state(mdev, ns);
 
        /* This triggers bitmap writeout of potentially still unwritten pages
         * if the resync finished cleanly, or aborted because of peer disk
@@ -2191,10 +2191,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
 }
 
 /**
- * drbd_send_state() - Sends the drbd state to the peer
+ * drbd_send_current_state() - Sends the drbd state to the peer
  * @mdev:      DRBD device.
  */
-int drbd_send_state(struct drbd_conf *mdev)
+int drbd_send_current_state(struct drbd_conf *mdev)
 {
        struct socket *sock;
        struct p_state p;
@@ -2220,6 +2220,37 @@ int drbd_send_state(struct drbd_conf *mdev)
        return ok;
 }
 
+/**
+ * drbd_send_state() - After a state change, sends the new state to the peer
+ * @mdev:      DRBD device.
+ * @state:     the state to send, not necessarily the current state.
+ *
+ * Each state change queues an "after_state_ch" work, which will eventually
+ * send the resulting new state to the peer. If more state changes happen
+ * between queuing and processing of the after_state_ch work, we still
+ * want to send each intermediary state in the order it occurred.
+ */
+int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
+{
+       struct socket *sock;
+       struct p_state p;
+       int ok = 0;
+
+       mutex_lock(&mdev->data.mutex);
+
+       p.state = cpu_to_be32(state.i);
+       sock = mdev->data.socket;
+
+       if (likely(sock != NULL)) {
+               ok = _drbd_send_cmd(mdev, sock, P_STATE,
+                                   (struct p_header80 *)&p, sizeof(p), 0);
+       }
+
+       mutex_unlock(&mdev->data.mutex);
+
+       return ok;
+}
+
 int drbd_send_state_req(struct drbd_conf *mdev,
        union drbd_state mask, union drbd_state val)
 {
index 00a82ab7ab988196765c627ea6b0e1b82ceacfe8..1bbbad302ae7e8df715e061b958c36918b640c9b 100644 (file)
@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
                /* if this was forced, we should consider sync */
                if (forced)
                        drbd_send_uuids(mdev);
-               drbd_send_state(mdev);
+               drbd_send_current_state(mdev);
        }
 
        drbd_md_sync(mdev);
index 1a48e02b83bc73709efd460eaefd251eee0aa626..f0d86cb300cf8703ac8939295ef1c0c0b48fea8d 100644 (file)
@@ -899,7 +899,7 @@ retry:
        drbd_send_sync_param(mdev, &mdev->sync_conf);
        drbd_send_sizes(mdev, 0, 0);
        drbd_send_uuids(mdev);
-       drbd_send_state(mdev);
+       drbd_send_current_state(mdev);
        clear_bit(USE_DEGR_WFC_T, &mdev->flags);
        clear_bit(RESIZE_PENDING, &mdev->flags);
 
@@ -3294,7 +3294,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
                        /* Nowadays only used when forcing a node into primary role and
                           setting its disk to UpToDate with that */
                        drbd_send_uuids(mdev);
-                       drbd_send_state(mdev);
+                       drbd_send_current_state(mdev);
                }
        }