drbd: Fix disconnect to keep the peer disk state if connection breaks during operation
authorPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 27 Mar 2013 13:08:40 +0000 (14:08 +0100)
committerJens Axboe <axboe@kernel.dk>
Thu, 28 Mar 2013 16:10:25 +0000 (10:10 -0600)
The issue was that if the connection broke while we did the
gracefull state change to C_DISCONNECTING (C_TEARDOWN), then
we returned a success code from the state engine. (SS_CW_NO_NEED)

The result of that is that we missed to call the fence-peer
script in such a case.

Fixed that by introducing a new error code (SS_OUTDATE_WO_CONN).
This one should never reach back into user space.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_state.c
drivers/block/drbd/drbd_strings.c
include/linux/drbd.h

index 56bafdcd943eb6bf1226af7f82c4b2ab09aa8254..39e9a91a8f310d4655f1c6f2e467a10e1140ac26 100644 (file)
@@ -2198,8 +2198,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool for
                return SS_SUCCESS;
        case SS_PRIMARY_NOP:
                /* Our state checking code wants to see the peer outdated. */
-               rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-                                               pdsk, D_OUTDATED), CS_VERBOSE);
+               rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
+
+               if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
+                       rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_VERBOSE);
+
                break;
        case SS_CW_FAILED_BY_PEER:
                /* The peer probably wants to see us outdated. */
index 22e259f3437036cee9fa372a974327ec5a52c61b..90c5be2b1d309bf041927891f2f8be24fc491bc1 100644 (file)
@@ -642,6 +642,10 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_t
            && os.conn < C_WF_REPORT_PARAMS)
                rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
 
+       if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
+           os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
+               rv = SS_OUTDATE_WO_CONN;
+
        return rv;
 }
 
@@ -1748,13 +1752,9 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
        if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
                return SS_CW_FAILED_BY_PEER;
 
-       rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
-
-       if (rv == SS_UNKNOWN_ERROR)
-               rv = conn_is_valid_transition(tconn, mask, val, 0);
-
-       if (rv == SS_SUCCESS)
-               rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+       rv = conn_is_valid_transition(tconn, mask, val, 0);
+       if (rv == SS_SUCCESS && tconn->cstate == C_WF_REPORT_PARAMS)
+               rv = SS_UNKNOWN_ERROR; /* continue waiting */
 
        return rv;
 }
index 9a664bd27404b87336247d452e736650f7476deb..58e08ff2b2cebb2d2836b47af7a92c37ed17226c 100644 (file)
@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
        [-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
        [-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
        [-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+       [-SS_OUTDATE_WO_CONN] = "Need a connection for a graceful disconnect/outdate peer",
        [-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
 };
 
index 0c5a18ec322ce2b1ef963d8364d868adc3a82f98..316330705fd7216d1e3633ce9f8c704d0125b170 100644 (file)
@@ -319,7 +319,8 @@ enum drbd_state_rv {
        SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
        SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
        SS_O_VOL_PEER_PRI = -20,
-       SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
+       SS_OUTDATE_WO_CONN = -21,
+       SS_AFTER_LAST_ERROR = -22,    /* Keep this at bottom */
 };
 
 /* from drbd_strings.c */