drbd: differentiate between normal and forced detach
authorLars Ellenberg <lars.ellenberg@linbit.com>
Thu, 14 Jun 2012 12:21:32 +0000 (14:21 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 24 Jul 2012 12:06:18 +0000 (14:06 +0200)
Aborting local requests (not waiting for completion from the lower level
disk) is dangerous: if the master bio has been completed to upper
layers, data pages may be re-used for other things already.
If local IO is still pending and later completes,
this may cause crashes or corrupt unrelated data.

Only abort local IO if explicitly requested.
Intended use case is a lower level device that turned into a tarpit,
not completing io requests, not even doing error completion.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_worker.c

index e54e31b02b88eb6e927072745f345871a8d96203..6ace11e9e5a1532129ba10f46853ae3b8bf2f8e6 100644 (file)
@@ -411,7 +411,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
                + mdev->ldev->md.al_offset + mdev->al_tr_pos;
 
        if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
-               drbd_chk_io_error(mdev, 1, true);
+               drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 
        if (++mdev->al_tr_pos >
            div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
index fcb956bb4b4c525875f961e7c2398b20c35723f4..ba91b408abad75ce7da0ff173595ce23ee51f635 100644 (file)
@@ -1096,7 +1096,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 
        if (ctx->error) {
                dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
-               drbd_chk_io_error(mdev, 1, true);
+               drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
                err = -EIO; /* ctx->error ? */
        }
 
@@ -1212,7 +1212,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
        wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
 
        if (ctx->error)
-               drbd_chk_io_error(mdev, 1, true);
+               drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
                /* that should force detach, so the in memory bitmap will be
                 * gone in a moment as well. */
 
index 356a6e5b4415bb7941612e80d7b686ec817aba06..79c69ebb0653eee884f91216026c16b975d960ed 100644 (file)
@@ -832,6 +832,7 @@ enum {
        BITMAP_IO_QUEUED,       /* Started bitmap IO */
        GO_DISKLESS,            /* Disk is being detached, on io-error or admin request. */
        WAS_IO_ERROR,           /* Local disk failed returned IO error */
+       FORCE_DETACH,           /* Force-detach from local disk, aborting any pending local IO */
        RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
        NET_CONGESTED,          /* The data socket is congested */
 
@@ -1838,12 +1839,20 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
        return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
 }
 
+enum drbd_force_detach_flags {
+       DRBD_IO_ERROR,
+       DRBD_META_IO_ERROR,
+       DRBD_FORCE_DETACH,
+};
+
 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
-static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
+               enum drbd_force_detach_flags forcedetach,
+               const char *where)
 {
        switch (mdev->ldev->dc.on_io_error) {
        case EP_PASS_ON:
-               if (!forcedetach) {
+               if (forcedetach == DRBD_IO_ERROR) {
                        if (__ratelimit(&drbd_ratelimit_state))
                                dev_err(DEV, "Local IO failed in %s.\n", where);
                        if (mdev->state.disk > D_INCONSISTENT)
@@ -1854,6 +1863,8 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
        case EP_DETACH:
        case EP_CALL_HELPER:
                set_bit(WAS_IO_ERROR, &mdev->flags);
+               if (forcedetach == DRBD_FORCE_DETACH)
+                       set_bit(FORCE_DETACH, &mdev->flags);
                if (mdev->state.disk > D_FAILED) {
                        _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
                        dev_err(DEV,
@@ -1873,7 +1884,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
  */
 #define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
 static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
-       int error, int forcedetach, const char *where)
+       int error, enum drbd_force_detach_flags forcedetach, const char *where)
 {
        if (error) {
                unsigned long flags;
index 920ede2829d6c5e467e177ac43a3e97e9f550aac..5bebe8d8ace31d3c613b529e80913ad4be4f6400 100644 (file)
@@ -1630,9 +1630,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                        eh = mdev->ldev->dc.on_io_error;
                        was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
 
-                       /* Immediately allow completion of all application IO, that waits
-                          for completion from the local disk. */
-                       tl_abort_disk_io(mdev);
+                       /* Immediately allow completion of all application IO,
+                        * that waits for completion from the local disk,
+                        * if this was a force-detach due to disk_timeout
+                        * or administrator request (drbdsetup detach --force).
+                        * Do NOT abort otherwise.
+                        * Aborting local requests may cause serious problems,
+                        * if requests are completed to upper layers already,
+                        * and then later the already submitted local bio completes.
+                        * This can cause DMA into former bio pages that meanwhile
+                        * have been re-used for other things.
+                        * So aborting local requests may cause crashes,
+                        * or even worse, silent data corruption.
+                        */
+                       if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
+                               tl_abort_disk_io(mdev);
 
                        /* current state still has to be D_FAILED,
                         * there is only one way out: to D_DISKLESS,
@@ -3870,7 +3882,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
        if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
                /* this was a try anyways ... */
                dev_err(DEV, "meta data update failed!\n");
-               drbd_chk_io_error(mdev, 1, true);
+               drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
        }
 
        /* Update mdev->ldev->md.la_size_sect,
index 6d4de6a72e8069018b2b4d050b7ef2aa8b259259..40a1c4f07190004488a2f2b4e172a4a933f7c11d 100644 (file)
@@ -950,6 +950,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
         * to realize a "hot spare" feature (not that I'd recommend that) */
        wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
+       /* make sure there is no leftover from previous force-detach attempts */
+       clear_bit(FORCE_DETACH, &mdev->flags);
+
        /* allocation not in the IO path, cqueue thread context */
        nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
        if (!nbc) {
@@ -1345,6 +1348,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
        }
 
        if (dt.detach_force) {
+               set_bit(FORCE_DETACH, &mdev->flags);
                drbd_force_state(mdev, NS(disk, D_FAILED));
                reply->ret_code = SS_SUCCESS;
                goto out;
index 8e93a6ac9bb65e3497f53c92723a67429ffdf946..1f4b2dbb7d4afc93ea2261b58c786fedab41e1a2 100644 (file)
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                req->rq_state |= RQ_LOCAL_COMPLETED;
                req->rq_state &= ~RQ_LOCAL_PENDING;
 
-               __drbd_chk_io_error(mdev, false);
+               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
                _req_may_be_done_not_susp(req, m);
                break;
 
@@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                        break;
                }
 
-               __drbd_chk_io_error(mdev, false);
+               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
 
        goto_queue_for_net_read:
 
@@ -1275,7 +1275,7 @@ void request_timer_fn(unsigned long data)
                 time_after(now, req->start_time + dt) &&
                !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
                dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
-               __drbd_chk_io_error(mdev, 1);
+               __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
        }
        nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
        spin_unlock_irq(&mdev->req_lock);
index 620c70ff223118e6f259a200512203f5010e2cda..a35393f2fd1bc4cf9a771b32a3cdef45e52ee016 100644 (file)
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
        if (list_empty(&mdev->read_ee))
                wake_up(&mdev->ee_wait);
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, false);
+               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
                : list_empty(&mdev->active_ee);
 
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, false);
+               __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        if (is_syncer_req)