once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
- WAS_IO_ERROR, /* Local disk failed returned IO error */
+ WAS_IO_ERROR, /* Local disk failed, returned IO error */
+ WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
}
enum drbd_force_detach_flags {
- DRBD_IO_ERROR,
+ DRBD_READ_ERROR,
+ DRBD_WRITE_ERROR,
DRBD_META_IO_ERROR,
DRBD_FORCE_DETACH,
};
#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
- enum drbd_force_detach_flags forcedetach,
+ enum drbd_force_detach_flags df,
const char *where)
{
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
- if (forcedetach == DRBD_IO_ERROR) {
+ if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
if (mdev->state.disk > D_INCONSISTENT)
_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
break;
}
- /* NOTE fall through to detach case if forcedetach set */
+ /* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
case EP_DETACH:
case EP_CALL_HELPER:
+ /* Remember whether we saw a READ or WRITE error.
+ *
+ * Recovery of the affected area for WRITE failure is covered
+ * by the activity log.
+ * READ errors may fall outside that area though. Certain READ
+ * errors can be "healed" by writing good data to the affected
+ * blocks, which triggers block re-allocation in lower layers.
+ *
+ * If we can not write the bitmap after a READ error,
+ * we may need to trigger a full sync (see w_go_diskless()).
+ *
+ * Force-detach is not really an IO error, but rather a
+ * desperate measure to try to deal with a completely
+ * unresponsive lower level IO stack.
+ * Still it should be treated as a WRITE error.
+ *
+ * Meta IO error is always WRITE error:
+ * we read meta data only once during attach,
+ * which will fail in case of errors.
+ */
drbd_set_flag(mdev, WAS_IO_ERROR);
- if (forcedetach == DRBD_FORCE_DETACH)
+ if (df == DRBD_READ_ERROR)
+ drbd_set_flag(mdev, WAS_READ_ERROR);
+ if (df == DRBD_FORCE_DETACH)
drbd_set_flag(mdev, FORCE_DETACH);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
- enum drbd_io_error_p eh = EP_PASS_ON;
- int was_io_error = 0;
/* corresponding get_ldev was in __drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS.
- * But is is still not save to dreference ldev here, since
- * we might come from an failed Attach before ldev was set. */
+ * But it is still not safe to dreference ldev here, we may end
+ * up here from a failed attach, before ldev was even set. */
if (mdev->ldev) {
- eh = mdev->ldev->dc.on_io_error;
- was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);
-
- if (was_io_error && eh == EP_CALL_HELPER)
+ enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
+
+ /* In some setups, this handler triggers a suicide,
+ * basically mapping IO error to node failure, to
+ * reduce the number of different failure scenarios.
+ *
+ * This handler intentionally runs before we abort IO,
+ * notify the peer, or try to update our meta data. */
+ if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
drbd_khelper(mdev, "local-io-error");
/* Immediately allow completion of all application IO,
* So aborting local requests may cause crashes,
* or even worse, silent data corruption.
*/
- if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
+ if (drbd_test_flag(mdev, FORCE_DETACH))
tl_abort_disk_io(mdev);
/* current state still has to be D_FAILED,
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
* the protected members anymore, though, so once put_ldev reaches zero
* again, it will be safe to free them. */
+
+ /* Try to write changed bitmap pages, read errors may have just
+ * set some bits outside the area covered by the activity log.
+ *
+ * If we have an IO error during the bitmap writeout,
+ * we will want a full sync next time, just in case.
+ * (Do we want a specific meta data flag for this?)
+ *
+ * If that does not make it to stable storage either,
+ * we cannot do anything about that anymore. */
+ if (mdev->bitmap) {
+ if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+ "detach", BM_LOCKED_MASK)) {
+ if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
+ drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+ drbd_md_sync(mdev);
+ }
+ }
+ }
+
drbd_force_state(mdev, NS(disk, D_DISKLESS));
return 1;
}
/* make sure there is no leftover from previous force-detach attempts */
drbd_clear_flag(mdev, FORCE_DETACH);
+ drbd_clear_flag(mdev, WAS_IO_ERROR);
+ drbd_clear_flag(mdev, WAS_READ_ERROR);
/* and no leftover from previously aborted resync or verify, either */
mdev->rs_total = 0;
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+ __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
_req_may_be_done_not_susp(req, m);
break;
break;
}
- __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+ __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
goto_queue_for_net_read:
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+ __drbd_chk_io_error(mdev, DRBD_READ_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
: list_empty(&mdev->active_ee);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+ __drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (is_syncer_req)