From 07be15b12c41dbc5ceae130be2e0b655f7611691 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 May 2012 11:53:08 +0200 Subject: [PATCH] drbd: fix resend/resubmit of frozen IO DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith), or because we lost access to data (on-no-data-accessible suspend-io). Resuming from there (re-connect, or re-attach, or explicit admin intervention) should "just work". Unfortunately, if the re-attach/re-connect did not happen within the timeout, since the commit drbd: Implemented real timeout checking for request processing time if so configured, the request_timer_fn() would timeout and detach/disconnect virtually immediately. This change tracks the most recent attach and connect, and does not timeout within after attach/connect. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_req.c | 51 +++++++++++++++++++++++---------- drivers/block/drbd/drbd_state.c | 16 ++++++++++- 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4fad3f565aba..976e78cadd3e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -859,6 +859,7 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned int epochs; enum write_ordering_e write_ordering; + unsigned long last_reconnect_jif; struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -881,6 +882,7 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; + unsigned long last_reattach_jif; struct drbd_work resync_work, unplug_work, go_diskless, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c76402c3f64c..44a7d6ba4e4f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1171,12 +1171,14 @@ void request_timer_fn(unsigned long data) struct list_head *le; struct net_conf *nc; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ + unsigned long now; rcu_read_lock(); nc = rcu_dereference(tconn->net_conf); - ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0; + if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS) + ent = nc->timeout * HZ/10 * nc->ko_count; - if (get_ldev(mdev)) { + if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10; put_ldev(mdev); } @@ -1184,32 +1186,51 @@ void request_timer_fn(unsigned long data) et = min_not_zero(dt, ent); - if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED)) + if (!et) return; /* Recurring timer stopped */ + now = jiffies; + spin_lock_irq(&tconn->req_lock); le = &tconn->oldest_tle->requests; if (list_empty(le)) { spin_unlock_irq(&tconn->req_lock); - mod_timer(&mdev->request_timer, jiffies + et); + mod_timer(&mdev->request_timer, now + et); return; } le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (ent && req->rq_state & RQ_NET_PENDING) { - if (time_is_before_eq_jiffies(req->start_time + ent)) { - dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); - } + + /* The request is considered timed out, if + * - we have some effective timeout from the configuration, + * with above state restrictions applied, + * - the oldest request is waiting for a response from the network + * resp. the local disk, + * - the oldest request is in fact older than the effective timeout, + * - the connection was established (resp. disk was attached) + * for longer than the timeout already. + * Note that for 32bit jiffies and very stable connections/disks, + * we may have a wrap around, which is catched by + * !time_in_range(now, last_..._jif, last_..._jif + timeout). + * + * Side effect: once per 32bit wrap-around interval, which means every + * ~198 days with 250 HZ, we have a window where the timeout would need + * to expire twice (worst case) to become effective. Good enough. + */ + if (ent && req->rq_state & RQ_NET_PENDING && + time_after(now, req->start_time + ent) && + !time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) { + dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } - if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) { - if (time_is_before_eq_jiffies(req->start_time + dt)) { - dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); - __drbd_chk_io_error(mdev, 1); - } + if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev && + time_after(now, req->start_time + dt) && + !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); } - nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et; + nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; spin_unlock_irq(&tconn->req_lock); mod_timer(&mdev->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c4554b17e48d..2673049df34c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) drbd_resume_al(mdev); + /* remember last attach time so request_timer_fn() won't + * kill newly established sessions while we are still trying to thaw + * previously frozen IO */ + if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && + ns.disk > D_NEGOTIATING) + mdev->last_reattach_jif = jiffies; + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); if (ascw) { ascw->os = os; @@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state enum drbd_state_rv rv; int vnr, number_of_volumes = 0; - if (mask.conn == C_MASK) + if (mask.conn == C_MASK) { + /* remember last connect time so request_timer_fn() won't + * kill newly established sessions while we are still trying to thaw + * previously frozen IO */ + if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS) + tconn->last_reconnect_jif = jiffies; + tconn->cstate = val.conn; + } rcu_read_lock(); idr_for_each_entry(&tconn->volumes, mdev, vnr) { -- 2.20.1