drbd: If we detect late that IO got frozen, retry after we thawed.
authorPhilipp Reisner <philipp.reisner@linbit.com>
Mon, 10 May 2010 14:42:23 +0000 (16:42 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 18 May 2010 00:03:32 +0000 (02:03 +0200)
If we detect late (= after grabing mdev->req_lock) that IO got frozen, we
return 1 to generic_make_request(), which simply will retry to make a
request for that bio.

In the subsequent call of generic_make_request() into drbd_make_request_26()
we sleep in inc_ap_bio().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_req.c

index 4b97f30bb7c6253684c2b71dda0019654ea8189f..c194348a46ed0833f58ffb3b0d40ab9cfd8be361 100644 (file)
@@ -2223,7 +2223,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
 /* I'd like to use wait_event_lock_irq,
  * but I'm not sure when it got introduced,
  * and not sure when it has 3 or 4 arguments */
-static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
+static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
 {
        /* compare with after_state_ch,
         * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
@@ -2245,7 +2245,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
                finish_wait(&mdev->misc_wait, &wait);
                spin_lock_irq(&mdev->req_lock);
        }
-       atomic_add(one_or_two, &mdev->ap_bio_cnt);
+       atomic_add(count, &mdev->ap_bio_cnt);
        spin_unlock_irq(&mdev->req_lock);
 }
 
index 343e0e6dd532c57035dee819d9b7832983eae821..3397f11d0ba915fd29ca9cd6f65717d2f6a83f10 100644 (file)
@@ -722,6 +722,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
        struct drbd_request *req;
        int local, remote;
        int err = -EIO;
+       int ret = 0;
 
        /* allocate outside of all locks; */
        req = drbd_req_new(mdev, bio);
@@ -784,7 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
                            (mdev->state.pdsk == D_INCONSISTENT &&
                             mdev->state.conn >= C_CONNECTED));
 
-       if (!(local || remote)) {
+       if (!(local || remote) && !mdev->state.susp) {
                dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
                goto fail_free_complete;
        }
@@ -810,6 +811,16 @@ allocate_barrier:
        /* GOOD, everything prepared, grab the spin_lock */
        spin_lock_irq(&mdev->req_lock);
 
+       if (mdev->state.susp) {
+               /* If we got suspended, use the retry mechanism of
+                  generic_make_request() to restart processing of this
+                  bio. In the next call to drbd_make_request_26
+                  we sleep in inc_ap_bio() */
+               ret = 1;
+               spin_unlock_irq(&mdev->req_lock);
+               goto fail_free_complete;
+       }
+
        if (remote) {
                remote = (mdev->state.pdsk == D_UP_TO_DATE ||
                            (mdev->state.pdsk == D_INCONSISTENT &&
@@ -947,12 +958,14 @@ fail_and_free_req:
                req->private_bio = NULL;
                put_ldev(mdev);
        }
-       bio_endio(bio, err);
+       if (!ret)
+               bio_endio(bio, err);
+
        drbd_req_free(req);
        dec_ap_bio(mdev);
        kfree(b);
 
-       return 0;
+       return ret;
 }
 
 /* helper function for drbd_make_request
@@ -1065,15 +1078,21 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
 
                /* we need to get a "reference count" (ap_bio_cnt)
                 * to avoid races with the disconnect/reconnect/suspend code.
-                * In case we need to split the bio here, we need to get two references
+                * In case we need to split the bio here, we need to get three references
                 * atomically, otherwise we might deadlock when trying to submit the
                 * second one! */
-               inc_ap_bio(mdev, 2);
+               inc_ap_bio(mdev, 3);
 
                D_ASSERT(e_enr == s_enr + 1);
 
-               drbd_make_request_common(mdev, &bp->bio1);
-               drbd_make_request_common(mdev, &bp->bio2);
+               while (drbd_make_request_common(mdev, &bp->bio1))
+                       inc_ap_bio(mdev, 1);
+
+               while (drbd_make_request_common(mdev, &bp->bio2))
+                       inc_ap_bio(mdev, 1);
+
+               dec_ap_bio(mdev);
+
                bio_pair_release(bp);
        }
        return 0;