drbd: Finish requests that completed while IO was frozen
authorPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 31 Jul 2012 09:22:58 +0000 (11:22 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 16 Aug 2012 15:14:45 +0000 (17:14 +0200)
Requests of an acked epoch are stored on the barrier_acked_requests list. In
case the private bio of such a request completes while IO on the drbd device
is suspended [req_mod(completed_ok)] then the request stays there.

When thawing IO because the fence_peer handler returned, then we use
tl_clear() to apply the connection_lost_while_pending event to all requests
on the transfer-log and the barrier_acked_requests list.

Up to now the connection_lost_while_pending event was not applied
on requests on the barrier_acked_requests list. Fixed that.

I.e. now the connection_lost_while_pending and resend events are
applied to requests on the barrier_acked_requests list. For that
it is necessary that the resend event finishes (local only)
READS correctly.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_req.c

index 2e0e7fc1dbbaf50e24d8a446a4e20aeb2239f5bb..136db95b212d3f23706e5cfa1fe0d6024f856961 100644 (file)
@@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
 static void md_sync_timer_fn(unsigned long data);
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
 static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void _tl_clear(struct drbd_conf *mdev);
 
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
              "Lars Ellenberg <lars@linbit.com>");
@@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 
        /* Actions operating on the disk state, also want to work on
           requests that got barrier acked. */
-       switch (what) {
-       case fail_frozen_disk_io:
-       case restart_frozen_disk_io:
-               list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
-                       req = list_entry(le, struct drbd_request, tl_requests);
-                       _req_mod(req, what);
-               }
 
-       case connection_lost_while_pending:
-       case resend:
-               break;
-       default:
-               dev_err(DEV, "what = %d in _tl_restart()\n", what);
+       list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
+               req = list_entry(le, struct drbd_request, tl_requests);
+               _req_mod(req, what);
        }
 }
 
@@ -458,12 +450,17 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
  * receiver thread and the worker thread.
  */
 void tl_clear(struct drbd_conf *mdev)
+{
+       spin_lock_irq(&mdev->req_lock);
+       _tl_clear(mdev);
+       spin_unlock_irq(&mdev->req_lock);
+}
+
+static void _tl_clear(struct drbd_conf *mdev)
 {
        struct list_head *le, *tle;
        struct drbd_request *r;
 
-       spin_lock_irq(&mdev->req_lock);
-
        _tl_restart(mdev, connection_lost_while_pending);
 
        /* we expect this list to be empty. */
@@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev)
 
        memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
 
-       spin_unlock_irq(&mdev->req_lock);
 }
 
 void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
@@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        if (ns.susp_fen) {
                /* case1: The outdate peer handler is successful: */
                if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
-                       tl_clear(mdev);
                        if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
                                drbd_uuid_new_current(mdev);
                                clear_bit(NEW_CUR_UUID, &mdev->flags);
                        }
                        spin_lock_irq(&mdev->req_lock);
+                       _tl_clear(mdev);
                        _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
                        spin_unlock_irq(&mdev->req_lock);
                }
index 0bb1e41f136f9fcb8e1d4c662d16bff4a1096de3..01b2ac641c7babe119f98dac7596ff8a690402a8 100644 (file)
@@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                break;
 
        case resend:
+               /* Simply complete (local only) READs. */
+               if (!(req->rq_state & RQ_WRITE) && !req->w.cb) {
+                       _req_may_be_done(req, m);
+                       break;
+               }
+
                /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
                   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
                   Trowing them out of the TL here by pretending we got a BARRIER_ACK