drbd: flush drbd work queue before invalidate/invalidate remote
authorLars Ellenberg <lars.ellenberg@linbit.com>
Tue, 19 Jun 2012 08:27:58 +0000 (10:27 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 24 Jul 2012 12:15:58 +0000 (14:15 +0200)
If you do back to back wait-sync/invalidate on a Primary in a tight loop,
during application IO load, you could trigger a race:
  kernel: block drbd6: FIXME going to queue 'set_n_write from StartingSync'
but 'write from resync_finished' still pending?

Fix this by changing the order of the drbd_queue_work() and
the wake_up() in dec_ap_pending(), and adding the additional
drbd_flush_workqueue() before requesting the full sync.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_nl.c

index 5136510ec8be139c9acd198d19a5e190a2ea0039..2704af2ccf61526f8a60b5c3b27d0d98503a42df 100644 (file)
@@ -2421,15 +2421,17 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
        int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
 
        D_ASSERT(ap_bio >= 0);
+
+       if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+               if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+                       drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+       }
+
        /* this currently does wake_up for every dec_ap_bio!
         * maybe rather introduce some type of hysteresis?
         * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
        if (ap_bio < mxb)
                wake_up(&mdev->misc_wait);
-       if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
-               if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
-                       drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
-       }
 }
 
 static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
index a68d9bfb731cd4c6d90cd9ed3002eb6c8694f9c6..c47df7cf7f80961e17d6dba4a5863ec83687ce23 100644 (file)
@@ -1977,9 +1977,11 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
        int retcode;
 
        /* If there is still bitmap IO pending, probably because of a previous
-        * resync just being finished, wait for it before requesting a new resync. */
+        * resync just being finished, wait for it before requesting a new resync.
+        * Also wait for it's after_state_ch(). */
        drbd_suspend_io(mdev);
        wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+       drbd_flush_workqueue(mdev);
 
        retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
 
@@ -2018,9 +2020,11 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
        int retcode;
 
        /* If there is still bitmap IO pending, probably because of a previous
-        * resync just being finished, wait for it before requesting a new resync. */
+        * resync just being finished, wait for it before requesting a new resync.
+        * Also wait for it's after_state_ch(). */
        drbd_suspend_io(mdev);
        wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+       drbd_flush_workqueue(mdev);
 
        retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);