drbd: make suspend_io() / resume_io() must be thread and recursion safe
authorPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 28 Feb 2013 09:30:19 +0000 (10:30 +0100)
committerJens Axboe <axboe@fb.com>
Wed, 25 Nov 2015 16:22:03 +0000 (09:22 -0700)
Avoid to prematurely resume application IO: don't set/clear a single
bit, but inc/dec an atomic counter.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_state.c

index a26265375e1ea26beee14bc2952ca0808fd4d74f..df3d89d5777ab8198299f4ab310e01284765f934 100644 (file)
@@ -500,7 +500,6 @@ enum {
 
        MD_NO_FUA,              /* Users wants us to not use FUA/FLUSH on meta data dev */
 
-       SUSPEND_IO,             /* suspend application io */
        BITMAP_IO,              /* suspend application io;
                                   once no more io in flight, start bitmap io */
        BITMAP_IO_QUEUED,       /* Started bitmap IO */
@@ -880,6 +879,7 @@ struct drbd_device {
        atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
        atomic_t unacked_cnt;    /* Need to send replies for */
        atomic_t local_cnt;      /* Waiting for local completion */
+       atomic_t suspend_cnt;
 
        /* Interval tree of pending local requests */
        struct rb_root read_requests;
@@ -2263,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
 
        if (drbd_suspended(device))
                return false;
-       if (test_bit(SUSPEND_IO, &device->flags))
+       if (atomic_read(&device->suspend_cnt))
                return false;
 
        /* to avoid potential deadlock or bitmap corruption,
index 613778994b2394bd4c9739bfbf2bb07817ff770c..c7cd3df8107e684258676da67e28a65a538fe711 100644 (file)
@@ -865,9 +865,11 @@ char *ppsize(char *buf, unsigned long long size)
  * and can be long lived.
  * This changes an device->flag, is triggered by drbd internals,
  * and should be short-lived. */
+/* It needs to be a counter, since multiple threads might
+   independently suspend and resume IO. */
 void drbd_suspend_io(struct drbd_device *device)
 {
-       set_bit(SUSPEND_IO, &device->flags);
+       atomic_inc(&device->suspend_cnt);
        if (drbd_suspended(device))
                return;
        wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
@@ -875,8 +877,8 @@ void drbd_suspend_io(struct drbd_device *device)
 
 void drbd_resume_io(struct drbd_device *device)
 {
-       clear_bit(SUSPEND_IO, &device->flags);
-       wake_up(&device->misc_wait);
+       if (atomic_dec_and_test(&device->suspend_cnt))
+               wake_up(&device->misc_wait);
 }
 
 /**
index f022e99f98559b74eef7978da3a0ae6cadbac495..5a7ef7873b675b73023001478be61e746b3d1194 100644 (file)
@@ -1484,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
        D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
 
        /* open coded non-blocking drbd_suspend_io(device); */
-       set_bit(SUSPEND_IO, &device->flags);
+       atomic_inc(&device->suspend_cnt);
 
        drbd_bm_lock(device, why, flags);
        rv = io_fn(device);