md: always hold reconfig_mutex when calling mddev_suspend()
authorNeilBrown <neilb@suse.com>
Thu, 19 Oct 2017 01:17:16 +0000 (12:17 +1100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 8 Jul 2018 13:30:49 +0000 (15:30 +0200)
commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream.

Most often mddev_suspend() is called with
reconfig_mutex held.  Make this a requirement in
preparation a subsequent patch.  Also require
reconfig_mutex to be held for mddev_resume(),
partly for symmetry and partly to guarantee
no races with incr/decr of mddev->suspend.

Taking the mutex in r5c_disable_writeback_async() is
a little tricky as this is called from a work queue
via log->disable_writeback_work, and flush_work()
is called on that while holding ->reconfig_mutex.
If the work item hasn't run before flush_work()
is called, the work function will not be able to
get the mutex.

So we use mddev_trylock() inside the wait_event() call, and have that
abort when conf->log is set to NULL, which happens before
flush_work() is called.
We wait in mddev->sb_wait and ensure this is woken
when any of the conditions change.  This requires
waking mddev->sb_wait in mddev_unlock().  This is only
like to trigger extra wake_ups of threads that needn't
be woken when metadata is being written, and that
doesn't happen often enough that the cost would be
noticeable.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/md/dm-raid.c
drivers/md/md.c
drivers/md/raid5-cache.c

index 33834db7c0a0cc93af39fcfc91204e13e49a2f5c..38a2ac24428e5b5671cb30c20a4f91d998f06597 100644 (file)
@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
-       if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+       if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+               mddev_lock_nointr(&rs->md);
                mddev_suspend(&rs->md);
+               mddev_unlock(&rs->md);
+       }
 
        rs->md.ro = 1;
 }
@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target *ti)
        if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
-       if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+       if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+               mddev_lock_nointr(mddev);
                mddev_resume(mddev);
+               mddev_unlock(mddev);
+       }
 }
 
 static struct target_type raid_target = {
index 7143c8b9284bdd6c08f00e2354b6ce9fe99536f9..757f12d49540204cbd536a572f5cbff3af175b93 100644 (file)
@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 void mddev_suspend(struct mddev *mddev)
 {
        WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
+       lockdep_assert_held(&mddev->reconfig_mutex);
        if (mddev->suspended++)
                return;
        synchronize_rcu();
@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
 
 void mddev_resume(struct mddev *mddev)
 {
+       lockdep_assert_held(&mddev->reconfig_mutex);
        if (--mddev->suspended)
                return;
        wake_up(&mddev->sb_wait);
@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev)
         */
        spin_lock(&pers_lock);
        md_wakeup_thread(mddev->thread);
+       wake_up(&mddev->sb_wait);
        spin_unlock(&pers_lock);
 }
 EXPORT_SYMBOL_GPL(mddev_unlock);
index 9a340728b846943f742cb01e6db1e2a1aabb3f95..79d812717406ebe0beced0f4e10d2187622cfdb1 100644 (file)
@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(struct work_struct *work)
        struct r5l_log *log = container_of(work, struct r5l_log,
                                           disable_writeback_work);
        struct mddev *mddev = log->rdev->mddev;
+       struct r5conf *conf = mddev->private;
+       int locked = 0;
 
        if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
                return;
@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(struct work_struct *work)
 
        /* wait superblock change before suspend */
        wait_event(mddev->sb_wait,
-                  !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
-
-       mddev_suspend(mddev);
-       log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
-       mddev_resume(mddev);
+                  conf->log == NULL ||
+                  (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
+                   (locked = mddev_trylock(mddev))));
+       if (locked) {
+               mddev_suspend(mddev);
+               log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+               mddev_resume(mddev);
+               mddev_unlock(mddev);
+       }
 }
 
 static void r5l_submit_current_io(struct r5l_log *log)
@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf)
        conf->log = NULL;
        synchronize_rcu();
 
+       /* Ensure disable_writeback_work wakes up and exits */
+       wake_up(&conf->mddev->sb_wait);
        flush_work(&log->disable_writeback_work);
        md_unregister_thread(&log->reclaim_thread);
        mempool_destroy(log->meta_pool);