md: close a race with setting mddev->in_sync
authorNeilBrown <neilb@suse.com>
Wed, 15 Mar 2017 03:05:14 +0000 (14:05 +1100)
committerShaohua Li <shli@fb.com>
Thu, 23 Mar 2017 02:18:30 +0000 (19:18 -0700)
If ->in_sync is being set just as md_write_start() is being called,
it is possible that set_in_sync() won't see the elevated
->writes_pending, and md_write_start() won't see the set ->in_sync.

To close this race, re-test ->writes_pending after setting ->in_sync,
and add memory barriers to ensure the increment of ->writes_pending
will be seen by the time of this second test, or the new ->in_sync
will be seen by md_write_start().

Add a spinlock to array_state_show() to ensure this temporary
instability is never visible from userspace.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/md.c

index 5951dea75abac7e36a9da875a74a6b34272a3ba7..dee6bbfb29b8ffc7e12615106c81143dd6c53380 100644 (file)
@@ -2258,6 +2258,10 @@ static bool set_in_sync(struct mddev *mddev)
        if (atomic_read(&mddev->writes_pending) == 0) {
                if (mddev->in_sync == 0) {
                        mddev->in_sync = 1;
+                       smp_mb();
+                       if (atomic_read(&mddev->writes_pending))
+                               /* lost a race with md_write_start() */
+                               mddev->in_sync = 0;
                        set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
                        sysfs_notify_dirent_safe(mddev->sysfs_state);
                }
@@ -4011,6 +4015,7 @@ array_state_show(struct mddev *mddev, char *page)
                        st = read_auto;
                        break;
                case 0:
+                       spin_lock(&mddev->lock);
                        if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
                                st = write_pending;
                        else if (mddev->in_sync)
@@ -4019,6 +4024,7 @@ array_state_show(struct mddev *mddev, char *page)
                                st = active_idle;
                        else
                                st = active;
+                       spin_unlock(&mddev->lock);
                }
        else {
                if (list_empty(&mddev->disks) &&
@@ -7885,6 +7891,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
                did_change = 1;
        }
        atomic_inc(&mddev->writes_pending);
+       smp_mb(); /* Match smp_mb in set_in_sync() */
        if (mddev->safemode == 1)
                mddev->safemode = 0;
        if (mddev->in_sync) {