md-cluster: remove a disk asynchronously from cluster environment
authorGuoqing Jiang <gqjiang@suse.com>
Sun, 20 Dec 2015 23:50:59 +0000 (10:50 +1100)
committerNeilBrown <neilb@suse.com>
Wed, 6 Jan 2016 00:38:36 +0000 (11:38 +1100)
For cluster raid, if one disk couldn't be reach in one node, then
other nodes would receive the REMOVE message for the disk.

In receiving node, we can't call md_kick_rdev_from_array to remove
the disk from array synchronously since the disk might still be busy
in this node. So let's set a ClusterRemove flag on the disk, then
let the thread to do the removal job eventually.

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: NeilBrown <neilb@suse.com>
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/md.h

index e57bbfed16380d9bf4b159501b552782c46e4a9c..3fd7301fd7afce062c6d4450acd0835076cd8386 100644 (file)
@@ -440,8 +440,11 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
        struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
                                                   le32_to_cpu(msg->raid_slot));
 
-       if (rdev)
-               md_kick_rdev_from_array(rdev);
+       if (rdev) {
+               set_bit(ClusterRemove, &rdev->flags);
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
        else
                pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
                        __func__, __LINE__, le32_to_cpu(msg->raid_slot));
index 61aacab424cf1860dbb5651f5a4fc6c9f7188027..198e29dffb9852e2c41aca0a5fdd4e7c1312804e 100644 (file)
@@ -8318,6 +8318,18 @@ void md_check_recovery(struct mddev *mddev)
                        goto unlock;
                }
 
+               if (mddev_is_clustered(mddev)) {
+                       struct md_rdev *rdev;
+                       /* kick the device if another node issued a
+                        * remove disk.
+                        */
+                       rdev_for_each(rdev, mddev) {
+                               if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
+                                               rdev->raid_disk < 0)
+                                       md_kick_rdev_from_array(rdev);
+                       }
+               }
+
                if (!mddev->external) {
                        int did_change = 0;
                        spin_lock(&mddev->lock);
index ca0b643fe3c18070b417a66daedb571457b440ca..f7b17aef837d723c1bb8798f00e2c1458c096e99 100644 (file)
@@ -183,6 +183,7 @@ enum flag_bits {
                                 * Usually, this device should be faster
                                 * than other devices in the array
                                 */
+       ClusterRemove,
 };
 
 #define BB_LEN_MASK    (0x00000000000001FFULL)