* to our bitmap
*/
int bitmap_copy_from_slot(struct mddev *mddev, int slot,
- sector_t *low, sector_t *high)
+ sector_t *low, sector_t *high, bool clear_bits)
{
int rv = 0, i, j;
sector_t block, lo = 0, hi = 0;
}
}
- bitmap_update_sb(bitmap);
- /* Setting this for the ev_page should be enough.
- * And we do not require both write_all and PAGE_DIRT either
- */
- for (i = 0; i < bitmap->storage.file_pages; i++)
- set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
- bitmap_write_all(bitmap);
- bitmap_unplug(bitmap);
+ if (clear_bits) {
+ bitmap_update_sb(bitmap);
+ /* Setting this for the ev_page should be enough.
+ * And we do not require both write_all and PAGE_DIRT either
+ */
+ for (i = 0; i < bitmap->storage.file_pages; i++)
+ set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+ bitmap_write_all(bitmap);
+ bitmap_unplug(bitmap);
+ }
*low = lo;
*high = hi;
err:
RESYNCING,
NEWDISK,
REMOVE,
+ RE_ADD,
};
struct cluster_msg {
str, ret);
goto clear_bit;
}
- ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
+ ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
if (ret) {
pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
goto dlm_unlock;
pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
}
+static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+ struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+ if (rdev && test_bit(Faulty, &rdev->flags))
+ clear_bit(Faulty, &rdev->flags);
+ else
+ pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
+}
+
static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
{
switch (msg->type) {
__func__, __LINE__, msg->slot);
process_remove_disk(mddev, msg);
break;
+ case RE_ADD:
+ pr_info("%s: %d Received RE_ADD from %d\n",
+ __func__, __LINE__, msg->slot);
+ process_readd_disk(mddev, msg);
+ break;
default:
pr_warn("%s:%d Received unknown message from %d\n",
__func__, __LINE__, msg->slot);
return __sendmsg(cinfo, &cmsg);
}
+static int gather_bitmaps(struct md_rdev *rdev)
+{
+ int sn, err;
+ sector_t lo, hi;
+ struct cluster_msg cmsg;
+ struct mddev *mddev = rdev->mddev;
+ struct md_cluster_info *cinfo = mddev->cluster_info;
+
+ cmsg.type = RE_ADD;
+ cmsg.raid_slot = rdev->desc_nr;
+ err = sendmsg(cinfo, &cmsg);
+ if (err)
+ goto out;
+
+ for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
+ if (sn == (cinfo->slot_number - 1))
+ continue;
+ err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
+ if (err) {
+ pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
+ goto out;
+ }
+ if ((hi > 0) && (lo < mddev->recovery_cp))
+ mddev->recovery_cp = lo;
+ }
+out:
+ return err;
+}
+
static struct md_cluster_operations cluster_ops = {
.join = join,
.leave = leave,
.add_new_disk_finish = add_new_disk_finish,
.new_disk_ack = new_disk_ack,
.remove_disk = remove_disk,
+ .gather_bitmaps = gather_bitmaps,
};
static int __init cluster_init(void)
}
} else if (cmd_match(buf, "re-add")) {
if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
- clear_bit(Faulty, &rdev->flags);
- err = add_bound_rdev(rdev);
+ /* clear_bit is performed _after_ all the devices
+ * have their local Faulty bit cleared. If any writes
+ * happen in the meantime in the local node, they
+ * will land in the local bitmap, which will be synced
+ * by this node eventually
+ */
+ if (!mddev_is_clustered(rdev->mddev) ||
+ (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
+ clear_bit(Faulty, &rdev->flags);
+ err = add_bound_rdev(rdev);
+ }
} else
err = -EBUSY;
}