[PATCH] md: Support suspending of IO to regions of an md array
authorNeilBrown <neilb@suse.de>
Mon, 27 Mar 2006 09:18:14 +0000 (01:18 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Mon, 27 Mar 2006 16:45:02 +0000 (08:45 -0800)
This allows user-space to access data safely.  This is needed for raid5
reshape as user-space needs to take a backup of the first few stripes before
allowing reshape to commence.

It will also be useful in cluster-aware raid1 configurations so that all
cluster members can leave a section of the array untouched while a
resync/recovery happens.

A 'start' and 'end' of the suspended range are written to 2 sysfs attributes.
Note that only one range can be suspended at a time.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
drivers/md/md.c
drivers/md/raid5.c
include/linux/raid/md_k.h

index a79dd33d343dbf6963df0788cc228751dd72efe5..92fd0104fa0432b01d9c4de125a41a8dfc5269d8 100644 (file)
@@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page)
 static struct md_sysfs_entry
 md_sync_completed = __ATTR_RO(sync_completed);
 
+static ssize_t
+suspend_lo_show(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
+}
+
+static ssize_t
+suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       char *e;
+       unsigned long long new = simple_strtoull(buf, &e, 10);
+
+       if (mddev->pers->quiesce == NULL)
+               return -EINVAL;
+       if (buf == e || (*e && *e != '\n'))
+               return -EINVAL;
+       if (new >= mddev->suspend_hi ||
+           (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
+               mddev->suspend_lo = new;
+               mddev->pers->quiesce(mddev, 2);
+               return len;
+       } else
+               return -EINVAL;
+}
+static struct md_sysfs_entry md_suspend_lo =
+__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
+
+
+static ssize_t
+suspend_hi_show(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
+}
+
+static ssize_t
+suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       char *e;
+       unsigned long long new = simple_strtoull(buf, &e, 10);
+
+       if (mddev->pers->quiesce == NULL)
+               return -EINVAL;
+       if (buf == e || (*e && *e != '\n'))
+               return -EINVAL;
+       if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
+           (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
+               mddev->suspend_hi = new;
+               mddev->pers->quiesce(mddev, 1);
+               mddev->pers->quiesce(mddev, 0);
+               return len;
+       } else
+               return -EINVAL;
+}
+static struct md_sysfs_entry md_suspend_hi =
+__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
+
+
 static struct attribute *md_default_attrs[] = {
        &md_level.attr,
        &md_raid_disks.attr,
@@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = {
        &md_sync_max.attr,
        &md_sync_speed.attr,
        &md_sync_completed.attr,
+       &md_suspend_lo.attr,
+       &md_suspend_hi.attr,
        NULL,
 };
 static struct attribute_group md_redundancy_group = {
index 355dafb98aac69fcf2008bc65be6d287202fba16..bb16ac231a402a97ebabf6f1bbfaeb38fbf94838 100644 (file)
@@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi)
                                        goto retry;
                                }
                        }
+                       /* FIXME what if we get a false positive because these
+                        * are being updated.
+                        */
+                       if (logical_sector >= mddev->suspend_lo &&
+                           logical_sector < mddev->suspend_hi) {
+                               release_stripe(sh);
+                               schedule();
+                               goto retry;
+                       }
 
                        if (test_bit(STRIPE_EXPANDING, &sh->state) ||
                            !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
@@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state)
        raid5_conf_t *conf = mddev_to_conf(mddev);
 
        switch(state) {
+       case 2: /* resume for a suspend */
+               wake_up(&conf->wait_for_overlap);
+               break;
+
        case 1: /* stop all writes */
                spin_lock_irq(&conf->device_lock);
                conf->quiesce = 1;
@@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state)
                spin_lock_irq(&conf->device_lock);
                conf->quiesce = 0;
                wake_up(&conf->wait_for_stripe);
+               wake_up(&conf->wait_for_overlap);
                spin_unlock_irq(&conf->device_lock);
                break;
        }
index 002ee631fabb8d6332be8243f90eeca2510dabd9..c0d3097846a77b363d03b096724a53f32871f807 100644 (file)
@@ -151,6 +151,10 @@ struct mddev_s
        sector_t                        resync_mismatches; /* count of sectors where
                                                            * parity/replica mismatch found
                                                            */
+
+       /* allow user-space to request suspension of IO to regions of the array */
+       sector_t                        suspend_lo;
+       sector_t                        suspend_hi;
        /* if zero, use the system-wide default */
        int                             sync_speed_min;
        int                             sync_speed_max;