dm raid1: hold all write bios when leg fails
authorMikulas Patocka <mpatocka@redhat.com>
Thu, 10 Dec 2009 23:52:06 +0000 (23:52 +0000)
committerAlasdair G Kergon <agk@redhat.com>
Thu, 10 Dec 2009 23:52:06 +0000 (23:52 +0000)
Hold all write bios when leg fails and errors are handled

When using a userspace daemon such as dmeventd to handle errors, we must
delay completing  bios until it has done its job.
This patch prevents the following race:
  - primary leg fails
  - write "1" fail, the write is held, secondary leg is set default
  - write "2" goes straight to the secondary leg

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Takahiro Yasui <tyasui@redhat.com>
Tested-by: Takahiro Yasui <tyasui@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
drivers/md/dm-raid1.c

index e363335e8d8147d6ffb16fbc2c9f32f3e87b36d2..f8d7b3aa46c67c1b1228714b959804381b83407c 100644 (file)
@@ -69,6 +69,7 @@ struct mirror_set {
        region_t nr_regions;
        int in_sync;
        int log_failure;
+       int leg_failure;
        atomic_t suspend;
 
        atomic_t default_mirror;        /* Default mirror */
@@ -211,6 +212,8 @@ static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
        struct mirror_set *ms = m->ms;
        struct mirror *new;
 
+       ms->leg_failure = 1;
+
        /*
         * error_count is used for nothing more than a
         * simple way to tell if a device has encountered
@@ -734,8 +737,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
                dm_rh_delay(ms->rh, bio);
 
        while ((bio = bio_list_pop(&nosync))) {
-               map_bio(get_default_mirror(ms), bio);
-               generic_make_request(bio);
+               if (unlikely(ms->leg_failure) && errors_handled(ms))
+                       hold_bio(ms, bio);
+               else {
+                       map_bio(get_default_mirror(ms), bio);
+                       generic_make_request(bio);
+               }
        }
 }
 
@@ -848,6 +855,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
        ms->nr_regions = dm_sector_div_up(ti->len, region_size);
        ms->in_sync = 0;
        ms->log_failure = 0;
+       ms->leg_failure = 0;
        atomic_set(&ms->suspend, 0);
        atomic_set(&ms->default_mirror, DEFAULT_MIRROR);