md/raid56: Don't perform reads to support writes until stripe is ready.
authorNeilBrown <neilb@suse.de>
Wed, 28 May 2014 03:39:22 +0000 (13:39 +1000)
committerNeilBrown <neilb@suse.de>
Thu, 29 May 2014 06:59:46 +0000 (16:59 +1000)
If it is found that we need to pre-read some blocks before a write
can succeed, we normally set STRIPE_DELAYED and don't actually perform
the read until STRIPE_PREREAD_ACTIVE subsequently gets set.

However for a degraded RAID6 we currently perform the reads as soon
as we see that a write is pending.  This significantly hurts
throughput.

So:
 - when handle_stripe_dirtying find a block that it wants on a device
   that is failed, set STRIPE_DELAY, instead of doing nothing, and
 - when fetch_block detects that a read might be required to satisfy a
   write, only perform the read if STRIPE_PREREAD_ACTIVE is set,
   and if we would actually need to read something to complete the write.

This also helps RAID5, though less often as RAID5 supports a
read-modify-write cycle.  For RAID5 the read is performed too early
only if the write is not a full 4K aligned write (i.e. no an
R5_OVERWRITE).

Also clean up a couple of horrible bits of formatting.

Reported-by: Patrik HornĂ­k <patrik@dsl.sk>
Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c

index ad1b9bea446ebdbaea0083cb976ed4207c618cc1..c1e8607d83401a6236f62e70bab1bd68223ecfc6 100644 (file)
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
        BUG_ON(atomic_read(&conf->active_stripes)==0);
        if (test_bit(STRIPE_HANDLE, &sh->state)) {
                if (test_bit(STRIPE_DELAYED, &sh->state) &&
-                   !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+                   !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
                        list_add_tail(&sh->lru, &conf->delayed_list);
-               else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
+                       if (atomic_read(&conf->preread_active_stripes)
+                           < IO_THRESHOLD)
+                               md_wakeup_thread(conf->mddev->thread);
+               } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
                           sh->bm_seq - conf->seq_write > 0)
                        list_add_tail(&sh->lru, &conf->bitmap_list);
                else {
@@ -2886,8 +2889,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
             (s->failed >= 1 && fdev[0]->toread) ||
             (s->failed >= 2 && fdev[1]->toread) ||
             (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
+             (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
              !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
-            (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
+            (sh->raid_conf->level == 6 && s->failed && s->to_write &&
+             s->to_write < sh->raid_conf->raid_disks - 2 &&
+             (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
                /* we would like to get this block, possibly by computing it,
                 * otherwise read it if the backing disk is insync
                 */
@@ -3086,7 +3092,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                    !test_bit(R5_LOCKED, &dev->flags) &&
                    !(test_bit(R5_UPTODATE, &dev->flags) ||
                    test_bit(R5_Wantcompute, &dev->flags))) {
-                       if (test_bit(R5_Insync, &dev->flags)) rcw++;
+                       if (test_bit(R5_Insync, &dev->flags))
+                               rcw++;
                        else
                                rcw += 2*disks;
                }
@@ -3107,10 +3114,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                            !(test_bit(R5_UPTODATE, &dev->flags) ||
                            test_bit(R5_Wantcompute, &dev->flags)) &&
                            test_bit(R5_Insync, &dev->flags)) {
-                               if (
-                                 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                                       pr_debug("Read_old block "
-                                                "%d for r-m-w\n", i);
+                               if (test_bit(STRIPE_PREREAD_ACTIVE,
+                                            &sh->state)) {
+                                       pr_debug("Read_old block %d for r-m-w\n",
+                                                i);
                                        set_bit(R5_LOCKED, &dev->flags);
                                        set_bit(R5_Wantread, &dev->flags);
                                        s->locked++;
@@ -3133,10 +3140,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                            !(test_bit(R5_UPTODATE, &dev->flags) ||
                              test_bit(R5_Wantcompute, &dev->flags))) {
                                rcw++;
-                               if (!test_bit(R5_Insync, &dev->flags))
-                                       continue; /* it's a failed drive */
-                               if (
-                                 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                               if (test_bit(R5_Insync, &dev->flags) &&
+                                   test_bit(STRIPE_PREREAD_ACTIVE,
+                                            &sh->state)) {
                                        pr_debug("Read_old block "
                                                "%d for Reconstruct\n", i);
                                        set_bit(R5_LOCKED, &dev->flags);