Merge tag 'v3.10.55' into update

[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / md / raid10.c
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 59d4daa5f4c7a32c245ef954f24650fe75084117..a1ea2a75391240548b5283c74dd8314745d9649d 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
                 sector_t first_bad;
                 int bad_sectors;
  
-               set_bit(R10BIO_Uptodate, &r10_bio->state);
+               /*
+                * Do not set R10BIO_Uptodate if the current device is
+                * rebuilding or Faulty. This is because we cannot use
+                * such device for properly reading the data back (we could
+                * potentially use it, if the current write would have felt
+                * before rdev->recovery_offset, but for simplicity we don't
+                * check this here.
+                */
+               if (test_bit(In_sync, &rdev->flags) &&
+                   !test_bit(Faulty, &rdev->flags))
+                       set_bit(R10BIO_Uptodate, &r10_bio->state);
  
                 /* Maybe we can clear some bad blocks. */
                 if (is_badblock(rdev,
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
         wake_up(&conf->wait_barrier);
  }
  
-static void freeze_array(struct r10conf *conf)
+static void freeze_array(struct r10conf *conf, int extra)
  {
         /* stop syncio and normal IO and wait for everything to
          * go quiet.
          * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+1
+        * wait until nr_pending match nr_queued+extra
          * This is called in the context of one normal IO request
          * that has failed. Thus any sync request that might be pending
          * will be blocked by nr_pending, and we need to wait for
          * pending IO requests to complete or be queued for re-try.
-        * Thus the number queued (nr_queued) plus this request (1)
+        * Thus the number queued (nr_queued) plus this request (extra)
          * must match the number of pending IOs (nr_pending) before
          * we continue.
          */
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
         conf->barrier++;
         conf->nr_waiting++;
         wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+1,
+                               conf->nr_pending == conf->nr_queued+extra,
                                 conf->resync_lock,
                                 flush_pending_writes(conf));
  
@@ -1311,7 +1321,7 @@ read_again:
                         /* Could not read all from this device, so we will
                          * need another r10_bio.
                          */
-                       sectors_handled = (r10_bio->sectors + max_sectors
+                       sectors_handled = (r10_bio->sector + max_sectors
                                            - bio->bi_sector);
                         r10_bio->sectors = max_sectors;
                         spin_lock_irq(&conf->device_lock);
@@ -1319,7 +1329,7 @@ read_again:
                                 bio->bi_phys_segments = 2;
                         else
                                 bio->bi_phys_segments++;
-                       spin_unlock(&conf->device_lock);
+                       spin_unlock_irq(&conf->device_lock);
                         /* Cannot call generic_make_request directly
                          * as that will be queued in __generic_make_request
                          * and subsequent mempool_alloc might block
@@ -1671,11 +1681,11 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
                 spin_lock_irqsave(&conf->device_lock, flags);
                 mddev->degraded++;
                 spin_unlock_irqrestore(&conf->device_lock, flags);
-               /*
-                * if recovery is running, make sure it aborts.
-                */
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
         }
+       /*
+        * If recovery is running, make sure it aborts.
+        */
+       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
         set_bit(Blocked, &rdev->flags);
         set_bit(Faulty, &rdev->flags);
         set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1752,6 +1762,7 @@ static int raid10_spare_active(struct mddev *mddev)
                         }
                         sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
                 } else if (tmp->rdev
+                          && tmp->rdev->recovery_offset == MaxSector
                            && !test_bit(Faulty, &tmp->rdev->flags)
                            && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
                         count++;
@@ -1837,8 +1848,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                  * we wait for all outstanding requests to complete.
                  */
                 synchronize_sched();
-               raise_barrier(conf, 0);
-               lower_barrier(conf);
+               freeze_array(conf, 0);
+               unfreeze_array(conf);
                 clear_bit(Unmerged, &rdev->flags);
         }
         md_integrity_add_rdev(rdev, mddev);
@@ -2065,11 +2076,17 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                          * both 'first' and 'i', so we just compare them.
                          * All vec entries are PAGE_SIZE;
                          */
-                       for (j = 0; j < vcnt; j++)
+                       int sectors = r10_bio->sectors;
+                       for (j = 0; j < vcnt; j++) {
+                               int len = PAGE_SIZE;
+                               if (sectors < (len / 512))
+                                       len = sectors * 512;
                                 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
                                            page_address(tbio->bi_io_vec[j].bv_page),
-                                          fbio->bi_io_vec[j].bv_len))
+                                          len))
                                         break;
+                               sectors -= len/512;
+                       }
                         if (j == vcnt)
                                 continue;
                         atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
@@ -2252,12 +2269,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
         d = r10_bio->devs[1].devnum;
         wbio = r10_bio->devs[1].bio;
         wbio2 = r10_bio->devs[1].repl_bio;
+       /* Need to test wbio2->bi_end_io before we call
+        * generic_make_request as if the former is NULL,
+        * the latter is free to free wbio2.
+        */
+       if (wbio2 && !wbio2->bi_end_io)
+               wbio2 = NULL;
         if (wbio->bi_end_io) {
                 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
                 generic_make_request(wbio);
         }
-       if (wbio2 && wbio2->bi_end_io) {
+       if (wbio2) {
                 atomic_inc(&conf->mirrors[d].replacement->nr_pending);
                 md_sync_acct(conf->mirrors[d].replacement->bdev,
                              bio_sectors(wbio2));
@@ -2612,7 +2635,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
         r10_bio->devs[slot].bio = NULL;
  
         if (mddev->ro == 0) {
-               freeze_array(conf);
+               freeze_array(conf, 1);
                 fix_read_error(conf, mddev, r10_bio);
                 unfreeze_array(conf);
         } else
@@ -2899,14 +2922,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
          */
         if (mddev->bitmap == NULL &&
             mddev->recovery_cp == MaxSector &&
+           mddev->reshape_position == MaxSector &&
+           !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
             !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+           !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
             conf->fullsync == 0) {
                 *skipped = 1;
-               max_sector = mddev->dev_sectors;
-               if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
-                   test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-                       max_sector = mddev->resync_max_sectors;
-               return max_sector - sector_nr;
+               return mddev->dev_sectors - sector_nr;
         }
  
   skipped:
@@ -2926,6 +2948,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                  */
                 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                         end_reshape(conf);
+                       close_sync(conf);
                         return 0;
                 }
  
@@ -3176,10 +3199,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                         if (j == conf->copies) {
                                 /* Cannot recover, so abort the recovery or
                                  * record a bad block */
-                               put_buf(r10_bio);
-                               if (rb2)
-                                       atomic_dec(&rb2->remaining);
-                               r10_bio = rb2;
                                 if (any_working) {
                                         /* problem is that there are bad blocks
                                          * on other device(s)
@@ -3211,6 +3230,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                         mirror->recovery_disabled
                                                 = mddev->recovery_disabled;
                                 }
+                               put_buf(r10_bio);
+                               if (rb2)
+                                       atomic_dec(&rb2->remaining);
+                               r10_bio = rb2;
                                 break;
                         }
                 }
@@ -3376,6 +3399,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
  
                 if (bio->bi_end_io == end_sync_read) {
                         md_sync_acct(bio->bi_bdev, nr_sectors);
+                       set_bit(BIO_UPTODATE, &bio->bi_flags);
                         generic_make_request(bio);
                 }
         }
@@ -3522,7 +3546,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
  
         /* FIXME calc properly */
         conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
-                                                           max(0,mddev->delta_disks)),
+                                                           max(0,-mddev->delta_disks)),
                                 GFP_KERNEL);
         if (!conf->mirrors)
                 goto out;
@@ -3609,8 +3633,7 @@ static int run(struct mddev *mddev)
         if (mddev->queue) {
                 blk_queue_max_discard_sectors(mddev->queue,
                                               mddev->chunk_sectors);
-               blk_queue_max_write_same_sectors(mddev->queue,
-                                                mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, 0);
                 blk_queue_io_min(mddev->queue, chunk_size);
                 if (conf->geo.raid_disks % conf->geo.near_copies)
                         blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3682,7 +3705,7 @@ static int run(struct mddev *mddev)
                     conf->geo.far_offset == 0)
                         goto out_free_conf;
                 if (conf->prev.far_copies != 1 &&
-                   conf->geo.far_offset == 0)
+                   conf->prev.far_offset == 0)
                         goto out_free_conf;
         }
  
@@ -4376,7 +4399,7 @@ read_more:
         read_bio->bi_private = r10_bio;
         read_bio->bi_end_io = end_sync_read;
         read_bio->bi_rw = READ;
-       read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+       read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
         read_bio->bi_flags |= 1 << BIO_UPTODATE;
         read_bio->bi_vcnt = 0;
         read_bio->bi_size = 0;