Merge tag 'v3.10.55' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / md / raid10.c
index 59d4daa5f4c7a32c245ef954f24650fe75084117..a1ea2a75391240548b5283c74dd8314745d9649d 100644 (file)
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
                sector_t first_bad;
                int bad_sectors;
 
-               set_bit(R10BIO_Uptodate, &r10_bio->state);
+               /*
+                * Do not set R10BIO_Uptodate if the current device is
+                * rebuilding or Faulty. This is because we cannot use
+                * such device for properly reading the data back (we could
+                * potentially use it, if the current write would have felt
+                * before rdev->recovery_offset, but for simplicity we don't
+                * check this here.
+                */
+               if (test_bit(In_sync, &rdev->flags) &&
+                   !test_bit(Faulty, &rdev->flags))
+                       set_bit(R10BIO_Uptodate, &r10_bio->state);
 
                /* Maybe we can clear some bad blocks. */
                if (is_badblock(rdev,
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
        wake_up(&conf->wait_barrier);
 }
 
-static void freeze_array(struct r10conf *conf)
+static void freeze_array(struct r10conf *conf, int extra)
 {
        /* stop syncio and normal IO and wait for everything to
         * go quiet.
         * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+1
+        * wait until nr_pending match nr_queued+extra
         * This is called in the context of one normal IO request
         * that has failed. Thus any sync request that might be pending
         * will be blocked by nr_pending, and we need to wait for
         * pending IO requests to complete or be queued for re-try.
-        * Thus the number queued (nr_queued) plus this request (1)
+        * Thus the number queued (nr_queued) plus this request (extra)
         * must match the number of pending IOs (nr_pending) before
         * we continue.
         */
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
        conf->barrier++;
        conf->nr_waiting++;
        wait_event_lock_irq_cmd(conf->wait_barrier,
-                               conf->nr_pending == conf->nr_queued+1,
+                               conf->nr_pending == conf->nr_queued+extra,
                                conf->resync_lock,
                                flush_pending_writes(conf));
 
@@ -1311,7 +1321,7 @@ read_again:
                        /* Could not read all from this device, so we will
                         * need another r10_bio.
                         */
-                       sectors_handled = (r10_bio->sectors + max_sectors
+                       sectors_handled = (r10_bio->sector + max_sectors
                                           - bio->bi_sector);
                        r10_bio->sectors = max_sectors;
                        spin_lock_irq(&conf->device_lock);
@@ -1319,7 +1329,7 @@ read_again:
                                bio->bi_phys_segments = 2;
                        else
                                bio->bi_phys_segments++;
-                       spin_unlock(&conf->device_lock);
+                       spin_unlock_irq(&conf->device_lock);
                        /* Cannot call generic_make_request directly
                         * as that will be queued in __generic_make_request
                         * and subsequent mempool_alloc might block
@@ -1671,11 +1681,11 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
                spin_lock_irqsave(&conf->device_lock, flags);
                mddev->degraded++;
                spin_unlock_irqrestore(&conf->device_lock, flags);
-               /*
-                * if recovery is running, make sure it aborts.
-                */
-               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        }
+       /*
+        * If recovery is running, make sure it aborts.
+        */
+       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1752,6 +1762,7 @@ static int raid10_spare_active(struct mddev *mddev)
                        }
                        sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
                } else if (tmp->rdev
+                          && tmp->rdev->recovery_offset == MaxSector
                           && !test_bit(Faulty, &tmp->rdev->flags)
                           && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
                        count++;
@@ -1837,8 +1848,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                 * we wait for all outstanding requests to complete.
                 */
                synchronize_sched();
-               raise_barrier(conf, 0);
-               lower_barrier(conf);
+               freeze_array(conf, 0);
+               unfreeze_array(conf);
                clear_bit(Unmerged, &rdev->flags);
        }
        md_integrity_add_rdev(rdev, mddev);
@@ -2065,11 +2076,17 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                         * both 'first' and 'i', so we just compare them.
                         * All vec entries are PAGE_SIZE;
                         */
-                       for (j = 0; j < vcnt; j++)
+                       int sectors = r10_bio->sectors;
+                       for (j = 0; j < vcnt; j++) {
+                               int len = PAGE_SIZE;
+                               if (sectors < (len / 512))
+                                       len = sectors * 512;
                                if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
                                           page_address(tbio->bi_io_vec[j].bv_page),
-                                          fbio->bi_io_vec[j].bv_len))
+                                          len))
                                        break;
+                               sectors -= len/512;
+                       }
                        if (j == vcnt)
                                continue;
                        atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
@@ -2252,12 +2269,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
        d = r10_bio->devs[1].devnum;
        wbio = r10_bio->devs[1].bio;
        wbio2 = r10_bio->devs[1].repl_bio;
+       /* Need to test wbio2->bi_end_io before we call
+        * generic_make_request as if the former is NULL,
+        * the latter is free to free wbio2.
+        */
+       if (wbio2 && !wbio2->bi_end_io)
+               wbio2 = NULL;
        if (wbio->bi_end_io) {
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
                generic_make_request(wbio);
        }
-       if (wbio2 && wbio2->bi_end_io) {
+       if (wbio2) {
                atomic_inc(&conf->mirrors[d].replacement->nr_pending);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
                             bio_sectors(wbio2));
@@ -2612,7 +2635,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
        r10_bio->devs[slot].bio = NULL;
 
        if (mddev->ro == 0) {
-               freeze_array(conf);
+               freeze_array(conf, 1);
                fix_read_error(conf, mddev, r10_bio);
                unfreeze_array(conf);
        } else
@@ -2899,14 +2922,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
         */
        if (mddev->bitmap == NULL &&
            mddev->recovery_cp == MaxSector &&
+           mddev->reshape_position == MaxSector &&
+           !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+           !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            conf->fullsync == 0) {
                *skipped = 1;
-               max_sector = mddev->dev_sectors;
-               if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
-                   test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-                       max_sector = mddev->resync_max_sectors;
-               return max_sector - sector_nr;
+               return mddev->dev_sectors - sector_nr;
        }
 
  skipped:
@@ -2926,6 +2948,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                 */
                if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                        end_reshape(conf);
+                       close_sync(conf);
                        return 0;
                }
 
@@ -3176,10 +3199,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                        if (j == conf->copies) {
                                /* Cannot recover, so abort the recovery or
                                 * record a bad block */
-                               put_buf(r10_bio);
-                               if (rb2)
-                                       atomic_dec(&rb2->remaining);
-                               r10_bio = rb2;
                                if (any_working) {
                                        /* problem is that there are bad blocks
                                         * on other device(s)
@@ -3211,6 +3230,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                        mirror->recovery_disabled
                                                = mddev->recovery_disabled;
                                }
+                               put_buf(r10_bio);
+                               if (rb2)
+                                       atomic_dec(&rb2->remaining);
+                               r10_bio = rb2;
                                break;
                        }
                }
@@ -3376,6 +3399,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
 
                if (bio->bi_end_io == end_sync_read) {
                        md_sync_acct(bio->bi_bdev, nr_sectors);
+                       set_bit(BIO_UPTODATE, &bio->bi_flags);
                        generic_make_request(bio);
                }
        }
@@ -3522,7 +3546,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 
        /* FIXME calc properly */
        conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
-                                                           max(0,mddev->delta_disks)),
+                                                           max(0,-mddev->delta_disks)),
                                GFP_KERNEL);
        if (!conf->mirrors)
                goto out;
@@ -3609,8 +3633,7 @@ static int run(struct mddev *mddev)
        if (mddev->queue) {
                blk_queue_max_discard_sectors(mddev->queue,
                                              mddev->chunk_sectors);
-               blk_queue_max_write_same_sectors(mddev->queue,
-                                                mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, 0);
                blk_queue_io_min(mddev->queue, chunk_size);
                if (conf->geo.raid_disks % conf->geo.near_copies)
                        blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3682,7 +3705,7 @@ static int run(struct mddev *mddev)
                    conf->geo.far_offset == 0)
                        goto out_free_conf;
                if (conf->prev.far_copies != 1 &&
-                   conf->geo.far_offset == 0)
+                   conf->prev.far_offset == 0)
                        goto out_free_conf;
        }
 
@@ -4376,7 +4399,7 @@ read_more:
        read_bio->bi_private = r10_bio;
        read_bio->bi_end_io = end_sync_read;
        read_bio->bi_rw = READ;
-       read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+       read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
        read_bio->bi_flags |= 1 << BIO_UPTODATE;
        read_bio->bi_vcnt = 0;
        read_bio->bi_size = 0;