md/r5cache: r5cache recovery: part 2
authorSong Liu <songliubraving@fb.com>
Thu, 17 Nov 2016 23:24:44 +0000 (15:24 -0800)
committerShaohua Li <shli@fb.com>
Fri, 18 Nov 2016 21:28:28 +0000 (13:28 -0800)
1. In previous patch, we:
      - add new data to r5l_recovery_ctx
      - add new functions to recovery write-back cache
   The new functions are not used in this patch, so this patch does not
   change the behavior of recovery.

2. In this patchpatch, we:
      - modify main recovery procedure r5l_recovery_log() to call new
        functions
      - remove old functions

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/raid5-cache.c
drivers/md/raid5.c

index 3aa7eccde6206ace5c8cb430adc4465f99feabaa..6b9957029dc8242d159fea7793dfa1d09eb94533 100644 (file)
@@ -1393,156 +1393,6 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
        return 0;
 }
 
-static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
-                                        struct r5l_recovery_ctx *ctx,
-                                        sector_t stripe_sect,
-                                        int *offset)
-{
-       struct r5conf *conf = log->rdev->mddev->private;
-       struct stripe_head *sh;
-       struct r5l_payload_data_parity *payload;
-       int disk_index;
-
-       sh = raid5_get_active_stripe(conf, stripe_sect, 0, 0, 0);
-       while (1) {
-               sector_t log_offset = r5l_ring_add(log, ctx->pos,
-                               ctx->meta_total_blocks);
-               payload = page_address(ctx->meta_page) + *offset;
-
-               if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) {
-                       raid5_compute_sector(conf,
-                                            le64_to_cpu(payload->location), 0,
-                                            &disk_index, sh);
-
-                       sync_page_io(log->rdev, log_offset, PAGE_SIZE,
-                                    sh->dev[disk_index].page, REQ_OP_READ, 0,
-                                    false);
-                       sh->dev[disk_index].log_checksum =
-                               le32_to_cpu(payload->checksum[0]);
-                       set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
-               } else {
-                       disk_index = sh->pd_idx;
-                       sync_page_io(log->rdev, log_offset, PAGE_SIZE,
-                                    sh->dev[disk_index].page, REQ_OP_READ, 0,
-                                    false);
-                       sh->dev[disk_index].log_checksum =
-                               le32_to_cpu(payload->checksum[0]);
-                       set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
-
-                       if (sh->qd_idx >= 0) {
-                               disk_index = sh->qd_idx;
-                               sync_page_io(log->rdev,
-                                            r5l_ring_add(log, log_offset, BLOCK_SECTORS),
-                                            PAGE_SIZE, sh->dev[disk_index].page,
-                                            REQ_OP_READ, 0, false);
-                               sh->dev[disk_index].log_checksum =
-                                       le32_to_cpu(payload->checksum[1]);
-                               set_bit(R5_Wantwrite,
-                                       &sh->dev[disk_index].flags);
-                       }
-               }
-
-               ctx->meta_total_blocks += le32_to_cpu(payload->size);
-               *offset += sizeof(struct r5l_payload_data_parity) +
-                       sizeof(__le32) *
-                       (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
-               if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY)
-                       break;
-       }
-
-       for (disk_index = 0; disk_index < sh->disks; disk_index++) {
-               void *addr;
-               u32 checksum;
-
-               if (!test_bit(R5_Wantwrite, &sh->dev[disk_index].flags))
-                       continue;
-               addr = kmap_atomic(sh->dev[disk_index].page);
-               checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
-               kunmap_atomic(addr);
-               if (checksum != sh->dev[disk_index].log_checksum)
-                       goto error;
-       }
-
-       for (disk_index = 0; disk_index < sh->disks; disk_index++) {
-               struct md_rdev *rdev, *rrdev;
-
-               if (!test_and_clear_bit(R5_Wantwrite,
-                                       &sh->dev[disk_index].flags))
-                       continue;
-
-               /* in case device is broken */
-               rcu_read_lock();
-               rdev = rcu_dereference(conf->disks[disk_index].rdev);
-               if (rdev) {
-                       atomic_inc(&rdev->nr_pending);
-                       rcu_read_unlock();
-                       sync_page_io(rdev, stripe_sect, PAGE_SIZE,
-                                    sh->dev[disk_index].page, REQ_OP_WRITE, 0,
-                                    false);
-                       rdev_dec_pending(rdev, rdev->mddev);
-                       rcu_read_lock();
-               }
-               rrdev = rcu_dereference(conf->disks[disk_index].replacement);
-               if (rrdev) {
-                       atomic_inc(&rrdev->nr_pending);
-                       rcu_read_unlock();
-                       sync_page_io(rrdev, stripe_sect, PAGE_SIZE,
-                                    sh->dev[disk_index].page, REQ_OP_WRITE, 0,
-                                    false);
-                       rdev_dec_pending(rrdev, rrdev->mddev);
-                       rcu_read_lock();
-               }
-               rcu_read_unlock();
-       }
-       raid5_release_stripe(sh);
-       return 0;
-
-error:
-       for (disk_index = 0; disk_index < sh->disks; disk_index++)
-               sh->dev[disk_index].flags = 0;
-       raid5_release_stripe(sh);
-       return -EINVAL;
-}
-
-static int r5l_recovery_flush_one_meta(struct r5l_log *log,
-                                      struct r5l_recovery_ctx *ctx)
-{
-       struct r5conf *conf = log->rdev->mddev->private;
-       struct r5l_payload_data_parity *payload;
-       struct r5l_meta_block *mb;
-       int offset;
-       sector_t stripe_sector;
-
-       mb = page_address(ctx->meta_page);
-       offset = sizeof(struct r5l_meta_block);
-
-       while (offset < le32_to_cpu(mb->meta_size)) {
-               int dd;
-
-               payload = (void *)mb + offset;
-               stripe_sector = raid5_compute_sector(conf,
-                                                    le64_to_cpu(payload->location), 0, &dd, NULL);
-               if (r5l_recovery_flush_one_stripe(log, ctx, stripe_sector,
-                                                 &offset))
-                       return -EINVAL;
-       }
-       return 0;
-}
-
-/* copy data/parity from log to raid disks */
-static void r5l_recovery_flush_log(struct r5l_log *log,
-                                  struct r5l_recovery_ctx *ctx)
-{
-       while (1) {
-               if (r5l_recovery_read_meta_block(log, ctx))
-                       return;
-               if (r5l_recovery_flush_one_meta(log, ctx))
-                       return;
-               ctx->seq++;
-               ctx->pos = r5l_ring_add(log, ctx->pos, ctx->meta_total_blocks);
-       }
-}
-
 static void
 r5l_recovery_create_empty_meta_block(struct r5l_log *log,
                                     struct page *page,
@@ -2166,7 +2016,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 
 static int r5l_recovery_log(struct r5l_log *log)
 {
+       struct mddev *mddev = log->rdev->mddev;
        struct r5l_recovery_ctx ctx;
+       int ret;
 
        ctx.pos = log->last_checkpoint;
        ctx.seq = log->last_cp_seq;
@@ -2178,47 +2030,33 @@ static int r5l_recovery_log(struct r5l_log *log)
        if (!ctx.meta_page)
                return -ENOMEM;
 
-       r5l_recovery_flush_log(log, &ctx);
+       ret = r5c_recovery_flush_log(log, &ctx);
        __free_page(ctx.meta_page);
 
-       /*
-        * we did a recovery. Now ctx.pos points to an invalid meta block. New
-        * log will start here. but we can't let superblock point to last valid
-        * meta block. The log might looks like:
-        * | meta 1| meta 2| meta 3|
-        * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
-        * superblock points to meta 1, we write a new valid meta 2n.  if crash
-        * happens again, new recovery will start from meta 1. Since meta 2n is
-        * valid now, recovery will think meta 3 is valid, which is wrong.
-        * The solution is we create a new meta in meta2 with its seq == meta
-        * 1's seq + 10 and let superblock points to meta2. The same recovery will
-        * not think meta 3 is a valid meta, because its seq doesn't match
-        */
-       if (ctx.seq > log->last_cp_seq) {
-               int ret;
-
-               ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10);
-               if (ret)
-                       return ret;
-               log->seq = ctx.seq + 11;
-               log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-               r5l_write_super(log, ctx.pos);
-               log->last_checkpoint = ctx.pos;
-               log->next_checkpoint = ctx.pos;
-       } else {
-               log->log_start = ctx.pos;
-               log->seq = ctx.seq;
-       }
+       if (ret)
+               return ret;
 
-       /*
-        * This is to suppress "function defined but not used" warning.
-        * It will be removed when the two functions are used (next patch).
-        */
-       if (!log) {
-               r5c_recovery_flush_log(log, &ctx);
-               r5c_recovery_rewrite_data_only_stripes(log, &ctx);
+       if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
+               pr_debug("md/raid:%s: starting from clean shutdown\n",
+                        mdname(mddev));
+       else {
+               pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
+                        mdname(mddev), ctx.data_only_stripes,
+                        ctx.data_parity_stripes);
+
+               if (ctx.data_only_stripes > 0)
+                       if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+                               pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+                                      mdname(mddev));
+                               return -EIO;
+                       }
        }
 
+       log->log_start = ctx.pos;
+       log->next_checkpoint = ctx.pos;
+       log->seq = ctx.seq;
+       r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq);
+       r5l_write_super(log, ctx.pos);
        return 0;
 }
 
index 17cf98e933072b1c0c213df22bb87a2d7b944555..aa4968c04055b380d4aa1e9e9c73d75994b23b61 100644 (file)
@@ -7100,7 +7100,8 @@ static int raid5_run(struct mddev *mddev)
 
                pr_debug("md/raid:%s: using device %s as journal\n",
                         mdname(mddev), bdevname(journal_dev->bdev, b));
-               r5l_init_log(conf, journal_dev);
+               if (r5l_init_log(conf, journal_dev))
+                       goto abort;
        }
 
        return 0;