static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{
- struct md_rdev *rdev = NULL;
+ struct md_rdev *rdev;
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
+restart:
+ rdev = NULL;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE;
loff_t offset = mddev->bitmap_info.offset;
page);
}
- if (wait)
- md_super_wait(mddev);
+ if (wait && md_super_wait(mddev) < 0)
+ goto restart;
return 0;
bad_alignment:
wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes)==0);
else
+ /* Note that we ignore the return value. The writes
+ * might have failed, but that would just mean that
+ * some bits which should be cleared haven't been,
+ * which is safe. The relevant bitmap blocks will
+ * probably get written again, but there is no great
+ * loss if they aren't.
+ */
md_super_wait(bitmap->mddev);
}
if (bio->bi_error) {
pr_err("md: super_written gets error=%d\n", bio->bi_error);
md_error(mddev, rdev);
- }
+ if (!test_bit(Faulty, &rdev->flags)
+ && (bio->bi_opf & MD_FAILFAST)) {
+ set_bit(MD_NEED_REWRITE, &mddev->flags);
+ set_bit(LastDev, &rdev->flags);
+ }
+ } else
+ clear_bit(LastDev, &rdev->flags);
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
* if zero is reached.
* If an error occurred, call md_error
*/
- struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
+ struct bio *bio;
+ int ff = 0;
+
+ if (test_bit(Faulty, &rdev->flags))
+ return;
+
+ bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
atomic_inc(&rdev->nr_pending);
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
+
+ if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
+ test_bit(FailFast, &rdev->flags) &&
+ !test_bit(LastDev, &rdev->flags))
+ ff = MD_FAILFAST;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA | ff);
atomic_inc(&mddev->pending_writes);
submit_bio(bio);
}
-void md_super_wait(struct mddev *mddev)
+int md_super_wait(struct mddev *mddev)
{
/* wait for all superblock writes that were scheduled to complete */
wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+ if (test_and_clear_bit(MD_NEED_REWRITE, &mddev->flags))
+ return -EAGAIN;
+ return 0;
}
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
rdev->mddev->level >= 1)
num_sectors = (sector_t)(2ULL << 32) - 2;
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
- md_super_wait(rdev->mddev);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->sb_csum = calc_sb_1_csum(sb);
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
- rdev->sb_page);
- md_super_wait(rdev->mddev);
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ rdev->sb_page);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
if (mddev->queue)
blk_add_trace_msg(mddev->queue, "md md_update_sb");
+rewrite:
bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
/* only need to write one superblock... */
break;
}
- md_super_wait(mddev);
+ if (md_super_wait(mddev) < 0)
+ goto rewrite;
/* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
if (mddev_is_clustered(mddev) && ret == 0)
#define MaxSector (~(sector_t)0)
+/*
+ * These flags should really be called "NO_RETRY" rather than
+ * "FAILFAST" because they don't make any promise about time lapse,
+ * only about the number of retries, which will be zero.
+ * REQ_FAILFAST_DRIVER is not included because
+ * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
+ * seems to suggest that the errors it avoids retrying should usually
+ * be retried.
+ */
+#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/*
* MD's 'extended' device
*/
* It is expects that no bad block log
* is present.
*/
+ LastDev, /* Seems to be the last working dev as
+ * it didn't fail, so don't use FailFast
+ * any more for metadata
+ */
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
+ MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
+ * supported as calls to md_error() will
+ * never cause the array to become failed.
+ */
+ MD_NEED_REWRITE, /* metadata write needs to be repeated */
};
#define MD_UPDATE_SB_FLAGS (BIT(MD_CHANGE_DEVS) | \
BIT(MD_CHANGE_CLEAN) | \
extern void md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page);
-extern void md_super_wait(struct mddev *mddev);
+extern int md_super_wait(struct mddev *mddev);
extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct page *page, int op, int op_flags,
bool metadata_op);