dm zoned: properly handle backing device failure

author Dmitry Fomichev <dmitry.fomichev@wdc.com>

Sat, 10 Aug 2019 21:43:11 +0000 (14:43 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 29 Aug 2019 06:26:44 +0000 (08:26 +0200)
author Dmitry Fomichev <dmitry.fomichev@wdc.com>
Sat, 10 Aug 2019 21:43:11 +0000 (14:43 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 Aug 2019 06:26:44 +0000 (08:26 +0200)
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c

index 0e50e8bab81c03bf437471b5e700d0790a3dc534..ccf17eb6adaa29ca3655260cf10242fb558ed79d 100644 (file)
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -401,15 +401,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
         sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
         struct bio *bio;
  
+       if (dmz_bdev_is_dying(zmd->dev))
+               return ERR_PTR(-EIO);
+
         /* Get a new block and a BIO to read it */
         mblk = dmz_alloc_mblock(zmd, mblk_no);
         if (!mblk)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
  
         bio = bio_alloc(GFP_NOIO, 1);
         if (!bio) {
                 dmz_free_mblock(zmd, mblk);
-               return NULL;
+               return ERR_PTR(-ENOMEM);
         }
  
         spin_lock(&zmd->mblk_lock);
@@ -540,8 +543,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
         if (!mblk) {
                 /* Cache miss: read the block from disk */
                 mblk = dmz_get_mblock_slow(zmd, mblk_no);
-               if (!mblk)
-                       return ERR_PTR(-ENOMEM);
+               if (IS_ERR(mblk))
+                       return mblk;
         }
  
         /* Wait for on-going read I/O and check for error */
@@ -569,16 +572,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
  /*
   * Issue a metadata block write BIO.
   */
-static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
-                            unsigned int set)
+static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
+                           unsigned int set)
  {
         sector_t block = zmd->sb[set].block + mblk->no;
         struct bio *bio;
  
+       if (dmz_bdev_is_dying(zmd->dev))
+               return -EIO;
+
         bio = bio_alloc(GFP_NOIO, 1);
         if (!bio) {
                 set_bit(DMZ_META_ERROR, &mblk->state);
-               return;
+               return -ENOMEM;
         }
  
         set_bit(DMZ_META_WRITING, &mblk->state);
@@ -590,6 +596,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
         bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
         bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
         submit_bio(bio);
+
+       return 0;
  }
  
  /*
@@ -601,6 +609,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
         struct bio *bio;
         int ret;
  
+       if (dmz_bdev_is_dying(zmd->dev))
+               return -EIO;
+
         bio = bio_alloc(GFP_NOIO, 1);
         if (!bio)
                 return -ENOMEM;
@@ -658,22 +669,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
  {
         struct dmz_mblock *mblk;
         struct blk_plug plug;
-       int ret = 0;
+       int ret = 0, nr_mblks_submitted = 0;
  
         /* Issue writes */
         blk_start_plug(&plug);
-       list_for_each_entry(mblk, write_list, link)
-               dmz_write_mblock(zmd, mblk, set);
+       list_for_each_entry(mblk, write_list, link) {
+               ret = dmz_write_mblock(zmd, mblk, set);
+               if (ret)
+                       break;
+               nr_mblks_submitted++;
+       }
         blk_finish_plug(&plug);
  
         /* Wait for completion */
         list_for_each_entry(mblk, write_list, link) {
+               if (!nr_mblks_submitted)
+                       break;
                 wait_on_bit_io(&mblk->state, DMZ_META_WRITING,
                                TASK_UNINTERRUPTIBLE);
                 if (test_bit(DMZ_META_ERROR, &mblk->state)) {
                         clear_bit(DMZ_META_ERROR, &mblk->state);
                         ret = -EIO;
                 }
+               nr_mblks_submitted--;
         }
  
         /* Flush drive cache (this will also sync data) */
@@ -735,6 +753,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
          */
         dmz_lock_flush(zmd);
  
+       if (dmz_bdev_is_dying(zmd->dev)) {
+               ret = -EIO;
+               goto out;
+       }
+
         /* Get dirty blocks */
         spin_lock(&zmd->mblk_lock);
         list_splice_init(&zmd->mblk_dirty_list, &write_list);
@@ -1623,6 +1646,10 @@ again:
                 /* Alloate a random zone */
                 dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
                 if (!dzone) {
+                       if (dmz_bdev_is_dying(zmd->dev)) {
+                               dzone = ERR_PTR(-EIO);
+                               goto out;
+                       }
                         dmz_wait_for_free_zones(zmd);
                         goto again;
                 }
@@ -1720,6 +1747,10 @@ again:
         /* Alloate a random zone */
         bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
         if (!bzone) {
+               if (dmz_bdev_is_dying(zmd->dev)) {
+                       bzone = ERR_PTR(-EIO);
+                       goto out;
+               }
                 dmz_wait_for_free_zones(zmd);
                 goto again;
         }
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c

index 81fe500d06a53051204a5bc98b0d0db2f3b8502a..a9f84a9984765a5284108953492c432a840ce287 100644 (file)
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -37,7 +37,7 @@ enum {
  /*
   * Number of seconds of target BIO inactivity to consider the target idle.
   */
-#define DMZ_IDLE_PERIOD                (10UL * HZ)
+#define DMZ_IDLE_PERIOD                        (10UL * HZ)
  
  /*
   * Percentage of unmapped (free) random zones below which reclaim starts
@@ -134,6 +134,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
                 set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
  
         while (block < end_block) {
+               if (dev->flags & DMZ_BDEV_DYING)
+                       return -EIO;
+
                 /* Get a valid region from the source zone */
                 ret = dmz_first_valid_block(zmd, src_zone, &block);
                 if (ret <= 0)
@@ -453,6 +456,9 @@ static void dmz_reclaim_work(struct work_struct *work)
         unsigned int p_unmap_rnd;
         int ret;
  
+       if (dmz_bdev_is_dying(zrc->dev))
+               return;
+
         if (!dmz_should_reclaim(zrc)) {
                 mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
                 return;
@@ -482,8 +488,16 @@ static void dmz_reclaim_work(struct work_struct *work)
                       p_unmap_rnd, nr_unmap_rnd, nr_rnd);
  
         ret = dmz_do_reclaim(zrc);
-       if (ret)
+       if (ret) {
                 dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
+               if (ret == -EIO)
+                       /*
+                        * LLD might be performing some error handling sequence
+                        * at the underlying device. To not interfere, do not
+                        * attempt to schedule the next reclaim run immediately.
+                        */
+                       return;
+       }
  
         dmz_schedule_reclaim(zrc);
  }
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c

index 03dff3c8f6c0f46193beb14240b61544ec780baa..1e004d975e78645bf6a41d0108adc8c43676a748 100644 (file)
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -133,6 +133,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
  
         atomic_inc(&bioctx->ref);
         generic_make_request(clone);
+       if (clone->bi_status == BLK_STS_IOERR)
+               return -EIO;
  
         if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
                 zone->wp_block += nr_blocks;
@@ -277,8 +279,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
  
         /* Get the buffer zone. One will be allocated if needed */
         bzone = dmz_get_chunk_buffer(zmd, zone);
-       if (!bzone)
-               return -ENOSPC;
+       if (IS_ERR(bzone))
+               return PTR_ERR(bzone);
  
         if (dmz_is_readonly(bzone))
                 return -EROFS;
@@ -389,6 +391,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
  
         dmz_lock_metadata(zmd);
  
+       if (dmz->dev->flags & DMZ_BDEV_DYING) {
+               ret = -EIO;
+               goto out;
+       }
+
         /*
          * Get the data zone mapping the chunk. There may be no
          * mapping for read and discard. If a mapping is obtained,
@@ -493,6 +500,8 @@ static void dmz_flush_work(struct work_struct *work)
  
         /* Flush dirty metadata blocks */
         ret = dmz_flush_metadata(dmz->metadata);
+       if (ret)
+               dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret);
  
         /* Process queued flush requests */
         while (1) {
@@ -556,6 +565,32 @@ out:
         return ret;
  }
  
+/*
+ * Check the backing device availability. If it's on the way out,
+ * start failing I/O. Reclaim and metadata components also call this
+ * function to cleanly abort operation in the event of such failure.
+ */
+bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
+{
+       struct gendisk *disk;
+
+       if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
+               disk = dmz_dev->bdev->bd_disk;
+               if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
+                       dmz_dev_warn(dmz_dev, "Backing device queue dying");
+                       dmz_dev->flags |= DMZ_BDEV_DYING;
+               } else if (disk->fops->check_events) {
+                       if (disk->fops->check_events(disk, 0) &
+                                       DISK_EVENT_MEDIA_CHANGE) {
+                               dmz_dev_warn(dmz_dev, "Backing device offline");
+                               dmz_dev->flags |= DMZ_BDEV_DYING;
+                       }
+               }
+       }
+
+       return dmz_dev->flags & DMZ_BDEV_DYING;
+}
+
  /*
   * Process a new BIO.
   */
@@ -569,6 +604,9 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
         sector_t chunk_sector;
         int ret;
  
+       if (dmz_bdev_is_dying(dmz->dev))
+               return DM_MAPIO_KILL;
+
         dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
                       bio_op(bio), (unsigned long long)sector, nr_sectors,
                       (unsigned long long)dmz_bio_chunk(dmz->dev, bio),
@@ -865,6 +903,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti,
  {
         struct dmz_target *dmz = ti->private;
  
+       if (dmz_bdev_is_dying(dmz->dev))
+               return -ENODEV;
+
         *bdev = dmz->dev->bdev;
  
         return 0;
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h

index ed8de49c9a08263e8ca25ff2979abb2958b4cde0..93a64529f21902968a705c6ea88a292ad6161d24 100644 (file)
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -56,6 +56,8 @@ struct dmz_dev {
  
         unsigned int            nr_zones;
  
+       unsigned int            flags;
+
         sector_t                zone_nr_sectors;
         unsigned int            zone_nr_sectors_shift;
  
@@ -67,6 +69,9 @@ struct dmz_dev {
                                  (dev)->zone_nr_sectors_shift)
  #define dmz_chunk_block(dev, b)        ((b) & ((dev)->zone_nr_blocks - 1))
  
+/* Device flags. */
+#define DMZ_BDEV_DYING         (1 << 0)
+
  /*
   * Zone descriptor.
   */
@@ -245,4 +250,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc);
  void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc);
  void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
  
+/*
+ * Functions defined in dm-zoned-target.c
+ */
+bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
+
  #endif /* DM_ZONED_H */
author	Dmitry Fomichev <dmitry.fomichev@wdc.com>
	Sat, 10 Aug 2019 21:43:11 +0000 (14:43 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 29 Aug 2019 06:26:44 +0000 (08:26 +0200)
drivers/md/dm-zoned-metadata.c		patch \| blob \| blame \| history
drivers/md/dm-zoned-reclaim.c		patch \| blob \| blame \| history
drivers/md/dm-zoned-target.c		patch \| blob \| blame \| history
drivers/md/dm-zoned.h		patch \| blob \| blame \| history