block: Allow devices to indicate whether discarded blocks are zeroed
authorMartin K. Petersen <martin.petersen@oracle.com>
Thu, 3 Dec 2009 08:24:48 +0000 (09:24 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Thu, 3 Dec 2009 08:24:48 +0000 (09:24 +0100)
The discard ioctl is used by mkfs utilities to clear a block device
prior to putting metadata down.  However, not all devices return zeroed
blocks after a discard.  Some drives return stale data, potentially
containing old superblocks.  It is therefore important to know whether
discarded blocks are properly zeroed.

Both ATA and SCSI drives have configuration bits that indicate whether
zeroes are returned after a discard operation.  Implement a block level
interface that allows this information to be bubbled up the stack and
queried via a new block device ioctl.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
block/blk-settings.c
block/blk-sysfs.c
block/compat_ioctl.c
block/ioctl.c
include/linux/blkdev.h
include/linux/fs.h

index 1ebc1fdb91445dadabe758c3c5dd9e7b73be9830..dd1f1e0e196f932cfe6abab7f74f84602557d05d 100644 (file)
@@ -101,6 +101,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
+       lim->discard_zeroes_data = -1;
        lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
        lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
        lim->alignment_offset = 0;
@@ -544,6 +545,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
        t->io_min = max(t->io_min, b->io_min);
        t->no_cluster |= b->no_cluster;
+       t->discard_zeroes_data &= b->discard_zeroes_data;
 
        /* Bottom device offset aligned? */
        if (offset &&
index 3147145edc15f72fad3aa4cb83ba03c1920039e2..8606c9543fdda0368c6bdde8b4af2d00584e674d 100644 (file)
@@ -136,6 +136,11 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
        return queue_var_show(q->limits.max_discard_sectors << 9, page);
 }
 
+static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(queue_discard_zeroes_data(q), page);
+}
+
 static ssize_t
 queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 {
@@ -313,6 +318,11 @@ static struct queue_sysfs_entry queue_discard_max_entry = {
        .show = queue_discard_max_show,
 };
 
+static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
+       .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
+       .show = queue_discard_zeroes_data_show,
+};
+
 static struct queue_sysfs_entry queue_nonrot_entry = {
        .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
        .show = queue_nonrot_show,
@@ -350,6 +360,7 @@ static struct attribute *default_attrs[] = {
        &queue_io_opt_entry.attr,
        &queue_discard_granularity_entry.attr,
        &queue_discard_max_entry.attr,
+       &queue_discard_zeroes_data_entry.attr,
        &queue_nonrot_entry.attr,
        &queue_nomerges_entry.attr,
        &queue_rq_affinity_entry.attr,
index 9bd086c1a4d591b59eea0e684debac6e6b8e52d9..4eb8e9ea4af561cd9d141a44709cb94138a25594 100644 (file)
@@ -747,6 +747,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                return compat_put_uint(arg, bdev_io_opt(bdev));
        case BLKALIGNOFF:
                return compat_put_int(arg, bdev_alignment_offset(bdev));
+       case BLKDISCARDZEROES:
+               return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
        case BLKFLSBUF:
        case BLKROSET:
        case BLKDISCARD:
index 1f4d1de12b0957c9e8a2897f1aa89d5b96f170a5..be48ea51faee2e3a73fd5d5fcc1c7071772db061 100644 (file)
@@ -280,6 +280,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                return put_uint(arg, bdev_io_opt(bdev));
        case BLKALIGNOFF:
                return put_int(arg, bdev_alignment_offset(bdev));
+       case BLKDISCARDZEROES:
+               return put_uint(arg, bdev_discard_zeroes_data(bdev));
        case BLKSECTGET:
                return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
        case BLKRASET:
index e727f6c44c449a8341a4add780b36fe32cabc1a2..784a919aa0d0e05bf978f56f97a2f6eb4e6451e1 100644 (file)
@@ -322,6 +322,7 @@ struct queue_limits {
        unsigned char           misaligned;
        unsigned char           discard_misaligned;
        unsigned char           no_cluster;
+       signed char             discard_zeroes_data;
 };
 
 struct request_queue
@@ -1150,6 +1151,19 @@ static inline int queue_sector_discard_alignment(struct request_queue *q,
                & (q->limits.discard_granularity - 1);
 }
 
+static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
+{
+       if (q->limits.discard_zeroes_data == 1)
+               return 1;
+
+       return 0;
+}
+
+static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
+{
+       return queue_discard_zeroes_data(bdev_get_queue(bdev));
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
        return q ? q->dma_alignment : 511;
index 79cea805173682e2d750b0b1722b9f86fa6cbfe6..891f7d642e5c6789cb05853137a8caa58a21c2c6 100644 (file)
@@ -304,6 +304,7 @@ struct inodes_stat_t {
 #define BLKIOOPT _IO(0x12,121)
 #define BLKALIGNOFF _IO(0x12,122)
 #define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
 
 #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
 #define FIBMAP    _IO(0x00,1)  /* bmap access */