dm integrity: optimize writing dm-bufio buffers that are partially changed
authorMikulas Patocka <mpatocka@redhat.com>
Sun, 30 Apr 2017 21:31:22 +0000 (17:31 -0400)
committerMike Snitzer <snitzer@redhat.com>
Mon, 28 Aug 2017 15:47:17 +0000 (11:47 -0400)
Rather than write the entire dm-bufio buffer when only a subset is
changed, improve dm-bufio (and dm-integrity) by only writing the subset
of the buffer that changed.

Update dm-integrity to make use of dm-bufio's new
dm_bufio_mark_partial_buffer_dirty() interface.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/dm-bufio.c
drivers/md/dm-bufio.h
drivers/md/dm-integrity.c

index 44f4a8ac95bd5a3a0f7742291c4827a27e27b3b5..94e050b395df8108aca955c1e71462cc499b139b 100644 (file)
 #define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
 #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT  (PAGE_SIZE << (MAX_ORDER - 1))
 
+/*
+ * Align buffer writes to this boundary.
+ * Tests show that SSDs have the highest IOPS when using 4k writes.
+ */
+#define DM_BUFIO_WRITE_ALIGN           4096
+
 /*
  * dm_buffer->list_mode
  */
@@ -149,6 +155,10 @@ struct dm_buffer {
        blk_status_t write_error;
        unsigned long state;
        unsigned long last_accessed;
+       unsigned dirty_start;
+       unsigned dirty_end;
+       unsigned write_start;
+       unsigned write_end;
        struct dm_bufio_client *c;
        struct list_head write_list;
        struct bio bio;
@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context)
 }
 
 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
-                    unsigned n_sectors, bio_end_io_t *end_io)
+                    unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
 {
        int r;
        struct dm_io_request io_req = {
@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
        if (b->data_mode != DATA_MODE_VMALLOC) {
                io_req.mem.type = DM_IO_KMEM;
-               io_req.mem.ptr.addr = b->data;
+               io_req.mem.ptr.addr = (char *)b->data + offset;
        } else {
                io_req.mem.type = DM_IO_VMA;
-               io_req.mem.ptr.vma = b->data;
+               io_req.mem.ptr.vma = (char *)b->data + offset;
        }
 
        b->bio.bi_end_io = end_io;
@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio)
 }
 
 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
-                          unsigned n_sectors, bio_end_io_t *end_io)
+                          unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
 {
        char *ptr;
-       int len;
+       unsigned len;
 
        bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
        b->bio.bi_iter.bi_sector = sector;
@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
        b->bio.bi_private = end_io;
        bio_set_op_attrs(&b->bio, rw, 0);
 
-       /*
-        * We assume that if len >= PAGE_SIZE ptr is page-aligned.
-        * If len < PAGE_SIZE the buffer doesn't cross page boundary.
-        */
-       ptr = b->data;
+       ptr = (char *)b->data + offset;
        len = n_sectors << SECTOR_SHIFT;
 
-       if (len >= PAGE_SIZE)
-               BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
-       else
-               BUG_ON((unsigned long)ptr & (len - 1));
-
        do {
-               if (!bio_add_page(&b->bio, virt_to_page(ptr),
-                                 len < PAGE_SIZE ? len : PAGE_SIZE,
+               unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
+               if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
                                  offset_in_page(ptr))) {
                        BUG_ON(b->c->block_size <= PAGE_SIZE);
-                       use_dmio(b, rw, sector, n_sectors, end_io);
+                       use_dmio(b, rw, sector, n_sectors, offset, end_io);
                        return;
                }
 
-               len -= PAGE_SIZE;
-               ptr += PAGE_SIZE;
+               len -= this_step;
+               ptr += this_step;
        } while (len > 0);
 
        submit_bio(&b->bio);
@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
 {
        unsigned n_sectors;
        sector_t sector;
-
-       if (rw == WRITE && b->c->write_callback)
-               b->c->write_callback(b);
+       unsigned offset, end;
 
        sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
-       n_sectors = 1 << b->c->sectors_per_block_bits;
+
+       if (rw != WRITE) {
+               n_sectors = 1 << b->c->sectors_per_block_bits;
+               offset = 0;
+       } else {
+               if (b->c->write_callback)
+                       b->c->write_callback(b);
+               offset = b->write_start;
+               end = b->write_end;
+               offset &= -DM_BUFIO_WRITE_ALIGN;
+               end += DM_BUFIO_WRITE_ALIGN - 1;
+               end &= -DM_BUFIO_WRITE_ALIGN;
+               if (unlikely(end > b->c->block_size))
+                       end = b->c->block_size;
+
+               sector += offset >> SECTOR_SHIFT;
+               n_sectors = (end - offset) >> SECTOR_SHIFT;
+       }
 
        if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
            b->data_mode != DATA_MODE_VMALLOC)
-               use_inline_bio(b, rw, sector, n_sectors, end_io);
+               use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
        else
-               use_dmio(b, rw, sector, n_sectors, end_io);
+               use_dmio(b, rw, sector, n_sectors, offset, end_io);
 }
 
 /*----------------------------------------------------------------
@@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b,
        clear_bit(B_DIRTY, &b->state);
        wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 
+       b->write_start = b->dirty_start;
+       b->write_end = b->dirty_end;
+
        if (!write_list)
                submit_io(b, WRITE, write_endio);
        else
@@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b)
 }
 EXPORT_SYMBOL_GPL(dm_bufio_release);
 
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+                                       unsigned start, unsigned end)
 {
        struct dm_bufio_client *c = b->c;
 
+       BUG_ON(start >= end);
+       BUG_ON(end > b->c->block_size);
+
        dm_bufio_lock(c);
 
        BUG_ON(test_bit(B_READING, &b->state));
 
-       if (!test_and_set_bit(B_DIRTY, &b->state))
+       if (!test_and_set_bit(B_DIRTY, &b->state)) {
+               b->dirty_start = start;
+               b->dirty_end = end;
                __relink_lru(b, LIST_DIRTY);
+       } else {
+               if (start < b->dirty_start)
+                       b->dirty_start = start;
+               if (end > b->dirty_end)
+                       b->dirty_end = end;
+       }
 
        dm_bufio_unlock(c);
 }
+EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
+
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+{
+       dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
+}
 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
 
 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
@@ -1398,6 +1435,8 @@ retry:
                wait_on_bit_io(&b->state, B_WRITING,
                               TASK_UNINTERRUPTIBLE);
                set_bit(B_DIRTY, &b->state);
+               b->dirty_start = 0;
+               b->dirty_end = c->block_size;
                __unlink_buffer(b);
                __link_buffer(b, new_block, LIST_DIRTY);
        } else {
index b6d8f53ec15b440c025c6d37c64b88025abd0772..be732d3f86119a0698f6ce32e6c4951997898e9f 100644 (file)
@@ -93,6 +93,15 @@ void dm_bufio_release(struct dm_buffer *b);
  */
 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
 
+/*
+ * Mark a part of the buffer dirty.
+ *
+ * The specified part of the buffer is scheduled to be written. dm-bufio may
+ * write the specified part of the buffer or it may write a larger superset.
+ */
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+                                       unsigned start, unsigned end);
+
 /*
  * Initiate writing of dirty buffers, without waiting for completion.
  */
index 3acce09bba35c54b1afe4e8af962766bfd90eb73..689f89d8eeef94f8169fa39323004915b70db251 100644 (file)
@@ -1040,7 +1040,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
                        memcpy(tag, dp, to_copy);
                } else if (op == TAG_WRITE) {
                        memcpy(dp, tag, to_copy);
-                       dm_bufio_mark_buffer_dirty(b);
+                       dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
                } else  {
                        /* e.g.: op == TAG_CMP */
                        if (unlikely(memcmp(dp, tag, to_copy))) {