fs: Provide function to unmap metadata for a range of blocks

author Jan Kara <jack@suse.cz>

Fri, 4 Nov 2016 17:08:11 +0000 (18:08 +0100)

committer Jens Axboe <axboe@fb.com>

Fri, 4 Nov 2016 20:34:47 +0000 (14:34 -0600)
author Jan Kara <jack@suse.cz>
Fri, 4 Nov 2016 17:08:11 +0000 (18:08 +0100)
committer Jens Axboe <axboe@fb.com>
Fri, 4 Nov 2016 20:34:47 +0000 (14:34 -0600)
diff --git a/fs/buffer.c b/fs/buffer.c

index af5776da814af7d678ef98a90d9f2d9e488bb64b..f8beca55240a2ef048a4a6c4021082588ded3079 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -43,6 +43,7 @@
  #include <linux/bitops.h>
  #include <linux/mpage.h>
  #include <linux/bit_spinlock.h>
+#include <linux/pagevec.h>
  #include <trace/events/block.h>
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -1636,6 +1637,81 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
  }
  EXPORT_SYMBOL(unmap_underlying_metadata);
  
+/**
+ * clean_bdev_aliases: clean a range of buffers in block device
+ * @bdev: Block device to clean buffers in
+ * @block: Start of a range of blocks to clean
+ * @len: Number of blocks to clean
+ *
+ * We are taking a range of blocks for data and we don't want writeback of any
+ * buffer-cache aliases starting from return from this function and until the
+ * moment when something will explicitly mark the buffer dirty (hopefully that
+ * will not happen until we will free that block ;-) We don't even need to mark
+ * it not-uptodate - nobody can expect anything from a newly allocated buffer
+ * anyway. We used to use unmap_buffer() for such invalidation, but that was
+ * wrong. We definitely don't want to mark the alias unmapped, for example - it
+ * would confuse anyone who might pick it with bread() afterwards...
+ *
+ * Also..  Note that bforget() doesn't lock the buffer.  So there can be
+ * writeout I/O going on against recently-freed buffers.  We don't wait on that
+ * I/O in bforget() - it's more efficient to wait on the I/O only if we really
+ * need to.  That happens here.
+ */
+void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
+{
+       struct inode *bd_inode = bdev->bd_inode;
+       struct address_space *bd_mapping = bd_inode->i_mapping;
+       struct pagevec pvec;
+       pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pgoff_t end;
+       int i;
+       struct buffer_head *bh;
+       struct buffer_head *head;
+
+       end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
+       pagevec_init(&pvec, 0);
+       while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       if (!page_has_buffers(page))
+                               continue;
+                       /*
+                        * We use page lock instead of bd_mapping->private_lock
+                        * to pin buffers here since we can afford to sleep and
+                        * it scales better than a global spinlock lock.
+                        */
+                       lock_page(page);
+                       /* Recheck when the page is locked which pins bhs */
+                       if (!page_has_buffers(page))
+                               goto unlock_page;
+                       head = page_buffers(page);
+                       bh = head;
+                       do {
+                               if (!buffer_mapped(bh))
+                                       goto next;
+                               if (bh->b_blocknr >= block + len)
+                                       break;
+                               clear_buffer_dirty(bh);
+                               wait_on_buffer(bh);
+                               clear_buffer_req(bh);
+next:
+                               bh = bh->b_this_page;
+                       } while (bh != head);
+unlock_page:
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+               index++;
+       }
+}
+EXPORT_SYMBOL(clean_bdev_aliases);
+
  /*
   * Size is a power-of-two in the range 512..PAGE_SIZE,
   * and the case we care about most is PAGE_SIZE.
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h

index ebbacd14d4504a192d7c3f7443012edac433485d..9c9c73ce7d4f8f438710e56175f467b91e337803 100644 (file)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -169,6 +169,8 @@ void invalidate_inode_buffers(struct inode *);
  int remove_inode_buffers(struct inode *inode);
  int sync_mapping_buffers(struct address_space *mapping);
  void unmap_underlying_metadata(struct block_device *bdev, sector_t block);
+void clean_bdev_aliases(struct block_device *bdev, sector_t block,
+                       sector_t len);
  
  void mark_buffer_async_write(struct buffer_head *bh);
  void __wait_on_buffer(struct buffer_head *);
author	Jan Kara <jack@suse.cz>
	Fri, 4 Nov 2016 17:08:11 +0000 (18:08 +0100)
committer	Jens Axboe <axboe@fb.com>
	Fri, 4 Nov 2016 20:34:47 +0000 (14:34 -0600)
fs/buffer.c		patch \| blob \| blame \| history
include/linux/buffer_head.h		patch \| blob \| blame \| history