ext4: rip out DAX handling from direct IO path
authorJan Kara <jack@suse.cz>
Sun, 20 Nov 2016 23:53:30 +0000 (18:53 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Sun, 20 Nov 2016 23:53:30 +0000 (18:53 -0500)
Reads and writes for DAX inodes should no longer end up in direct IO
code. Rip out the support and add a warning.

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/inode.c

index da82de65035074674a5dce90746e5bf426dff3a2..6673e88011ecf610d86554c8dec4706ce61f1919 100644 (file)
@@ -2452,8 +2452,6 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
 struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
                             struct buffer_head *bh_result, int create);
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create);
 int ext4_dio_get_block(struct inode *inode, sector_t iblock,
index 9de9a5a5d2a4f9e839deea6039141ca407825730..861f848159e850ad09dbf28ac4491cfaee631275 100644 (file)
@@ -3280,46 +3280,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 }
 
 #ifdef CONFIG_FS_DAX
-/*
- * Get block function for DAX IO and mmap faults. It takes care of converting
- * unwritten extents to written ones and initializes new / converted blocks
- * to zeros.
- */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
-{
-       int ret;
-
-       ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
-       if (!create)
-               return _ext4_get_block(inode, iblock, bh_result, 0);
-
-       ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                  EXT4_GET_BLOCKS_PRE_IO |
-                                  EXT4_GET_BLOCKS_CREATE_ZERO);
-       if (ret < 0)
-               return ret;
-
-       if (buffer_unwritten(bh_result)) {
-               /*
-                * We are protected by i_mmap_sem or i_mutex so we know block
-                * cannot go away from under us even though we dropped
-                * i_data_sem. Convert extent to written and write zeros there.
-                */
-               ret = ext4_get_block_trans(inode, iblock, bh_result,
-                                          EXT4_GET_BLOCKS_CONVERT |
-                                          EXT4_GET_BLOCKS_CREATE_ZERO);
-               if (ret < 0)
-                       return ret;
-       }
-       /*
-        * At least for now we have to clear BH_New so that DAX code
-        * doesn't attempt to zero blocks again in a racy way.
-        */
-       clear_buffer_new(bh_result);
-       return 0;
-}
-
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
                            unsigned flags, struct iomap *iomap)
 {
@@ -3473,14 +3433,6 @@ struct iomap_ops ext4_iomap_ops = {
        .iomap_end              = ext4_iomap_end,
 };
 
-#else
-/* Just define empty function, it will never get called. */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
-                      struct buffer_head *bh_result, int create)
-{
-       BUG();
-       return 0;
-}
 #endif
 
 static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3602,19 +3554,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
        iocb->private = NULL;
        if (overwrite)
                get_block_func = ext4_dio_get_block_overwrite;
-       else if (IS_DAX(inode)) {
-               /*
-                * We can avoid zeroing for aligned DAX writes beyond EOF. Other
-                * writes need zeroing either because they can race with page
-                * faults or because they use partial blocks.
-                */
-               if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
-                   ext4_aligned_io(inode, offset, count))
-                       get_block_func = ext4_dio_get_block;
-               else
-                       get_block_func = ext4_dax_get_block;
-               dio_flags = DIO_LOCKING;
-       } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+       else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
                   round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
                get_block_func = ext4_dio_get_block;
                dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3628,14 +3568,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
 #endif
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, get_block_func,
-                               ext4_end_io_dio, dio_flags);
-       } else
-               ret = __blockdev_direct_IO(iocb, inode,
-                                          inode->i_sb->s_bdev, iter,
-                                          get_block_func,
-                                          ext4_end_io_dio, NULL, dio_flags);
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+                                  get_block_func, ext4_end_io_dio, NULL,
+                                  dio_flags);
 
        if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3704,6 +3639,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
 {
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
        /*
@@ -3712,19 +3648,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
         * we are protected against page writeback as well.
         */
        inode_lock_shared(inode);
-       if (IS_DAX(inode)) {
-               ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
-       } else {
-               size_t count = iov_iter_count(iter);
-
-               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-                                                  iocb->ki_pos + count);
-               if (ret)
-                       goto out_unlock;
-               ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
-                                          iter, ext4_dio_get_block,
-                                          NULL, NULL, 0);
-       }
+       ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+                                          iocb->ki_pos + count);
+       if (ret)
+               goto out_unlock;
+       ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+                                  iter, ext4_dio_get_block, NULL, NULL, 0);
 out_unlock:
        inode_unlock_shared(inode);
        return ret;
@@ -3753,6 +3682,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        if (ext4_has_inline_data(inode))
                return 0;
 
+       /* DAX uses iomap path now */
+       if (WARN_ON_ONCE(IS_DAX(inode)))
+               return 0;
+
        trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
        if (iov_iter_rw(iter) == READ)
                ret = ext4_direct_IO_read(iocb, iter);