ext4: refactor punch hole code
authorTheodore Ts'o <tytso@mit.edu>
Wed, 3 Apr 2013 16:45:17 +0000 (12:45 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 3 Apr 2013 16:45:17 +0000 (12:45 -0400)
Move common code in ext4_ind_punch_hole() and ext4_ext_punch_hole()
into ext4_punch_hole().  This saves over 150 lines of code.

This also fixes a potential bug when the punch_hole() code is racing
against indirect-to-extents or extents-to-indirect migation.  We are
currently using i_mutex to protect against changes to the inode flag;
specifically, the append-only, immutable, and extents inode flags.  So
we need to take i_mutex before deciding whether to use the
extents-specific or indirect-specific punch_hole code.

Also, there was a missing call to ext4_inode_block_unlocked_dio() in
the indirect punch codepath.  This was added in commit 02d262dffcf4c
to block DIO readers racing against the punch operation in the
codepath for extent-mapped inodes, but it was missing for
indirect-block mapped inodes.  One of the advantages of refactoring
the code is that it makes such oversights much less likely.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/indirect.c
fs/ext4/inode.c

index f91e11bd975374b4d191ed1c97baceac4f34a7ab..0649253804c4e46c4fe26fbebf4cb504390e6199 100644 (file)
@@ -2110,7 +2110,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
 extern void ext4_ind_truncate(struct inode *inode);
-extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
+extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+                                ext4_lblk_t first, ext4_lblk_t stop);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2575,8 +2576,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                               struct ext4_map_blocks *map, int flags);
 extern void ext4_ext_truncate(struct inode *);
-extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
-                               loff_t length);
+extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
+                                ext4_lblk_t end);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
index 9c6d06dcef8bf1f997c0f05bfe443fcde993d72b..d58365e40df7fe1b981c5b699e6cfac34de27a1b 100644 (file)
@@ -2599,8 +2599,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
        return 1;
 }
 
-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
-                                ext4_lblk_t end)
+int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
+                         ext4_lblk_t end)
 {
        struct super_block *sb = inode->i_sb;
        int depth = ext_depth(inode);
@@ -4623,187 +4623,6 @@ static int ext4_xattr_fiemap(struct inode *inode,
        return (error < 0 ? error : 0);
 }
 
-/*
- * ext4_ext_punch_hole
- *
- * Punches a hole of "length" bytes in a file starting
- * at byte "offset"
- *
- * @inode:  The inode of the file to punch a hole in
- * @offset: The starting byte offset of the hole
- * @length: The length of the hole
- *
- * Returns the number of blocks removed or negative on err
- */
-int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
-{
-       struct inode *inode = file_inode(file);
-       struct super_block *sb = inode->i_sb;
-       ext4_lblk_t first_block, stop_block;
-       struct address_space *mapping = inode->i_mapping;
-       handle_t *handle;
-       loff_t first_page, last_page, page_len;
-       loff_t first_page_offset, last_page_offset;
-       int credits, err = 0;
-
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               err = filemap_write_and_wait_range(mapping,
-                       offset, offset + length - 1);
-
-               if (err)
-                       return err;
-       }
-
-       mutex_lock(&inode->i_mutex);
-       /* It's not possible punch hole on append only file */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-               err = -EPERM;
-               goto out_mutex;
-       }
-       if (IS_SWAPFILE(inode)) {
-               err = -ETXTBSY;
-               goto out_mutex;
-       }
-
-       /* No need to punch hole beyond i_size */
-       if (offset >= inode->i_size)
-               goto out_mutex;
-
-       /*
-        * If the hole extends beyond i_size, set the hole
-        * to end after the page that contains i_size
-        */
-       if (offset + length > inode->i_size) {
-               length = inode->i_size +
-                  PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
-                  offset;
-       }
-
-       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
-
-       first_page_offset = first_page << PAGE_CACHE_SHIFT;
-       last_page_offset = last_page << PAGE_CACHE_SHIFT;
-
-       /* Now release the pages */
-       if (last_page_offset > first_page_offset) {
-               truncate_pagecache_range(inode, first_page_offset,
-                                        last_page_offset - 1);
-       }
-
-       /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
-       err = ext4_flush_unwritten_io(inode);
-       if (err)
-               goto out_dio;
-       inode_dio_wait(inode);
-
-       credits = ext4_writepage_trans_blocks(inode);
-       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
-       if (IS_ERR(handle)) {
-               err = PTR_ERR(handle);
-               goto out_dio;
-       }
-
-
-       /*
-        * Now we need to zero out the non-page-aligned data in the
-        * pages at the start and tail of the hole, and unmap the buffer
-        * heads for the block aligned regions of the page that were
-        * completely zeroed.
-        */
-       if (first_page > last_page) {
-               /*
-                * If the file space being truncated is contained within a page
-                * just zero out and unmap the middle of that page
-                */
-               err = ext4_discard_partial_page_buffers(handle,
-                       mapping, offset, length, 0);
-
-               if (err)
-                       goto out;
-       } else {
-               /*
-                * zero out and unmap the partial page that contains
-                * the start of the hole
-                */
-               page_len  = first_page_offset - offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                                  offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-
-               /*
-                * zero out and unmap the partial page that contains
-                * the end of the hole
-                */
-               page_len = offset + length - last_page_offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                       last_page_offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       /*
-        * If i_size is contained in the last page, we need to
-        * unmap and zero the partial page after i_size
-        */
-       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
-          inode->i_size % PAGE_CACHE_SIZE != 0) {
-
-               page_len = PAGE_CACHE_SIZE -
-                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
-
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle,
-                         mapping, inode->i_size, page_len, 0);
-
-                       if (err)
-                               goto out;
-               }
-       }
-
-       first_block = (offset + sb->s_blocksize - 1) >>
-               EXT4_BLOCK_SIZE_BITS(sb);
-       stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
-
-       /* If there are no blocks to remove, return now */
-       if (first_block >= stop_block)
-               goto out;
-
-       down_write(&EXT4_I(inode)->i_data_sem);
-       ext4_discard_preallocations(inode);
-
-       err = ext4_es_remove_extent(inode, first_block,
-                                   stop_block - first_block);
-       err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
-
-       ext4_discard_preallocations(inode);
-
-       if (IS_SYNC(inode))
-               ext4_handle_sync(handle);
-
-       up_write(&EXT4_I(inode)->i_data_sem);
-
-out:
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
-       ext4_journal_stop(handle);
-out_dio:
-       ext4_inode_resume_unlocked_dio(inode);
-out_mutex:
-       mutex_unlock(&inode->i_mutex);
-       return err;
-}
-
 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
index c0f9e4699f0b129d5acccfc84c085e4f973b940e..d8846779f4eab6ccaca26af4baddcbac4dfc0188 100644 (file)
@@ -1434,8 +1434,8 @@ err:
        return ret;
 }
 
-static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
-                                ext4_lblk_t first, ext4_lblk_t stop)
+int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+                         ext4_lblk_t first, ext4_lblk_t stop)
 {
        int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
        int level, ret = 0;
@@ -1469,157 +1469,3 @@ err:
        return ret;
 }
 
-int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
-{
-       struct inode *inode = file_inode(file);
-       struct super_block *sb = inode->i_sb;
-       ext4_lblk_t first_block, stop_block;
-       struct address_space *mapping = inode->i_mapping;
-       handle_t *handle = NULL;
-       loff_t first_page, last_page, page_len;
-       loff_t first_page_offset, last_page_offset;
-       int err = 0;
-
-       /*
-        * Write out all dirty pages to avoid race conditions
-        * Then release them.
-        */
-       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               err = filemap_write_and_wait_range(mapping,
-                       offset, offset + length - 1);
-               if (err)
-                       return err;
-       }
-
-       mutex_lock(&inode->i_mutex);
-       /* It's not possible punch hole on append only file */
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-               err = -EPERM;
-               goto out_mutex;
-       }
-       if (IS_SWAPFILE(inode)) {
-               err = -ETXTBSY;
-               goto out_mutex;
-       }
-
-       /* No need to punch hole beyond i_size */
-       if (offset >= inode->i_size)
-               goto out_mutex;
-
-       /*
-        * If the hole extents beyond i_size, set the hole
-        * to end after the page that contains i_size
-        */
-       if (offset + length > inode->i_size) {
-               length = inode->i_size +
-                   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
-                   offset;
-       }
-
-       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
-
-       first_page_offset = first_page << PAGE_CACHE_SHIFT;
-       last_page_offset = last_page << PAGE_CACHE_SHIFT;
-
-       /* Now release the pages */
-       if (last_page_offset > first_page_offset) {
-               truncate_pagecache_range(inode, first_page_offset,
-                                        last_page_offset - 1);
-       }
-
-       /* Wait all existing dio works, newcomers will block on i_mutex */
-       inode_dio_wait(inode);
-
-       handle = start_transaction(inode);
-       if (IS_ERR(handle))
-               goto out_mutex;
-
-       /*
-        * Now we need to zero out the non-page-aligned data in the
-        * pages at the start and tail of the hole, and unmap the buffer
-        * heads for the block aligned regions of the page that were
-        * completely zerod.
-        */
-       if (first_page > last_page) {
-               /*
-                * If the file space being truncated is contained within a page
-                * just zero out and unmap the middle of that page
-                */
-               err = ext4_discard_partial_page_buffers(handle,
-                       mapping, offset, length, 0);
-               if (err)
-                       goto out;
-       } else {
-               /*
-                * Zero out and unmap the paritial page that contains
-                * the start of the hole
-                */
-               page_len = first_page_offset - offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                                       offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-
-               /*
-                * Zero out and unmap the partial page that contains
-                * the end of the hole
-                */
-               page_len = offset + length - last_page_offset;
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle, mapping,
-                                               last_page_offset, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       /*
-        * If i_size contained in the last page, we need to
-        * unmap and zero the paritial page after i_size
-        */
-       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
-           inode->i_size % PAGE_CACHE_SIZE != 0) {
-               page_len = PAGE_CACHE_SIZE -
-                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
-               if (page_len > 0) {
-                       err = ext4_discard_partial_page_buffers(handle,
-                               mapping, inode->i_size, page_len, 0);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       first_block = (offset + sb->s_blocksize - 1) >>
-               EXT4_BLOCK_SIZE_BITS(sb);
-       stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
-
-       if (first_block >= stop_block)
-               goto out;
-
-       down_write(&EXT4_I(inode)->i_data_sem);
-       ext4_discard_preallocations(inode);
-
-       err = ext4_es_remove_extent(inode, first_block,
-                                   stop_block - first_block);
-       err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
-
-       ext4_discard_preallocations(inode);
-
-       if (IS_SYNC(inode))
-               ext4_handle_sync(handle);
-
-       up_write(&EXT4_I(inode)->i_data_sem);
-
-out:
-       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-       ext4_mark_inode_dirty(handle, inode);
-       ext4_journal_stop(handle);
-
-out_mutex:
-       mutex_unlock(&inode->i_mutex);
-
-       return err;
-}
index a4ffb470fbf33547b3c52b8440f0e3f5c4c19962..9bda50aa34e2f45ff4402fff2301d5e6b17a6c63 100644 (file)
@@ -3566,20 +3566,190 @@ int ext4_can_truncate(struct inode *inode)
 int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
        struct inode *inode = file_inode(file);
+       struct super_block *sb = inode->i_sb;
+       ext4_lblk_t first_block, stop_block;
+       struct address_space *mapping = inode->i_mapping;
+       loff_t first_page, last_page, page_len;
+       loff_t first_page_offset, last_page_offset;
+       handle_t *handle;
+       unsigned int credits;
+       int ret = 0;
+
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               return ext4_ind_punch_hole(file, offset, length);
-
-       if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
+       if (EXT4_SB(sb)->s_cluster_ratio > 1) {
                /* TODO: Add support for bigalloc file systems */
                return -EOPNOTSUPP;
        }
 
        trace_ext4_punch_hole(inode, offset, length);
 
-       return ext4_ext_punch_hole(file, offset, length);
+       /*
+        * Write out all dirty pages to avoid race conditions
+        * Then release them.
+        */
+       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+               ret = filemap_write_and_wait_range(mapping, offset,
+                                                  offset + length - 1);
+               if (ret)
+                       return ret;
+       }
+
+       mutex_lock(&inode->i_mutex);
+       /* It's not possible punch hole on append only file */
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+               ret = -EPERM;
+               goto out_mutex;
+       }
+       if (IS_SWAPFILE(inode)) {
+               ret = -ETXTBSY;
+               goto out_mutex;
+       }
+
+       /* No need to punch hole beyond i_size */
+       if (offset >= inode->i_size)
+               goto out_mutex;
+
+       /*
+        * If the hole extends beyond i_size, set the hole
+        * to end after the page that contains i_size
+        */
+       if (offset + length > inode->i_size) {
+               length = inode->i_size +
+                  PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+                  offset;
+       }
+
+       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+
+       first_page_offset = first_page << PAGE_CACHE_SHIFT;
+       last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+       /* Now release the pages */
+       if (last_page_offset > first_page_offset) {
+               truncate_pagecache_range(inode, first_page_offset,
+                                        last_page_offset - 1);
+       }
+
+       /* Wait all existing dio workers, newcomers will block on i_mutex */
+       ext4_inode_block_unlocked_dio(inode);
+       ret = ext4_flush_unwritten_io(inode);
+       if (ret)
+               goto out_dio;
+       inode_dio_wait(inode);
+
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               credits = ext4_writepage_trans_blocks(inode);
+       else
+               credits = ext4_blocks_for_truncate(inode);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               ext4_std_error(sb, ret);
+               goto out_dio;
+       }
+
+       /*
+        * Now we need to zero out the non-page-aligned data in the
+        * pages at the start and tail of the hole, and unmap the
+        * buffer heads for the block aligned regions of the page that
+        * were completely zeroed.
+        */
+       if (first_page > last_page) {
+               /*
+                * If the file space being truncated is contained
+                * within a page just zero out and unmap the middle of
+                * that page
+                */
+               ret = ext4_discard_partial_page_buffers(handle,
+                       mapping, offset, length, 0);
+
+               if (ret)
+                       goto out_stop;
+       } else {
+               /*
+                * zero out and unmap the partial page that contains
+                * the start of the hole
+                */
+               page_len = first_page_offset - offset;
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers(handle, mapping,
+                                               offset, page_len, 0);
+                       if (ret)
+                               goto out_stop;
+               }
+
+               /*
+                * zero out and unmap the partial page that contains
+                * the end of the hole
+                */
+               page_len = offset + length - last_page_offset;
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers(handle, mapping,
+                                       last_page_offset, page_len, 0);
+                       if (ret)
+                               goto out_stop;
+               }
+       }
+
+       /*
+        * If i_size is contained in the last page, we need to
+        * unmap and zero the partial page after i_size
+        */
+       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+          inode->i_size % PAGE_CACHE_SIZE != 0) {
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+
+               if (page_len > 0) {
+                       ret = ext4_discard_partial_page_buffers(handle,
+                                       mapping, inode->i_size, page_len, 0);
+
+                       if (ret)
+                               goto out_stop;
+               }
+       }
+
+       first_block = (offset + sb->s_blocksize - 1) >>
+               EXT4_BLOCK_SIZE_BITS(sb);
+       stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       /* If there are no blocks to remove, return now */
+       if (first_block >= stop_block)
+               goto out_stop;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+       ext4_discard_preallocations(inode);
+
+       ret = ext4_es_remove_extent(inode, first_block,
+                                   stop_block - first_block);
+       if (ret) {
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto out_stop;
+       }
+
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               ret = ext4_ext_remove_space(inode, first_block,
+                                           stop_block - 1);
+       else
+               ret = ext4_free_hole_blocks(handle, inode, first_block,
+                                           stop_block);
+
+       ext4_discard_preallocations(inode);
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+       up_write(&EXT4_I(inode)->i_data_sem);
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
+out_stop:
+       ext4_journal_stop(handle);
+out_dio:
+       ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
 }
 
 /*