ext4: add punching hole support for non-extent-mapped files
authorZheng Liu <wenqing.lz@taobao.com>
Mon, 28 Jan 2013 14:21:37 +0000 (09:21 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 28 Jan 2013 14:21:37 +0000 (09:21 -0500)
This patch add supports for indirect file support punching hole.  It
is almost the same as ext4_ext_punch_hole.  First, we invalidate all
pages between this hole, and then we try to deallocate all blocks of
this hole.

A recursive function is used to handle deallocation of blocks.  In
this function, it iterates over the entries in inode's i_blocks or
indirect blocks, and try to free the block for each one of them.

After applying this patch, xfstest #255 will not pass w/o extent because
indirect-based file doesn't support unwritten extents.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/indirect.c
fs/ext4/inode.c

index 80246237f6d53c16d131a7a4879e6b6526c01dce..ca9294f6b73084770ecf966ed52b9696e9ac2c22 100644 (file)
@@ -2103,6 +2103,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
 extern void ext4_ind_truncate(struct inode *inode);
+extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
index 391e53a52e58f12e6a53cfdf94a2af8e74223305..566c8f3789e10401e54309fa9295670b78450e6f 100644 (file)
@@ -4400,13 +4400,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
-       /*
-        * currently supporting (pre)allocate mode for extent-based
-        * files _only_
-        */
-       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-               return -EOPNOTSUPP;
-
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                return -EOPNOTSUPP;
@@ -4418,6 +4411,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (ret)
                return ret;
 
+       /*
+        * currently supporting (pre)allocate mode for extent-based
+        * files _only_
+        */
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+               return -EOPNOTSUPP;
+
        trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
index 8d83d1e508e4fab8441c4bb09d68f4df503528a2..bdd20231e66c51cbe2eafc0624c7c58eff51fb79 100644 (file)
@@ -1518,3 +1518,243 @@ out_stop:
        trace_ext4_truncate_exit(inode);
 }
 
+static int free_hole_blocks(handle_t *handle, struct inode *inode,
+                           struct buffer_head *parent_bh, __le32 *i_data,
+                           int level, ext4_lblk_t first,
+                           ext4_lblk_t count, int max)
+{
+       struct buffer_head *bh = NULL;
+       int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+       int ret = 0;
+       int i, inc;
+       ext4_lblk_t offset;
+       __le32 blk;
+
+       inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
+       for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
+               if (offset >= count + first)
+                       break;
+               if (*i_data == 0 || (offset + inc) <= first)
+                       continue;
+               blk = *i_data;
+               if (level > 0) {
+                       ext4_lblk_t first2;
+                       bh = sb_bread(inode->i_sb, blk);
+                       if (!bh) {
+                               EXT4_ERROR_INODE_BLOCK(inode, blk,
+                                                      "Read failure");
+                               return -EIO;
+                       }
+                       first2 = (first > offset) ? first - offset : 0;
+                       ret = free_hole_blocks(handle, inode, bh,
+                                              (__le32 *)bh->b_data, level - 1,
+                                              first2, count - offset,
+                                              inode->i_sb->s_blocksize >> 2);
+                       if (ret) {
+                               brelse(bh);
+                               goto err;
+                       }
+               }
+               if (level == 0 ||
+                   (bh && all_zeroes((__le32 *)bh->b_data,
+                                     (__le32 *)bh->b_data + addr_per_block))) {
+                       ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
+                       *i_data = 0;
+               }
+               brelse(bh);
+               bh = NULL;
+       }
+
+err:
+       return ret;
+}
+
+static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
+                                ext4_lblk_t first, ext4_lblk_t stop)
+{
+       int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+       int level, ret = 0;
+       int num = EXT4_NDIR_BLOCKS;
+       ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
+       __le32 *i_data = EXT4_I(inode)->i_data;
+
+       count = stop - first;
+       for (level = 0; level < 4; level++, max *= addr_per_block) {
+               if (first < max) {
+                       ret = free_hole_blocks(handle, inode, NULL, i_data,
+                                              level, first, count, num);
+                       if (ret)
+                               goto err;
+                       if (count > max - first)
+                               count -= max - first;
+                       else
+                               break;
+                       first = 0;
+               } else {
+                       first -= max;
+               }
+               i_data += num;
+               if (level == 0) {
+                       num = 1;
+                       max = 1;
+               }
+       }
+
+err:
+       return ret;
+}
+
+int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct super_block *sb = inode->i_sb;
+       ext4_lblk_t first_block, stop_block;
+       struct address_space *mapping = inode->i_mapping;
+       handle_t *handle = NULL;
+       loff_t first_page, last_page, page_len;
+       loff_t first_page_offset, last_page_offset;
+       int err = 0;
+
+       /*
+        * Write out all dirty pages to avoid race conditions
+        * Then release them.
+        */
+       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+               err = filemap_write_and_wait_range(mapping,
+                       offset, offset + length - 1);
+               if (err)
+                       return err;
+       }
+
+       mutex_lock(&inode->i_mutex);
+       /* It's not possible punch hole on append only file */
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+               err = -EPERM;
+               goto out_mutex;
+       }
+       if (IS_SWAPFILE(inode)) {
+               err = -ETXTBSY;
+               goto out_mutex;
+       }
+
+       /* No need to punch hole beyond i_size */
+       if (offset >= inode->i_size)
+               goto out_mutex;
+
+       /*
+        * If the hole extents beyond i_size, set the hole
+        * to end after the page that contains i_size
+        */
+       if (offset + length > inode->i_size) {
+               length = inode->i_size +
+                   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+                   offset;
+       }
+
+       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+
+       first_page_offset = first_page << PAGE_CACHE_SHIFT;
+       last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+       /* Now release the pages */
+       if (last_page_offset > first_page_offset) {
+               truncate_pagecache_range(inode, first_page_offset,
+                                        last_page_offset - 1);
+       }
+
+       /* Wait all existing dio works, newcomers will block on i_mutex */
+       inode_dio_wait(inode);
+
+       handle = start_transaction(inode);
+       if (IS_ERR(handle))
+               goto out_mutex;
+
+       /*
+        * Now we need to zero out the non-page-aligned data in the
+        * pages at the start and tail of the hole, and unmap the buffer
+        * heads for the block aligned regions of the page that were
+        * completely zerod.
+        */
+       if (first_page > last_page) {
+               /*
+                * If the file space being truncated is contained within a page
+                * just zero out and unmap the middle of that page
+                */
+               err = ext4_discard_partial_page_buffers(handle,
+                       mapping, offset, length, 0);
+               if (err)
+                       goto out;
+       } else {
+               /*
+                * Zero out and unmap the paritial page that contains
+                * the start of the hole
+                */
+               page_len = first_page_offset - offset;
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle, mapping,
+                                                       offset, page_len, 0);
+                       if (err)
+                               goto out;
+               }
+
+               /*
+                * Zero out and unmap the partial page that contains
+                * the end of the hole
+                */
+               page_len = offset + length - last_page_offset;
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle, mapping,
+                                               last_page_offset, page_len, 0);
+                       if (err)
+                               goto out;
+               }
+       }
+
+       /*
+        * If i_size contained in the last page, we need to
+        * unmap and zero the paritial page after i_size
+        */
+       if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
+           inode->i_size % PAGE_CACHE_SIZE != 0) {
+               page_len = PAGE_CACHE_SIZE -
+                       (inode->i_size & (PAGE_CACHE_SIZE - 1));
+               if (page_len > 0) {
+                       err = ext4_discard_partial_page_buffers(handle,
+                               mapping, inode->i_size, page_len, 0);
+                       if (err)
+                               goto out;
+               }
+       }
+
+       first_block = (offset + sb->s_blocksize - 1) >>
+               EXT4_BLOCK_SIZE_BITS(sb);
+       stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       if (first_block >= stop_block)
+               goto out;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+       ext4_discard_preallocations(inode);
+
+       err = ext4_es_remove_extent(inode, first_block,
+                                   stop_block - first_block);
+       err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
+
+       ext4_discard_preallocations(inode);
+
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+
+       up_write(&EXT4_I(inode)->i_data_sem);
+
+out:
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+
+out_mutex:
+       mutex_unlock(&inode->i_mutex);
+
+       return err;
+}
index 5abf89c100b92d7c701cc86294d0adba06159da4..80683bf0df1e3ce39d2bc929057e98c6ae856352 100644 (file)
@@ -3557,10 +3557,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
-       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
-               /* TODO: Add support for non extent hole punching */
-               return -EOPNOTSUPP;
-       }
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               return ext4_ind_punch_hole(file, offset, length);
 
        if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
                /* TODO: Add support for bigalloc file systems */