ext4: fix transaction issues for ext4_fallocate and ext_zero_range
authorDmitry Monakhov <dmonakhov@openvz.org>
Wed, 27 Aug 2014 22:40:00 +0000 (18:40 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Wed, 27 Aug 2014 22:40:00 +0000 (18:40 -0400)
After commit f282ac19d86f we use different transactions for
preallocation and i_disksize update which result in complain from fsck
after power-failure.  spotted by generic/019. IMHO this is regression
because fs becomes inconsistent, even more 'e2fsck -p' will no longer
works (which drives admins go crazy) Same transaction requirement
applies ctime,mtime updates

testcase: xfstest generic/019

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
fs/ext4/extents.c

index 0e9de2328bd20d373024eef6aead582805b0c6c6..74292a71b384e2fd480357a8115c5e4936fa6f20 100644 (file)
@@ -4665,7 +4665,8 @@ retry:
 }
 
 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
-                                 ext4_lblk_t len, int flags, int mode)
+                                 ext4_lblk_t len, loff_t new_size,
+                                 int flags, int mode)
 {
        struct inode *inode = file_inode(file);
        handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        int retries = 0;
        struct ext4_map_blocks map;
        unsigned int credits;
+       loff_t epos;
 
        map.m_lblk = offset;
+       map.m_len = len;
        /*
         * Don't normalize the request if it can fit in one extent so
         * that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        credits = ext4_chunk_trans_blocks(inode, len);
 
 retry:
-       while (ret >= 0 && ret < len) {
-               map.m_lblk = map.m_lblk + ret;
-               map.m_len = len = len - ret;
+       while (ret >= 0 && len) {
                handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
                                            credits);
                if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
+               map.m_lblk += ret;
+               map.m_len = len = len - ret;
+               epos = (loff_t)map.m_lblk << inode->i_blkbits;
+               inode->i_ctime = ext4_current_time(inode);
+               if (new_size) {
+                       if (epos > new_size)
+                               epos = new_size;
+                       if (ext4_update_inode_size(inode, epos) & 0x1)
+                               inode->i_mtime = inode->i_ctime;
+               } else {
+                       if (epos > inode->i_size)
+                               ext4_set_inode_flag(inode,
+                                                   EXT4_INODE_EOFBLOCKS);
+               }
+               ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
@@ -4732,7 +4748,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        int ret = 0;
        int flags;
        int credits;
-       int partial;
+       int partial_begin, partial_end;
        loff_t start, end;
        ext4_lblk_t lblk;
        struct address_space *mapping = inode->i_mapping;
@@ -4772,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
        if (start < offset || end > offset + len)
                return -EINVAL;
-       partial = (offset + len) & ((1 << blkbits) - 1);
+       partial_begin = offset & ((1 << blkbits) - 1);
+       partial_end = (offset + len) & ((1 << blkbits) - 1);
 
        lblk = start >> blkbits;
        max_blocks = (end >> blkbits);
@@ -4806,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                 * If we have a partial block after EOF we have to allocate
                 * the entire block.
                 */
-               if (partial)
+               if (partial_end)
                        max_blocks += 1;
        }
 
@@ -4814,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
                /* Now release the pages and zero block aligned part of pages*/
                truncate_pagecache_range(inode, start, end - 1);
+               inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 
                /* Wait all existing dio workers, newcomers will block on i_mutex */
                ext4_inode_block_unlocked_dio(inode);
@@ -4826,11 +4844,14 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                if (ret)
                        goto out_dio;
 
-               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags,
-                                            mode);
+               ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+                                            flags, mode);
                if (ret)
                        goto out_dio;
        }
+       if (!partial_begin && !partial_end)
+               goto out_dio;
+
        /*
         * In worst case we have to writeout two nonadjacent unwritten
         * blocks and update the inode
@@ -4856,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                if ((offset + len) > i_size_read(inode))
                        ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
        }
-
        ext4_mark_inode_dirty(handle, inode);
 
        /* Zero out partial block at the edges of the range */
@@ -4883,7 +4903,6 @@ out_mutex:
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
        struct inode *inode = file_inode(file);
-       handle_t *handle;
        loff_t new_size = 0;
        unsigned int max_blocks;
        int ret = 0;
@@ -4939,32 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                        goto out;
        }
 
-       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode);
+       ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+                                    flags, mode);
        if (ret)
                goto out;
 
-       handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-       if (IS_ERR(handle))
-               goto out;
-
-       inode->i_ctime = ext4_current_time(inode);
-
-       if (new_size) {
-               if (ext4_update_inode_size(inode, new_size) & 0x1)
-                       inode->i_mtime = inode->i_ctime;
-       } else {
-               /*
-               * Mark that we allocate beyond EOF so the subsequent truncate
-               * can proceed even if the new size is the same as i_size.
-               */
-               if ((offset + len) > i_size_read(inode))
-                       ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+       if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
+               ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
+                                               EXT4_I(inode)->i_sync_tid);
        }
-       ext4_mark_inode_dirty(handle, inode);
-       if (file->f_flags & O_SYNC)
-               ext4_handle_sync(handle);
-
-       ext4_journal_stop(handle);
 out:
        mutex_unlock(&inode->i_mutex);
        trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);