Btrfs: do not call file_update_time in aio_write
authorJosef Bacik <jbacik@fusionio.com>
Fri, 9 Nov 2012 15:53:21 +0000 (10:53 -0500)
committerChris Mason <chris.mason@fusionio.com>
Mon, 17 Dec 2012 01:46:27 +0000 (20:46 -0500)
This starts a transaction and dirties the inode everytime we call it, which
is super expensive if you have a write heavy workload.  We will be updating
the inode when the IO completes and we reserve the space for the inode
update when we reserve space for the write, so there is no chance of loss of
information or enospc issues.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
fs/btrfs/file.c
fs/btrfs/inode.c

index c56088ece50041487cc9c318f515da912196b2b4..20452c110d7d4e1ce96db6b6af2f4c797c2c1e63 100644 (file)
@@ -1464,6 +1464,24 @@ out:
        return written ? written : err;
 }
 
+static void update_time_for_write(struct inode *inode)
+{
+       struct timespec now;
+
+       if (IS_NOCMTIME(inode))
+               return;
+
+       now = current_fs_time(inode->i_sb);
+       if (!timespec_equal(&inode->i_mtime, &now))
+               inode->i_mtime = now;
+
+       if (!timespec_equal(&inode->i_ctime, &now))
+               inode->i_ctime = now;
+
+       if (IS_I_VERSION(inode))
+               inode_inc_iversion(inode);
+}
+
 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                                    const struct iovec *iov,
                                    unsigned long nr_segs, loff_t pos)
@@ -1519,11 +1537,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                goto out;
        }
 
-       err = file_update_time(file);
-       if (err) {
-               mutex_unlock(&inode->i_mutex);
-               goto out;
-       }
+       /*
+        * We reserve space for updating the inode when we reserve space for the
+        * extent we are going to write, so we will enospc out there.  We don't
+        * need to start yet another transaction to update the inode as we will
+        * update the inode when we finish writing whatever data we write.
+        */
+       update_time_for_write(inode);
 
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
@@ -1563,8 +1583,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
         * this will either be one more than the running transaction
         * or the generation used for the next transaction if there isn't
         * one running right now.
+        *
+        * We also have to set last_sub_trans to the current log transid,
+        * otherwise subsequent syncs to a file that's been synced in this
+        * transaction will appear to have already occured.
         */
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
+       BTRFS_I(inode)->last_sub_trans = root->log_transid;
        if (num_written > 0 || num_written == -EIOCBQUEUED) {
                err = generic_write_sync(file, pos, num_written);
                if (err < 0 && num_written > 0)
index 355a297e79885bdbafbbd0b9bb0bd4b6bf284c85..1673dbdf1f76fd654bd66879bc6e89ef26964f14 100644 (file)
@@ -1922,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
-               ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-               if (!ret) {
-                       if (nolock)
-                               trans = btrfs_join_transaction_nolock(root);
-                       else
-                               trans = btrfs_join_transaction(root);
-                       if (IS_ERR(trans)) {
-                               ret = PTR_ERR(trans);
-                               trans = NULL;
-                               goto out;
-                       }
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode_fallback(trans, root, inode);
-                       if (ret) /* -ENOMEM or corruption */
-                               btrfs_abort_transaction(trans, root, ret);
+               btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+               if (nolock)
+                       trans = btrfs_join_transaction_nolock(root);
+               else
+                       trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       trans = NULL;
+                       goto out;
                }
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+               ret = btrfs_update_inode_fallback(trans, root, inode);
+               if (ret) /* -ENOMEM or corruption */
+                       btrfs_abort_transaction(trans, root, ret);
                goto out;
        }
 
@@ -1986,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode_fallback(trans, root, inode);
-               if (ret) { /* -ENOMEM or corruption */
-                       btrfs_abort_transaction(trans, root, ret);
-                       goto out_unlock;
-               }
-       } else {
-               btrfs_set_inode_last_trans(trans, inode);
+       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       ret = btrfs_update_inode_fallback(trans, root, inode);
+       if (ret) { /* -ENOMEM or corruption */
+               btrfs_abort_transaction(trans, root, ret);
+               goto out_unlock;
        }
        ret = 0;
 out_unlock: