Btrfs: delay commits during fsync to allow more writers
authorJosef Bacik <jwhiter@redhat.com>
Fri, 10 Aug 2007 20:22:09 +0000 (16:22 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Fri, 10 Aug 2007 20:22:09 +0000 (16:22 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 6bce46be49ffb0dbc5a50630287a7f39d808bca8..f446f161f73645df52d577ca21f9472e078fc4b4 100644 (file)
@@ -25,6 +25,11 @@ struct btrfs_inode {
        struct btrfs_block_group_cache *block_group;
        struct btrfs_key location;
        struct inode vfs_inode;
+
+       /*
+        * transid of the trans_handle that last modified this inode
+        */
+       u64 last_trans;
 };
 static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
 {
index d3cd564b3b3f3af4504a1b3d1cc71475fd4256d5..c7f5161271d39e050c8d78deeece1f5b960fddb8 100644 (file)
@@ -301,6 +301,7 @@ struct btrfs_fs_info {
        struct radix_tree_root extent_map_radix;
        struct radix_tree_root extent_ins_radix;
        u64 generation;
+       u64 last_trans_committed;
        struct btrfs_transaction *running_transaction;
        struct btrfs_super_block *disk_super;
        struct btrfs_super_block super_copy;
index 192211274ce5cf692e18688430c65ad88eff5bdd..b2f79878d51a05fb5b934910dd72e56d622797f6 100644 (file)
@@ -435,6 +435,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
        INIT_LIST_HEAD(&fs_info->dead_roots);
        sb_set_blocksize(sb, 4096);
        fs_info->running_transaction = NULL;
+       fs_info->last_trans_committed = 0;
        fs_info->tree_root = tree_root;
        fs_info->extent_root = extent_root;
        fs_info->sb = sb;
index 00b118a2db6925ba0810a461bd484dfa7a855391..6933ab11a5cdde67e081cd4e42df3915bcb76e0d 100644 (file)
@@ -694,22 +694,36 @@ static int btrfs_sync_file(struct file *file,
 {
        struct inode *inode = dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
+       int ret = 0;
        struct btrfs_trans_handle *trans;
 
        /*
-        * FIXME, use inode generation number to check if we can skip the
-        * commit
+        * check the transaction that last modified this inode
+        * and see if its already been committed
         */
        mutex_lock(&root->fs_info->fs_mutex);
+       if (!BTRFS_I(inode)->last_trans)
+               goto out;
+       mutex_lock(&root->fs_info->trans_mutex);
+       if (BTRFS_I(inode)->last_trans <=
+           root->fs_info->last_trans_committed) {
+               BTRFS_I(inode)->last_trans = 0;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               goto out;
+       }
+       mutex_unlock(&root->fs_info->trans_mutex);
+
+       /*
+        * ok we haven't committed the transaction yet, lets do a commit
+        */
        trans = btrfs_start_transaction(root, 1);
        if (!trans) {
                ret = -ENOMEM;
                goto out;
        }
        ret = btrfs_commit_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
 out:
+       mutex_unlock(&root->fs_info->fs_mutex);
        return ret > 0 ? EIO : ret;
 }
 
index 5c05ecbc5726e79e2716faddd8aa35d31534f6b2..398484179d82ea39e137baf20be1af3a9f06516f 100644 (file)
@@ -193,6 +193,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans,
 
        fill_inode_item(inode_item, inode);
        btrfs_mark_buffer_dirty(path->nodes[0]);
+       btrfs_set_inode_last_trans(trans, inode);
        ret = 0;
 failed:
        btrfs_release_path(root, path);
@@ -2234,6 +2235,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
+       ei->last_trans = 0;
        return &ei->vfs_inode;
 }
 
index c9d52dc83e4854e5d9c5f1d6a33c2f91b67f56e1..18abea8027943023c0756e8d36d1ee30b70a5c30 100644 (file)
@@ -55,7 +55,8 @@ static int join_transaction(struct btrfs_root *root)
                BUG_ON(!cur_trans);
                root->fs_info->generation++;
                root->fs_info->running_transaction = cur_trans;
-               cur_trans->num_writers = 0;
+               cur_trans->num_writers = 1;
+               cur_trans->num_joined = 0;
                cur_trans->transid = root->fs_info->generation;
                init_waitqueue_head(&cur_trans->writer_wait);
                init_waitqueue_head(&cur_trans->commit_wait);
@@ -65,8 +66,11 @@ static int join_transaction(struct btrfs_root *root)
                cur_trans->start_time = get_seconds();
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
                init_bit_radix(&cur_trans->dirty_pages);
+       } else {
+               cur_trans->num_writers++;
+               cur_trans->num_joined++;
        }
-       cur_trans->num_writers++;
+
        return 0;
 }
 
@@ -428,12 +432,14 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
-       int ret = 0;
+       unsigned long joined = 0;
+       unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        struct list_head dirty_fs_roots;
        struct radix_tree_root pinned_copy;
        DEFINE_WAIT(wait);
+       int ret;
 
        init_bit_radix(&pinned_copy);
        INIT_LIST_HEAD(&dirty_fs_roots);
@@ -448,7 +454,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                mutex_unlock(&root->fs_info->fs_mutex);
                ret = wait_for_commit(root, cur_trans);
                BUG_ON(ret);
+
+               mutex_lock(&root->fs_info->trans_mutex);
                put_transaction(cur_trans);
+               mutex_unlock(&root->fs_info->trans_mutex);
+
                mutex_lock(&root->fs_info->fs_mutex);
                return 0;
        }
@@ -463,26 +473,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        mutex_unlock(&root->fs_info->trans_mutex);
 
                        wait_for_commit(root, prev_trans);
-                       put_transaction(prev_trans);
 
                        mutex_lock(&root->fs_info->fs_mutex);
                        mutex_lock(&root->fs_info->trans_mutex);
+                       put_transaction(prev_trans);
                }
        }
-       while (trans->transaction->num_writers > 1) {
+
+       do {
+               joined = cur_trans->num_joined;
                WARN_ON(cur_trans != trans->transaction);
-               prepare_to_wait(&trans->transaction->writer_wait, &wait,
+               prepare_to_wait(&cur_trans->writer_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
-               if (trans->transaction->num_writers <= 1)
-                       break;
+
+               if (cur_trans->num_writers > 1)
+                       timeout = MAX_SCHEDULE_TIMEOUT;
+               else
+                       timeout = 1;
+
                mutex_unlock(&root->fs_info->fs_mutex);
                mutex_unlock(&root->fs_info->trans_mutex);
-               schedule();
+
+               schedule_timeout(timeout);
+
                mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
-               finish_wait(&trans->transaction->writer_wait, &wait);
-       }
-       finish_wait(&trans->transaction->writer_wait, &wait);
+               finish_wait(&cur_trans->writer_wait, &wait);
+       } while (cur_trans->num_writers > 1 ||
+                (cur_trans->num_joined != joined));
+
        WARN_ON(cur_trans != trans->transaction);
        ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
                              &dirty_fs_roots);
@@ -511,6 +530,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        btrfs_finish_extent_commit(trans, root, &pinned_copy);
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans->commit_done = 1;
+       root->fs_info->last_trans_committed = cur_trans->transid;
        wake_up(&cur_trans->commit_wait);
        put_transaction(cur_trans);
        put_transaction(cur_trans);
index d5f491d3757eae1ee7a04bf96483f3d3f6f7f8ed..e451783a1a4dcb93474699e7ca7a2b575daa8131 100644 (file)
@@ -23,6 +23,7 @@
 struct btrfs_transaction {
        u64 transid;
        unsigned long num_writers;
+       unsigned long num_joined;
        int in_commit;
        int use_count;
        int commit_done;
@@ -57,6 +58,12 @@ static inline void btrfs_update_inode_block_group(struct
        BTRFS_I(inode)->block_group = trans->block_group;
 }
 
+static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
+                                             struct inode *inode)
+{
+       BTRFS_I(inode)->last_trans = trans->transaction->transid;
+}
+
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,