Btrfs: place ordered operations on a per transaction list
authorJosef Bacik <jbacik@fusionio.com>
Wed, 13 Feb 2013 16:09:14 +0000 (11:09 -0500)
committerJosef Bacik <jbacik@fusionio.com>
Wed, 20 Feb 2013 17:59:57 +0000 (12:59 -0500)
Miao made the ordered operations stuff run async, which introduced a
deadlock where we could get somebody (sync) racing in and committing the
transaction while a commit was already happening.  The new committer would
try and flush ordered operations which would hang waiting for the commit to
finish because it is done asynchronously and no longer inherits the callers
trans handle.  To fix this we need to make the ordered operations list a per
transaction list.  We can get new inodes added to the ordered operation list
by truncating them and then having another process writing to them, so this
makes it so that anybody trying to add an ordered operation _must_ start a
transaction in order to add itself to the list, which will keep new inodes
from getting added to the ordered operations list after we start committing.
This should fix the deadlock and also keeps us from doing a lot more work
than we need to during commit.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/file.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 14f01dc70ea63d7d62ddb5bbec6444662f9a02b9..961ff2986341a777d2cf589202f833d70bf2f7f1 100644 (file)
@@ -1407,13 +1407,6 @@ struct btrfs_fs_info {
         */
        struct list_head delalloc_inodes;
 
-       /*
-        * special rename and truncate targets that must be on disk before
-        * we're allowed to commit.  This is basically the ext3 style
-        * data=ordered list.
-        */
-       struct list_head ordered_operations;
-
        /*
         * there is a pool of worker threads for checksumming during writes
         * and a pool for checksumming after reads.  This is because readers
index 9fcce54ecde451aa3ace778d1834e085b93b1945..e511d9f78c193cf57445d10d1e45f03b61685398 100644 (file)
@@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                                    int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+                                            struct btrfs_root *root);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                      struct btrfs_root *root);
@@ -2029,7 +2030,6 @@ int open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
-       INIT_LIST_HEAD(&fs_info->ordered_operations);
        INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->trans_lock);
@@ -3538,7 +3538,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
        btrfs_cleanup_transaction(root);
 }
 
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+                                            struct btrfs_root *root)
 {
        struct btrfs_inode *btrfs_inode;
        struct list_head splice;
@@ -3548,7 +3549,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
        mutex_lock(&root->fs_info->ordered_operations_mutex);
        spin_lock(&root->fs_info->ordered_extent_lock);
 
-       list_splice_init(&root->fs_info->ordered_operations, &splice);
+       list_splice_init(&t->ordered_operations, &splice);
        while (!list_empty(&splice)) {
                btrfs_inode = list_entry(splice.next, struct btrfs_inode,
                                         ordered_operations);
@@ -3829,7 +3830,7 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
        while (!list_empty(&list)) {
                t = list_entry(list.next, struct btrfs_transaction, list);
 
-               btrfs_destroy_ordered_operations(root);
+               btrfs_destroy_ordered_operations(t, root);
 
                btrfs_destroy_ordered_extents(root);
 
index 75d0fe134be31cb6b23e9a0091263cd993a1f48c..b12ba52c45055a72a66c25bd87c0970c99bc1d95 100644 (file)
@@ -1628,7 +1628,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
         */
        if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
                               &BTRFS_I(inode)->runtime_flags)) {
-               btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
+               struct btrfs_trans_handle *trans;
+               struct btrfs_root *root = BTRFS_I(inode)->root;
+
+               /*
+                * We need to block on a committing transaction to keep us from
+                * throwing a ordered operation on to the list and causing
+                * something like sync to deadlock trying to flush out this
+                * inode.
+                */
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+               btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
+               btrfs_end_transaction(trans, root);
                if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
                        filemap_flush(inode->i_mapping);
        }
index 9489fa96e3edd254d716d9708b8ee836237c1d1b..dc08d77b717ea47f0eb7d43e351153f556c75eaa 100644 (file)
@@ -612,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
  * extra check to make sure the ordered operation list really is empty
  * before we return
  */
-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
+int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root, int wait)
 {
        struct btrfs_inode *btrfs_inode;
        struct inode *inode;
+       struct btrfs_transaction *cur_trans = trans->transaction;
        struct list_head splice;
        struct list_head works;
        struct btrfs_delalloc_work *work, *next;
@@ -626,7 +628,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
 
        mutex_lock(&root->fs_info->ordered_operations_mutex);
        spin_lock(&root->fs_info->ordered_extent_lock);
-       list_splice_init(&root->fs_info->ordered_operations, &splice);
+       list_splice_init(&cur_trans->ordered_operations, &splice);
        while (!list_empty(&splice)) {
                btrfs_inode = list_entry(splice.next, struct btrfs_inode,
                                   ordered_operations);
@@ -643,7 +645,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
 
                if (!wait)
                        list_add_tail(&BTRFS_I(inode)->ordered_operations,
-                                     &root->fs_info->ordered_operations);
+                                     &cur_trans->ordered_operations);
                spin_unlock(&root->fs_info->ordered_extent_lock);
 
                work = btrfs_alloc_delalloc_work(inode, wait, 1);
@@ -653,7 +655,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
                                list_add_tail(&btrfs_inode->ordered_operations,
                                              &splice);
                        list_splice_tail(&splice,
-                                        &root->fs_info->ordered_operations);
+                                        &cur_trans->ordered_operations);
                        spin_unlock(&root->fs_info->ordered_extent_lock);
                        ret = -ENOMEM;
                        goto out;
@@ -1033,6 +1035,7 @@ out:
 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root, struct inode *inode)
 {
+       struct btrfs_transaction *cur_trans = trans->transaction;
        u64 last_mod;
 
        last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
@@ -1047,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
        spin_lock(&root->fs_info->ordered_extent_lock);
        if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
                list_add_tail(&BTRFS_I(inode)->ordered_operations,
-                             &root->fs_info->ordered_operations);
+                             &cur_trans->ordered_operations);
        }
        spin_unlock(&root->fs_info->ordered_extent_lock);
 }
index d523dbd2314dd0ca68fa64b5a8cf308d8452dbfb..267ac99095f63fe6a6855f6f97f97c40d0e7e501 100644 (file)
@@ -197,7 +197,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root, int wait);
 void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct inode *inode);
index d574d830a1c4b8ee9d19c67716006bcf0ddd88f8..0c87d18d18814ec2424997778a3d520b3f16de4b 100644 (file)
@@ -157,6 +157,7 @@ loop:
        spin_lock_init(&cur_trans->delayed_refs.lock);
 
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+       INIT_LIST_HEAD(&cur_trans->ordered_operations);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
@@ -1456,7 +1457,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
         * it here and no for sure that nothing new will be added
         * to the list
         */
-       ret = btrfs_run_ordered_operations(root, 1);
+       ret = btrfs_run_ordered_operations(trans, root, 1);
 
        return ret;
 }
@@ -1479,7 +1480,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        int should_grow = 0;
        unsigned long now = get_seconds();
 
-       ret = btrfs_run_ordered_operations(root, 0);
+       ret = btrfs_run_ordered_operations(trans, root, 0);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                btrfs_end_transaction(trans, root);
index 46628210e5d822a9517adcfda5c01e3e6b5f43e3..3f772fd0191ac1a4ef63c2b33660b4fc39dd4a0b 100644 (file)
@@ -43,6 +43,7 @@ struct btrfs_transaction {
        wait_queue_head_t writer_wait;
        wait_queue_head_t commit_wait;
        struct list_head pending_snapshots;
+       struct list_head ordered_operations;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
 };