Btrfs: introduce per-subvolume ordered extent list
authorMiao Xie <miaox@cn.fujitsu.com>
Wed, 15 May 2013 07:48:23 +0000 (07:48 +0000)
committerJosef Bacik <jbacik@fusionio.com>
Fri, 14 Jun 2013 15:29:41 +0000 (11:29 -0400)
The reason we introduce per-subvolume ordered extent list is the same
as the per-subvolume delalloc inode list.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/relocation.c
fs/btrfs/super.c
fs/btrfs/transaction.c

index 43c0735339402289e46c5068c78520c78efe14ce..905f7c6c82f37d37524c58a7992fd55e81f439f8 100644 (file)
@@ -1437,17 +1437,18 @@ struct btrfs_fs_info {
        atomic_t open_ioctl_trans;
 
        /*
-        * this is used by the balancing code to wait for all the pending
-        * ordered extents
+        * this is used to protect the following list -- ordered_roots.
         */
-       spinlock_t ordered_extent_lock;
+       spinlock_t ordered_root_lock;
 
        /*
-        * all of the data=ordered extents pending writeback
+        * all fs/file tree roots in which there are data=ordered extents
+        * pending writeback are added into this list.
+        *
         * these can span multiple transactions and basically include
         * every dirty data page that isn't from nodatacow
         */
-       struct list_head ordered_extents;
+       struct list_head ordered_roots;
 
        spinlock_t delalloc_root_lock;
        /* all fs/file tree roots that have delalloc inodes. */
@@ -1753,6 +1754,20 @@ struct btrfs_root {
        struct list_head delalloc_inodes;
        struct list_head delalloc_root;
        u64 nr_delalloc_inodes;
+       /*
+        * this is used by the balancing code to wait for all the pending
+        * ordered extents
+        */
+       spinlock_t ordered_extent_lock;
+
+       /*
+        * all of the data=ordered extents pending writeback
+        * these can span multiple transactions and basically include
+        * every dirty data page that isn't from nodatacow
+        */
+       struct list_head ordered_extents;
+       struct list_head ordered_root;
+       u64 nr_ordered_extents;
 };
 
 struct btrfs_ioctl_defrag_range_args {
index 2af312b6fb1fb24da5679c1bc2732dd0accfe969..4253ad580e391489c71c671df92450f048d29aae 100644 (file)
@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
        btrfs_dev_replace_unlock(dev_replace);
 
-       btrfs_wait_ordered_extents(root, 0);
+       btrfs_wait_all_ordered_extents(root->fs_info, 0);
 
        /* force writing the updated state information to disk */
        trans = btrfs_start_transaction(root, 0);
@@ -475,7 +475,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
                return ret;
        }
-       btrfs_wait_ordered_extents(root, 0);
+       btrfs_wait_all_ordered_extents(root->fs_info, 0);
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
index 2748c7ccdd5154194207b7af0dda5b15fd782226..0f873872d1f81b2b92a69c3d091d1085131bf1df 100644 (file)
@@ -1192,6 +1192,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->last_trans = 0;
        root->highest_objectid = 0;
        root->nr_delalloc_inodes = 0;
+       root->nr_ordered_extents = 0;
        root->name = NULL;
        root->inode_tree = RB_ROOT;
        INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
@@ -1202,11 +1203,14 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        INIT_LIST_HEAD(&root->root_list);
        INIT_LIST_HEAD(&root->delalloc_inodes);
        INIT_LIST_HEAD(&root->delalloc_root);
+       INIT_LIST_HEAD(&root->ordered_extents);
+       INIT_LIST_HEAD(&root->ordered_root);
        INIT_LIST_HEAD(&root->logged_list[0]);
        INIT_LIST_HEAD(&root->logged_list[1]);
        spin_lock_init(&root->orphan_lock);
        spin_lock_init(&root->inode_lock);
        spin_lock_init(&root->delalloc_lock);
+       spin_lock_init(&root->ordered_extent_lock);
        spin_lock_init(&root->accounting_lock);
        spin_lock_init(&root->log_extents_lock[0]);
        spin_lock_init(&root->log_extents_lock[1]);
@@ -2193,8 +2197,8 @@ int open_ctree(struct super_block *sb,
        fs_info->thread_pool_size = min_t(unsigned long,
                                          num_online_cpus() + 2, 8);
 
-       INIT_LIST_HEAD(&fs_info->ordered_extents);
-       spin_lock_init(&fs_info->ordered_extent_lock);
+       INIT_LIST_HEAD(&fs_info->ordered_roots);
+       spin_lock_init(&fs_info->ordered_root_lock);
        fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
                                        GFP_NOFS);
        if (!fs_info->delayed_root) {
@@ -3683,7 +3687,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
        INIT_LIST_HEAD(&splice);
 
        mutex_lock(&root->fs_info->ordered_operations_mutex);
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->fs_info->ordered_root_lock);
 
        list_splice_init(&t->ordered_operations, &splice);
        while (!list_empty(&splice)) {
@@ -3691,14 +3695,14 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
                                         ordered_operations);
 
                list_del_init(&btrfs_inode->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               spin_unlock(&root->fs_info->ordered_root_lock);
 
                btrfs_invalidate_inodes(btrfs_inode->root);
 
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->fs_info->ordered_root_lock);
        }
 
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->fs_info->ordered_root_lock);
        mutex_unlock(&root->fs_info->ordered_operations_mutex);
 }
 
@@ -3706,15 +3710,36 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
 {
        struct btrfs_ordered_extent *ordered;
 
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->ordered_extent_lock);
        /*
         * This will just short circuit the ordered completion stuff which will
         * make sure the ordered extent gets properly cleaned up.
         */
-       list_for_each_entry(ordered, &root->fs_info->ordered_extents,
+       list_for_each_entry(ordered, &root->ordered_extents,
                            root_extent_list)
                set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->ordered_extent_lock);
+}
+
+static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_root *root;
+       struct list_head splice;
+
+       INIT_LIST_HEAD(&splice);
+
+       spin_lock(&fs_info->ordered_root_lock);
+       list_splice_init(&fs_info->ordered_roots, &splice);
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                       ordered_root);
+               list_del_init(&root->ordered_root);
+
+               btrfs_destroy_ordered_extents(root);
+
+               cond_resched_lock(&fs_info->ordered_root_lock);
+       }
+       spin_unlock(&fs_info->ordered_root_lock);
 }
 
 int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -3977,7 +4002,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
 
                btrfs_destroy_ordered_operations(t, root);
 
-               btrfs_destroy_ordered_extents(root);
+               btrfs_destroy_all_ordered_extents(root->fs_info);
 
                btrfs_destroy_delayed_refs(t, root);
 
index f8ff06834e796bfcb1ed63654224c0ae6ecac28f..4ec8305fe0789b2d59d33ada8d112e4a6fd5ac7d 100644 (file)
@@ -3901,7 +3901,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
                 */
                btrfs_start_all_delalloc_inodes(root->fs_info, 0);
                if (!current->journal_info)
-                       btrfs_wait_ordered_extents(root, 0);
+                       btrfs_wait_all_ordered_extents(root->fs_info, 0);
        }
 }
 
@@ -3931,7 +3931,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
        if (delalloc_bytes == 0) {
                if (trans)
                        return;
-               btrfs_wait_ordered_extents(root, 0);
+               btrfs_wait_all_ordered_extents(root->fs_info, 0);
                return;
        }
 
@@ -3959,7 +3959,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
                loops++;
                if (wait_ordered && !trans) {
-                       btrfs_wait_ordered_extents(root, 0);
+                       btrfs_wait_all_ordered_extents(root->fs_info, 0);
                } else {
                        time_left = schedule_timeout_killable(1);
                        if (time_left)
index 18191f193b47b7f6bf60591baf2eed1081357517..51520755f4dc7b7241c1d0325d0632955fecdf30 100644 (file)
@@ -7991,9 +7991,9 @@ void btrfs_destroy_inode(struct inode *inode)
         */
        smp_mb();
        if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->fs_info->ordered_root_lock);
                list_del_init(&BTRFS_I(inode)->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               spin_unlock(&root->fs_info->ordered_root_lock);
        }
 
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
index 1ddd728541eea20d747c002f2890cfacd2eb19b4..665c640e3ea6448a68a0152ba843f14b8c6adfc5 100644 (file)
@@ -24,6 +24,7 @@
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "extent_io.h"
+#include "disk-io.h"
 
 static struct kmem_cache *btrfs_ordered_extent_cache;
 
@@ -184,6 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
                                      u64 start, u64 len, u64 disk_len,
                                      int type, int dio, int compress_type)
 {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ordered_inode_tree *tree;
        struct rb_node *node;
        struct btrfs_ordered_extent *entry;
@@ -227,10 +229,18 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
                ordered_data_tree_panic(inode, -EEXIST, file_offset);
        spin_unlock_irq(&tree->lock);
 
-       spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+       spin_lock(&root->ordered_extent_lock);
        list_add_tail(&entry->root_extent_list,
-                     &BTRFS_I(inode)->root->fs_info->ordered_extents);
-       spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+                     &root->ordered_extents);
+       root->nr_ordered_extents++;
+       if (root->nr_ordered_extents == 1) {
+               spin_lock(&root->fs_info->ordered_root_lock);
+               BUG_ON(!list_empty(&root->ordered_root));
+               list_add_tail(&root->ordered_root,
+                             &root->fs_info->ordered_roots);
+               spin_unlock(&root->fs_info->ordered_root_lock);
+       }
+       spin_unlock(&root->ordered_extent_lock);
 
        return 0;
 }
@@ -516,8 +526,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
        set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
        spin_unlock_irq(&tree->lock);
 
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
+       root->nr_ordered_extents--;
 
        trace_btrfs_ordered_extent_remove(inode, entry);
 
@@ -530,7 +541,14 @@ void btrfs_remove_ordered_extent(struct inode *inode,
            !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
                list_del_init(&BTRFS_I(inode)->ordered_operations);
        }
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+
+       if (!root->nr_ordered_extents) {
+               spin_lock(&root->fs_info->ordered_root_lock);
+               BUG_ON(list_empty(&root->ordered_root));
+               list_del_init(&root->ordered_root);
+               spin_unlock(&root->fs_info->ordered_root_lock);
+       }
+       spin_unlock(&root->ordered_extent_lock);
        wake_up(&entry->wait);
 }
 
@@ -550,7 +568,6 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
 void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
 {
        struct list_head splice, works;
-       struct list_head *cur;
        struct btrfs_ordered_extent *ordered, *next;
        struct inode *inode;
 
@@ -558,35 +575,34 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
        INIT_LIST_HEAD(&works);
 
        mutex_lock(&root->fs_info->ordered_operations_mutex);
-       spin_lock(&root->fs_info->ordered_extent_lock);
-       list_splice_init(&root->fs_info->ordered_extents, &splice);
+       spin_lock(&root->ordered_extent_lock);
+       list_splice_init(&root->ordered_extents, &splice);
        while (!list_empty(&splice)) {
-               cur = splice.next;
-               ordered = list_entry(cur, struct btrfs_ordered_extent,
-                                    root_extent_list);
-               list_del_init(&ordered->root_extent_list);
-               atomic_inc(&ordered->refs);
-
+               ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
+                                          root_extent_list);
+               list_move_tail(&ordered->root_extent_list,
+                              &root->ordered_extents);
                /*
                 * the inode may be getting freed (in sys_unlink path).
                 */
                inode = igrab(ordered->inode);
+               if (!inode) {
+                       cond_resched_lock(&root->ordered_extent_lock);
+                       continue;
+               }
 
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               atomic_inc(&ordered->refs);
+               spin_unlock(&root->ordered_extent_lock);
 
-               if (inode) {
-                       ordered->flush_work.func = btrfs_run_ordered_extent_work;
-                       list_add_tail(&ordered->work_list, &works);
-                       btrfs_queue_worker(&root->fs_info->flush_workers,
-                                          &ordered->flush_work);
-               } else {
-                       btrfs_put_ordered_extent(ordered);
-               }
+               ordered->flush_work.func = btrfs_run_ordered_extent_work;
+               list_add_tail(&ordered->work_list, &works);
+               btrfs_queue_worker(&root->fs_info->flush_workers,
+                                  &ordered->flush_work);
 
                cond_resched();
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->ordered_extent_lock);
        }
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->ordered_extent_lock);
 
        list_for_each_entry_safe(ordered, next, &works, work_list) {
                list_del_init(&ordered->work_list);
@@ -604,6 +620,33 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
        mutex_unlock(&root->fs_info->ordered_operations_mutex);
 }
 
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
+                                   int delay_iput)
+{
+       struct btrfs_root *root;
+       struct list_head splice;
+
+       INIT_LIST_HEAD(&splice);
+
+       spin_lock(&fs_info->ordered_root_lock);
+       list_splice_init(&fs_info->ordered_roots, &splice);
+       while (!list_empty(&splice)) {
+               root = list_first_entry(&splice, struct btrfs_root,
+                                       ordered_root);
+               root = btrfs_grab_fs_root(root);
+               BUG_ON(!root);
+               list_move_tail(&root->ordered_root,
+                              &fs_info->ordered_roots);
+               spin_unlock(&fs_info->ordered_root_lock);
+
+               btrfs_wait_ordered_extents(root, delay_iput);
+               btrfs_put_fs_root(root);
+
+               spin_lock(&fs_info->ordered_root_lock);
+       }
+       spin_unlock(&fs_info->ordered_root_lock);
+}
+
 /*
  * this is used during transaction commit to write all the inodes
  * added to the ordered operation list.  These files must be fully on
@@ -629,7 +672,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
        INIT_LIST_HEAD(&works);
 
        mutex_lock(&root->fs_info->ordered_operations_mutex);
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->fs_info->ordered_root_lock);
        list_splice_init(&cur_trans->ordered_operations, &splice);
        while (!list_empty(&splice)) {
                btrfs_inode = list_entry(splice.next, struct btrfs_inode,
@@ -648,17 +691,17 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
                if (!wait)
                        list_add_tail(&BTRFS_I(inode)->ordered_operations,
                                      &cur_trans->ordered_operations);
-               spin_unlock(&root->fs_info->ordered_extent_lock);
+               spin_unlock(&root->fs_info->ordered_root_lock);
 
                work = btrfs_alloc_delalloc_work(inode, wait, 1);
                if (!work) {
-                       spin_lock(&root->fs_info->ordered_extent_lock);
+                       spin_lock(&root->fs_info->ordered_root_lock);
                        if (list_empty(&BTRFS_I(inode)->ordered_operations))
                                list_add_tail(&btrfs_inode->ordered_operations,
                                              &splice);
                        list_splice_tail(&splice,
                                         &cur_trans->ordered_operations);
-                       spin_unlock(&root->fs_info->ordered_extent_lock);
+                       spin_unlock(&root->fs_info->ordered_root_lock);
                        ret = -ENOMEM;
                        goto out;
                }
@@ -667,9 +710,9 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
                                   &work->work);
 
                cond_resched();
-               spin_lock(&root->fs_info->ordered_extent_lock);
+               spin_lock(&root->fs_info->ordered_root_lock);
        }
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->fs_info->ordered_root_lock);
 out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
@@ -1055,12 +1098,12 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
        if (last_mod < root->fs_info->last_trans_committed)
                return;
 
-       spin_lock(&root->fs_info->ordered_extent_lock);
+       spin_lock(&root->fs_info->ordered_root_lock);
        if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
                list_add_tail(&BTRFS_I(inode)->ordered_operations,
                              &cur_trans->ordered_operations);
        }
-       spin_unlock(&root->fs_info->ordered_extent_lock);
+       spin_unlock(&root->fs_info->ordered_root_lock);
 }
 
 int __init ordered_data_init(void)
index 58b0e3b0ebadb633b22916f3a50015c64c24a109..d082d43e00e59c3dc0f55ff5c2097a5d67b930b7 100644 (file)
@@ -204,6 +204,8 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct inode *inode);
 void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
+void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
+                                   int delay_iput);
 void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
 void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
 void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
index f6e1b54f05d8849d63dc4eaf7cd44b84cec77596..aa559f1161df542b1ec3d27d2a37ddd2c481fc8a 100644 (file)
@@ -4164,7 +4164,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                err = ret;
                goto out;
        }
-       btrfs_wait_ordered_extents(fs_info->tree_root, 0);
+       btrfs_wait_all_ordered_extents(fs_info, 0);
 
        while (1) {
                mutex_lock(&fs_info->cleaner_mutex);
index 2a6a908d00179f1345c74494345d79a54018eae0..41d81bee583dae436a8b5c70cc761685d79290ee 100644 (file)
@@ -862,7 +862,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
                return 0;
        }
 
-       btrfs_wait_ordered_extents(root, 1);
+       btrfs_wait_all_ordered_extents(fs_info, 0);
 
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
index 4b6311181412362074a16fcd10effee037039753..2b17213571a0c7f1e9ae3f2b812b4acdade26705 100644 (file)
@@ -1505,7 +1505,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
                ret = btrfs_start_all_delalloc_inodes(root->fs_info, 1);
                if (ret)
                        return ret;
-               btrfs_wait_ordered_extents(root, 1);
+               btrfs_wait_all_ordered_extents(root->fs_info, 1);
        }
 
        ret = btrfs_run_delayed_items(trans, root);