Btrfs: async delayed refs
authorChris Mason <clm@fb.com>
Thu, 22 May 2014 23:18:52 +0000 (16:18 -0700)
committerChris Mason <clm@fb.com>
Tue, 10 Jun 2014 00:20:58 +0000 (17:20 -0700)
Delayed extent operations are triggered during transaction commits.
The goal is to queue up a healthly batch of changes to the extent
allocation tree and run through them in bulk.

This farms them off to async helper threads.  The goal is to have the
bulk of the delayed operations being done in the background, but this is
also important to limit our stack footprint.

Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/transaction.c

index 4896d7a947ebfe5eb22fee7188899d3a06216dcd..02895a126ab940d1a0ac7d412f55e9799a84d573 100644 (file)
@@ -1546,6 +1546,9 @@ struct btrfs_fs_info {
         */
        struct btrfs_workqueue *fixup_workers;
        struct btrfs_workqueue *delayed_workers;
+
+       /* the extent workers do delayed refs on the extent allocation tree */
+       struct btrfs_workqueue *extent_workers;
        struct task_struct *transaction_kthread;
        struct task_struct *cleaner_kthread;
        int thread_pool_size;
@@ -3268,6 +3271,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+                                unsigned long count, int wait);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr,
index 0c0fa78ef4525b1a1c8999fad619fb898ed7609d..8bb4aa19898fb2b73a039b7f3ffd61fb70dd15f0 100644 (file)
@@ -2069,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_destroy_workqueue(fs_info->readahead_workers);
        btrfs_destroy_workqueue(fs_info->flush_workers);
        btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
+       btrfs_destroy_workqueue(fs_info->extent_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2586,6 +2587,10 @@ int open_ctree(struct super_block *sb,
                btrfs_alloc_workqueue("readahead", flags, max_active, 2);
        fs_info->qgroup_rescan_workers =
                btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+       fs_info->extent_workers =
+               btrfs_alloc_workqueue("extent-refs", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 8);
 
        if (!(fs_info->workers && fs_info->delalloc_workers &&
              fs_info->submit_workers && fs_info->flush_workers &&
@@ -2595,6 +2600,7 @@ int open_ctree(struct super_block *sb,
              fs_info->endio_freespace_worker && fs_info->rmw_workers &&
              fs_info->caching_workers && fs_info->readahead_workers &&
              fs_info->fixup_workers && fs_info->delayed_workers &&
+             fs_info->fixup_workers && fs_info->extent_workers &&
              fs_info->qgroup_rescan_workers)) {
                err = -ENOMEM;
                goto fail_sb_buffer;
index bb5b3067ddc3b505a5cc6b3dcf8044d4adbeabd6..6caddd5970e4b8acd52709b2d399f32484c63f4f 100644 (file)
@@ -2674,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
        u64 num_entries =
                atomic_read(&trans->transaction->delayed_refs.num_entries);
        u64 avg_runtime;
+       u64 val;
 
        smp_mb();
        avg_runtime = fs_info->avg_delayed_ref_runtime;
+       val = num_entries * avg_runtime;
        if (num_entries * avg_runtime >= NSEC_PER_SEC)
                return 1;
+       if (val >= NSEC_PER_SEC / 2)
+               return 2;
 
        return btrfs_check_space_for_delayed_refs(trans, root);
 }
 
+struct async_delayed_refs {
+       struct btrfs_root *root;
+       int count;
+       int error;
+       int sync;
+       struct completion wait;
+       struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+       struct async_delayed_refs *async;
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       async = container_of(work, struct async_delayed_refs, work);
+
+       trans = btrfs_join_transaction(async->root);
+       if (IS_ERR(trans)) {
+               async->error = PTR_ERR(trans);
+               goto done;
+       }
+
+       /*
+        * trans->sync means that when we call end_transaciton, we won't
+        * wait on delayed refs
+        */
+       trans->sync = true;
+       ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+       if (ret)
+               async->error = ret;
+
+       ret = btrfs_end_transaction(trans, async->root);
+       if (ret && !async->error)
+               async->error = ret;
+done:
+       if (async->sync)
+               complete(&async->wait);
+       else
+               kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+                                unsigned long count, int wait)
+{
+       struct async_delayed_refs *async;
+       int ret;
+
+       async = kmalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               return -ENOMEM;
+
+       async->root = root->fs_info->tree_root;
+       async->count = count;
+       async->error = 0;
+       if (wait)
+               async->sync = 1;
+       else
+               async->sync = 0;
+       init_completion(&async->wait);
+
+       btrfs_init_work(&async->work, delayed_ref_async_start,
+                       NULL, NULL);
+
+       btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+       if (wait) {
+               wait_for_completion(&async->wait);
+               ret = async->error;
+               kfree(async);
+               return ret;
+       }
+       return 0;
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
index 992aae6c00b06713f18ab473cb63a6d03ef6e445..38d1e7b976d800caf17c38eee629d3ecb6119689 100644 (file)
@@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                trans = NULL;
                goto out_unlock;
        }
+
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
index 3aafbde8b637f44c0622e8dc9bd224f369c2d065..1c54e2eb74abac9e2530467e77bb6164890fc45c 100644 (file)
@@ -697,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        unsigned long cur = trans->delayed_ref_updates;
        int lock = (trans->type != TRANS_JOIN_NOLOCK);
        int err = 0;
+       int must_run_delayed_refs = 0;
 
        if (trans->use_count > 1) {
                trans->use_count--;
@@ -711,10 +712,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                btrfs_create_pending_block_groups(trans, root);
 
        trans->delayed_ref_updates = 0;
-       if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
+       if (!trans->sync) {
+               must_run_delayed_refs =
+                       btrfs_should_throttle_delayed_refs(trans, root);
                cur = max_t(unsigned long, cur, 32);
-               trans->delayed_ref_updates = 0;
-               btrfs_run_delayed_refs(trans, root, cur);
+
+               /*
+                * don't make the caller wait if they are from a NOLOCK
+                * or ATTACH transaction, it will deadlock with commit
+                */
+               if (must_run_delayed_refs == 1 &&
+                   (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
+                       must_run_delayed_refs = 2;
        }
 
        if (trans->qgroup_reserved) {
@@ -775,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        assert_qgroups_uptodate(trans);
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
+       if (must_run_delayed_refs) {
+               btrfs_async_run_delayed_refs(root, cur,
+                                            must_run_delayed_refs == 1);
+       }
        return err;
 }