Btrfs: Replace the transaction work queue with kthreads
authorChris Mason <chris.mason@oracle.com>
Wed, 25 Jun 2008 20:01:31 +0000 (16:01 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:03 +0000 (11:04 -0400)
This creates one kthread for commits and one kthread for
deleting old snapshots.  All the work queues are removed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/volumes.c

index 5edbcc09b3cc191ffa26b76dda76f3974d159fab..40f0e0cb804bd07f84f40d51d33dcfa7377f687f 100644 (file)
@@ -1352,6 +1352,8 @@ again:
                                                free_extent_buffer(tmp);
                                        goto again;
                                } else {
+                                       if (tmp)
+                                               free_extent_buffer(tmp);
                                        b = read_node_slot(root, b, slot);
                                }
                        }
@@ -3048,7 +3050,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
                free_extent_buffer(c);
                path->nodes[level] = next;
                path->slots[level] = 0;
-               path->locks[level] = 1;
+               if (!path->skip_locking)
+                       path->locks[level] = 1;
                if (!level)
                        break;
                if (level == 1 && path->locks[1] && path->reada)
index e9bbb53eda63b6f19753f18fe80e20fa03ed53dd..244fe86bcc55f27635738b59fc46f3011b01ad9b 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
-#include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/backing-dev.h>
 #include <asm/kmap_types.h>
@@ -519,15 +518,14 @@ struct btrfs_fs_info {
        struct backing_dev_info bdi;
        spinlock_t hash_lock;
        struct mutex trans_mutex;
+       struct mutex transaction_kthread_mutex;
+       struct mutex cleaner_mutex;
        struct mutex alloc_mutex;
        struct mutex chunk_mutex;
        struct mutex drop_mutex;
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
-       struct list_head end_io_work_list;
-       struct work_struct end_io_work;
-       spinlock_t end_io_work_lock;
        atomic_t nr_async_submits;
 
        /*
@@ -543,13 +541,10 @@ struct btrfs_fs_info {
        struct btrfs_workers workers;
        struct btrfs_workers endio_workers;
        struct btrfs_workers submit_workers;
+       struct task_struct *transaction_kthread;
+       struct task_struct *cleaner_kthread;
        int thread_pool_size;
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-       struct work_struct trans_work;
-#else
-       struct delayed_work trans_work;
-#endif
        struct kobject super_kobj;
        struct completion kobj_unregister;
        int do_barriers;
index 52569b57692defe4936549bc289c3e6e82ea7d5d..31ca9f89388dceac0dc8619b9845abb065f7d03c 100644 (file)
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/version.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/scatterlist.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> // for block_sync_page
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+# include <linux/freezer.h>
+#else
+# include <linux/sched.h>
+#endif
 #include "crc32c.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work)
 #endif
 }
 
+static int cleaner_kthread(void *arg)
+{
+       struct btrfs_root *root = arg;
+
+       do {
+               smp_mb();
+               if (root->fs_info->closing)
+                       break;
+
+               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+               mutex_lock(&root->fs_info->cleaner_mutex);
+printk("cleaner awake\n");
+               btrfs_clean_old_snapshots(root);
+printk("cleaner done\n");
+               mutex_unlock(&root->fs_info->cleaner_mutex);
+
+               if (freezing(current)) {
+                       refrigerator();
+               } else {
+                       smp_mb();
+                       if (root->fs_info->closing)
+                               break;
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+       } while (!kthread_should_stop());
+       return 0;
+}
+
+static int transaction_kthread(void *arg)
+{
+       struct btrfs_root *root = arg;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_transaction *cur;
+       unsigned long now;
+       unsigned long delay;
+       int ret;
+
+       do {
+               smp_mb();
+               if (root->fs_info->closing)
+                       break;
+
+               delay = HZ * 30;
+               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
+               mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+               mutex_lock(&root->fs_info->trans_mutex);
+               cur = root->fs_info->running_transaction;
+               if (!cur) {
+                       mutex_unlock(&root->fs_info->trans_mutex);
+                       goto sleep;
+               }
+               now = get_seconds();
+               if (now < cur->start_time || now - cur->start_time < 30) {
+                       mutex_unlock(&root->fs_info->trans_mutex);
+                       delay = HZ * 5;
+                       goto sleep;
+               }
+               mutex_unlock(&root->fs_info->trans_mutex);
+               btrfs_defrag_dirty_roots(root->fs_info);
+               trans = btrfs_start_transaction(root, 1);
+               ret = btrfs_commit_transaction(trans, root);
+sleep:
+               wake_up_process(root->fs_info->cleaner_kthread);
+               mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+
+               if (freezing(current)) {
+                       refrigerator();
+               } else {
+                       if (root->fs_info->closing)
+                               break;
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(delay);
+                       __set_current_state(TASK_RUNNING);
+               }
+       } while (!kthread_should_stop());
+       return 0;
+}
+
 struct btrfs_root *open_ctree(struct super_block *sb,
                              struct btrfs_fs_devices *fs_devices,
                              char *options)
@@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
        fs_info->do_barriers = 1;
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-       INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
-#else
-       INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
-#endif
        BTRFS_I(fs_info->btree_inode)->root = tree_root;
        memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
               sizeof(struct btrfs_key));
@@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        mutex_init(&fs_info->drop_mutex);
        mutex_init(&fs_info->alloc_mutex);
        mutex_init(&fs_info->chunk_mutex);
+       mutex_init(&fs_info->transaction_kthread_mutex);
+       mutex_init(&fs_info->cleaner_mutex);
 
 #if 0
        ret = add_hasher(fs_info, "crc32c");
@@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
 
-
        err = -EINVAL;
        if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
                printk("Btrfs: wanted %llu devices, but found %llu\n",
@@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->data_alloc_profile = (u64)-1;
        fs_info->metadata_alloc_profile = (u64)-1;
        fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+       fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+                                              "btrfs-cleaner");
+       if (!fs_info->cleaner_kthread)
+               goto fail_extent_root;
+
+       fs_info->transaction_kthread = kthread_run(transaction_kthread,
+                                                  tree_root,
+                                                  "btrfs-transaction");
+       if (!fs_info->transaction_kthread)
+               goto fail_trans_kthread;
+
 
        return tree_root;
 
+fail_trans_kthread:
+       kthread_stop(fs_info->cleaner_kthread);
 fail_extent_root:
        free_extent_buffer(extent_root->node);
 fail_tree_root:
@@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
-       btrfs_transaction_flush_work(root);
+       kthread_stop(root->fs_info->transaction_kthread);
+       kthread_stop(root->fs_info->cleaner_kthread);
+
        btrfs_defrag_dirty_roots(root->fs_info);
+       btrfs_clean_old_snapshots(root);
        trans = btrfs_start_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
        /* run commit again to  drop the original snapshot */
@@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root)
 
        write_ctree_super(NULL, root);
 
-       btrfs_transaction_flush_work(root);
-
        if (fs_info->delalloc_bytes) {
                printk("btrfs: at unmount delalloc count %Lu\n",
                       fs_info->delalloc_bytes);
index 6274f30031db0c4cd2ba89b729153e59c4994733..89cc4f611869b82d62e026a0d831e016297fd5d5 100644 (file)
@@ -1216,15 +1216,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        if (ret == -ENOSPC) {
 printk("space info full %Lu\n", flags);
                space_info->full = 1;
-               goto out;
+               goto out_unlock;
        }
        BUG_ON(ret);
 
        ret = btrfs_make_block_group(trans, extent_root, 0, flags,
                     BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
        BUG_ON(ret);
-out:
+out_unlock:
        mutex_unlock(&extent_root->fs_info->chunk_mutex);
+out:
        return 0;
 }
 
@@ -2274,7 +2275,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
                        free_extent_buffer(next);
                        mutex_unlock(&root->fs_info->alloc_mutex);
 
-                       reada_walk_down(root, cur, path->slots[*level]);
+                       if (path->slots[*level] == 0)
+                               reada_walk_down(root, cur, path->slots[*level]);
 
                        next = read_tree_block(root, bytenr, blocksize,
                                               ptr_gen);
@@ -2446,8 +2448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        break;
                if (wret < 0)
                        ret = wret;
-               ret = -EAGAIN;
-               break;
        }
        for (i = 0; i <= orig_level; i++) {
                if (path->nodes[i]) {
index b61ded7a20c943770b02a1bc665e9463804efef2..726d6871fa13f78133d8fa0ffde50e887d98b16e 100644 (file)
@@ -340,7 +340,6 @@ static int btrfs_fill_super(struct super_block * sb,
                goto fail_close;
 
        sb->s_root = root_dentry;
-       btrfs_transaction_queue_work(tree_root, HZ * 30);
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
        save_mount_options(sb, data);
@@ -416,9 +415,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                goto error_free_subvol_name;
 
        bdev = fs_devices->latest_bdev;
-       btrfs_lock_volumes();
        s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
-       btrfs_unlock_volumes();
        if (IS_ERR(s))
                goto error_s;
 
@@ -530,13 +527,15 @@ out:
 static void btrfs_write_super_lockfs(struct super_block *sb)
 {
        struct btrfs_root *root = btrfs_sb(sb);
-       btrfs_transaction_flush_work(root);
+       mutex_lock(&root->fs_info->transaction_kthread_mutex);
+       mutex_lock(&root->fs_info->cleaner_mutex);
 }
 
 static void btrfs_unlockfs(struct super_block *sb)
 {
        struct btrfs_root *root = btrfs_sb(sb);
-       btrfs_transaction_queue_work(root, HZ * 30);
+       mutex_unlock(&root->fs_info->cleaner_mutex);
+       mutex_unlock(&root->fs_info->transaction_kthread_mutex);
 }
 
 static struct super_operations btrfs_super_ops = {
@@ -589,10 +588,9 @@ static int __init init_btrfs_fs(void)
        if (err)
                return err;
 
-       btrfs_init_transaction_sys();
        err = btrfs_init_cachep();
        if (err)
-               goto free_transaction_sys;
+               goto free_sysfs;
 
        err = extent_io_init();
        if (err)
@@ -618,15 +616,13 @@ free_extent_io:
        extent_io_exit();
 free_cachep:
        btrfs_destroy_cachep();
-free_transaction_sys:
-       btrfs_exit_transaction_sys();
+free_sysfs:
        btrfs_exit_sysfs();
        return err;
 }
 
 static void __exit exit_btrfs_fs(void)
 {
-       btrfs_exit_transaction_sys();
        btrfs_destroy_cachep();
        extent_map_exit();
        extent_io_exit();
index 69ed5f85a38792d87e23a7e62385e6a3602f4a9d..0c53ff775b9230386e9cd37a110643e188344798 100644 (file)
@@ -29,8 +29,6 @@ static int total_trans = 0;
 extern struct kmem_cache *btrfs_trans_handle_cachep;
 extern struct kmem_cache *btrfs_transaction_cachep;
 
-static struct workqueue_struct *trans_wq;
-
 #define BTRFS_ROOT_TRANS_TAG 0
 #define BTRFS_ROOT_DEFRAG_TAG 1
 
@@ -807,81 +805,15 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 {
        struct list_head dirty_roots;
        INIT_LIST_HEAD(&dirty_roots);
-
+again:
        mutex_lock(&root->fs_info->trans_mutex);
        list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
        mutex_unlock(&root->fs_info->trans_mutex);
 
        if (!list_empty(&dirty_roots)) {
                drop_dirty_roots(root, &dirty_roots);
+               goto again;
        }
        return 0;
 }
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-void btrfs_transaction_cleaner(void *p)
-#else
-void btrfs_transaction_cleaner(struct work_struct *work)
-#endif
-{
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-       struct btrfs_fs_info *fs_info = p;
-#else
-       struct btrfs_fs_info *fs_info = container_of(work,
-                                                    struct btrfs_fs_info,
-                                                    trans_work.work);
-
-#endif
-       struct btrfs_root *root = fs_info->tree_root;
-       struct btrfs_transaction *cur;
-       struct btrfs_trans_handle *trans;
-       unsigned long now;
-       unsigned long delay = HZ * 30;
-       int ret;
-
-       smp_mb();
-       if (root->fs_info->closing)
-               goto out;
-
-       mutex_lock(&root->fs_info->trans_mutex);
-       cur = root->fs_info->running_transaction;
-       if (!cur) {
-               mutex_unlock(&root->fs_info->trans_mutex);
-               goto out;
-       }
-       now = get_seconds();
-       if (now < cur->start_time || now - cur->start_time < 30) {
-               mutex_unlock(&root->fs_info->trans_mutex);
-               delay = HZ * 5;
-               goto out;
-       }
-       mutex_unlock(&root->fs_info->trans_mutex);
-       btrfs_defrag_dirty_roots(root->fs_info);
-       trans = btrfs_start_transaction(root, 1);
-       ret = btrfs_commit_transaction(trans, root);
-out:
-       btrfs_clean_old_snapshots(root);
-       btrfs_transaction_queue_work(root, delay);
-}
-
-void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
-{
-       if (!root->fs_info->closing)
-               queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
-}
-
-void btrfs_transaction_flush_work(struct btrfs_root *root)
-{
-       cancel_delayed_work(&root->fs_info->trans_work);
-       flush_workqueue(trans_wq);
-}
-
-void __init btrfs_init_transaction_sys(void)
-{
-       trans_wq = create_workqueue("btrfs-transaction");
-}
-
-void btrfs_exit_transaction_sys(void)
-{
-       destroy_workqueue(trans_wq);
-}
 
index 52559b51b181abc9b4b36f0445b990dc395236d1..e1e5a06b65f455d8df65819ec064e54c365af71d 100644 (file)
@@ -82,16 +82,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root);
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-void btrfs_transaction_cleaner(void *p);
-#else
-void btrfs_transaction_cleaner(struct work_struct *work);
-#endif
-
-void btrfs_transaction_flush_work(struct btrfs_root *root);
-void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
-void btrfs_init_transaction_sys(void);
-void btrfs_exit_transaction_sys(void);
 int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest,
                        struct list_head *dead_list);
 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
index 869864ddcc294ff1c2a0879aedd03654d5cda28a..4e7cee27aab565cef76d1e4041e643382d2438b1 100644 (file)
@@ -271,13 +271,17 @@ again:
        list_for_each(cur, head) {
                device = list_entry(cur, struct btrfs_device, dev_list);
                if (!device->in_fs_metadata) {
-                       if (device->bdev) {
-                               close_bdev_excl(device->bdev);
-                               fs_devices->open_devices--;
-                       }
+                       struct block_device *bdev;
                        list_del(&device->dev_list);
                        list_del(&device->dev_alloc_list);
                        fs_devices->num_devices--;
+                       if (device->bdev) {
+                               bdev = device->bdev;
+                               fs_devices->open_devices--;
+                               mutex_unlock(&uuid_mutex);
+                               close_bdev_excl(bdev);
+                               mutex_lock(&uuid_mutex);
+                       }
                        kfree(device->name);
                        kfree(device);
                        goto again;