btrfs: delayed-ref: Use list to replace the ref_root in ref_head.
authorQu Wenruo <quwenruo@cn.fujitsu.com>
Mon, 30 Mar 2015 09:03:00 +0000 (17:03 +0800)
committerChris Mason <clm@fb.com>
Wed, 10 Jun 2015 16:25:03 +0000 (09:25 -0700)
This patch replace the rbtree used in ref_head to list.
This has the following advantage:
1) Easier merge logic.
With the new list implement, we only need to care merging the tail
ref_node with the new ref_node.
And this can be done quite easy at insert time, no need to do a
indicated merge at run_delayed_refs().

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/backref.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c

index a3316f1707e6d801ed195c2585ef0805ad9edd4a..49bc5a41c1f814a30fad6a02e9fd0c3e9f81bc96 100644 (file)
@@ -574,8 +574,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                              struct list_head *prefs, u64 *total_refs,
                              u64 inum)
 {
+       struct btrfs_delayed_ref_node *node;
        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
-       struct rb_node *n = &head->node.rb_node;
        struct btrfs_key key;
        struct btrfs_key op_key = {0};
        int sgn;
@@ -585,12 +585,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
 
        spin_lock(&head->lock);
-       n = rb_first(&head->ref_root);
-       while (n) {
-               struct btrfs_delayed_ref_node *node;
-               node = rb_entry(n, struct btrfs_delayed_ref_node,
-                               rb_node);
-               n = rb_next(n);
+       list_for_each_entry(node, &head->ref_list, list) {
                if (node->seq > seq)
                        continue;
 
index 8f8ed7d20bac5f4e058fc693d0fdc24fde7abeda..4dbc31636d14da73665fd24840f1d7fff8c2ca85 100644 (file)
@@ -268,7 +268,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                rb_erase(&head->href_node, &delayed_refs->href_root);
        } else {
                assert_spin_locked(&head->lock);
-               rb_erase(&ref->rb_node, &head->ref_root);
+               list_del(&ref->list);
        }
        ref->in_tree = 0;
        btrfs_put_delayed_ref(ref);
@@ -328,48 +328,6 @@ static int merge_ref(struct btrfs_trans_handle *trans,
        return done;
 }
 
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
-                             struct btrfs_delayed_ref_root *delayed_refs,
-                             struct btrfs_delayed_ref_head *head)
-{
-       struct rb_node *node;
-       u64 seq = 0;
-
-       assert_spin_locked(&head->lock);
-       /*
-        * We don't have too much refs to merge in the case of delayed data
-        * refs.
-        */
-       if (head->is_data)
-               return;
-
-       spin_lock(&fs_info->tree_mod_seq_lock);
-       if (!list_empty(&fs_info->tree_mod_seq_list)) {
-               struct seq_list *elem;
-
-               elem = list_first_entry(&fs_info->tree_mod_seq_list,
-                                       struct seq_list, list);
-               seq = elem->seq;
-       }
-       spin_unlock(&fs_info->tree_mod_seq_lock);
-
-       node = rb_first(&head->ref_root);
-       while (node) {
-               struct btrfs_delayed_ref_node *ref;
-
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                              rb_node);
-               /* We can't merge refs that are outside of our seq count */
-               if (seq && ref->seq >= seq)
-                       break;
-               if (merge_ref(trans, delayed_refs, head, ref, seq))
-                       node = rb_first(&head->ref_root);
-               else
-                       node = rb_next(&ref->rb_node);
-       }
-}
-
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                            struct btrfs_delayed_ref_root *delayed_refs,
                            u64 seq)
@@ -484,6 +442,74 @@ update_existing_ref(struct btrfs_trans_handle *trans,
        }
 }
 
+/*
+ * Helper to insert the ref_node to the tail or merge with tail.
+ *
+ * Return 0 for insert.
+ * Return >0 for merge.
+ */
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+                          struct btrfs_delayed_ref_root *root,
+                          struct btrfs_delayed_ref_head *href,
+                          struct btrfs_delayed_ref_node *ref)
+{
+       struct btrfs_delayed_ref_node *exist;
+       int mod;
+       int ret = 0;
+
+       spin_lock(&href->lock);
+       /* Check whether we can merge the tail node with ref */
+       if (list_empty(&href->ref_list))
+               goto add_tail;
+       exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+                          list);
+       /* No need to compare bytenr nor is_head */
+       if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+           exist->seq != ref->seq)
+               goto add_tail;
+
+       if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+            exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+           comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+                          btrfs_delayed_node_to_tree_ref(ref),
+                          ref->type))
+               goto add_tail;
+       if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+            exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+           comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+                          btrfs_delayed_node_to_data_ref(ref)))
+               goto add_tail;
+
+       /* Now we are sure we can merge */
+       ret = 1;
+       if (exist->action == ref->action) {
+               mod = ref->ref_mod;
+       } else {
+               /* Need to change action */
+               if (exist->ref_mod < ref->ref_mod) {
+                       exist->action = ref->action;
+                       mod = -exist->ref_mod;
+                       exist->ref_mod = ref->ref_mod;
+               } else
+                       mod = -ref->ref_mod;
+       }
+       exist->ref_mod += mod;
+
+       /* remove existing tail if its ref_mod is zero */
+       if (exist->ref_mod == 0)
+               drop_delayed_ref(trans, root, href, exist);
+       spin_unlock(&href->lock);
+       return ret;
+
+add_tail:
+       list_add_tail(&ref->list, &href->ref_list);
+       atomic_inc(&root->num_entries);
+       trans->delayed_ref_updates++;
+       spin_unlock(&href->lock);
+       return ret;
+}
+
 /*
  * helper function to update the accounting in the head ref
  * existing and update must have the same bytenr
@@ -618,7 +644,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref = btrfs_delayed_node_to_head(ref);
        head_ref->must_insert_reserved = must_insert_reserved;
        head_ref->is_data = is_data;
-       head_ref->ref_root = RB_ROOT;
+       INIT_LIST_HEAD(&head_ref->ref_list);
        head_ref->processing = 0;
        head_ref->total_ref_mod = count_mod;
 
@@ -659,10 +685,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, int level,
                     int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_tree_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -693,21 +719,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_tree_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       /*
+        * XXX: memory should be freed at the same level allocated.
+        * But bad practice is anywhere... Follow it now. Need cleanup.
+        */
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -721,10 +740,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
                     u64 offset, int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_data_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -758,21 +777,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_data_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
index 5eb0892396d017ba95645f7ab179a1c8a3696253..362ca57cfeb7d7f74b1639e2627390a09a5a2c0f 100644 (file)
 #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
 #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
 
+/*
+ * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
+ * same ref_node structure.
+ * Ref_head is in a higher logic level than tree/data ref, and duplicated
+ * bytenr/num_bytes in ref_node is really a waste or memory, they should be
+ * referred from ref_head.
+ * This gets more disgusting after we use list to store tree/data ref in
+ * ref_head. Must clean this mess up later.
+ */
 struct btrfs_delayed_ref_node {
+       /*
+        * ref_head use rb tree, stored in ref_root->href.
+        * indexed by bytenr
+        */
        struct rb_node rb_node;
 
+       /*data/tree ref use list, stored in ref_head->ref_list. */
+       struct list_head list;
+
        /* the starting bytenr of the extent */
        u64 bytenr;
 
@@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head {
        struct mutex mutex;
 
        spinlock_t lock;
-       struct rb_root ref_root;
+       struct list_head ref_list;
 
        struct rb_node href_node;
 
index 7f8377871283d7e85aed636aede80f8367afda91..695363ae1c2875d0e25991a724f549e951fef5f4 100644 (file)
@@ -4062,6 +4062,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
        while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
+               struct btrfs_delayed_ref_node *tmp;
                bool pin_bytes = false;
 
                head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4077,11 +4078,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                        continue;
                }
                spin_lock(&head->lock);
-               while ((node = rb_first(&head->ref_root)) != NULL) {
-                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                                      rb_node);
+               list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+                                                list) {
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &head->ref_root);
+                       list_del(&ref->list);
                        atomic_dec(&delayed_refs->num_entries);
                        btrfs_put_delayed_ref(ref);
                }
index 4eefabcc838f16fb81f23312b04721f3a69b2d4b..adf0eedde50273d017b78522f2ac68e0d2c3b3db 100644 (file)
@@ -2323,28 +2323,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static noinline struct btrfs_delayed_ref_node *
+static inline struct btrfs_delayed_ref_node *
 select_delayed_ref(struct btrfs_delayed_ref_head *head)
 {
-       struct rb_node *node;
-       struct btrfs_delayed_ref_node *ref, *last = NULL;;
+       if (list_empty(&head->ref_list))
+               return NULL;
 
-       /*
-        * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
-        * this prevents ref count from going down to zero when
-        * there still are pending delayed ref.
-        */
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                               rb_node);
-               if (ref->action == BTRFS_ADD_DELAYED_REF)
-                       return ref;
-               else if (last == NULL)
-                       last = ref;
-               node = rb_next(node);
-       }
-       return last;
+       return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
+                         list);
 }
 
 /*
@@ -2396,16 +2382,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
-               /*
-                * We need to try and merge add/drops of the same ref since we
-                * can run into issues with relocate dropping the implicit ref
-                * and then it being added back again before the drop can
-                * finish.  If we merged anything we need to re-loop so we can
-                * get a good ref.
-                */
                spin_lock(&locked_ref->lock);
-               btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
-                                        locked_ref);
 
                /*
                 * locked_ref is the head node, so we have to go one
@@ -2482,7 +2459,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        spin_unlock(&locked_ref->lock);
                        spin_lock(&delayed_refs->lock);
                        spin_lock(&locked_ref->lock);
-                       if (rb_first(&locked_ref->ref_root) ||
+                       if (!list_empty(&locked_ref->ref_list) ||
                            locked_ref->extent_op) {
                                spin_unlock(&locked_ref->lock);
                                spin_unlock(&delayed_refs->lock);
@@ -2496,7 +2473,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                } else {
                        actual_count++;
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &locked_ref->ref_root);
+                       list_del(&ref->list);
                }
                atomic_dec(&delayed_refs->num_entries);
 
@@ -2905,7 +2882,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct btrfs_delayed_data_ref *data_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
-       struct rb_node *node;
        int ret = 0;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -2934,11 +2910,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        spin_unlock(&delayed_refs->lock);
 
        spin_lock(&head->lock);
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-
+       list_for_each_entry(ref, &head->ref_list, list) {
                /* If it's a shared ref we know a cross reference exists */
                if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
                        ret = 1;
@@ -6448,7 +6420,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
                goto out_delayed_unlock;
 
        spin_lock(&head->lock);
-       if (rb_first(&head->ref_root))
+       if (!list_empty(&head->ref_list))
                goto out;
 
        if (head->extent_op) {