Btrfs: bugfix: ignore the wrong key for indirect tree block backrefs
authorJan Schmidt <list.btrfs@jan-o-sch.net>
Tue, 15 May 2012 15:55:51 +0000 (17:55 +0200)
committerJan Schmidt <list.btrfs@jan-o-sch.net>
Sat, 26 May 2012 10:17:51 +0000 (12:17 +0200)
The key we store with a tree block backref is only a hint. It is set when
the ref is created and can remain correct for a long time. As the tree is
rebalanced, however, eventually the key no longer points to the correct
destination.

With this patch, we change find_parent_nodes to no longer add keys unless it
knows for sure they're correct (e.g. because they're for an extent data
backref). Then when we later encounter a backref ref with no parent and no
key set, we grab the block and take the first key from the block itself.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
fs/btrfs/backref.c

index c69a846999bf7601bd37fdada1d32d1d29221101..366978c5cdd38b4eeb405b07abf33710c3bb8c1b 100644 (file)
 struct __prelim_ref {
        struct list_head list;
        u64 root_id;
-       struct btrfs_key key;
+       struct btrfs_key key_for_search;
        int level;
        int count;
        u64 parent;
        u64 wanted_disk_byte;
 };
 
+/*
+ * the rules for all callers of this function are:
+ * - obtaining the parent is the goal
+ * - if you add a key, you must know that it is a correct key
+ * - if you cannot add the parent or a correct key, then we will look into the
+ *   block later to set a correct key
+ *
+ * delayed refs
+ * ============
+ *        backref type | shared | indirect | shared | indirect
+ * information         |   tree |     tree |   data |     data
+ * --------------------+--------+----------+--------+----------
+ *      parent logical |    y   |     -    |    -   |     -
+ *      key to resolve |    -   |     y    |    y   |     y
+ *  tree block logical |    -   |     -    |    -   |     -
+ *  root for resolving |    y   |     y    |    y   |     y
+ *
+ * - column 1:       we've the parent -> done
+ * - column 2, 3, 4: we use the key to find the parent
+ *
+ * on disk refs (inline or keyed)
+ * ==============================
+ *        backref type | shared | indirect | shared | indirect
+ * information         |   tree |     tree |   data |     data
+ * --------------------+--------+----------+--------+----------
+ *      parent logical |    y   |     -    |    y   |     -
+ *      key to resolve |    -   |     -    |    -   |     y
+ *  tree block logical |    y   |     y    |    y   |     y
+ *  root for resolving |    -   |     y    |    y   |     y
+ *
+ * - column 1, 3: we've the parent -> done
+ * - column 2:    we take the first key from the block to find the parent
+ *                (see __add_missing_keys)
+ * - column 4:    we use the key to find the parent
+ *
+ * additional information that's available but not required to find the parent
+ * block might help in merging entries to gain some speed.
+ */
+
 static int __add_prelim_ref(struct list_head *head, u64 root_id,
-                           struct btrfs_key *key, int level, u64 parent,
-                           u64 wanted_disk_byte, int count)
+                           struct btrfs_key *key, int level,
+                           u64 parent, u64 wanted_disk_byte, int count)
 {
        struct __prelim_ref *ref;
 
@@ -50,9 +89,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
 
        ref->root_id = root_id;
        if (key)
-               ref->key = *key;
+               ref->key_for_search = *key;
        else
-               memset(&ref->key, 0, sizeof(ref->key));
+               memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
 
        ref->level = level;
        ref->count = count;
@@ -152,12 +191,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
 
        path->lowest_level = level;
-       ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0);
        pr_debug("search slot in root %llu (level %d, ref count %d) returned "
                 "%d for key (%llu %u %llu)\n",
                 (unsigned long long)ref->root_id, level, ref->count, ret,
-                (unsigned long long)ref->key.objectid, ref->key.type,
-                (unsigned long long)ref->key.offset);
+                (unsigned long long)ref->key_for_search.objectid,
+                ref->key_for_search.type,
+                (unsigned long long)ref->key_for_search.offset);
        if (ret < 0)
                goto out;
 
@@ -248,10 +288,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
+static inline int ref_for_same_block(struct __prelim_ref *ref1,
+                                    struct __prelim_ref *ref2)
+{
+       if (ref1->level != ref2->level)
+               return 0;
+       if (ref1->root_id != ref2->root_id)
+               return 0;
+       if (ref1->key_for_search.type != ref2->key_for_search.type)
+               return 0;
+       if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
+               return 0;
+       if (ref1->key_for_search.offset != ref2->key_for_search.offset)
+               return 0;
+       if (ref1->parent != ref2->parent)
+               return 0;
+
+       return 1;
+}
+
+/*
+ * read tree blocks and add keys where required.
+ */
+static int __add_missing_keys(struct btrfs_fs_info *fs_info,
+                             struct list_head *head)
+{
+       struct list_head *pos;
+       struct extent_buffer *eb;
+
+       list_for_each(pos, head) {
+               struct __prelim_ref *ref;
+               ref = list_entry(pos, struct __prelim_ref, list);
+
+               if (ref->parent)
+                       continue;
+               if (ref->key_for_search.type)
+                       continue;
+               BUG_ON(!ref->wanted_disk_byte);
+               eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
+                                    fs_info->tree_root->leafsize, 0);
+               BUG_ON(!eb);
+               btrfs_tree_read_lock(eb);
+               if (btrfs_header_level(eb) == 0)
+                       btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+               else
+                       btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+               btrfs_tree_read_unlock(eb);
+               free_extent_buffer(eb);
+       }
+       return 0;
+}
+
 /*
  * merge two lists of backrefs and adjust counts accordingly
  *
  * mode = 1: merge identical keys, if key is set
+ *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
+ *           additionally, we could even add a key range for the blocks we
+ *           looked into to merge even more (-> replace unresolved refs by those
+ *           having a parent).
  * mode = 2: merge identical parents
  */
 static int __merge_refs(struct list_head *head, int mode)
@@ -265,20 +360,21 @@ static int __merge_refs(struct list_head *head, int mode)
 
                ref1 = list_entry(pos1, struct __prelim_ref, list);
 
-               if (mode == 1 && ref1->key.type == 0)
-                       continue;
                for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
                     pos2 = n2, n2 = pos2->next) {
                        struct __prelim_ref *ref2;
+                       struct __prelim_ref *xchg;
 
                        ref2 = list_entry(pos2, struct __prelim_ref, list);
 
                        if (mode == 1) {
-                               if (memcmp(&ref1->key, &ref2->key,
-                                          sizeof(ref1->key)) ||
-                                   ref1->level != ref2->level ||
-                                   ref1->root_id != ref2->root_id)
+                               if (!ref_for_same_block(ref1, ref2))
                                        continue;
+                               if (!ref1->parent && ref2->parent) {
+                                       xchg = ref1;
+                                       ref1 = ref2;
+                                       ref2 = xchg;
+                               }
                                ref1->count += ref2->count;
                        } else {
                                if (ref1->parent != ref2->parent)
@@ -298,16 +394,17 @@ static int __merge_refs(struct list_head *head, int mode)
  * smaller or equal that seq to the list
  */
 static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
-                             struct btrfs_key *info_key,
                              struct list_head *prefs)
 {
        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
        struct rb_node *n = &head->node.rb_node;
+       struct btrfs_key key;
+       struct btrfs_key op_key = {0};
        int sgn;
        int ret = 0;
 
        if (extent_op && extent_op->update_key)
-               btrfs_disk_key_to_cpu(info_key, &extent_op->key);
+               btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
 
        while ((n = rb_prev(n))) {
                struct btrfs_delayed_ref_node *node;
@@ -339,7 +436,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        struct btrfs_delayed_tree_ref *ref;
 
                        ref = btrfs_delayed_node_to_tree_ref(node);
-                       ret = __add_prelim_ref(prefs, ref->root, info_key,
+                       ret = __add_prelim_ref(prefs, ref->root, &op_key,
                                               ref->level + 1, 0, node->bytenr,
                                               node->ref_mod * sgn);
                        break;
@@ -348,7 +445,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        struct btrfs_delayed_tree_ref *ref;
 
                        ref = btrfs_delayed_node_to_tree_ref(node);
-                       ret = __add_prelim_ref(prefs, ref->root, info_key,
+                       ret = __add_prelim_ref(prefs, ref->root, NULL,
                                               ref->level + 1, ref->parent,
                                               node->bytenr,
                                               node->ref_mod * sgn);
@@ -356,8 +453,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                }
                case BTRFS_EXTENT_DATA_REF_KEY: {
                        struct btrfs_delayed_data_ref *ref;
-                       struct btrfs_key key;
-
                        ref = btrfs_delayed_node_to_data_ref(node);
 
                        key.objectid = ref->objectid;
@@ -370,7 +465,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                }
                case BTRFS_SHARED_DATA_REF_KEY: {
                        struct btrfs_delayed_data_ref *ref;
-                       struct btrfs_key key;
 
                        ref = btrfs_delayed_node_to_data_ref(node);
 
@@ -396,8 +490,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
  */
 static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                             struct btrfs_path *path, u64 bytenr,
-                            struct btrfs_key *info_key, int *info_level,
-                            struct list_head *prefs)
+                            int *info_level, struct list_head *prefs)
 {
        int ret = 0;
        int slot;
@@ -426,12 +519,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 
        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                struct btrfs_tree_block_info *info;
-               struct btrfs_disk_key disk_key;
 
                info = (struct btrfs_tree_block_info *)ptr;
                *info_level = btrfs_tree_block_level(leaf, info);
-               btrfs_tree_block_key(leaf, info, &disk_key);
-               btrfs_disk_key_to_cpu(info_key, &disk_key);
                ptr += sizeof(struct btrfs_tree_block_info);
                BUG_ON(ptr > end);
        } else {
@@ -449,7 +539,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 
                switch (type) {
                case BTRFS_SHARED_BLOCK_REF_KEY:
-                       ret = __add_prelim_ref(prefs, 0, info_key,
+                       ret = __add_prelim_ref(prefs, 0, NULL,
                                                *info_level + 1, offset,
                                                bytenr, 1);
                        break;
@@ -464,8 +554,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                        break;
                }
                case BTRFS_TREE_BLOCK_REF_KEY:
-                       ret = __add_prelim_ref(prefs, offset, info_key,
-                                              *info_level + 1, 0, bytenr, 1);
+                       ret = __add_prelim_ref(prefs, offset, NULL,
+                                              *info_level + 1, 0,
+                                              bytenr, 1);
                        break;
                case BTRFS_EXTENT_DATA_REF_KEY: {
                        struct btrfs_extent_data_ref *dref;
@@ -479,8 +570,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                        key.type = BTRFS_EXTENT_DATA_KEY;
                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
                        root = btrfs_extent_data_ref_root(leaf, dref);
-                       ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr,
-                                               count);
+                       ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+                                              bytenr, count);
                        break;
                }
                default:
@@ -498,8 +589,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
  */
 static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                            struct btrfs_path *path, u64 bytenr,
-                           struct btrfs_key *info_key, int info_level,
-                           struct list_head *prefs)
+                           int info_level, struct list_head *prefs)
 {
        struct btrfs_root *extent_root = fs_info->extent_root;
        int ret;
@@ -529,7 +619,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
 
                switch (key.type) {
                case BTRFS_SHARED_BLOCK_REF_KEY:
-                       ret = __add_prelim_ref(prefs, 0, info_key,
+                       ret = __add_prelim_ref(prefs, 0, NULL,
                                                info_level + 1, key.offset,
                                                bytenr, 1);
                        break;
@@ -545,8 +635,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                        break;
                }
                case BTRFS_TREE_BLOCK_REF_KEY:
-                       ret = __add_prelim_ref(prefs, key.offset, info_key,
-                                               info_level + 1, 0, bytenr, 1);
+                       ret = __add_prelim_ref(prefs, key.offset, NULL,
+                                              info_level + 1, 0,
+                                              bytenr, 1);
                        break;
                case BTRFS_EXTENT_DATA_REF_KEY: {
                        struct btrfs_extent_data_ref *dref;
@@ -562,7 +653,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
                        root = btrfs_extent_data_ref_root(leaf, dref);
                        ret = __add_prelim_ref(prefs, root, &key, 0, 0,
-                                               bytenr, count);
+                                              bytenr, count);
                        break;
                }
                default:
@@ -588,7 +679,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 {
        struct btrfs_key key;
        struct btrfs_path *path;
-       struct btrfs_key info_key = { 0 };
        struct btrfs_delayed_ref_root *delayed_refs = NULL;
        struct btrfs_delayed_ref_head *head;
        int info_level = 0;
@@ -647,8 +737,7 @@ again:
                                btrfs_put_delayed_ref(&head->node);
                                goto again;
                        }
-                       ret = __add_delayed_refs(head, seq, &info_key,
-                                                &prefs_delayed);
+                       ret = __add_delayed_refs(head, seq, &prefs_delayed);
                        if (ret) {
                                spin_unlock(&delayed_refs->lock);
                                goto out;
@@ -668,10 +757,10 @@ again:
                if (key.objectid == bytenr &&
                    key.type == BTRFS_EXTENT_ITEM_KEY) {
                        ret = __add_inline_refs(fs_info, path, bytenr,
-                                               &info_key, &info_level, &prefs);
+                                               &info_level, &prefs);
                        if (ret)
                                goto out;
-                       ret = __add_keyed_refs(fs_info, path, bytenr, &info_key,
+                       ret = __add_keyed_refs(fs_info, path, bytenr,
                                               info_level, &prefs);
                        if (ret)
                                goto out;
@@ -679,16 +768,12 @@ again:
        }
        btrfs_release_path(path);
 
-       /*
-        * when adding the delayed refs above, the info_key might not have
-        * been known yet. Go over the list and replace the missing keys
-        */
-       list_for_each_entry(ref, &prefs_delayed, list) {
-               if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
-                       memcpy(&ref->key, &info_key, sizeof(ref->key));
-       }
        list_splice_init(&prefs_delayed, &prefs);
 
+       ret = __add_missing_keys(fs_info, &prefs);
+       if (ret)
+               goto out;
+
        ret = __merge_refs(&prefs, 1);
        if (ret)
                goto out;