dm btree: introduce cursor api
authorJoe Thornber <ejt@redhat.com>
Thu, 15 Sep 2016 14:49:24 +0000 (10:49 -0400)
committerMike Snitzer <snitzer@redhat.com>
Thu, 22 Sep 2016 15:15:04 +0000 (11:15 -0400)
This uses prefetching to speed up iteration through a btree.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/persistent-data/dm-btree.c
drivers/md/persistent-data/dm-btree.h

index 2cc1877804c2e72b46d8dfe501cfd357fd1e418b..20a40329d84abd46c7491936e974967bd2bfd0d4 100644 (file)
@@ -994,3 +994,165 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
        return walk_node(info, root, fn, context);
 }
 EXPORT_SYMBOL_GPL(dm_btree_walk);
+
+/*----------------------------------------------------------------*/
+
+static void prefetch_values(struct dm_btree_cursor *c)
+{
+       unsigned i, nr;
+       __le64 value_le;
+       struct cursor_node *n = c->nodes + c->depth - 1;
+       struct btree_node *bn = dm_block_data(n->b);
+       struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);
+
+       BUG_ON(c->info->value_type.size != sizeof(value_le));
+
+       nr = le32_to_cpu(bn->header.nr_entries);
+       for (i = 0; i < nr; i++) {
+               memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
+               dm_bm_prefetch(bm, le64_to_cpu(value_le));
+       }
+}
+
+static bool leaf_node(struct dm_btree_cursor *c)
+{
+       struct cursor_node *n = c->nodes + c->depth - 1;
+       struct btree_node *bn = dm_block_data(n->b);
+
+       return le32_to_cpu(bn->header.flags) & LEAF_NODE;
+}
+
+static int push_node(struct dm_btree_cursor *c, dm_block_t b)
+{
+       int r;
+       struct cursor_node *n = c->nodes + c->depth;
+
+       if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
+               DMERR("couldn't push cursor node, stack depth too high");
+               return -EINVAL;
+       }
+
+       r = bn_read_lock(c->info, b, &n->b);
+       if (r)
+               return r;
+
+       n->index = 0;
+       c->depth++;
+
+       if (c->prefetch_leaves || !leaf_node(c))
+               prefetch_values(c);
+
+       return 0;
+}
+
+static void pop_node(struct dm_btree_cursor *c)
+{
+       c->depth--;
+       unlock_block(c->info, c->nodes[c->depth].b);
+}
+
+static int inc_or_backtrack(struct dm_btree_cursor *c)
+{
+       struct cursor_node *n;
+       struct btree_node *bn;
+
+       for (;;) {
+               if (!c->depth)
+                       return -ENODATA;
+
+               n = c->nodes + c->depth - 1;
+               bn = dm_block_data(n->b);
+
+               n->index++;
+               if (n->index < le32_to_cpu(bn->header.nr_entries))
+                       break;
+
+               pop_node(c);
+       }
+
+       return 0;
+}
+
+static int find_leaf(struct dm_btree_cursor *c)
+{
+       int r = 0;
+       struct cursor_node *n;
+       struct btree_node *bn;
+       __le64 value_le;
+
+       for (;;) {
+               n = c->nodes + c->depth - 1;
+               bn = dm_block_data(n->b);
+
+               if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
+                       break;
+
+               memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
+               r = push_node(c, le64_to_cpu(value_le));
+               if (r) {
+                       DMERR("push_node failed");
+                       break;
+               }
+       }
+
+       if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
+               return -ENODATA;
+
+       return r;
+}
+
+int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
+                         bool prefetch_leaves, struct dm_btree_cursor *c)
+{
+       int r;
+
+       c->info = info;
+       c->root = root;
+       c->depth = 0;
+       c->prefetch_leaves = prefetch_leaves;
+
+       r = push_node(c, root);
+       if (r)
+               return r;
+
+       return find_leaf(c);
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);
+
+void dm_btree_cursor_end(struct dm_btree_cursor *c)
+{
+       while (c->depth)
+               pop_node(c);
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_end);
+
+int dm_btree_cursor_next(struct dm_btree_cursor *c)
+{
+       int r = inc_or_backtrack(c);
+       if (!r) {
+               r = find_leaf(c);
+               if (r)
+                       DMERR("find_leaf failed");
+       }
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_next);
+
+int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
+{
+       if (c->depth) {
+               struct cursor_node *n = c->nodes + c->depth - 1;
+               struct btree_node *bn = dm_block_data(n->b);
+
+               if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
+                       return -EINVAL;
+
+               *key = le64_to_cpu(*key_ptr(bn, n->index));
+               memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
+               return 0;
+
+       } else
+               return -ENODATA;
+}
+EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);
index c74301fa5a379517b9c37e7e98ab380d97f3b9b0..db9bd26adf31f54f78ba7470658a1fa8bcf067ac 100644 (file)
@@ -176,4 +176,39 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
                  int (*fn)(void *context, uint64_t *keys, void *leaf),
                  void *context);
 
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Cursor API.  This does not follow the rolling lock convention.  Since we
+ * know the order that values are required we can issue prefetches to speed
+ * up iteration.  Use on a single level btree only.
+ */
+#define DM_BTREE_CURSOR_MAX_DEPTH 16
+
+struct cursor_node {
+       struct dm_block *b;
+       unsigned index;
+};
+
+struct dm_btree_cursor {
+       struct dm_btree_info *info;
+       dm_block_t root;
+
+       bool prefetch_leaves;
+       unsigned depth;
+       struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
+};
+
+/*
+ * Creates a fresh cursor.  If prefetch_leaves is set then it is assumed
+ * the btree contains block indexes that will be prefetched.  The cursor is
+ * quite large, so you probably don't want to put it on the stack.
+ */
+int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
+                         bool prefetch_leaves, struct dm_btree_cursor *c);
+void dm_btree_cursor_end(struct dm_btree_cursor *c);
+int dm_btree_cursor_next(struct dm_btree_cursor *c);
+int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
+
 #endif /* _LINUX_DM_BTREE_H */