lib: radix-tree: add entry deletion support to __radix_tree_replace()
authorJohannes Weiner <hannes@cmpxchg.org>
Tue, 13 Dec 2016 00:43:46 +0000 (16:43 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2016 02:55:08 +0000 (18:55 -0800)
Page cache shadow entry handling will be a lot simpler when it can use a
single generic replacement function for pages, shadow entries, and
emptying slots.

Make __radix_tree_replace() properly account insertions and deletions in
node->count and garbage collect nodes as they become empty.  Then
re-implement radix_tree_delete() on top of it.

Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
lib/radix-tree.c

index f91d5b0af654291bc559c64841da15e9066c6840..5d8930f3b3d8678262ce7c4551d0c1ced49b41a1 100644 (file)
@@ -538,6 +538,107 @@ out:
        return maxshift + RADIX_TREE_MAP_SHIFT;
 }
 
+/**
+ *     radix_tree_shrink    -    shrink radix tree to minimum height
+ *     @root           radix tree root
+ */
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
+{
+       bool shrunk = false;
+
+       for (;;) {
+               struct radix_tree_node *node = root->rnode;
+               struct radix_tree_node *child;
+
+               if (!radix_tree_is_internal_node(node))
+                       break;
+               node = entry_to_node(node);
+
+               /*
+                * The candidate node has more than one child, or its child
+                * is not at the leftmost slot, or the child is a multiorder
+                * entry, we cannot shrink.
+                */
+               if (node->count != 1)
+                       break;
+               child = node->slots[0];
+               if (!child)
+                       break;
+               if (!radix_tree_is_internal_node(child) && node->shift)
+                       break;
+
+               if (radix_tree_is_internal_node(child))
+                       entry_to_node(child)->parent = NULL;
+
+               /*
+                * We don't need rcu_assign_pointer(), since we are simply
+                * moving the node from one part of the tree to another: if it
+                * was safe to dereference the old pointer to it
+                * (node->slots[0]), it will be safe to dereference the new
+                * one (root->rnode) as far as dependent read barriers go.
+                */
+               root->rnode = child;
+
+               /*
+                * We have a dilemma here. The node's slot[0] must not be
+                * NULLed in case there are concurrent lookups expecting to
+                * find the item. However if this was a bottom-level node,
+                * then it may be subject to the slot pointer being visible
+                * to callers dereferencing it. If item corresponding to
+                * slot[0] is subsequently deleted, these callers would expect
+                * their slot to become empty sooner or later.
+                *
+                * For example, lockless pagecache will look up a slot, deref
+                * the page pointer, and if the page has 0 refcount it means it
+                * was concurrently deleted from pagecache so try the deref
+                * again. Fortunately there is already a requirement for logic
+                * to retry the entire slot lookup -- the indirect pointer
+                * problem (replacing direct root node with an indirect pointer
+                * also results in a stale slot). So tag the slot as indirect
+                * to force callers to retry.
+                */
+               if (!radix_tree_is_internal_node(child))
+                       node->slots[0] = RADIX_TREE_RETRY;
+
+               radix_tree_node_free(node);
+               shrunk = true;
+       }
+
+       return shrunk;
+}
+
+static bool delete_node(struct radix_tree_root *root,
+                       struct radix_tree_node *node)
+{
+       bool deleted = false;
+
+       do {
+               struct radix_tree_node *parent;
+
+               if (node->count) {
+                       if (node == entry_to_node(root->rnode))
+                               deleted |= radix_tree_shrink(root);
+                       return deleted;
+               }
+
+               parent = node->parent;
+               if (parent) {
+                       parent->slots[node->offset] = NULL;
+                       parent->count--;
+               } else {
+                       root_tag_clear_all(root);
+                       root->rnode = NULL;
+               }
+
+               radix_tree_node_free(node);
+               deleted = true;
+
+               node = parent;
+       } while (node);
+
+       return deleted;
+}
+
 /**
  *     __radix_tree_create     -       create a slot in a radix tree
  *     @root:          radix tree root
@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
                         bool warn_typeswitch)
 {
        void *old = rcu_dereference_raw(*slot);
-       int exceptional;
+       int count, exceptional;
 
        WARN_ON_ONCE(radix_tree_is_internal_node(item));
-       WARN_ON_ONCE(!!item - !!old);
 
+       count = !!item - !!old;
        exceptional = !!radix_tree_exceptional_entry(item) -
                      !!radix_tree_exceptional_entry(old);
 
-       WARN_ON_ONCE(warn_typeswitch && exceptional);
+       WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
 
-       if (node)
+       if (node) {
+               node->count += count;
                node->exceptional += exceptional;
+       }
 
        rcu_assign_pointer(*slot, item);
 }
@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
                          void **slot, void *item)
 {
        /*
-        * This function supports replacing exceptional entries, but
-        * that needs accounting against the node unless the slot is
-        * root->rnode.
+        * This function supports replacing exceptional entries and
+        * deleting entries, but that needs accounting against the
+        * node unless the slot is root->rnode.
         */
        replace_slot(root, node, slot, item,
                     !node && slot != (void **)&root->rnode);
+
+       delete_node(root, node);
 }
 
 /**
@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
  *
  * NOTE: This cannot be used to switch between non-entries (empty slots),
  * regular entries, and exceptional entries, as that requires accounting
- * inside the radix tree node. When switching from one type of entry to
- * another, use __radix_tree_lookup() and __radix_tree_replace().
+ * inside the radix tree node. When switching from one type of entry or
+ * deleting, use __radix_tree_lookup() and __radix_tree_replace().
  */
 void radix_tree_replace_slot(struct radix_tree_root *root,
                             void **slot, void *item)
@@ -1466,75 +1571,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
 }
 #endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
-/**
- *     radix_tree_shrink    -    shrink radix tree to minimum height
- *     @root           radix tree root
- */
-static inline bool radix_tree_shrink(struct radix_tree_root *root)
-{
-       bool shrunk = false;
-
-       for (;;) {
-               struct radix_tree_node *node = root->rnode;
-               struct radix_tree_node *child;
-
-               if (!radix_tree_is_internal_node(node))
-                       break;
-               node = entry_to_node(node);
-
-               /*
-                * The candidate node has more than one child, or its child
-                * is not at the leftmost slot, or the child is a multiorder
-                * entry, we cannot shrink.
-                */
-               if (node->count != 1)
-                       break;
-               child = node->slots[0];
-               if (!child)
-                       break;
-               if (!radix_tree_is_internal_node(child) && node->shift)
-                       break;
-
-               if (radix_tree_is_internal_node(child))
-                       entry_to_node(child)->parent = NULL;
-
-               /*
-                * We don't need rcu_assign_pointer(), since we are simply
-                * moving the node from one part of the tree to another: if it
-                * was safe to dereference the old pointer to it
-                * (node->slots[0]), it will be safe to dereference the new
-                * one (root->rnode) as far as dependent read barriers go.
-                */
-               root->rnode = child;
-
-               /*
-                * We have a dilemma here. The node's slot[0] must not be
-                * NULLed in case there are concurrent lookups expecting to
-                * find the item. However if this was a bottom-level node,
-                * then it may be subject to the slot pointer being visible
-                * to callers dereferencing it. If item corresponding to
-                * slot[0] is subsequently deleted, these callers would expect
-                * their slot to become empty sooner or later.
-                *
-                * For example, lockless pagecache will look up a slot, deref
-                * the page pointer, and if the page has 0 refcount it means it
-                * was concurrently deleted from pagecache so try the deref
-                * again. Fortunately there is already a requirement for logic
-                * to retry the entire slot lookup -- the indirect pointer
-                * problem (replacing direct root node with an indirect pointer
-                * also results in a stale slot). So tag the slot as indirect
-                * to force callers to retry.
-                */
-               if (!radix_tree_is_internal_node(child))
-                       node->slots[0] = RADIX_TREE_RETRY;
-
-               radix_tree_node_free(node);
-               shrunk = true;
-       }
-
-       return shrunk;
-}
-
 /**
  *     __radix_tree_delete_node    -    try to free node after clearing a slot
  *     @root:          radix tree root
@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 bool __radix_tree_delete_node(struct radix_tree_root *root,
                              struct radix_tree_node *node)
 {
-       bool deleted = false;
-
-       do {
-               struct radix_tree_node *parent;
-
-               if (node->count) {
-                       if (node == entry_to_node(root->rnode))
-                               deleted |= radix_tree_shrink(root);
-                       return deleted;
-               }
-
-               parent = node->parent;
-               if (parent) {
-                       parent->slots[node->offset] = NULL;
-                       parent->count--;
-               } else {
-                       root_tag_clear_all(root);
-                       root->rnode = NULL;
-               }
-
-               radix_tree_node_free(node);
-               deleted = true;
-
-               node = parent;
-       } while (node);
-
-       return deleted;
+       return delete_node(root, node);
 }
 
 static inline void delete_sibling_entries(struct radix_tree_node *node,
@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
                node_tag_clear(root, node, tag, offset);
 
        delete_sibling_entries(node, node_to_entry(slot), offset);
-       node->slots[offset] = NULL;
-       node->count--;
-       if (radix_tree_exceptional_entry(entry))
-               node->exceptional--;
-
-       __radix_tree_delete_node(root, node);
+       __radix_tree_replace(root, node, slot, NULL);
 
        return entry;
 }