lib: radix-tree: check accounting of existing slot replacement users
authorJohannes Weiner <hannes@cmpxchg.org>
Tue, 13 Dec 2016 00:43:43 +0000 (16:43 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2016 02:55:08 +0000 (18:55 -0800)
The bug in khugepaged fixed earlier in this series shows that radix tree
slot replacement is fragile; and it will become more so when not only
NULL<->!NULL transitions need to be caught but transitions from and to
exceptional entries as well.  We need checks.

Re-implement radix_tree_replace_slot() on top of the sanity-checked
__radix_tree_replace().  This requires existing callers to also pass the
radix tree root, but it'll warn us when somebody replaces slots with
contents that need proper accounting (transitions between NULL entries,
real entries, exceptional entries) and where a replacement through the
slot pointer would corrupt the radix tree node counts.

Link: http://lkml.kernel.org/r/20161117193021.GB23430@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/s390/mm/gmap.c
drivers/sh/intc/virq.c
fs/dax.c
include/linux/radix-tree.h
lib/radix-tree.c
mm/filemap.c
mm/khugepaged.c
mm/migrate.c
mm/truncate.c
tools/testing/radix-tree/multiorder.c

index 3ba622702ce47cd2a908150f6b37e2801be47557..ec1f0dedb948df0766a4849bd429f0fb70eece27 100644 (file)
@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
        if (slot) {
                rmap->next = radix_tree_deref_slot_protected(slot,
                                                        &sg->guest_table_lock);
-               radix_tree_replace_slot(slot, rmap);
+               radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
        } else {
                rmap->next = NULL;
                radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
index e7899624aa0b637d8602aad96fb7e7439590a286..35bbe288ddb4ebe11eccebce4ca776b1e47a1340 100644 (file)
@@ -254,7 +254,7 @@ restart:
 
                radix_tree_tag_clear(&d->tree, entry->enum_id,
                                     INTC_TAG_VIRQ_NEEDS_ALLOC);
-               radix_tree_replace_slot((void **)entries[i],
+               radix_tree_replace_slot(&d->tree, (void **)entries[i],
                                        &intc_irq_xlate[irq]);
        }
 
index db78bae0dc0f8daa7a6bd19a76f5ebb8f747e39b..85930c2a2749fcc33c3d6cf3d60dabfc4cc07c07 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
                radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
        entry |= RADIX_DAX_ENTRY_LOCK;
-       radix_tree_replace_slot(slot, (void *)entry);
+       radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
        return (void *)entry;
 }
 
@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
                radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 
        entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-       radix_tree_replace_slot(slot, (void *)entry);
+       radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
        return (void *)entry;
 }
 
index 7ced8a70cc8b4712e1aab71393a5a22018a13da9..2d1b9b8be983a4c6b6e98d19b4209cbc1d367ee8 100644 (file)
@@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
        return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-/**
- * radix_tree_replace_slot     - replace item in a slot
- * @pslot:     pointer to slot, returned by radix_tree_lookup_slot
- * @item:      new item to store in the slot.
- *
- * For use with radix_tree_lookup_slot().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-static inline void radix_tree_replace_slot(void **pslot, void *item)
-{
-       BUG_ON(radix_tree_is_internal_node(item));
-       rcu_assign_pointer(*pslot, item);
-}
-
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
                        unsigned order, struct radix_tree_node **nodep,
                        void ***slotp);
@@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
 void __radix_tree_replace(struct radix_tree_root *root,
                          struct radix_tree_node *node,
                          void **slot, void *item);
+void radix_tree_replace_slot(struct radix_tree_root *root,
+                            void **slot, void *item);
 bool __radix_tree_delete_node(struct radix_tree_root *root,
                              struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
index 7885796d35ae74bb615b567f2b1b0bae87562500..f91d5b0af654291bc559c64841da15e9066c6840 100644 (file)
@@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-/**
- * __radix_tree_replace                - replace item in a slot
- * @root:      radix tree root
- * @node:      pointer to tree node
- * @slot:      pointer to slot in @node
- * @item:      new item to store in the slot.
- *
- * For use with __radix_tree_lookup().  Caller must hold tree write locked
- * across slot lookup and replacement.
- */
-void __radix_tree_replace(struct radix_tree_root *root,
-                         struct radix_tree_node *node,
-                         void **slot, void *item)
+static void replace_slot(struct radix_tree_root *root,
+                        struct radix_tree_node *node,
+                        void **slot, void *item,
+                        bool warn_typeswitch)
 {
        void *old = rcu_dereference_raw(*slot);
        int exceptional;
@@ -776,7 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
        exceptional = !!radix_tree_exceptional_entry(item) -
                      !!radix_tree_exceptional_entry(old);
 
-       WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode);
+       WARN_ON_ONCE(warn_typeswitch && exceptional);
 
        if (node)
                node->exceptional += exceptional;
@@ -784,6 +775,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
        rcu_assign_pointer(*slot, item);
 }
 
+/**
+ * __radix_tree_replace                - replace item in a slot
+ * @root:      radix tree root
+ * @node:      pointer to tree node
+ * @slot:      pointer to slot in @node
+ * @item:      new item to store in the slot.
+ *
+ * For use with __radix_tree_lookup().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ */
+void __radix_tree_replace(struct radix_tree_root *root,
+                         struct radix_tree_node *node,
+                         void **slot, void *item)
+{
+       /*
+        * This function supports replacing exceptional entries, but
+        * that needs accounting against the node unless the slot is
+        * root->rnode.
+        */
+       replace_slot(root, node, slot, item,
+                    !node && slot != (void **)&root->rnode);
+}
+
+/**
+ * radix_tree_replace_slot     - replace item in a slot
+ * @root:      radix tree root
+ * @slot:      pointer to slot
+ * @item:      new item to store in the slot.
+ *
+ * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
+ * across slot lookup and replacement.
+ *
+ * NOTE: This cannot be used to switch between non-entries (empty slots),
+ * regular entries, and exceptional entries, as that requires accounting
+ * inside the radix tree node. When switching from one type of entry to
+ * another, use __radix_tree_lookup() and __radix_tree_replace().
+ */
+void radix_tree_replace_slot(struct radix_tree_root *root,
+                            void **slot, void *item)
+{
+       replace_slot(root, NULL, slot, item, true);
+}
+
 /**
  *     radix_tree_tag_set - set a tag on a radix tree node
  *     @root:          radix tree root
index caa779f8797fcba05e856732779ca411d8b2781a..1ba726aef708ae4e4200b42c7670a05fada9715b 100644 (file)
@@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
                                                      false);
                }
        }
-       radix_tree_replace_slot(slot, page);
+       radix_tree_replace_slot(&mapping->page_tree, slot, page);
        mapping->nrpages++;
        if (node) {
                workingset_node_pages_inc(node);
@@ -196,7 +196,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
                        shadow = NULL;
                }
 
-               radix_tree_replace_slot(slot, shadow);
+               radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
 
                if (!node)
                        break;
index 5d7c006373d30580f97d336c1cbb60edc7927735..7a50c726c5ae130cf66c1b0197eccf9d6645b73f 100644 (file)
@@ -1426,7 +1426,7 @@ static void collapse_shmem(struct mm_struct *mm,
                list_add_tail(&page->lru, &pagelist);
 
                /* Finally, replace with the new page. */
-               radix_tree_replace_slot(slot,
+               radix_tree_replace_slot(&mapping->page_tree, slot,
                                new_page + (index % HPAGE_PMD_NR));
 
                slot = radix_tree_iter_next(&iter);
@@ -1538,7 +1538,8 @@ tree_unlocked:
                        /* Unfreeze the page. */
                        list_del(&page->lru);
                        page_ref_unfreeze(page, 2);
-                       radix_tree_replace_slot(slot, page);
+                       radix_tree_replace_slot(&mapping->page_tree,
+                                               slot, page);
                        spin_unlock_irq(&mapping->tree_lock);
                        putback_lru_page(page);
                        unlock_page(page);
index 66ce6b490b13a9394411addc61ca43f819d5132b..0ed24b1fa77b89cd49738390cda724c5e74008f8 100644 (file)
@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
                SetPageDirty(newpage);
        }
 
-       radix_tree_replace_slot(pslot, newpage);
+       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
        /*
         * Drop cache reference from old page by unfreezing
@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
        get_page(newpage);
 
-       radix_tree_replace_slot(pslot, newpage);
+       radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
 
        page_ref_unfreeze(page, expected_count - 1);
 
index 8d8c62d89e6d101bce9373c20258d1bbc9df7fa2..3c631c357873202f1277caa15e2baf26c97a35ca 100644 (file)
@@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
                goto unlock;
        if (*slot != entry)
                goto unlock;
-       radix_tree_replace_slot(slot, NULL);
+       radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
        mapping->nrexceptional--;
        if (!node)
                goto unlock;
index 05d7bc4889711d44d57d9b58224b21cd079b6c45..d1be94667a30cdcbad8a2dca86832bc73d5ed70d 100644 (file)
@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order)
 
        slot = radix_tree_lookup_slot(&tree, index);
        free(*slot);
-       radix_tree_replace_slot(slot, item2);
+       radix_tree_replace_slot(&tree, slot, item2);
        for (i = min; i < max; i++) {
                struct item *item = item_lookup(&tree, i);
                assert(item != 0);