From d37cacc5adace7f3e0824e1f559192ad7299d029 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 17 Dec 2016 08:18:17 -0500 Subject: [PATCH] ida: Use exceptional entries for small IDAs We can use the root entry as a bitmap and save allocating a 128 byte bitmap for an IDA that contains only a few entries (30 on a 32-bit machine, 62 on a 64-bit machine). This costs about 300 bytes of kernel text on x86-64, so as long as 3 IDAs fall into this category, this is a net win for memory consumption. Thanks to Rasmus Villemoes for his work documenting the problem and collecting statistics on IDAs. Signed-off-by: Matthew Wilcox --- lib/idr.c | 87 +++++++++++++++++++++++++-- lib/radix-tree.c | 8 +++ tools/testing/radix-tree/idr-test.c | 93 ++++++++++++++++++++++++++++- 3 files changed, 181 insertions(+), 7 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 2abd7769c430..7d25e240bc5a 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -194,6 +194,39 @@ EXPORT_SYMBOL(idr_replace); * limitation, it should be quite straightforward to raise the maximum. */ +/* + * Developer's notes: + * + * The IDA uses the functionality provided by the IDR & radix tree to store + * bitmaps in each entry. The IDR_FREE tag means there is at least one bit + * free, unlike the IDR where it means at least one entry is free. + * + * I considered telling the radix tree that each slot is an order-10 node + * and storing the bit numbers in the radix tree, but the radix tree can't + * allow a single multiorder entry at index 0, which would significantly + * increase memory consumption for the IDA. So instead we divide the index + * by the number of bits in the leaf bitmap before doing a radix tree lookup. + * + * As an optimisation, if there are only a few low bits set in any given + * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional + * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits + * directly in the entry. By being really tricksy, we could store + * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising + * for 0-3 allocated IDs. + * + * We allow the radix tree 'exceptional' count to get out of date. Nothing + * in the IDA nor the radix tree code checks it. If it becomes important + * to maintain an accurate exceptional count, switch the rcu_assign_pointer() + * calls to radix_tree_iter_replace() which will correct the exceptional + * count. + * + * The IDA always requires a lock to alloc/free. If we add a 'test_bit' + * equivalent, it will still need locking. Going to RCU lookup would require + * using RCU to free bitmaps, and that's not trivial without embedding an + * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte + * bitmap, which is excessive. + */ + #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) /** @@ -221,11 +254,12 @@ int ida_get_new_above(struct ida *ida, int start, int *id) struct radix_tree_iter iter; struct ida_bitmap *bitmap; unsigned long index; - unsigned bit; + unsigned bit, ebit; int new; index = start / IDA_BITMAP_BITS; bit = start % IDA_BITMAP_BITS; + ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; slot = radix_tree_iter_init(&iter, index); for (;;) { @@ -240,10 +274,29 @@ int ida_get_new_above(struct ida *ida, int start, int *id) return PTR_ERR(slot); } } - if (iter.index > index) + if (iter.index > index) { bit = 0; + ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; + } new = iter.index * IDA_BITMAP_BITS; bitmap = rcu_dereference_raw(*slot); + if (radix_tree_exception(bitmap)) { + unsigned long tmp = (unsigned long)bitmap; + ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); + if (ebit < BITS_PER_LONG) { + tmp |= 1UL << ebit; + rcu_assign_pointer(*slot, (void *)tmp); + *id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT; + return 0; + } + bitmap = this_cpu_xchg(ida_bitmap, NULL); + if (!bitmap) + return -EAGAIN; + memset(bitmap, 0, sizeof(*bitmap)); + bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; + rcu_assign_pointer(*slot, bitmap); + } + if (bitmap) { bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); @@ -261,6 +314,14 @@ int ida_get_new_above(struct ida *ida, int start, int *id) new += bit; if (new < 0) return -ENOSPC; + if (ebit < BITS_PER_LONG) { + bitmap = (void *)((1UL << ebit) | + RADIX_TREE_EXCEPTIONAL_ENTRY); + radix_tree_iter_replace(root, &iter, slot, + bitmap); + *id = new; + return 0; + } bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; @@ -287,6 +348,7 @@ void ida_remove(struct ida *ida, int id) unsigned long index = id / IDA_BITMAP_BITS; unsigned offset = id % IDA_BITMAP_BITS; struct ida_bitmap *bitmap; + unsigned long *btmp; struct radix_tree_iter iter; void **slot; @@ -295,12 +357,24 @@ void ida_remove(struct ida *ida, int id) goto err; bitmap = rcu_dereference_raw(*slot); - if (!test_bit(offset, bitmap->bitmap)) + if (radix_tree_exception(bitmap)) { + btmp = (unsigned long *)slot; + offset += RADIX_TREE_EXCEPTIONAL_SHIFT; + if (offset >= BITS_PER_LONG) + goto err; + } else { + btmp = bitmap->bitmap; + } + if (!test_bit(offset, btmp)) goto err; - __clear_bit(offset, bitmap->bitmap); + __clear_bit(offset, btmp); radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); - if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { + if (radix_tree_exception(bitmap)) { + if (rcu_dereference_raw(*slot) == + (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) + radix_tree_iter_delete(&ida->ida_rt, &iter, slot); + } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { kfree(bitmap); radix_tree_iter_delete(&ida->ida_rt, &iter, slot); } @@ -326,7 +400,8 @@ void ida_destroy(struct ida *ida) radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); - kfree(bitmap); + if (!radix_tree_exception(bitmap)) + kfree(bitmap); radix_tree_iter_delete(&ida->ida_rt, &iter, slot); } } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7b9f8515033e..14130ab197c0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -338,6 +338,14 @@ static void dump_ida_node(void *entry, unsigned long index) for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) dump_ida_node(node->slots[i], index | (i << node->shift)); + } else if (radix_tree_exceptional_entry(entry)) { + pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", + entry, (int)(index & RADIX_TREE_MAP_MASK), + index * IDA_BITMAP_BITS, + index * IDA_BITMAP_BITS + BITS_PER_LONG - + RADIX_TREE_EXCEPTIONAL_SHIFT, + (unsigned long)entry >> + RADIX_TREE_EXCEPTIONAL_SHIFT); } else { struct ida_bitmap *bitmap = entry; diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 4dffad9284e0..59081122c63d 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -11,6 +11,7 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ +#include #include #include #include @@ -214,7 +215,7 @@ void ida_check_nomem(void) DEFINE_IDA(ida); int id, err; - err = ida_get_new(&ida, &id); + err = ida_get_new_above(&ida, 256, &id); assert(err == -EAGAIN); err = ida_get_new_above(&ida, 1UL << 30, &id); assert(err == -EAGAIN); @@ -246,6 +247,66 @@ void ida_check_leaf(void) assert(ida_is_empty(&ida)); } +/* + * Check handling of conversions between exceptional entries and full bitmaps. + */ +void ida_check_conv(void) +{ + DEFINE_IDA(ida); + int id; + unsigned long i; + + for (i = 0; i < IDA_BITMAP_BITS * 2; i += IDA_BITMAP_BITS) { + assert(ida_pre_get(&ida, GFP_KERNEL)); + assert(!ida_get_new_above(&ida, i + 1, &id)); + assert(id == i + 1); + assert(!ida_get_new_above(&ida, i + BITS_PER_LONG, &id)); + assert(id == i + BITS_PER_LONG); + ida_remove(&ida, i + 1); + ida_remove(&ida, i + BITS_PER_LONG); + assert(ida_is_empty(&ida)); + } + + assert(ida_pre_get(&ida, GFP_KERNEL)); + + for (i = 0; i < IDA_BITMAP_BITS * 2; i++) { + assert(ida_pre_get(&ida, GFP_KERNEL)); + assert(!ida_get_new(&ida, &id)); + assert(id == i); + } + + for (i = IDA_BITMAP_BITS * 2; i > 0; i--) { + ida_remove(&ida, i - 1); + } + assert(ida_is_empty(&ida)); + + for (i = 0; i < IDA_BITMAP_BITS + BITS_PER_LONG - 4; i++) { + assert(ida_pre_get(&ida, GFP_KERNEL)); + assert(!ida_get_new(&ida, &id)); + assert(id == i); + } + + for (i = IDA_BITMAP_BITS + BITS_PER_LONG - 4; i > 0; i--) { + ida_remove(&ida, i - 1); + } + assert(ida_is_empty(&ida)); + + radix_tree_cpu_dead(1); + for (i = 0; i < 1000000; i++) { + int err = ida_get_new(&ida, &id); + if (err == -EAGAIN) { + assert((i % IDA_BITMAP_BITS) == (BITS_PER_LONG - 2)); + assert(ida_pre_get(&ida, GFP_KERNEL)); + err = ida_get_new(&ida, &id); + } else { + assert((i % IDA_BITMAP_BITS) != (BITS_PER_LONG - 2)); + } + assert(!err); + assert(id == i); + } + ida_destroy(&ida); +} + /* * Check allocations up to and slightly above the maximum allowed (2^31-1) ID. * Allocating up to 2^31-1 should succeed, and then allocating the next one @@ -273,6 +334,34 @@ void ida_check_max(void) } } +void ida_check_random(void) +{ + DEFINE_IDA(ida); + DECLARE_BITMAP(bitmap, 2048); + int id; + unsigned int i; + time_t s = time(NULL); + + repeat: + memset(bitmap, 0, sizeof(bitmap)); + for (i = 0; i < 100000; i++) { + int i = rand(); + int bit = i & 2047; + if (test_bit(bit, bitmap)) { + __clear_bit(bit, bitmap); + ida_remove(&ida, bit); + } else { + __set_bit(bit, bitmap); + ida_pre_get(&ida, GFP_KERNEL); + assert(!ida_get_new_above(&ida, bit, &id)); + assert(id == bit); + } + } + ida_destroy(&ida); + if (time(NULL) < s + 10) + goto repeat; +} + void ida_checks(void) { DEFINE_IDA(ida); @@ -337,6 +426,8 @@ void ida_checks(void) ida_check_leaf(); ida_check_max(); + ida_check_conv(); + ida_check_random(); radix_tree_cpu_dead(1); } -- 2.20.1