Btrfs: Reduce contention on the root node
authorChris Mason <chris.mason@oracle.com>
Wed, 25 Jun 2008 20:14:04 +0000 (16:14 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:04 +0000 (11:04 -0400)
This calls unlock_up sooner in btrfs_search_slot in order to decrease the
amount of work done with the higher level tree locks held.

Also, it changes btrfs_tree_lock to spin for a big against the page lock
before scheduling.  This makes a big difference in context switch rate under
highly contended workloads.

Longer term, a better locking structure is needed than the page lock.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/locking.c

index 0cb80f32a9c7aef48db4699ee88416343cabceec..c6759fc1004a14f247889e1bc13784edbc34b69f 100644 (file)
@@ -1313,16 +1313,13 @@ again:
                                slot = p->slots[level];
                                BUG_ON(btrfs_header_nritems(b) == 1);
                        }
+                       unlock_up(p, level, lowest_unlock);
+
                        /* this is only true while dropping a snapshot */
                        if (level == lowest_level) {
-                               unlock_up(p, level, lowest_unlock);
                                break;
                        }
 
-                       if (should_reada)
-                               reada_for_search(root, p, level, slot,
-                                                key->objectid);
-
                        blocknr = btrfs_node_blockptr(b, slot);
                        gen = btrfs_node_ptr_generation(b, slot);
                        blocksize = btrfs_level_size(root, level - 1);
@@ -1340,6 +1337,11 @@ again:
                                        btrfs_release_path(NULL, p);
                                        if (tmp)
                                                free_extent_buffer(tmp);
+                                       if (should_reada)
+                                               reada_for_search(root, p,
+                                                                level, slot,
+                                                                key->objectid);
+
                                        tmp = read_tree_block(root, blocknr,
                                                         blocksize, gen);
                                        if (tmp)
@@ -1348,12 +1350,15 @@ again:
                                } else {
                                        if (tmp)
                                                free_extent_buffer(tmp);
+                                       if (should_reada)
+                                               reada_for_search(root, p,
+                                                                level, slot,
+                                                                key->objectid);
                                        b = read_node_slot(root, b, slot);
                                }
                        }
                        if (!p->skip_locking)
                                btrfs_tree_lock(b);
-                       unlock_up(p, level, lowest_unlock);
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
index 80813a307b4b38a6a511af61f5a49c4475125406..058a506a0dd887649441dc1432f895d99fc67d16 100644 (file)
 
 int btrfs_tree_lock(struct extent_buffer *eb)
 {
+       int i;
+
+       if (!TestSetPageLocked(eb->first_page))
+               return 0;
+       for (i = 0; i < 512; i++) {
+               cpu_relax();
+               if (!TestSetPageLocked(eb->first_page))
+                       return 0;
+       }
+       cpu_relax();
        lock_page(eb->first_page);
        return 0;
 }