return path;
}
+/*
+ * set all locked nodes in the path to blocking locks. This should
+ * be done before scheduling
+ */
+noinline void btrfs_set_path_blocking(struct btrfs_path *p)
+{
+ int i;
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ if (p->nodes[i] && p->locks[i])
+ btrfs_set_lock_blocking(p->nodes[i]);
+ }
+}
+
+/*
+ * reset all the locked nodes in the patch to spinning locks.
+ */
+noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
+{
+ int i;
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ if (p->nodes[i] && p->locks[i])
+ btrfs_clear_lock_blocking(p->nodes[i]);
+ }
+}
+
/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
if (IS_ERR(cow))
return PTR_ERR(cow);
+ /* cow is set to blocking by btrfs_init_new_buffer */
+
copy_extent_buffer(cow, buf, 0, 0, cow->len);
btrfs_set_header_bytenr(cow, cow->start);
btrfs_set_header_generation(cow, trans->transid);
}
search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+
+ if (parent)
+ btrfs_set_lock_blocking(parent);
+ btrfs_set_lock_blocking(buf);
+
ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0,
prealloc_dest);
if (parent_nritems == 1)
return 0;
+ btrfs_set_lock_blocking(parent);
+
for (i = start_slot; i < end_slot; i++) {
int close = 1;
search_start = last_block;
btrfs_tree_lock(cur);
+ btrfs_set_lock_blocking(cur);
err = __btrfs_cow_block(trans, root, cur, parent, i,
&cur, search_start,
min(16 * blocksize,
return 0;
mid = path->nodes[level];
+
WARN_ON(!path->locks[level]);
WARN_ON(btrfs_header_generation(mid) != trans->transid);
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
btrfs_tree_lock(child);
+ btrfs_set_lock_blocking(child);
BUG_ON(!child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
BUG_ON(ret);
add_root_to_dirty_list(root);
btrfs_tree_unlock(child);
+
path->locks[level] = 0;
path->nodes[level] = NULL;
clean_tree_block(trans, root, mid);
left = read_node_slot(root, parent, pslot - 1);
if (left) {
btrfs_tree_lock(left);
+ btrfs_set_lock_blocking(left);
wret = btrfs_cow_block(trans, root, left,
parent, pslot - 1, &left, 0);
if (wret) {
right = read_node_slot(root, parent, pslot + 1);
if (right) {
btrfs_tree_lock(right);
+ btrfs_set_lock_blocking(right);
wret = btrfs_cow_block(trans, root, right,
parent, pslot + 1, &right, 0);
if (wret) {
u32 left_nr;
btrfs_tree_lock(left);
+ btrfs_set_lock_blocking(left);
+
left_nr = btrfs_header_nritems(left);
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
wret = 1;
*/
if (right) {
u32 right_nr;
+
btrfs_tree_lock(right);
+ btrfs_set_lock_blocking(right);
+
right_nr = btrfs_header_nritems(right);
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
wret = 1;
}
}
+/*
+ * returns -EAGAIN if it had to drop the path, or zero if everything was in
+ * cache
+ */
+static noinline int reada_for_balance(struct btrfs_root *root,
+ struct btrfs_path *path, int level)
+{
+ int slot;
+ int nritems;
+ struct extent_buffer *parent;
+ struct extent_buffer *eb;
+ u64 gen;
+ u64 block1 = 0;
+ u64 block2 = 0;
+ int ret = 0;
+ int blocksize;
+
+ parent = path->nodes[level - 1];
+ if (!parent)
+ return 0;
+
+ nritems = btrfs_header_nritems(parent);
+ slot = path->slots[level];
+ blocksize = btrfs_level_size(root, level);
+
+ if (slot > 0) {
+ block1 = btrfs_node_blockptr(parent, slot - 1);
+ gen = btrfs_node_ptr_generation(parent, slot - 1);
+ eb = btrfs_find_tree_block(root, block1, blocksize);
+ if (eb && btrfs_buffer_uptodate(eb, gen))
+ block1 = 0;
+ free_extent_buffer(eb);
+ }
+ if (slot < nritems) {
+ block2 = btrfs_node_blockptr(parent, slot + 1);
+ gen = btrfs_node_ptr_generation(parent, slot + 1);
+ eb = btrfs_find_tree_block(root, block2, blocksize);
+ if (eb && btrfs_buffer_uptodate(eb, gen))
+ block2 = 0;
+ free_extent_buffer(eb);
+ }
+ if (block1 || block2) {
+ ret = -EAGAIN;
+ btrfs_release_path(root, path);
+ if (block1)
+ readahead_tree_block(root, block1, blocksize, 0);
+ if (block2)
+ readahead_tree_block(root, block2, blocksize, 0);
+
+ if (block1) {
+ eb = read_tree_block(root, block1, blocksize, 0);
+ free_extent_buffer(eb);
+ }
+ if (block1) {
+ eb = read_tree_block(root, block2, blocksize, 0);
+ free_extent_buffer(eb);
+ }
+ }
+ return ret;
+}
+
+
/*
* when we walk down the tree, it is usually safe to unlock the higher layers
* in the tree. The exceptions are when our path goes through slot 0, because
}
}
+/*
+ * This releases any locks held in the path starting at level and
+ * going all the way up to the root.
+ *
+ * btrfs_search_slot will keep the lock held on higher nodes in a few
+ * corner cases, such as COW of the block at slot zero in the node. This
+ * ignores those rules, and it should only be called when there are no
+ * more updates to be done higher up in the tree.
+ */
+noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
+{
+ int i;
+
+ if (path->keep_locks || path->lowest_level)
+ return;
+
+ for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+ if (!path->nodes[i])
+ break;
+ if (!path->locks[i])
+ break;
+ btrfs_tree_unlock(path->nodes[i]);
+ path->locks[i] = 0;
+ }
+}
+
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
*/
if (prealloc_block.objectid &&
prealloc_block.offset != b->len) {
+ btrfs_set_path_blocking(p);
btrfs_free_reserved_extent(root,
prealloc_block.objectid,
prealloc_block.offset);
goto again;
}
+ btrfs_set_path_blocking(p);
+
wret = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1],
if (!p->skip_locking)
p->locks[level] = 1;
+ btrfs_clear_path_blocking(p);
+
+ /*
+ * we have a lock on b and as long as we aren't changing
+ * the tree, there is no way to for the items in b to change.
+ * It is safe to drop the lock on our parent before we
+ * go through the expensive btree search on b.
+ *
+ * If cow is true, then we might be changing slot zero,
+ * which may require changing the parent. So, we can't
+ * drop the lock until after we know which slot we're
+ * operating on.
+ */
+ if (!cow)
+ btrfs_unlock_up_safe(p, level + 1);
+
ret = check_block(root, p, level);
if (ret) {
ret = -1;
}
ret = bin_search(b, key, level, &slot);
+
if (level != 0) {
if (ret && slot > 0)
slot -= 1;
if ((p->search_for_split || ins_len > 0) &&
btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
- int sret = split_node(trans, root, p, level);
+ int sret;
+
+ sret = reada_for_balance(root, p, level);
+ if (sret)
+ goto again;
+
+ btrfs_set_path_blocking(p);
+ sret = split_node(trans, root, p, level);
+ btrfs_clear_path_blocking(p);
+
BUG_ON(sret > 0);
if (sret) {
ret = sret;
b = p->nodes[level];
slot = p->slots[level];
} else if (ins_len < 0) {
- int sret = balance_level(trans, root, p,
- level);
+ int sret;
+
+ sret = reada_for_balance(root, p, level);
+ if (sret)
+ goto again;
+
+ btrfs_set_path_blocking(p);
+ sret = balance_level(trans, root, p, level);
+ btrfs_clear_path_blocking(p);
+
if (sret) {
ret = sret;
goto done;
* of the btree by dropping locks before
* we read.
*/
- if (level > 1) {
+ if (level > 0) {
btrfs_release_path(NULL, p);
if (tmp)
free_extent_buffer(tmp);
free_extent_buffer(tmp);
goto again;
} else {
+ btrfs_set_path_blocking(p);
if (tmp)
free_extent_buffer(tmp);
if (should_reada)
b = read_node_slot(root, b, slot);
}
}
- if (!p->skip_locking)
- btrfs_tree_lock(b);
+ if (!p->skip_locking) {
+ int lret;
+
+ btrfs_clear_path_blocking(p);
+ lret = btrfs_try_spin_lock(b);
+
+ if (!lret) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_lock(b);
+ btrfs_clear_path_blocking(p);
+ }
+ }
} else {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(root, b) < ins_len) {
- int sret = split_leaf(trans, root, key,
+ int sret;
+
+ btrfs_set_path_blocking(p);
+ sret = split_leaf(trans, root, key,
p, ins_len, ret == 0);
+ btrfs_clear_path_blocking(p);
+
BUG_ON(sret > 0);
if (sret) {
ret = sret;
}
ret = 1;
done:
+ /*
+ * we don't really know what they plan on doing with the path
+ * from here on, so for now just mark it as blocking
+ */
+ btrfs_set_path_blocking(p);
if (prealloc_block.objectid) {
btrfs_free_reserved_extent(root,
prealloc_block.objectid,
prealloc_block.offset);
}
-
return ret;
}
ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
BUG_ON(ret);
+ btrfs_set_lock_blocking(eb);
+
parent = eb;
while (1) {
level = btrfs_header_level(parent);
eb = read_tree_block(root, bytenr, blocksize,
generation);
btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
}
/*
eb = read_tree_block(root, bytenr, blocksize,
generation);
btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
}
ret = btrfs_cow_block(trans, root, eb, parent, slot,
right = read_node_slot(root, upper, slot + 1);
btrfs_tree_lock(right);
+ btrfs_set_lock_blocking(right);
+
free_space = btrfs_leaf_free_space(root, right);
if (free_space < data_size)
goto out_unlock;
left = read_node_slot(root, path->nodes[1], slot - 1);
btrfs_tree_lock(left);
+ btrfs_set_lock_blocking(left);
+
free_space = btrfs_leaf_free_space(root, left);
if (free_space < data_size) {
ret = 1;
path->keep_locks = 0;
BUG_ON(ret);
+ /*
+ * make sure any changes to the path from split_leaf leave it
+ * in a blocking state
+ */
+ btrfs_set_path_blocking(path);
+
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
BUG();
}
out:
+ btrfs_unlock_up_safe(path, 1);
return ret;
}
*/
if (slot >= nritems) {
path->slots[level] = slot;
+ btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level,
cache_only, min_trans);
if (sret == 0) {
btrfs_release_path(root, path);
goto again;
} else {
+ btrfs_clear_path_blocking(path);
goto out;
}
}
unlock_up(path, level, 1);
goto out;
}
+ btrfs_set_path_blocking(path);
cur = read_node_slot(root, cur, slot);
btrfs_tree_lock(cur);
+
path->locks[level - 1] = 1;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
+ btrfs_clear_path_blocking(path);
}
out:
if (ret == 0)
memcpy(min_key, &found_key, sizeof(found_key));
+ btrfs_set_path_blocking(path);
return ret;
}
if (ret < 0)
return ret;
+ btrfs_set_path_blocking(path);
nritems = btrfs_header_nritems(path->nodes[0]);
/*
* by releasing the path above we dropped all our locks. A balance
free_extent_buffer(next);
}
+ /* the path was set to blocking above */
if (level == 1 && (path->locks[1] || path->skip_locking) &&
path->reada)
reada_for_search(root, path, level, slot, 0);
if (!path->skip_locking) {
WARN_ON(!btrfs_tree_locked(c));
btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
}
break;
}
path->locks[level] = 1;
if (!level)
break;
+
+ btrfs_set_path_blocking(path);
if (level == 1 && path->locks[1] && path->reada)
reada_for_search(root, path, level, slot, 0);
next = read_node_slot(root, next, 0);
if (!path->skip_locking) {
WARN_ON(!btrfs_tree_locked(path->nodes[level]));
btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
}
}
done:
while (1) {
if (path->slots[0] == 0) {
+ btrfs_set_path_blocking(path);
ret = btrfs_prev_leaf(root, path);
if (ret != 0)
return ret;
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
void btrfs_init_path(struct btrfs_path *p);
+void btrfs_set_path_blocking(struct btrfs_path *p);
+void btrfs_clear_path_blocking(struct btrfs_path *p);
+void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
+
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int slot, int nr);
int btrfs_del_leaf(struct btrfs_trans_handle *trans,
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
- buf->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
else
WARN_ON(1);
return buf;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
WARN_ON(!btrfs_tree_locked(buf));
+
+ /* ugh, clear_extent_buffer_dirty can be expensive */
+ btrfs_set_lock_blocking(buf);
+
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
}
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
+ btrfs_set_lock_blocking(buf);
+
WARN_ON(!btrfs_tree_locked(buf));
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
int ret;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
- buf->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return ret;
}
btrfs_set_header_generation(buf, trans->transid);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
+
+ btrfs_set_lock_blocking(buf);
btrfs_set_buffer_uptodate(buf);
+
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
set_extent_dirty(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
buf->start + buf->len - 1, GFP_NOFS);
}
trans->blocks_used++;
+ /* this returns a buffer locked for blocking */
return buf;
}
next = read_tree_block(root, bytenr, blocksize, ptr_gen);
btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
&refs);
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
eb->start = start;
eb->len = len;
- mutex_init(&eb->mutex);
+ spin_lock_init(&eb->lock);
+ init_waitqueue_head(&eb->lock_wq);
+
#if LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
list_add(&eb->leak_list, &buffers);
unlock_page(p);
}
if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
spin_lock(&tree->buffer_lock);
exists = buffer_tree_insert(tree, start, &eb->rb_node);
unsigned long num_pages;
num_pages = num_extent_pages(eb->start, eb->len);
- eb->flags &= ~EXTENT_UPTODATE;
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
GFP_NOFS);
struct page *page;
int pg_uptodate = 1;
- if (eb->flags & EXTENT_UPTODATE)
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 1;
ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
struct bio *bio = NULL;
unsigned long bio_flags = 0;
- if (eb->flags & EXTENT_UPTODATE)
+ if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
}
if (all_uptodate) {
if (start_i == 0)
- eb->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
goto unlock_exit;
}
}
if (!ret)
- eb->flags |= EXTENT_UPTODATE;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
return ret;
unlock_exit:
unmap_extent_buffer(eb, eb->map_token, km);
eb->map_token = NULL;
save = 1;
- WARN_ON(!mutex_is_locked(&eb->mutex));
}
err = map_private_extent_buffer(eb, start, min_len, token, map,
map_start, map_len, km);
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
+/* these are bit numbers for test/set bit */
+#define EXTENT_BUFFER_UPTODATE 0
+#define EXTENT_BUFFER_BLOCKING 1
+
/*
* page->private values. Every page that is controlled by the extent
* map has page->private set to one.
unsigned long map_start;
unsigned long map_len;
struct page *first_page;
+ unsigned long bflags;
atomic_t refs;
- int flags;
struct list_head leak_list;
struct rb_node rb_node;
- struct mutex mutex;
+
+ /* the spinlock is used to protect most operations */
+ spinlock_t lock;
+
+ /*
+ * when we keep the lock held while blocking, waiters go onto
+ * the wq
+ */
+ wait_queue_head_t lock_wq;
};
struct extent_map_tree;
#include "tree-log.h"
#include "ref-cache.h"
#include "compression.h"
+#include "locking.h"
struct btrfs_iget_args {
u64 ino;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+
BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
alloc_group_block, 0);
btrfs_free_path(path);
goto failed;
}
+ btrfs_unlock_up_safe(path, 1);
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
#include "locking.h"
/*
- * locks the per buffer mutex in an extent buffer. This uses adaptive locks
- * and the spin is not tuned very extensively. The spinning does make a big
- * difference in almost every workload, but spinning for the right amount of
- * time needs some help.
- *
- * In general, we want to spin as long as the lock holder is doing btree
- * searches, and we should give up if they are in more expensive code.
+ * btrfs_header_level() isn't free, so don't call it when lockdep isn't
+ * on
*/
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void spin_nested(struct extent_buffer *eb)
+{
+ spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
+}
+#else
+static inline void spin_nested(struct extent_buffer *eb)
+{
+ spin_lock(&eb->lock);
+}
+#endif
-int btrfs_tree_lock(struct extent_buffer *eb)
+/*
+ * Setting a lock to blocking will drop the spinlock and set the
+ * flag that forces other procs who want the lock to wait. After
+ * this you can safely schedule with the lock held.
+ */
+void btrfs_set_lock_blocking(struct extent_buffer *eb)
{
- int i;
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
+ set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
+ spin_unlock(&eb->lock);
+ }
+ /* exit with the spin lock released and the bit set */
+}
- if (mutex_trylock(&eb->mutex))
- return 0;
+/*
+ * clearing the blocking flag will take the spinlock again.
+ * After this you can't safely schedule
+ */
+void btrfs_clear_lock_blocking(struct extent_buffer *eb)
+{
+ if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
+ spin_nested(eb);
+ clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags);
+ smp_mb__after_clear_bit();
+ }
+ /* exit with the spin lock held */
+}
+
+/*
+ * unfortunately, many of the places that currently set a lock to blocking
+ * don't end up blocking for every long, and often they don't block
+ * at all. For a dbench 50 run, if we don't spin one the blocking bit
+ * at all, the context switch rate can jump up to 400,000/sec or more.
+ *
+ * So, we're still stuck with this crummy spin on the blocking bit,
+ * at least until the most common causes of the short blocks
+ * can be dealt with.
+ */
+static int btrfs_spin_on_block(struct extent_buffer *eb)
+{
+ int i;
for (i = 0; i < 512; i++) {
cpu_relax();
- if (mutex_trylock(&eb->mutex))
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ return 1;
+ if (need_resched())
+ break;
+ }
+ return 0;
+}
+
+/*
+ * This is somewhat different from trylock. It will take the
+ * spinlock but if it finds the lock is set to blocking, it will
+ * return without the lock held.
+ *
+ * returns 1 if it was able to take the lock and zero otherwise
+ *
+ * After this call, scheduling is not safe without first calling
+ * btrfs_set_lock_blocking()
+ */
+int btrfs_try_spin_lock(struct extent_buffer *eb)
+{
+ int i;
+
+ spin_nested(eb);
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ return 1;
+ spin_unlock(&eb->lock);
+
+ /* spin for a bit on the BLOCKING flag */
+ for (i = 0; i < 2; i++) {
+ if (!btrfs_spin_on_block(eb))
+ break;
+
+ spin_nested(eb);
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ return 1;
+ spin_unlock(&eb->lock);
+ }
+ return 0;
+}
+
+/*
+ * the autoremove wake function will return 0 if it tried to wake up
+ * a process that was already awake, which means that process won't
+ * count as an exclusive wakeup. The waitq code will continue waking
+ * procs until it finds one that was actually sleeping.
+ *
+ * For btrfs, this isn't quite what we want. We want a single proc
+ * to be notified that the lock is ready for taking. If that proc
+ * already happen to be awake, great, it will loop around and try for
+ * the lock.
+ *
+ * So, btrfs_wake_function always returns 1, even when the proc that we
+ * tried to wake up was already awake.
+ */
+static int btrfs_wake_function(wait_queue_t *wait, unsigned mode,
+ int sync, void *key)
+{
+ autoremove_wake_function(wait, mode, sync, key);
+ return 1;
+}
+
+/*
+ * returns with the extent buffer spinlocked.
+ *
+ * This will spin and/or wait as required to take the lock, and then
+ * return with the spinlock held.
+ *
+ * After this call, scheduling is not safe without first calling
+ * btrfs_set_lock_blocking()
+ */
+int btrfs_tree_lock(struct extent_buffer *eb)
+{
+ DEFINE_WAIT(wait);
+ wait.func = btrfs_wake_function;
+
+ while(1) {
+ spin_nested(eb);
+
+ /* nobody is blocking, exit with the spinlock held */
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 0;
+
+ /*
+ * we have the spinlock, but the real owner is blocking.
+ * wait for them
+ */
+ spin_unlock(&eb->lock);
+
+ /*
+ * spin for a bit, and if the blocking flag goes away,
+ * loop around
+ */
+ if (btrfs_spin_on_block(eb))
+ continue;
+
+ prepare_to_wait_exclusive(&eb->lock_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ schedule();
+
+ finish_wait(&eb->lock_wq, &wait);
}
- cpu_relax();
- mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
return 0;
}
+/*
+ * Very quick trylock, this does not spin or schedule. It returns
+ * 1 with the spinlock held if it was able to take the lock, or it
+ * returns zero if it was unable to take the lock.
+ *
+ * After this call, scheduling is not safe without first calling
+ * btrfs_set_lock_blocking()
+ */
int btrfs_try_tree_lock(struct extent_buffer *eb)
{
- return mutex_trylock(&eb->mutex);
+ if (spin_trylock(&eb->lock)) {
+ if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) {
+ /*
+ * we've got the spinlock, but the real owner is
+ * blocking. Drop the spinlock and return failure
+ */
+ spin_unlock(&eb->lock);
+ return 0;
+ }
+ return 1;
+ }
+ /* someone else has the spinlock giveup */
+ return 0;
}
int btrfs_tree_unlock(struct extent_buffer *eb)
{
- mutex_unlock(&eb->mutex);
+ /*
+ * if we were a blocking owner, we don't have the spinlock held
+ * just clear the bit and look for waiters
+ */
+ if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ smp_mb__after_clear_bit();
+ else
+ spin_unlock(&eb->lock);
+
+ if (waitqueue_active(&eb->lock_wq))
+ wake_up(&eb->lock_wq);
return 0;
}
int btrfs_tree_locked(struct extent_buffer *eb)
{
- return mutex_is_locked(&eb->mutex);
+ return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) ||
+ spin_is_locked(&eb->lock);
}
/*
{
int i;
struct extent_buffer *eb;
+
for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) {
eb = path->nodes[i];
if (!eb)
break;
smp_mb();
- if (!list_empty(&eb->mutex.wait_list))
+ if (spin_is_contended(&eb->lock) ||
+ waitqueue_active(&eb->lock_wq))
return 1;
}
return 0;
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_tree_locked(struct extent_buffer *eb);
+
int btrfs_try_tree_lock(struct extent_buffer *eb);
+int btrfs_try_spin_lock(struct extent_buffer *eb);
+
int btrfs_path_lock_waiting(struct btrfs_path *path, int level);
+
+void btrfs_set_lock_blocking(struct extent_buffer *eb);
+void btrfs_clear_lock_blocking(struct extent_buffer *eb);
#endif
u32 nritems;
root_node = btrfs_lock_root_node(root);
+ btrfs_set_lock_blocking(root_node);
nritems = btrfs_header_nritems(root_node);
root->defrag_max.objectid = 0;
/* from above we know this is not a leaf */
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
+ btrfs_set_lock_blocking(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
next = path->nodes[*level];
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
+ btrfs_set_lock_blocking(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
+ btrfs_set_lock_blocking(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
btrfs_tree_lock(next);
clean_tree_block(trans, log, next);
+ btrfs_set_lock_blocking(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);