Btrfs: leave btree locks spinning more often
authorChris Mason <chris.mason@oracle.com>
Fri, 13 Mar 2009 15:00:37 +0000 (11:00 -0400)
committerChris Mason <chris.mason@oracle.com>
Tue, 24 Mar 2009 20:14:28 +0000 (16:14 -0400)
btrfs_mark_buffer dirty would set dirty bits in the extent_io tree
for the buffers it was dirtying.  This may require a kmalloc and it
was not atomic.  So, anyone who called btrfs_mark_buffer_dirty had to
set any btree locks they were holding to blocking first.

This commit changes dirty tracking for extent buffers to just use a flag
in the extent buffer.  Now that we have one and only one extent buffer
per page, this can be safely done without losing dirty bits along the way.

This also introduces a path->leave_spinning flag that callers of
btrfs_search_slot can use to indicate they will properly deal with a
path returned where all the locks are spinning instead of blocking.

Many of the btree search callers now expect spinning paths,
resulting in better btree concurrency overall.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
14 files changed:
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode-item.c
fs/btrfs/inode.c
fs/btrfs/locking.c
fs/btrfs/tree-log.c

index 3764248bdc059d4ec05bf5cfe1d3b146d7559c24..8686a3d2ab3a06bccefba2978836aae2db701252 100644 (file)
@@ -1684,7 +1684,8 @@ done:
         * we don't really know what they plan on doing with the path
         * from here on, so for now just mark it as blocking
         */
-       btrfs_set_path_blocking(p);
+       if (!p->leave_spinning)
+               btrfs_set_path_blocking(p);
        return ret;
 }
 
@@ -3032,26 +3033,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
                return -EAGAIN;
        }
 
+       btrfs_set_path_blocking(path);
        ret = split_leaf(trans, root, &orig_key, path,
                         sizeof(struct btrfs_item), 1);
        path->keep_locks = 0;
        BUG_ON(ret);
 
+       btrfs_unlock_up_safe(path, 1);
+       leaf = path->nodes[0];
+       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+
+split:
        /*
         * make sure any changes to the path from split_leaf leave it
         * in a blocking state
         */
        btrfs_set_path_blocking(path);
 
-       leaf = path->nodes[0];
-       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-
-split:
        item = btrfs_item_nr(leaf, path->slots[0]);
        orig_offset = btrfs_item_offset(leaf, item);
        item_size = btrfs_item_size(leaf, item);
 
-
        buf = kmalloc(item_size, GFP_NOFS);
        read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
                            path->slots[0]), item_size);
@@ -3545,7 +3547,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
        }
 
        btrfs_set_header_nritems(leaf, nritems + nr);
-       btrfs_mark_buffer_dirty(leaf);
 
        ret = 0;
        if (slot == 0) {
@@ -3553,6 +3554,8 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
        }
+       btrfs_unlock_up_safe(path, 1);
+       btrfs_mark_buffer_dirty(leaf);
 
        if (btrfs_leaf_free_space(root, leaf) < 0) {
                btrfs_print_leaf(root, leaf);
@@ -3596,7 +3599,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                               total_data, total_size, nr);
 
 out:
-       btrfs_unlock_up_safe(path, 1);
        return ret;
 }
 
@@ -3792,6 +3794,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        slot = path->slots[1];
                        extent_buffer_get(leaf);
 
+                       btrfs_set_path_blocking(path);
                        wret = push_leaf_left(trans, root, path, 1, 1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
index 08d9f8d15538f6a26b2259b510c899bd449089d1..4ddce91cf3f9e655038f9a2bfd06de4482c8d28c 100644 (file)
@@ -401,15 +401,16 @@ struct btrfs_path {
        int locks[BTRFS_MAX_LEVEL];
        int reada;
        /* keep some upper locks as we walk down */
-       int keep_locks;
-       int skip_locking;
        int lowest_level;
 
        /*
         * set by btrfs_split_item, tells search_slot to keep all locks
         * and to force calls to keep space in the nodes
         */
-       int search_for_split;
+       unsigned int search_for_split:1;
+       unsigned int keep_locks:1;
+       unsigned int skip_locking:1;
+       unsigned int leave_spinning:1;
 };
 
 /*
@@ -779,6 +780,11 @@ struct btrfs_fs_info {
        atomic_t throttle_gen;
 
        u64 total_pinned;
+
+       /* protected by the delalloc lock, used to keep from writing
+        * metadata until there is a nice batch
+        */
+       u64 dirty_metadata_bytes;
        struct list_head dirty_cowonly_roots;
 
        struct btrfs_fs_devices *fs_devices;
index 926a0b287a7d752994fc5a11894f21e91d5561f8..1d70236ba00c7bf4abf5181d2efd3b20d5867e2e 100644 (file)
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
        key.objectid = dir;
        btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
        key.offset = btrfs_name_hash(name, name_len);
+
        path = btrfs_alloc_path();
+       path->leave_spinning = 1;
+
        data_size = sizeof(*dir_item) + name_len;
        dir_item = insert_with_overflow(trans, root, path, &key, data_size,
                                        name, name_len);
index 1f1d89b188189ab5db429ba88457412b2d6503f6..9244cd7313d451f69ebac1df57fb6a16aff548af 100644 (file)
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
        struct extent_io_tree *tree;
+       struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+       struct extent_buffer *eb;
+       int was_dirty;
+
        tree = &BTRFS_I(page->mapping->host)->io_tree;
+       if (!(current->flags & PF_MEMALLOC)) {
+               return extent_write_full_page(tree, page,
+                                             btree_get_extent, wbc);
+       }
 
-       if (current->flags & PF_MEMALLOC) {
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return 0;
+       redirty_page_for_writepage(wbc, page);
+       eb = btrfs_find_tree_block(root, page_offset(page),
+                                     PAGE_CACHE_SIZE);
+       WARN_ON(!eb);
+
+       was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+       if (!was_dirty) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
+               spin_unlock(&root->fs_info->delalloc_lock);
        }
-       return extent_write_full_page(tree, page, btree_get_extent, wbc);
+       free_extent_buffer(eb);
+
+       unlock_page(page);
+       return 0;
 }
 
 static int btree_writepages(struct address_space *mapping,
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
        struct extent_io_tree *tree;
        tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
+               struct btrfs_root *root = BTRFS_I(mapping->host)->root;
                u64 num_dirty;
-               u64 start = 0;
                unsigned long thresh = 32 * 1024 * 1024;
 
                if (wbc->for_kupdate)
                        return 0;
 
-               num_dirty = count_range_bits(tree, &start, (u64)-1,
-                                            thresh, EXTENT_DIRTY);
+               /* this is a bit racy, but that's ok */
+               num_dirty = root->fs_info->dirty_metadata_bytes;
                if (num_dirty < thresh)
                        return 0;
        }
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
            root->fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
 
-               /* ugh, clear_extent_buffer_dirty can be expensive */
-               btrfs_set_lock_blocking(buf);
+               if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
+                       spin_lock(&root->fs_info->delalloc_lock);
+                       if (root->fs_info->dirty_metadata_bytes >= buf->len)
+                               root->fs_info->dirty_metadata_bytes -= buf->len;
+                       else
+                               WARN_ON(1);
+                       spin_unlock(&root->fs_info->delalloc_lock);
+               }
 
+               /* ugh, clear_extent_buffer_dirty needs to lock the page */
+               btrfs_set_lock_blocking(buf);
                clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
        }
@@ -2348,8 +2373,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
-
-       btrfs_set_lock_blocking(buf);
+       int was_dirty;
 
        btrfs_assert_tree_locked(buf);
        if (transid != root->fs_info->generation) {
@@ -2360,7 +2384,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
                        (unsigned long long)root->fs_info->generation);
                WARN_ON(1);
        }
-       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
+       was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
+                                           buf);
+       if (!was_dirty) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               root->fs_info->dirty_metadata_bytes += buf->len;
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
 }
 
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2400,6 +2430,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 int btree_lock_page_hook(struct page *page)
 {
        struct inode *inode = page->mapping->host;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_buffer *eb;
        unsigned long len;
@@ -2415,6 +2446,16 @@ int btree_lock_page_hook(struct page *page)
 
        btrfs_tree_lock(eb);
        btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+
+       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               if (root->fs_info->dirty_metadata_bytes >= eb->len)
+                       root->fs_info->dirty_metadata_bytes -= eb->len;
+               else
+                       WARN_ON(1);
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
+
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
 out:
index 95029db227be5767d17eeab2a11cb36b63709849..c958ecbc19168ef0e7ef8e7bdd123f5f527b55f7 100644 (file)
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
+void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
 int wait_on_tree_block_writeback(struct btrfs_root *root,
index a421c32c6cfefe36ce0a1892e5462e2b8228f5b8..8933d15a240fa0bdc079598d99e7ee739e12728d 100644 (file)
@@ -56,9 +56,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
                                         int ref_mod);
 static int update_reserved_extents(struct btrfs_root *root,
                                   u64 bytenr, u64 num, int reserve);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, int is_data);
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc,
@@ -618,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
        } else {
                goto out;
        }
+       btrfs_unlock_up_safe(path, 1);
        btrfs_mark_buffer_dirty(path->nodes[0]);
 out:
        btrfs_release_path(root, path);
@@ -760,6 +758,7 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
+       path->leave_spinning = 1;
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
        key.offset = num_bytes;
@@ -767,8 +766,10 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
        /* first find the extent item and update its reference count */
        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
                                path, 0, 1);
-       if (ret < 0)
+       if (ret < 0) {
+               btrfs_set_path_blocking(path);
                return ret;
+       }
 
        if (ret > 0) {
                WARN_ON(1);
@@ -791,11 +792,15 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
 
        refs = btrfs_extent_refs(l, item);
        btrfs_set_extent_refs(l, item, refs + refs_to_add);
+       btrfs_unlock_up_safe(path, 1);
+
        btrfs_mark_buffer_dirty(path->nodes[0]);
 
        btrfs_release_path(root->fs_info->extent_root, path);
 
        path->reada = 1;
+       path->leave_spinning = 1;
+
        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
                                    path, bytenr, parent,
@@ -2050,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
                clear_extent_dirty(&fs_info->pinned_extents,
                                bytenr, bytenr + num - 1, GFP_NOFS);
        }
+       mutex_unlock(&root->fs_info->pinned_mutex);
+
        while (num > 0) {
                cache = btrfs_lookup_block_group(fs_info, bytenr);
                BUG_ON(!cache);
@@ -2141,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
        u64 end;
        int ret;
 
-       mutex_lock(&root->fs_info->pinned_mutex);
        while (1) {
+               mutex_lock(&root->fs_info->pinned_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY);
                if (ret)
@@ -2150,14 +2157,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
                ret = btrfs_discard_extent(root, start, end + 1 - start);
 
+               /* unlocks the pinned mutex */
                btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
 
-               if (need_resched()) {
-                       mutex_unlock(&root->fs_info->pinned_mutex);
-                       cond_resched();
-                       mutex_lock(&root->fs_info->pinned_mutex);
-               }
+               cond_resched();
        }
        mutex_unlock(&root->fs_info->pinned_mutex);
        return ret;
@@ -2165,7 +2169,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
 static int pin_down_bytes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, int is_data)
+                         struct btrfs_path *path,
+                         u64 bytenr, u64 num_bytes, int is_data,
+                         struct extent_buffer **must_clean)
 {
        int err = 0;
        struct extent_buffer *buf;
@@ -2191,15 +2197,16 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
                    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
                    header_transid == trans->transid &&
                    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
-                       clean_tree_block(NULL, root, buf);
-                       btrfs_tree_unlock(buf);
-                       free_extent_buffer(buf);
+                       *must_clean = buf;
                        return 1;
                }
                btrfs_tree_unlock(buf);
        }
        free_extent_buffer(buf);
 pinit:
+       btrfs_set_path_blocking(path);
+       mutex_lock(&root->fs_info->pinned_mutex);
+       /* unlocks the pinned mutex */
        btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
 
        BUG_ON(err < 0);
@@ -2236,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
+       path->leave_spinning = 1;
        ret = lookup_extent_backref(trans, extent_root, path,
                                    bytenr, parent, root_objectid,
                                    ref_generation, owner_objectid, 1);
@@ -2261,6 +2269,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                                    refs_to_drop);
                        BUG_ON(ret);
                        btrfs_release_path(extent_root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root,
                                                &key, path, -1, 1);
                        if (ret) {
@@ -2318,6 +2327,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                /* if refs are 0, we need to setup the path for deletion */
                if (refs == 0) {
                        btrfs_release_path(extent_root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root, &key, path,
                                                -1, 1);
                        BUG_ON(ret);
@@ -2327,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        if (refs == 0) {
                u64 super_used;
                u64 root_used;
+               struct extent_buffer *must_clean = NULL;
 
                if (pin) {
-                       mutex_lock(&root->fs_info->pinned_mutex);
-                       ret = pin_down_bytes(trans, root, bytenr, num_bytes,
-                               owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
-                       mutex_unlock(&root->fs_info->pinned_mutex);
+                       ret = pin_down_bytes(trans, root, path,
+                               bytenr, num_bytes,
+                               owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
+                               &must_clean);
                        if (ret > 0)
                                mark_free = 1;
                        BUG_ON(ret < 0);
                }
+
                /* block accounting for super block */
                spin_lock(&info->delalloc_lock);
                super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -2348,11 +2360,27 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                btrfs_set_root_used(&root->root_item,
                                           root_used - num_bytes);
                spin_unlock(&info->delalloc_lock);
+
+               /*
+                * it is going to be very rare for someone to be waiting
+                * on the block we're freeing.  del_items might need to
+                * schedule, so rather than get fancy, just force it
+                * to blocking here
+                */
+               if (must_clean)
+                       btrfs_set_lock_blocking(must_clean);
+
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                BUG_ON(ret);
                btrfs_release_path(extent_root, path);
 
+               if (must_clean) {
+                       clean_tree_block(NULL, root, must_clean);
+                       btrfs_tree_unlock(must_clean);
+                       free_extent_buffer(must_clean);
+               }
+
                if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
                        BUG_ON(ret);
@@ -2480,8 +2508,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
            owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
                mutex_lock(&root->fs_info->pinned_mutex);
+
+               /* unlocks the pinned mutex */
                btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-               mutex_unlock(&root->fs_info->pinned_mutex);
                update_reserved_extents(root, bytenr, num_bytes, 0);
                ret = 0;
        } else {
@@ -2931,6 +2960,7 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
                                       sizes, 2);
        BUG_ON(ret);
@@ -5435,6 +5465,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_inode(trans, root, path, objectid);
        if (ret)
                goto out;
index ebe6b29e60698156250708766c70bb74b903bfb6..08085af089e27827eba49898146ebe63c6623b07 100644 (file)
@@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb)
 int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb)
 {
-       int set;
        unsigned long i;
        unsigned long num_pages;
        struct page *page;
 
-       u64 start = eb->start;
-       u64 end = start + eb->len - 1;
-
-       set = clear_extent_dirty(tree, start, end, GFP_NOFS);
        num_pages = num_extent_pages(eb->start, eb->len);
 
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
-               if (!set && !PageDirty(page))
+               if (!PageDirty(page))
                        continue;
 
                lock_page(page);
@@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                else
                        set_page_private(page, EXTENT_PAGE_PRIVATE);
 
-               /*
-                * if we're on the last page or the first page and the
-                * block isn't aligned on a page boundary, do extra checks
-                * to make sure we don't clean page that is partially dirty
-                */
-               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
-                   ((i == num_pages - 1) &&
-                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
-                       start = (u64)page->index << PAGE_CACHE_SHIFT;
-                       end  = start + PAGE_CACHE_SIZE - 1;
-                       if (test_range_bit(tree, start, end,
-                                          EXTENT_DIRTY, 0)) {
-                               unlock_page(page);
-                               continue;
-                       }
-               }
                clear_page_dirty_for_io(page);
                spin_lock_irq(&page->mapping->tree_lock);
                if (!PageDirty(page)) {
@@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
 {
        unsigned long i;
        unsigned long num_pages;
+       int was_dirty = 0;
 
+       was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
        num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = 0; i < num_pages; i++) {
-               struct page *page = extent_buffer_page(eb, i);
-               /* writepage may need to do something special for the
-                * first page, we have to make sure page->private is
-                * properly set.  releasepage may drop page->private
-                * on us if the page isn't already dirty.
-                */
-               lock_page(page);
-               if (i == 0) {
-                       set_page_extent_head(page, eb->len);
-               } else if (PagePrivate(page) &&
-                          page->private != EXTENT_PAGE_PRIVATE) {
-                       set_page_extent_mapped(page);
-               }
+       for (i = 0; i < num_pages; i++)
                __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
-               set_extent_dirty(tree, page_offset(page),
-                                page_offset(page) + PAGE_CACHE_SIZE - 1,
-                                GFP_NOFS);
-               unlock_page(page);
-       }
-       return 0;
+       return was_dirty;
 }
 
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
                ret = 0;
                goto out;
        }
+       if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+               ret = 0;
+               goto out;
+       }
        /* at this point we can safely release the extent buffer */
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++)
index 1f9df88afbf6922e9f4af87972f05521ed15e862..5bc20abf3f3d340b22248802e4c8d7d72f4dc4bb 100644 (file)
@@ -25,6 +25,7 @@
 /* these are bit numbers for test/set bit */
 #define EXTENT_BUFFER_UPTODATE 0
 #define EXTENT_BUFFER_BLOCKING 1
+#define EXTENT_BUFFER_DIRTY 2
 
 /*
  * page->private values.  Every page that is controlled by the extent
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_io_tree *tree,
                             struct extent_buffer *eb);
+int test_extent_buffer_dirty(struct extent_io_tree *tree,
+                            struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
                               struct extent_buffer *eb);
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
index 964652435fd1a5f40e061cfe1c3a7dd1d1198248..9b99886562d0b2c8ea9e883f3f2623b0ec6d8674 100644 (file)
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
        file_key.offset = pos;
        btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      sizeof(*item));
        if (ret < 0)
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                key.offset = end_byte - 1;
                key.type = BTRFS_EXTENT_CSUM_KEY;
 
+               path->leave_spinning = 1;
                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                if (ret > 0) {
                        if (path->slots[0] == 0)
@@ -757,8 +759,10 @@ insert:
        } else {
                ins_size = csum_size;
        }
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      ins_size);
+       path->leave_spinning = 0;
        if (ret < 0)
                goto fail_unlock;
        if (ret != 0) {
@@ -776,7 +780,6 @@ found:
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
        eb_token = NULL;
-       cond_resched();
 next_sector:
 
        if (!eb_token ||
@@ -817,9 +820,9 @@ next_sector:
                eb_token = NULL;
        }
        btrfs_mark_buffer_dirty(path->nodes[0]);
-       cond_resched();
        if (total_bytes < sums->len) {
                btrfs_release_path(root, path);
+               cond_resched();
                goto again;
        }
 out:
index c800754976452a3932bac22e1803b1e589079389..f06c275644b761b23df8f0b7d069be0499d556e2 100644 (file)
@@ -606,6 +606,7 @@ next_slot:
                        btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
 
                        btrfs_release_path(root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                                      sizeof(*extent));
                        BUG_ON(ret);
@@ -639,7 +640,9 @@ next_slot:
                                                        ram_bytes);
                        btrfs_set_file_extent_type(leaf, extent, found_type);
 
+                       btrfs_unlock_up_safe(path, 1);
                        btrfs_mark_buffer_dirty(path->nodes[0]);
+                       btrfs_set_lock_blocking(path->nodes[0]);
 
                        if (disk_bytenr != 0) {
                                ret = btrfs_update_extent_ref(trans, root,
@@ -652,6 +655,7 @@ next_slot:
 
                                BUG_ON(ret);
                        }
+                       path->leave_spinning = 0;
                        btrfs_release_path(root, path);
                        if (disk_bytenr != 0)
                                inode_add_bytes(inode, extent_end - end);
index 3d46fa1f29a4d75be94f116e148a0f50321973f6..6b627c6118081ccea87d1acaf92000f995dda946 100644 (file)
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
+
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0) {
                ret = -ENOENT;
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
index c427011dc4534fee755eef4981b064873095273c..b83a45dc717eb6472c86584af2f999cddd0d1ca9 100644 (file)
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        btrfs_set_trans_block_group(trans, inode);
 
        key.objectid = inode->i_ino;
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                        cur_size = min_t(unsigned long, compressed_size,
                                       PAGE_CACHE_SIZE);
 
-                       kaddr = kmap(cpage);
+                       kaddr = kmap_atomic(cpage, KM_USER0);
                        write_extent_buffer(leaf, kaddr, ptr, cur_size);
-                       kunmap(cpage);
+                       kunmap_atomic(kaddr, KM_USER0);
 
                        i++;
                        ptr += cur_size;
@@ -1452,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
+       path->leave_spinning = 1;
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                 file_pos + num_bytes, file_pos, &hint);
        BUG_ON(ret);
@@ -1474,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_set_file_extent_compression(leaf, fi, compression);
        btrfs_set_file_extent_encryption(leaf, fi, encryption);
        btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+
+       btrfs_unlock_up_safe(path, 1);
+       btrfs_set_lock_blocking(leaf);
+
        btrfs_mark_buffer_dirty(leaf);
 
        inode_add_bytes(inode, num_bytes);
@@ -1486,8 +1492,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                          root->root_key.objectid,
                                          trans->transid, inode->i_ino, &ins);
        BUG_ON(ret);
-
        btrfs_free_path(path);
+
        return 0;
 }
 
@@ -2118,6 +2124,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
+       path->leave_spinning = 1;
        ret = btrfs_lookup_inode(trans, root, path,
                                 &BTRFS_I(inode)->location, 1);
        if (ret) {
@@ -2164,6 +2171,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto err;
        }
 
+       path->leave_spinning = 1;
        di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
                                    name, name_len, -1);
        if (IS_ERR(di)) {
@@ -2515,6 +2523,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        key.type = (u8)-1;
 
 search_again:
+       path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
                goto error;
@@ -2661,6 +2670,7 @@ delete:
                        break;
                }
                if (found_extent) {
+                       btrfs_set_path_blocking(path);
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes,
                                                leaf->start, root_owner,
@@ -3466,6 +3476,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        sizes[0] = sizeof(struct btrfs_inode_item);
        sizes[1] = name_len + sizeof(*ref);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
        if (ret != 0)
                goto fail;
index 6d8db2f5c38d0d485199120f5c170492099eb54d..a5310c0f41e24f089bfd78e3a2f0939d3957f717 100644 (file)
@@ -96,11 +96,12 @@ int btrfs_try_spin_lock(struct extent_buffer *eb)
 {
        int i;
 
-       spin_nested(eb);
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               return 1;
-       spin_unlock(&eb->lock);
-
+       if (btrfs_spin_on_block(eb)) {
+               spin_nested(eb);
+               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+                       return 1;
+               spin_unlock(&eb->lock);
+       }
        /* spin for a bit on the BLOCKING flag */
        for (i = 0; i < 2; i++) {
                cpu_relax();
index 9c462fbd60fac14ad17f192afb08c37794295d36..a93934fc93bd2885846d51a222b3ee7fdf1d8364 100644 (file)
@@ -203,7 +203,6 @@ static int process_one_buffer(struct btrfs_root *log,
                mutex_lock(&log->fs_info->pinned_mutex);
                btrfs_update_pinned_extents(log->fs_info->extent_root,
                                            eb->start, eb->len, 1);
-               mutex_unlock(&log->fs_info->pinned_mutex);
        }
 
        if (btrfs_buffer_uptodate(eb, gen)) {