Btrfs: allocator optimizations, truncate readahead
authorChris Mason <chris.mason@oracle.com>
Fri, 18 May 2007 17:28:27 +0000 (13:28 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Fri, 18 May 2007 17:28:27 +0000 (13:28 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/super.c

index d8caeba5d758ddd8e21930a89a9d90cd4a404334..5830b4333d83d4ef1d64293cd0652f4e23dcf420 100644 (file)
@@ -1081,7 +1081,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh)
 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                                 struct btrfs_block_group_cache
                                                 *hint, u64 search_start,
-                                                int data);
+                                                int data, int owner);
 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root);
 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
index 678456d1e2f9c4a68d813eafcac6d705114b7637..4c0262b57664bd1a347d5bffa595811459b76486 100644 (file)
@@ -257,24 +257,30 @@ static struct address_space_operations btree_aops = {
 int readahead_tree_block(struct btrfs_root *root, u64 blocknr)
 {
        struct buffer_head *bh = NULL;
+       int ret = 0;
 
        bh = btrfs_find_create_tree_block(root, blocknr);
        if (!bh)
                return 0;
-       if (buffer_uptodate(bh))
+       if (buffer_uptodate(bh)) {
+               ret = 1;
                goto done;
-       if (test_set_buffer_locked(bh))
+       }
+       if (test_set_buffer_locked(bh)) {
+               ret = 1;
                goto done;
+       }
        if (!buffer_uptodate(bh)) {
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(READ, bh);
        } else {
                unlock_buffer(bh);
+               ret = 1;
        }
 done:
        brelse(bh);
-       return 0;
+       return ret;
 }
 
 struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
index 1497ff98f0d3b5967e4f9ba2039c08b1fde421f3..e3c6bfea3751e32d69a7143c857d2ce9b3a771a3 100644 (file)
@@ -12,6 +12,33 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root);
 
+static void reada_extent_leaves(struct btrfs_root *root,
+                               struct btrfs_path *path, u64 limit)
+{
+       struct btrfs_node *node;
+       int i;
+       int nritems;
+       u64 item_objectid;
+       u64 blocknr;
+       int slot;
+       int ret;
+
+       if (!path->nodes[1])
+               return;
+       node = btrfs_buffer_node(path->nodes[1]);
+       slot = path->slots[1] + 1;
+       nritems = btrfs_header_nritems(&node->header);
+       for (i = slot; i < nritems && i < slot + 8; i++) {
+               item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
+               if (item_objectid > limit)
+                       break;
+               blocknr = btrfs_node_blockptr(node, i);
+               ret = readahead_tree_block(root, blocknr);
+               if (ret)
+                       break;
+       }
+}
+
 static int cache_block_group(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group)
 {
@@ -24,6 +51,7 @@ static int cache_block_group(struct btrfs_root *root,
        u64 i;
        u64 last = 0;
        u64 hole_size;
+       u64 limit;
        int found = 0;
 
        root = root->fs_info->extent_root;
@@ -46,14 +74,17 @@ printk("cache block group %Lu\n", block_group->key.objectid);
                return ret;
        if (ret && path->slots[0] > 0)
                path->slots[0]--;
+       limit = block_group->key.objectid + block_group->key.offset;
+       reada_extent_leaves(root, path, limit);
        while(1) {
                leaf = btrfs_buffer_leaf(path->nodes[0]);
                slot = path->slots[0];
                if (slot >= btrfs_header_nritems(&leaf->header)) {
+                       reada_extent_leaves(root, path, limit);
                        ret = btrfs_next_leaf(root, path);
-                       if (ret == 0)
+                       if (ret == 0) {
                                continue;
-                       else {
+                       else {
                                if (found) {
                                        hole_size = block_group->key.objectid +
                                                block_group->key.offset - last;
@@ -187,7 +218,7 @@ new_group:
                return max((*cache_ret)->last_alloc, search_start);
        }
        cache = btrfs_find_block_group(root, cache,
-                                      last + cache->key.offset - 1, 0);
+                                      last + cache->key.offset - 1, 0, 0);
        *cache_ret = cache;
        goto again;
 }
@@ -195,7 +226,7 @@ new_group:
 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                                 struct btrfs_block_group_cache
                                                 *hint, u64 search_start,
-                                                int data)
+                                                int data, int owner)
 {
        struct btrfs_block_group_cache *cache[8];
        struct btrfs_block_group_cache *found_group = NULL;
@@ -207,6 +238,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
        int i;
        int ret;
        int full_search = 0;
+       int factor = 8;
+
+       if (!owner)
+               factor = 5;
 
        if (data)
                radix = &info->block_group_data_radix;
@@ -219,14 +254,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                if (shint->data == data) {
                        used = btrfs_block_group_used(&shint->item);
                        if (used + shint->pinned <
-                           (shint->key.offset * 8) / 10) {
+                           (shint->key.offset * factor) / 10) {
                                return shint;
                        }
                }
        }
        if (hint && hint->data == data) {
                used = btrfs_block_group_used(&hint->item);
-               if (used + hint->pinned < (hint->key.offset * 8) / 10) {
+               if (used + hint->pinned < (hint->key.offset * factor) / 10) {
                        return hint;
                }
                if (used >= (hint->key.offset * 8) / 10) {
@@ -261,7 +296,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                cache[i]->key.offset;
                        used = btrfs_block_group_used(&cache[i]->item);
                        if (used + cache[i]->pinned <
-                           (cache[i]->key.offset * 8) / 10) {
+                           (cache[i]->key.offset * factor) / 10) {
                                found_group = cache[i];
                                goto found;
                        }
@@ -272,6 +307,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
                                                     BTRFS_BLOCK_GROUP_AVAIL);
                        }
                }
+               cond_resched();
        }
        last = hint_last;
 again:
@@ -295,13 +331,16 @@ again:
                                                     BTRFS_BLOCK_GROUP_AVAIL);
                        }
                }
+               cond_resched();
        }
        if (!full_search) {
+printk("find block group doing full search data %d start %Lu\n", data, search_start);
                last = search_start;
                full_search = 1;
                goto again;
        }
        if (!found_group) {
+printk("find block group bailing to zero data %d\n", data);
                ret = radix_tree_gang_lookup(radix,
                                             (void **)&found_group, 0, 1);
                BUG_ON(ret != 1);
@@ -554,8 +593,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                                                      blocknr + i);
                                }
                        }
-                       if (old_val < (cache->key.offset * 6) / 10 &&
-                           old_val + num >= (cache->key.offset * 6) / 10) {
+                       if (old_val < (cache->key.offset * 5) / 10 &&
+                           old_val + num >= (cache->key.offset * 5) / 10) {
 printk("group %Lu now available\n", cache->key.objectid);
                                radix_tree_tag_set(cache->radix,
                                                   cache->key.objectid +
@@ -842,6 +881,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
        int level;
        struct btrfs_block_group_cache *block_group;
        int full_scan = 0;
+       u64 limit;
 
        path = btrfs_alloc_path();
        ins->flags = 0;
@@ -858,11 +898,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
        if (search_start) {
                block_group = lookup_block_group(info, search_start);
                block_group = btrfs_find_block_group(root, block_group,
-                                                    search_start, data);
+                                                    search_start, data, 1);
        } else {
                block_group = btrfs_find_block_group(root,
                                                     trans->block_group, 0,
-                                                    data);
+                                                    data, 1);
        }
 
 check_failed:
@@ -916,6 +956,12 @@ check_failed:
                                info->extent_tree_prealloc_nr = 0;
                                total_found = 0;
                        }
+                       if (start_found)
+                               limit = last_block +
+                                       block_group->key.offset / 2;
+                       else
+                               limit = search_start +
+                                       block_group->key.offset / 2;
                        ret = btrfs_next_leaf(root, path);
                        if (ret == 0)
                                continue;
@@ -960,6 +1006,7 @@ check_failed:
                }
 next:
                path->slots[0]++;
+               cond_resched();
        }
        // FIXME -ENOSPC
 check_pending:
@@ -1049,7 +1096,8 @@ printk("doing full scan!\n");
        block_group = lookup_block_group(info, search_start);
        if (!full_scan)
                block_group = btrfs_find_block_group(root, block_group,
-                                                    search_start, data);
+                                                    search_start, data, 0);
+       cond_resched();
        goto check_failed;
 
 error:
@@ -1102,7 +1150,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
         * in the correct block group.
         */
        if (data) {
-               ret = find_free_extent(trans, root, 0, search_start,
+               ret = find_free_extent(trans, root, 0, 0,
                                       search_end, &prealloc_key, 0);
                if (ret) {
                        return ret;
@@ -1173,7 +1221,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        struct buffer_head *buf;
 
        ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
-                                1, 0, (unsigned long)-1, &ins, 0);
+                                1, hint, (unsigned long)-1, &ins, 0);
        if (ret) {
                BUG();
                return NULL;
index eaa48f09d1c4530a49a46c94e95268c27f5893c7..0f79490123cf66384bdf2f094d167a345eb29e2d 100644 (file)
@@ -351,6 +351,35 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
+                          u64 objectid)
+{
+       struct btrfs_node *node;
+       int i;
+       int nritems;
+       u64 item_objectid;
+       u64 blocknr;
+       int slot;
+       int ret;
+
+       if (!path->nodes[1])
+               return;
+       node = btrfs_buffer_node(path->nodes[1]);
+       slot = path->slots[1];
+       if (slot == 0)
+               return;
+       nritems = btrfs_header_nritems(&node->header);
+       for (i = slot - 1; i >= 0; i--) {
+               item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
+               if (item_objectid != objectid)
+                       break;
+               blocknr = btrfs_node_blockptr(node, i);
+               ret = readahead_tree_block(root, blocknr);
+               if (ret)
+                       break;
+       }
+}
+
 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct inode *inode)
@@ -386,6 +415,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
                        BUG_ON(path->slots[0] == 0);
                        path->slots[0]--;
                }
+               reada_truncate(root, path, inode->i_ino);
                leaf = btrfs_buffer_leaf(path->nodes[0]);
                found_key = &leaf->items[path->slots[0]].key;
                if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
@@ -587,28 +617,30 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r
        return d_splice_alias(inode, dentry);
 }
 
-static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path)
+static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
+                        u64 objectid)
 {
        struct btrfs_node *node;
        int i;
-       int nritems;
-       u64 objectid;
+       u32 nritems;
        u64 item_objectid;
        u64 blocknr;
        int slot;
+       int ret;
 
        if (!path->nodes[1])
                return;
        node = btrfs_buffer_node(path->nodes[1]);
        slot = path->slots[1];
-       objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key);
        nritems = btrfs_header_nritems(&node->header);
-       for (i = slot; i < nritems; i++) {
+       for (i = slot + 1; i < nritems; i++) {
                item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
                if (item_objectid != objectid)
                        break;
                blocknr = btrfs_node_blockptr(node, i);
-               readahead_tree_block(root, blocknr);
+               ret = readahead_tree_block(root, blocknr);
+               if (ret)
+                       break;
        }
 }
 
@@ -646,21 +678,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (ret < 0)
                goto err;
        advance = 0;
-       reada_leaves(root, path);
+       reada_leaves(root, path, inode->i_ino);
        while(1) {
                leaf = btrfs_buffer_leaf(path->nodes[0]);
                nritems = btrfs_header_nritems(&leaf->header);
                slot = path->slots[0];
                if (advance || slot >= nritems) {
                        if (slot >= nritems -1) {
+                               reada_leaves(root, path, inode->i_ino);
                                ret = btrfs_next_leaf(root, path);
                                if (ret)
                                        break;
                                leaf = btrfs_buffer_leaf(path->nodes[0]);
                                nritems = btrfs_header_nritems(&leaf->header);
                                slot = path->slots[0];
-                               if (path->slots[1] == 0)
-                                       reada_leaves(root, path);
                        } else {
                                slot++;
                                path->slots[0]++;
@@ -805,13 +836,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        struct btrfs_inode_item inode_item;
        struct btrfs_key *location;
        int ret;
+       int owner;
 
        inode = new_inode(root->fs_info->sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
 
        BTRFS_I(inode)->root = root;
-       group = btrfs_find_block_group(root, group, 0, 0);
+       if (mode & S_IFDIR)
+               owner = 0;
+       else
+               owner = 1;
+       group = btrfs_find_block_group(root, group, 0, 0, owner);
        BTRFS_I(inode)->block_group = group;
 
        inode->i_uid = current->fsuid;
@@ -1562,7 +1598,7 @@ failed:
 static int drop_extents(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          struct inode *inode,
-                         u64 start, u64 end)
+                         u64 start, u64 end, u64 *hint_block)
 {
        int ret;
        struct btrfs_key key;
@@ -1659,17 +1695,14 @@ static int drop_extents(struct btrfs_trans_handle *trans,
                                new_num = (start - key.offset) >>
                                        inode->i_blkbits;
                                old_num = btrfs_file_extent_num_blocks(extent);
+                               *hint_block =
+                                       btrfs_file_extent_disk_blocknr(extent);
                                inode->i_blocks -= (old_num - new_num) << 3;
                                btrfs_set_file_extent_num_blocks(extent,
                                                                 new_num);
                                mark_buffer_dirty(path->nodes[0]);
                        } else {
                                WARN_ON(1);
-                               /*
-                               ret = btrfs_truncate_item(trans, root, path,
-                                                         start - key.offset);
-                               BUG_ON(ret);
-                               */
                        }
                }
                if (!keep) {
@@ -1683,6 +1716,8 @@ static int drop_extents(struct btrfs_trans_handle *trans,
                                      btrfs_file_extent_disk_num_blocks(extent);
                                extent_num_blocks =
                                      btrfs_file_extent_num_blocks(extent);
+                               *hint_block =
+                                       btrfs_file_extent_disk_blocknr(extent);
                        }
                        ret = btrfs_del_item(trans, root, path);
                        BUG_ON(ret);
@@ -1831,6 +1866,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        u64 start_pos;
        u64 num_blocks;
        u64 alloc_extent_start;
+       u64 hint_block;
        struct btrfs_trans_handle *trans;
        struct btrfs_key ins;
        pinned[0] = NULL;
@@ -1871,6 +1907,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        }
        if (first_index != last_index &&
            (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
+           pos + count < inode->i_size &&
            (count & (PAGE_CACHE_SIZE - 1))) {
                pinned[1] = grab_cache_page(inode->i_mapping, last_index);
                if (!PageUptodate(pinned[1])) {
@@ -1892,18 +1929,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        btrfs_set_trans_block_group(trans, inode);
        /* FIXME blocksize != 4096 */
        inode->i_blocks += num_blocks << 3;
+       hint_block = 0;
        if (start_pos < inode->i_size) {
                /* FIXME blocksize != pagesize */
                ret = drop_extents(trans, root, inode,
                                   start_pos,
                                   (pos + count + root->blocksize -1) &
-                                  ~((u64)root->blocksize - 1));
+                                  ~((u64)root->blocksize - 1), &hint_block);
                BUG_ON(ret);
        }
        if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
            pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
                ret = btrfs_alloc_extent(trans, root, inode->i_ino,
-                                        num_blocks, 1, (u64)-1, &ins, 1);
+                                        num_blocks, hint_block, (u64)-1,
+                                        &ins, 1);
                BUG_ON(ret);
                ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
                                       start_pos, ins.objectid, ins.offset);
@@ -2455,7 +2494,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type,
                           btrfs_fill_super, mnt);
 }
 
-
 static int btrfs_getattr(struct vfsmount *mnt,
                         struct dentry *dentry, struct kstat *stat)
 {