end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata)
+ if (end_io_wq->metadata == 1)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
+ else if (end_io_wq->metadata == 2)
+ btrfs_queue_worker(&fs_info->endio_freespace_worker,
+ &end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
}
}
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
{
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+ 1, &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
btrfs_start_workers(&fs_info->endio_meta_workers, 1);
btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
btrfs_start_workers(&fs_info->endio_write_workers, 1);
+ btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_meta_write_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers);
btrfs_close_devices(fs_info->fs_devices);
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+static void recalculate_thresholds(struct btrfs_block_group_cache
+ *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info);
+
struct inode *lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache
*block_group, struct btrfs_path *path)
return btrfs_update_inode(trans, root, inode);
}
+int btrfs_write_out_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_path *path)
+{
+ struct btrfs_free_space_header *header;
+ struct extent_buffer *leaf;
+ struct inode *inode;
+ struct rb_node *node;
+ struct list_head *pos, *n;
+ struct page *page;
+ struct extent_state *cached_state = NULL;
+ struct list_head bitmap_list;
+ struct btrfs_key key;
+ u64 bytes = 0;
+ u32 *crc, *checksums;
+ pgoff_t index = 0, last_index = 0;
+ unsigned long first_page_offset;
+ int num_checksums;
+ int entries = 0;
+ int bitmaps = 0;
+ int ret = 0;
+
+ root = root->fs_info->tree_root;
+
+ INIT_LIST_HEAD(&bitmap_list);
+
+ spin_lock(&block_group->lock);
+ if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+ spin_unlock(&block_group->lock);
+ return 0;
+ }
+ spin_unlock(&block_group->lock);
+
+ inode = lookup_free_space_inode(root, block_group, path);
+ if (IS_ERR(inode))
+ return 0;
+
+ if (!i_size_read(inode)) {
+ iput(inode);
+ return 0;
+ }
+
+ last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ filemap_write_and_wait(inode->i_mapping);
+ btrfs_wait_ordered_range(inode, inode->i_size &
+ ~(root->sectorsize - 1), (u64)-1);
+
+ /* We need a checksum per page. */
+ num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+ crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+ if (!crc) {
+ iput(inode);
+ return 0;
+ }
+
+ /* Since the first page has all of our checksums and our generation we
+ * need to calculate the offset into the page that we can start writing
+ * our entries.
+ */
+ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+ node = rb_first(&block_group->free_space_offset);
+ if (!node)
+ goto out_free;
+
+ /*
+ * Lock all pages first so we can lock the extent safely.
+ *
+ * NOTE: Because we hold the ref the entire time we're going to write to
+ * the page find_get_page should never fail, so we don't do a check
+ * after find_get_page at this point. Just putting this here so people
+ * know and don't freak out.
+ */
+ while (index <= last_index) {
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page) {
+ pgoff_t i = 0;
+
+ while (i < index) {
+ page = find_get_page(inode->i_mapping, i);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ i++;
+ }
+ goto out_free;
+ }
+ index++;
+ }
+
+ index = 0;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+ 0, &cached_state, GFP_NOFS);
+
+ /* Write out the extent entries */
+ do {
+ struct btrfs_free_space_entry *entry;
+ void *addr;
+ unsigned long offset = 0;
+ unsigned long start_offset = 0;
+
+ if (index == 0) {
+ start_offset = first_page_offset;
+ offset = start_offset;
+ }
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ entry = addr + start_offset;
+
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ while (1) {
+ struct btrfs_free_space *e;
+
+ e = rb_entry(node, struct btrfs_free_space, offset_index);
+ entries++;
+
+ entry->offset = cpu_to_le64(e->offset);
+ entry->bytes = cpu_to_le64(e->bytes);
+ if (e->bitmap) {
+ entry->type = BTRFS_FREE_SPACE_BITMAP;
+ list_add_tail(&e->list, &bitmap_list);
+ bitmaps++;
+ } else {
+ entry->type = BTRFS_FREE_SPACE_EXTENT;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ break;
+ entry++;
+ }
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+ PAGE_CACHE_SIZE - start_offset);
+ kunmap(page);
+
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /*
+ * We need to release our reference we got for grab_cache_page,
+ * except for the first page which will hold our checksums, we
+ * do that below.
+ */
+ if (index != 0) {
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ page_cache_release(page);
+
+ index++;
+ } while (node);
+
+ /* Write out the bitmaps */
+ list_for_each_safe(pos, n, &bitmap_list) {
+ void *addr;
+ struct btrfs_free_space *entry =
+ list_entry(pos, struct btrfs_free_space, list);
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+ *crc = ~(u32)0;
+ *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+ kunmap(page);
+ btrfs_csum_final(*crc, (char *)crc);
+ crc++;
+ bytes += PAGE_CACHE_SIZE;
+
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ list_del_init(&entry->list);
+ index++;
+ }
+
+ /* Zero out the rest of the pages just to make sure */
+ while (index <= last_index) {
+ void *addr;
+
+ page = find_get_page(inode->i_mapping, index);
+
+ addr = kmap(page);
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ bytes += PAGE_CACHE_SIZE;
+ index++;
+ }
+
+ btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+ /* Write the checksums and trans id to the first page */
+ {
+ void *addr;
+ u64 *gen;
+
+ page = find_get_page(inode->i_mapping, 0);
+
+ addr = kmap(page);
+ memcpy(addr, checksums, sizeof(u32) * num_checksums);
+ gen = addr + (sizeof(u32) * num_checksums);
+ *gen = trans->transid;
+ kunmap(page);
+ ClearPageChecked(page);
+ set_page_extent_mapped(page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ page_cache_release(page);
+ }
+ BTRFS_I(inode)->generation = trans->transid;
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+ i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+ filemap_write_and_wait(inode->i_mapping);
+
+ key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ key.offset = block_group->key.objectid;
+ key.type = 0;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+ goto out_free;
+ }
+ leaf = path->nodes[0];
+ if (ret > 0) {
+ struct btrfs_key found_key;
+ BUG_ON(!path->slots[0]);
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+ found_key.offset != block_group->key.objectid) {
+ ret = 0;
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+ GFP_NOFS);
+ btrfs_release_path(root, path);
+ goto out_free;
+ }
+ }
+ header = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_free_space_header);
+ btrfs_set_free_space_entries(leaf, header, entries);
+ btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+ btrfs_set_free_space_generation(leaf, header, trans->transid);
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_release_path(root, path);
+
+ ret = 1;
+
+out_free:
+ if (ret == 0) {
+ invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+ spin_lock(&block_group->lock);
+ block_group->disk_cache_state = BTRFS_DC_ERROR;
+ spin_unlock(&block_group->lock);
+ BTRFS_I(inode)->generation = 0;
+ }
+ kfree(checksums);
+ btrfs_update_inode(trans, root, inode);
+ iput(inode);
+ return ret;
+}
+
static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
u64 offset)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
+ BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
int type;
int nocow;
int check_prev = 1;
+ bool nolock = false;
path = btrfs_alloc_path();
BUG_ON(!path);
- trans = btrfs_join_transaction(root, 1);
+ if (root == root->fs_info->tree_root) {
+ nolock = true;
+ trans = btrfs_join_transaction_nolock(root, 1);
+ } else {
+ trans = btrfs_join_transaction(root, 1);
+ }
BUG_ON(!trans);
cow_start = (u64)-1;
BUG_ON(ret);
}
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
+ if (nolock) {
+ ret = btrfs_end_transaction_nolock(trans, root);
+ BUG_ON(ret);
+ } else {
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ }
btrfs_free_path(path);
return 0;
}
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
- if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
}
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
+ int do_list = (root->root_key.objectid !=
+ BTRFS_ROOT_TREE_OBJECTID);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
- if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+ if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
}
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ if (root == root->fs_info->tree_root)
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+ else
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
if (!(rw & REQ_WRITE)) {
struct extent_state *cached_state = NULL;
int compressed = 0;
int ret;
+ bool nolock = false;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
return 0;
BUG_ON(!ordered_extent);
+ nolock = (root == root->fs_info->tree_root);
+
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state, GFP_NOFS);
- trans = btrfs_join_transaction(root, 1);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root, 1);
+ else
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans)
- btrfs_end_transaction(trans, root);
+ if (nolock) {
+ if (trans)
+ btrfs_end_transaction_nolock(trans, root);
+ } else {
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
+ }
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */