Btrfs: Add support for online device removal
authorChris Mason <chris.mason@oracle.com>
Wed, 7 May 2008 15:43:44 +0000 (11:43 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:02 +0000 (11:04 -0400)
This required a few structural changes to the code that manages bdev pointers:

The VFS super block now gets an anon-bdev instead of a pointer to the
lowest bdev.  This allows us to avoid swapping the super block bdev pointer
around at run time.

The code to read in the super block no longer goes through the extent
buffer interface.  Things got ugly keeping the mapping constant.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 3b6f8524a4ad6932aaa4dc0162d752a006297305..33ab165591c5f746d80bde012f50b0e7cdd1abb7 100644 (file)
@@ -505,7 +505,7 @@ struct btrfs_fs_info {
        u64 alloc_start;
        struct btrfs_transaction *running_transaction;
        struct btrfs_super_block super_copy;
-       struct extent_buffer *sb_buffer;
+       struct btrfs_super_block super_for_commit;
        struct block_device *__bdev;
        struct super_block *sb;
        struct inode *btree_inode;
@@ -1208,6 +1208,7 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
 
 /* struct btrfs_super_block */
 BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
 BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
                         generation, 64);
 BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
index fabc31b334b66aeda4307b434888c7a117855691..9d5424ad01a3f9de0ad8ab4218aab3b988c5ce87 100644 (file)
@@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
 
        spin_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
-       spin_unlock(&em_tree->lock);
-       if (em)
+       if (em) {
+               em->bdev =
+                       BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
+               spin_unlock(&em_tree->lock);
                goto out;
+       }
+       spin_unlock(&em_tree->lock);
 
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
@@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
        em->start = 0;
        em->len = (u64)-1;
        em->block_start = 0;
-       em->bdev = inode->i_sb->s_bdev;
+       em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
        spin_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
@@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
        BUG_ON(ret);
 
-       if (offset == BTRFS_SUPER_INFO_OFFSET) {
-               bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
-               submit_bio(rw, bio);
-               return 0;
-       }
        return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
 }
 
@@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info)
        list = &fs_info->fs_devices->devices;
        list_for_each(next, list) {
                device = list_entry(next, struct btrfs_device, dev_list);
-               if (device->bdev && device->bdev != fs_info->sb->s_bdev)
-                       close_bdev_excl(device->bdev);
+               close_bdev_excl(device->bdev);
                device->bdev = NULL;
        }
        return 0;
@@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        u32 leafsize;
        u32 blocksize;
        u32 stripesize;
+       struct buffer_head *bh;
        struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
                                                 GFP_NOFS);
        struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
@@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->new_trans_lock);
 
        init_completion(&fs_info->kobj_unregister);
-       sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE);
        fs_info->tree_root = tree_root;
        fs_info->extent_root = extent_root;
        fs_info->chunk_root = chunk_root;
@@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->btree_inode->i_ino = 1;
        fs_info->btree_inode->i_nlink = 1;
 
+       sb->s_blocksize = 4096;
+       sb->s_blocksize_bits = blksize_bits(4096);
+
        /*
         * we set the i_size on the btree inode to the max possible int.
         * the real end of the address space is determined by all of
@@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
-       fs_info->sb_buffer = read_tree_block(tree_root,
-                                            BTRFS_SUPER_INFO_OFFSET,
-                                            4096);
 
-       if (!fs_info->sb_buffer)
+       bh = __bread(fs_devices->latest_bdev,
+                    BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+       if (!bh)
                goto fail_iput;
 
-       read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
-                          sizeof(fs_info->super_copy));
+       memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
+       brelse(bh);
 
-       read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
-                          (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
-                          BTRFS_FSID_SIZE);
+       memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE);
 
        disk_super = &fs_info->super_copy;
        if (!btrfs_super_root(disk_super))
@@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        tree_root->leafsize = leafsize;
        tree_root->sectorsize = sectorsize;
        tree_root->stripesize = stripesize;
-       sb_set_blocksize(sb, sectorsize);
+
+       sb->s_blocksize = sectorsize;
+       sb->s_blocksize_bits = blksize_bits(sectorsize);
 
        if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
                    sizeof(disk_super->magic))) {
@@ -1339,7 +1339,6 @@ fail_tree_root:
 fail_sys_array:
        mutex_unlock(&fs_info->fs_mutex);
 fail_sb_buffer:
-       free_extent_buffer(fs_info->sb_buffer);
        extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
 fail_iput:
        iput(fs_info->btree_inode);
@@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root)
        struct list_head *cur;
        struct list_head *head = &root->fs_info->fs_devices->devices;
        struct btrfs_device *dev;
-       struct extent_buffer *sb;
+       struct btrfs_super_block *sb;
        struct btrfs_dev_item *dev_item;
        struct buffer_head *bh;
        int ret;
        int do_barriers;
        int max_errors;
        int total_errors = 0;
+       u32 crc;
+       u64 flags;
 
        max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
        do_barriers = !btrfs_test_opt(root, NOBARRIER);
 
-       sb = root->fs_info->sb_buffer;
-       dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
-                                                     dev_item);
+       sb = &root->fs_info->super_for_commit;
+       dev_item = &sb->dev_item;
        list_for_each(cur, head) {
                dev = list_entry(cur, struct btrfs_device, dev_list);
-               btrfs_set_device_type(sb, dev_item, dev->type);
-               btrfs_set_device_id(sb, dev_item, dev->devid);
-               btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes);
-               btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used);
-               btrfs_set_device_io_align(sb, dev_item, dev->io_align);
-               btrfs_set_device_io_width(sb, dev_item, dev->io_width);
-               btrfs_set_device_sector_size(sb, dev_item, dev->sector_size);
-               write_extent_buffer(sb, dev->uuid,
-                                   (unsigned long)btrfs_device_uuid(dev_item),
-                                   BTRFS_UUID_SIZE);
-
-               btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN);
-               csum_tree_block(root, sb, 0);
-
-               bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET /
-                             root->fs_info->sb->s_blocksize,
+               btrfs_set_stack_device_type(dev_item, dev->type);
+               btrfs_set_stack_device_id(dev_item, dev->devid);
+               btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
+               btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
+               btrfs_set_stack_device_io_align(dev_item, dev->io_align);
+               btrfs_set_stack_device_io_width(dev_item, dev->io_width);
+               btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
+               memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
+               flags = btrfs_super_flags(sb);
+               btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
+
+
+               crc = ~(u32)0;
+               crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc,
+                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+               btrfs_csum_final(crc, sb->csum);
+
+               bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096,
                              BTRFS_SUPER_INFO_SIZE);
 
-               read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE);
+               memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
                dev->pending_io = bh;
 
                get_bh(bh);
@@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
        int ret;
 
        ret = write_all_supers(root);
-#if 0
-       if (!btrfs_test_opt(root, NOBARRIER))
-               blkdev_issue_flush(sb->s_bdev, NULL);
-       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super);
-       ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
-                                    super->start, super->len);
-       if (!btrfs_test_opt(root, NOBARRIER))
-               blkdev_issue_flush(sb->s_bdev, NULL);
-#endif
        return ret;
 }
 
@@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root)
        if (root->fs_info->dev_root->node);
                free_extent_buffer(root->fs_info->dev_root->node);
 
-       free_extent_buffer(fs_info->sb_buffer);
-
        btrfs_free_block_groups(root->fs_info);
        del_fs_roots(fs_info);
 
@@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root)
 {
        struct backing_dev_info *bdi;
 
-       bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+       bdi = &root->fs_info->bdi;
        if (root->fs_info->throttles && bdi_write_congested(bdi)) {
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
                congestion_wait(WRITE, HZ/20);
index f94794a993291aff636177cf4bd7a7f46913347a..c0e67bde8428547c9649896cb8ba1e0ba37058ae 100644 (file)
@@ -147,6 +147,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
        u64 end;
        int ret;
 
+       bytenr = max_t(u64, bytenr,
+                      BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
        block_group_cache = &info->block_group_cache;
        ret = find_first_extent_bit(block_group_cache,
                                    bytenr, &start, &end,
@@ -1059,16 +1061,25 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
        }
 }
 
-static u64 reduce_alloc_profile(u64 flags)
+static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
+       u64 num_devices = root->fs_info->fs_devices->num_devices;
+
+       if (num_devices == 1)
+               flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
+       if (num_devices < 4)
+               flags &= ~BTRFS_BLOCK_GROUP_RAID10;
+
        if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
            (flags & (BTRFS_BLOCK_GROUP_RAID1 |
-                     BTRFS_BLOCK_GROUP_RAID10)))
+                     BTRFS_BLOCK_GROUP_RAID10))) {
                flags &= ~BTRFS_BLOCK_GROUP_DUP;
+       }
 
        if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
-           (flags & BTRFS_BLOCK_GROUP_RAID10))
+           (flags & BTRFS_BLOCK_GROUP_RAID10)) {
                flags &= ~BTRFS_BLOCK_GROUP_RAID1;
+       }
 
        if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
            ((flags & BTRFS_BLOCK_GROUP_RAID1) |
@@ -1078,7 +1089,6 @@ static u64 reduce_alloc_profile(u64 flags)
        return flags;
 }
 
-
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags)
@@ -1089,7 +1099,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        u64 num_bytes;
        int ret;
 
-       flags = reduce_alloc_profile(flags);
+       flags = reduce_alloc_profile(extent_root, flags);
 
        space_info = __find_space_info(extent_root->fs_info, flags);
        if (!space_info) {
@@ -1169,6 +1179,21 @@ static int update_block_group(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
+{
+       u64 start;
+       u64 end;
+       int ret;
+       ret = find_first_extent_bit(&root->fs_info->block_group_cache,
+                                   search_start, &start, &end,
+                                   BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
+                                   BLOCK_GROUP_SYSTEM);
+       if (ret)
+               return 0;
+       return start;
+}
+
+
 static int update_pinned_extents(struct btrfs_root *root,
                                u64 bytenr, u64 num, int pin)
 {
@@ -1185,16 +1210,25 @@ static int update_pinned_extents(struct btrfs_root *root,
        }
        while (num > 0) {
                cache = btrfs_lookup_block_group(fs_info, bytenr);
-               WARN_ON(!cache);
-               len = min(num, cache->key.offset -
-                         (bytenr - cache->key.objectid));
+               if (!cache) {
+                       u64 first = first_logical_byte(root, bytenr);
+                       WARN_ON(first < bytenr);
+                       len = min(first - bytenr, num);
+               } else {
+                       len = min(num, cache->key.offset -
+                                 (bytenr - cache->key.objectid));
+               }
                if (pin) {
-                       cache->pinned += len;
-                       cache->space_info->bytes_pinned += len;
+                       if (cache) {
+                               cache->pinned += len;
+                               cache->space_info->bytes_pinned += len;
+                       }
                        fs_info->total_pinned += len;
                } else {
-                       cache->pinned -= len;
-                       cache->space_info->bytes_pinned -= len;
+                       if (cache) {
+                               cache->pinned -= len;
+                               cache->space_info->bytes_pinned -= len;
+                       }
                        fs_info->total_pinned -= len;
                }
                bytenr += len;
@@ -1547,7 +1581,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                                     int data)
 {
        int ret;
-       u64 orig_search_start = search_start;
+       u64 orig_search_start;
        struct btrfs_root * root = orig_root->fs_info->extent_root;
        struct btrfs_fs_info *info = root->fs_info;
        u64 total_needed = num_bytes;
@@ -1577,6 +1611,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                }
        }
 
+       search_start = max(search_start, first_logical_byte(root, 0));
+       orig_search_start = search_start;
+
        if (search_end == (u64)-1)
                search_end = btrfs_super_total_bytes(&info->super_copy);
 
@@ -1751,7 +1788,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
        }
 again:
-       data = reduce_alloc_profile(data);
+       data = reduce_alloc_profile(root, data);
        if (root->ref_cows) {
                if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
@@ -2309,6 +2346,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
        struct file_ra_state *ra;
        unsigned long total_read = 0;
        unsigned long ra_pages;
+       struct btrfs_trans_handle *trans;
 
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
 
@@ -2326,9 +2364,13 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
                                       calc_ra(i, last_index, ra_pages));
                }
                total_read++;
+               if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
+                       goto truncate_racing;
+
                page = grab_cache_page(inode->i_mapping, i);
-               if (!page)
+               if (!page) {
                        goto out_unlock;
+               }
                if (!PageUptodate(page)) {
                        btrfs_readpage(NULL, page);
                        lock_page(page);
@@ -2350,20 +2392,33 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
 
                lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
-               set_page_dirty(page);
                set_extent_delalloc(io_tree, page_start,
                                    page_end, GFP_NOFS);
+               set_page_dirty(page);
 
                unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                unlock_page(page);
                page_cache_release(page);
-               balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
        }
+       balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                          total_read);
 
 out_unlock:
        kfree(ra);
+       trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+       if (trans) {
+               btrfs_add_ordered_inode(inode);
+               btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+               mark_inode_dirty(inode);
+       }
        mutex_unlock(&inode->i_mutex);
        return 0;
+
+truncate_racing:
+       vmtruncate(inode, inode->i_size);
+       balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                          total_read);
+       goto out_unlock;
 }
 
 /*
@@ -2466,6 +2521,27 @@ out:
        return 0;
 }
 
+static int noinline del_extent_zero(struct btrfs_root *extent_root,
+                                   struct btrfs_path *path,
+                                   struct btrfs_key *extent_key)
+{
+       int ret;
+       struct btrfs_trans_handle *trans;
+
+       trans = btrfs_start_transaction(extent_root, 1);
+       ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
+       if (ret > 0) {
+               ret = -EIO;
+               goto out;
+       }
+       if (ret < 0)
+               goto out;
+       ret = btrfs_del_item(trans, extent_root, path);
+out:
+       btrfs_end_transaction(trans, extent_root);
+       return ret;
+}
+
 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
                                        struct btrfs_path *path,
                                        struct btrfs_key *extent_key)
@@ -2477,6 +2553,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
        u32 item_size;
        int ret = 0;
 
+       if (extent_key->objectid == 0) {
+               ret = del_extent_zero(extent_root, path, extent_key);
+               goto out;
+       }
        key.objectid = extent_key->objectid;
        key.type = BTRFS_EXTENT_REF_KEY;
        key.offset = 0;
@@ -2490,15 +2570,24 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
                ret = 0;
                leaf = path->nodes[0];
                nritems = btrfs_header_nritems(leaf);
-               if (path->slots[0] == nritems)
-                       goto out;
+               if (path->slots[0] == nritems) {
+                       ret = btrfs_next_leaf(extent_root, path);
+                       if (ret > 0) {
+                               ret = 0;
+                               goto out;
+                       }
+                       if (ret < 0)
+                               goto out;
+               }
 
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.objectid != extent_key->objectid)
+               if (found_key.objectid != extent_key->objectid) {
                        break;
+               }
 
-               if (found_key.type != BTRFS_EXTENT_REF_KEY)
+               if (found_key.type != BTRFS_EXTENT_REF_KEY) {
                        break;
+               }
 
                key.offset = found_key.offset + 1;
                item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -2519,7 +2608,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
                BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
-       num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
+       num_devices = root->fs_info->fs_devices->num_devices;
        if (num_devices == 1) {
                stripped |= BTRFS_BLOCK_GROUP_DUP;
                stripped = flags & ~stripped;
@@ -2535,9 +2624,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
                return flags;
        } else {
                /* they already had raid on here, just return */
-               if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
-                   (flags & BTRFS_BLOCK_GROUP_RAID1)) {
-               }
                if (flags & stripped)
                        return flags;
 
@@ -2570,7 +2656,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
        struct extent_buffer *leaf;
        u32 nritems;
        int ret;
-       int progress = 0;
+       int progress;
 
        shrink_block_group = btrfs_lookup_block_group(root->fs_info,
                                                      shrink_start);
@@ -2597,6 +2683,7 @@ again:
        shrink_block_group->ro = 1;
 
        total_found = 0;
+       progress = 0;
        key.objectid = shrink_start;
        key.offset = 0;
        key.type = 0;
index f7beb9b0d37aa6ed8e9c39eb0d20b7de528df769..b437d3bdf95e7ba3ce3223b7862c9892609bbe1e 100644 (file)
@@ -2194,6 +2194,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 again:
        spin_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
+       if (em)
+               em->bdev = root->fs_info->fs_devices->latest_bdev;
        spin_unlock(&em_tree->lock);
 
        if (em) {
@@ -2212,7 +2214,7 @@ again:
 
        em->start = EXTENT_MAP_HOLE;
        em->len = (u64)-1;
-       em->bdev = inode->i_sb->s_bdev;
+       em->bdev = root->fs_info->fs_devices->latest_bdev;
        ret = btrfs_lookup_file_extent(trans, root, path,
                                       objectid, start, trans != NULL);
        if (ret < 0) {
@@ -3101,6 +3103,27 @@ out:
        return ret;
 }
 
+long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
+{
+       struct btrfs_ioctl_vol_args *vol_args;
+       int ret;
+
+       vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
+
+       if (!vol_args)
+               return -ENOMEM;
+
+       if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
+               ret = -EFAULT;
+               goto out;
+       }
+       ret = btrfs_rm_device(root, vol_args->name);
+
+out:
+       kfree(vol_args);
+       return ret;
+}
+
 int dup_item_to_inode(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root,
                       struct btrfs_path *path,
@@ -3294,6 +3317,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_resize(root, (void __user *)arg);
        case BTRFS_IOC_ADD_DEV:
                return btrfs_ioctl_add_dev(root, (void __user *)arg);
+       case BTRFS_IOC_RM_DEV:
+               return btrfs_ioctl_rm_dev(root, (void __user *)arg);
        case BTRFS_IOC_BALANCE:
                return btrfs_balance(root->fs_info->dev_root);
        case BTRFS_IOC_CLONE:
index 7153dfaa34047b1537fc1054252ffdbb9902bcab..020e5a83e31f8b29584a594be935f31b1e2a71aa 100644 (file)
@@ -315,24 +315,12 @@ static void btrfs_write_super(struct super_block *sb)
        sb->s_dirt = 0;
 }
 
-/*
- * This is almost a copy of get_sb_bdev in fs/super.c.
- * We need the local copy to allow direct mounting of
- * subvolumes, but this could be easily integrated back
- * into the generic version.  --hch
- */
-
-/* start copy & paste */
-static int set_bdev_super(struct super_block *s, void *data)
+static int btrfs_test_super(struct super_block *s, void *data)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       return 0;
-}
+       struct btrfs_fs_devices *test_fs_devices = data;
+       struct btrfs_root *root = btrfs_sb(s);
 
-static int test_bdev_super(struct super_block *s, void *data)
-{
-       return (void *)s->s_bdev == data;
+       return root->fs_info->fs_devices == test_fs_devices;
 }
 
 int btrfs_get_sb_bdev(struct file_system_type *fs_type,
@@ -354,14 +342,9 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type,
                return error;
 
        bdev = fs_devices->lowest_bdev;
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       down(&bdev->bd_mount_sem);
-       s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       btrfs_lock_volumes();
+       s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+       btrfs_unlock_volumes();
        if (IS_ERR(s))
                goto error_s;
 
@@ -373,13 +356,11 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type,
                        goto error_bdev;
                }
 
-               close_bdev_excl(bdev);
        } else {
                char b[BDEVNAME_SIZE];
 
                s->s_flags = flags;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-               sb_set_blocksize(s, block_size(bdev));
                error = btrfs_fill_super(s, fs_devices, data,
                                         flags & MS_SILENT ? 1 : 0);
                if (error) {
@@ -458,7 +439,7 @@ static struct file_system_type btrfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "btrfs",
        .get_sb         = btrfs_get_sb,
-       .kill_sb        = kill_block_super,
+       .kill_sb        = kill_anon_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
 
index 9826942fa18afea20a62c15be3bb5e63c6b3686c..57746c11eae35c251806c28990b8428e9a2e13f8 100644 (file)
@@ -738,9 +738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                   chunk_root->node->start);
        btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
                                         btrfs_header_level(chunk_root->node));
-       write_extent_buffer(root->fs_info->sb_buffer,
-                           &root->fs_info->super_copy, 0,
-                           sizeof(root->fs_info->super_copy));
+       memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
+              sizeof(root->fs_info->super_copy));
 
        btrfs_copy_pinned(root, pinned_copy);
 
index b38187573108e4263c02552c62746a790a2308a3..55da5f0c56e3dddd9a14d7ac633b46d95135fef6 100644 (file)
@@ -45,6 +45,16 @@ struct map_lookup {
 static DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
 
+void btrfs_lock_volumes(void)
+{
+       mutex_lock(&uuid_mutex);
+}
+
+void btrfs_unlock_volumes(void)
+{
+       mutex_unlock(&uuid_mutex);
+}
+
 int btrfs_cleanup_fs_uuids(void)
 {
        struct btrfs_fs_devices *fs_devices;
@@ -193,12 +203,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                        ret = PTR_ERR(bdev);
                        goto fail;
                }
+               set_blocksize(bdev, 4096);
                if (device->devid == fs_devices->latest_devid)
                        fs_devices->latest_bdev = bdev;
                if (device->devid == fs_devices->lowest_devid) {
                        fs_devices->lowest_bdev = bdev;
                }
                device->bdev = bdev;
+
        }
        mutex_unlock(&uuid_mutex);
        return 0;
@@ -393,6 +405,9 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct btrfs_root *root = device->dev_root;
        struct btrfs_key key;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf = NULL;
+       struct btrfs_dev_extent *extent = NULL;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -403,8 +418,25 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        key.type = BTRFS_DEV_EXTENT_KEY;
 
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret > 0) {
+               ret = btrfs_previous_item(root, path, key.objectid,
+                                         BTRFS_DEV_EXTENT_KEY);
+               BUG_ON(ret);
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               extent = btrfs_item_ptr(leaf, path->slots[0],
+                                       struct btrfs_dev_extent);
+               BUG_ON(found_key.offset > start || found_key.offset +
+                      btrfs_dev_extent_length(leaf, extent) < start);
+               ret = 0;
+       } else if (ret == 0) {
+               leaf = path->nodes[0];
+               extent = btrfs_item_ptr(leaf, path->slots[0],
+                                       struct btrfs_dev_extent);
+       }
        BUG_ON(ret);
 
+       device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
        ret = btrfs_del_item(trans, root, path);
        BUG_ON(ret);
 
@@ -593,6 +625,170 @@ out:
        return ret;
 }
 
+static int btrfs_rm_dev_item(struct btrfs_root *root,
+                            struct btrfs_device *device)
+{
+       int ret;
+       struct btrfs_path *path;
+       struct block_device *bdev = device->bdev;
+       struct btrfs_device *next_dev;
+       struct btrfs_key key;
+       u64 total_bytes;
+       struct btrfs_fs_devices *fs_devices;
+       struct btrfs_trans_handle *trans;
+
+       root = root->fs_info->chunk_root;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       trans = btrfs_start_transaction(root, 1);
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.type = BTRFS_DEV_ITEM_KEY;
+       key.offset = device->devid;
+
+       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret < 0)
+               goto out;
+
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       ret = btrfs_del_item(trans, root, path);
+       if (ret)
+               goto out;
+
+       /*
+        * at this point, the device is zero sized.  We want to
+        * remove it from the devices list and zero out the old super
+        */
+       list_del_init(&device->dev_list);
+       list_del_init(&device->dev_alloc_list);
+       fs_devices = root->fs_info->fs_devices;
+
+       next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
+                             dev_list);
+       if (bdev == fs_devices->lowest_bdev)
+               fs_devices->lowest_bdev = next_dev->bdev;
+       if (bdev == root->fs_info->sb->s_bdev)
+               root->fs_info->sb->s_bdev = next_dev->bdev;
+       if (bdev == fs_devices->latest_bdev)
+               fs_devices->latest_bdev = next_dev->bdev;
+
+       total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
+       btrfs_set_super_total_bytes(&root->fs_info->super_copy,
+                                   total_bytes - device->total_bytes);
+
+       total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
+       btrfs_set_super_num_devices(&root->fs_info->super_copy,
+                                   total_bytes - 1);
+out:
+       btrfs_free_path(path);
+       btrfs_commit_transaction(trans, root);
+       return ret;
+}
+
+int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+{
+       struct btrfs_device *device;
+       struct block_device *bdev;
+       struct buffer_head *bh;
+       struct btrfs_super_block *disk_super;
+       u64 all_avail;
+       u64 devid;
+       int ret = 0;
+
+       mutex_lock(&root->fs_info->fs_mutex);
+       mutex_lock(&uuid_mutex);
+
+       all_avail = root->fs_info->avail_data_alloc_bits |
+               root->fs_info->avail_system_alloc_bits |
+               root->fs_info->avail_metadata_alloc_bits;
+
+       if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
+           root->fs_info->fs_devices->num_devices <= 4) {
+               printk("btrfs: unable to go below four devices on raid10\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
+           root->fs_info->fs_devices->num_devices <= 2) {
+               printk("btrfs: unable to go below two devices on raid1\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
+       if (IS_ERR(bdev)) {
+               ret = PTR_ERR(bdev);
+               goto out;
+       }
+
+       bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+       if (!bh) {
+               ret = -EIO;
+               goto error_close;
+       }
+       disk_super = (struct btrfs_super_block *)bh->b_data;
+       if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+           sizeof(disk_super->magic))) {
+               ret = -ENOENT;
+               goto error_brelse;
+       }
+       if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) {
+               ret = -ENOENT;
+               goto error_brelse;
+       }
+       devid = le64_to_cpu(disk_super->dev_item.devid);
+       device = btrfs_find_device(root, devid, NULL);
+       if (!device) {
+               ret = -ENOENT;
+               goto error_brelse;
+       }
+
+       root->fs_info->fs_devices->num_devices--;
+
+       ret = btrfs_shrink_device(device, 0);
+       if (ret)
+               goto error_brelse;
+
+
+       ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
+       if (ret)
+               goto error_brelse;
+
+       /* make sure this device isn't detected as part of the FS anymore */
+       memset(&disk_super->magic, 0, sizeof(disk_super->magic));
+       set_buffer_dirty(bh);
+       sync_dirty_buffer(bh);
+
+       brelse(bh);
+
+       /* one close for the device struct or super_block */
+       close_bdev_excl(device->bdev);
+
+       /* one close for us */
+       close_bdev_excl(device->bdev);
+
+       kfree(device->name);
+       kfree(device);
+       ret = 0;
+       goto out;
+
+error_brelse:
+       brelse(bh);
+error_close:
+       close_bdev_excl(bdev);
+out:
+       mutex_unlock(&uuid_mutex);
+       mutex_unlock(&root->fs_info->fs_mutex);
+       return ret;
+}
+
 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 {
        struct btrfs_trans_handle *trans;
@@ -831,13 +1027,17 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
        em = lookup_extent_mapping(em_tree, chunk_offset, 1);
        spin_unlock(&em_tree->lock);
 
-       BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset);
+       BUG_ON(em->start > chunk_offset ||
+              em->start + em->len < chunk_offset);
        map = (struct map_lookup *)em->bdev;
 
        for (i = 0; i < map->num_stripes; i++) {
                ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
                                            map->stripes[i].physical);
                BUG_ON(ret);
+
+               ret = btrfs_update_device(trans, map->stripes[i].dev);
+               BUG_ON(ret);
        }
        ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
                               chunk_offset);
@@ -847,11 +1047,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
                BUG_ON(ret);
-               goto out;
        }
 
-
-
        spin_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
        kfree(map);
@@ -861,7 +1058,6 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
        free_extent_map(em);
        spin_unlock(&em_tree->lock);
 
-out:
        /* once for us */
        free_extent_map(em);
 
@@ -1449,7 +1645,7 @@ again:
                return 0;
 
        if (!em) {
-               printk("unable to find logical %Lu\n", logical);
+               printk("unable to find logical %Lu len %Lu\n", logical, *length);
                BUG();
        }
 
@@ -1712,6 +1908,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
 
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
+
        spin_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
        spin_unlock(&map_tree->map_tree.lock);
@@ -1845,7 +2042,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
 int btrfs_read_sys_array(struct btrfs_root *root)
 {
        struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
-       struct extent_buffer *sb = root->fs_info->sb_buffer;
+       struct extent_buffer *sb;
        struct btrfs_disk_key *disk_key;
        struct btrfs_chunk *chunk;
        u8 *ptr;
@@ -1857,6 +2054,12 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        u32 cur;
        struct btrfs_key key;
 
+       sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
+                                         BTRFS_SUPER_INFO_SIZE);
+       if (!sb)
+               return -ENOMEM;
+       btrfs_set_buffer_uptodate(sb);
+       write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
        array_size = btrfs_super_sys_array_size(super_copy);
 
        ptr = super_copy->sys_chunk_array;
@@ -1867,8 +2070,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                disk_key = (struct btrfs_disk_key *)ptr;
                btrfs_disk_key_to_cpu(&key, disk_key);
 
-               len = sizeof(*disk_key);
-               ptr += len;
+               len = sizeof(*disk_key); ptr += len;
                sb_ptr += len;
                cur += len;
 
@@ -1887,6 +2089,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                sb_ptr += len;
                cur += len;
        }
+       free_extent_buffer(sb);
        return ret;
 }
 
index a9663e92bb14d7d6b07e40b302fa7f2c6b208ce3..0f94a69e6eb654ead1094d4efe68c6be64947c61 100644 (file)
@@ -125,6 +125,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 int btrfs_add_device(struct btrfs_trans_handle *trans,
                     struct btrfs_root *root,
                     struct btrfs_device *device);
+int btrfs_rm_device(struct btrfs_root *root, char *device_path);
 int btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
 int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
@@ -136,4 +137,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_root *dev_root);
+void btrfs_unlock_volumes(void);
+void btrfs_lock_volumes(void);
 #endif