Btrfs: Delete csum items when freeing extents
authorChris Mason <chris.mason@oracle.com>
Wed, 10 Dec 2008 14:10:46 +0000 (09:10 -0500)
committerChris Mason <chris.mason@oracle.com>
Wed, 10 Dec 2008 14:10:46 +0000 (09:10 -0500)
This finishes off the new checksumming code by removing csum items
for extents that are no longer in use.

The trick is doing it without racing because a single csum item may
hold csums for more than one extent.  Extra checks are added to
btrfs_csum_file_blocks to make sure that we are using the correct
csum item after dropping locks.

A new btrfs_split_item is added to split a single csum item so it
can be split without dropping the leaf lock.  This is used to
remove csum bytes from the middle of an item.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/file-item.c

index 19c0dd33b1e8e5f7639c8cc98f70b7c2c07e7bec..c0c95cccbb5bcdb42ca023090d6bb1a60ca17640 100644 (file)
@@ -1512,7 +1512,8 @@ cow_done:
                        if (ret && slot > 0)
                                slot -= 1;
                        p->slots[level] = slot;
-                       if (ins_len > 0 && btrfs_header_nritems(b) >=
+                       if ((p->search_for_split || ins_len > 0) &&
+                           btrfs_header_nritems(b) >=
                            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
                                int sret = split_node(trans, root, p, level);
                                BUG_ON(sret > 0);
@@ -1596,7 +1597,8 @@ cow_done:
                                        goto done;
                                }
                        }
-                       unlock_up(p, level, lowest_unlock);
+                       if (!p->search_for_split)
+                               unlock_up(p, level, lowest_unlock);
                        goto done;
                }
        }
@@ -2636,11 +2638,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int num_doubles = 0;
        struct btrfs_disk_key disk_key;
 
-       if (extend)
+       if (extend && data_size)
                space_needed = data_size;
 
        /* first try to make some room by pushing left and right */
-       if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
+       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
                wret = push_leaf_right(trans, root, path, data_size, 0);
                if (wret < 0) {
                        return wret;
@@ -2721,7 +2723,7 @@ again:
        } else {
                if (leaf_space_used(l, 0, mid + 1) + space_needed >
                        BTRFS_LEAF_DATA_SIZE(root)) {
-                       if (!extend && slot == 0) {
+                       if (!extend && data_size && slot == 0) {
                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
                                btrfs_set_header_nritems(right, 0);
                                wret = insert_ptr(trans, root, path,
@@ -2742,7 +2744,7 @@ again:
                                }
                                btrfs_mark_buffer_dirty(right);
                                return ret;
-                       } else if (extend && slot == 0) {
+                       } else if ((extend || !data_size) && slot == 0) {
                                mid = 1;
                        } else {
                                mid = slot;
@@ -2827,6 +2829,123 @@ again:
        return ret;
 }
 
+/*
+ * This function splits a single item into two items,
+ * giving 'new_key' to the new item and splitting the
+ * old one at split_offset (from the start of the item).
+ *
+ * The path may be released by this operation.  After
+ * the split, the path is pointing to the old item.  The
+ * new item is going to be in the same node as the old one.
+ *
+ * Note, the item being split must be smaller enough to live alone on
+ * a tree block with room for one extra struct btrfs_item
+ *
+ * This allows us to split the item in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root,
+                    struct btrfs_path *path,
+                    struct btrfs_key *new_key,
+                    unsigned long split_offset)
+{
+       u32 item_size;
+       struct extent_buffer *leaf;
+       struct btrfs_key orig_key;
+       struct btrfs_item *item;
+       struct btrfs_item *new_item;
+       int ret = 0;
+       int slot;
+       u32 nritems;
+       u32 orig_offset;
+       struct btrfs_disk_key disk_key;
+       char *buf;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
+       if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
+               goto split;
+
+       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+       btrfs_release_path(root, path);
+
+       path->search_for_split = 1;
+       path->keep_locks = 1;
+
+       ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
+       path->search_for_split = 0;
+
+       /* if our item isn't there or got smaller, return now */
+       if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
+                                                       path->slots[0])) {
+               path->keep_locks = 0;
+               return -EAGAIN;
+       }
+
+       ret = split_leaf(trans, root, &orig_key, path, 0, 0);
+       path->keep_locks = 0;
+       BUG_ON(ret);
+
+       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+       leaf = path->nodes[0];
+
+split:
+       item = btrfs_item_nr(leaf, path->slots[0]);
+       orig_offset = btrfs_item_offset(leaf, item);
+       item_size = btrfs_item_size(leaf, item);
+
+
+       buf = kmalloc(item_size, GFP_NOFS);
+       read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
+                           path->slots[0]), item_size);
+       slot = path->slots[0] + 1;
+       leaf = path->nodes[0];
+
+       nritems = btrfs_header_nritems(leaf);
+
+       if (slot != nritems) {
+               /* shift the items */
+               memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
+                             btrfs_item_nr_offset(slot),
+                             (nritems - slot) * sizeof(struct btrfs_item));
+
+       }
+
+       btrfs_cpu_key_to_disk(&disk_key, new_key);
+       btrfs_set_item_key(leaf, &disk_key, slot);
+
+       new_item = btrfs_item_nr(leaf, slot);
+
+       btrfs_set_item_offset(leaf, new_item, orig_offset);
+       btrfs_set_item_size(leaf, new_item, item_size - split_offset);
+
+       btrfs_set_item_offset(leaf, item,
+                             orig_offset + item_size - split_offset);
+       btrfs_set_item_size(leaf, item, split_offset);
+
+       btrfs_set_header_nritems(leaf, nritems + 1);
+
+       /* write the data for the start of the original item */
+       write_extent_buffer(leaf, buf,
+                           btrfs_item_ptr_offset(leaf, path->slots[0]),
+                           split_offset);
+
+       /* write the data for the new item */
+       write_extent_buffer(leaf, buf + split_offset,
+                           btrfs_item_ptr_offset(leaf, slot),
+                           item_size - split_offset);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+       if (btrfs_leaf_free_space(root, leaf) < 0) {
+               btrfs_print_leaf(root, leaf);
+               BUG();
+       }
+       kfree(buf);
+       return ret;
+}
+
 /*
  * make the item pointed to by the path smaller.  new_size indicates
  * how small to make it, and from_end tells us if we just chop bytes
index f72b43819349a9124dcb2a511a1bf59422be2f19..5b0c79d22c099f37786024675e6c67142771d2ff 100644 (file)
@@ -409,6 +409,12 @@ struct btrfs_path {
        int keep_locks;
        int skip_locking;
        int lowest_level;
+
+       /*
+        * set by btrfs_split_item, tells search_slot to keep all locks
+        * and to force calls to keep space in the nodes
+        */
+       int search_for_split;
 };
 
 /*
@@ -1816,6 +1822,11 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct btrfs_path *path,
                        u32 new_size, int from_end);
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+                    struct btrfs_root *root,
+                    struct btrfs_path *path,
+                    struct btrfs_key *new_key,
+                    unsigned long split_offset);
 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
                      *root, struct btrfs_key *key, struct btrfs_path *p, int
                      ins_len, int cow);
@@ -1953,6 +1964,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
                       struct btrfs_key *location, int mod);
 
 /* file-item.c */
+int btrfs_del_csums(struct btrfs_trans_handle *trans,
+                   struct btrfs_root *root, u64 bytenr, u64 len);
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                          struct bio *bio, u32 *dst);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
index 803647bc8400990e192a7bde7fb57fbff728683f..cc74316dc42666bc89ff991e66fdcd4940e7b084 100644 (file)
@@ -2484,7 +2484,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                mark_free = 1;
                        BUG_ON(ret < 0);
                }
-
                /* block accounting for super block */
                spin_lock_irq(&info->delalloc_lock);
                super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -2504,6 +2503,11 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                         mark_free);
                BUG_ON(ret);
 
+               if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+                       ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
+                       BUG_ON(ret);
+               }
+
 #ifdef BIO_RW_DISCARD
                /* Tell the block device(s) that the sectors can be discarded */
                ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
index a3ad2ce00116a6a8e7e8e27093f5f0a81bd7e6b9..3ebef871ee6cf11e9be16d02504ae6ebd4c519da 100644 (file)
@@ -310,6 +310,179 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
        return 0;
 }
 
+/*
+ * helper function for csum removal, this expects the
+ * key to describe the csum pointed to by the path, and it expects
+ * the csum to overlap the range [bytenr, len]
+ *
+ * The csum should not be entirely contained in the range and the
+ * range should not be entirely contained in the csum.
+ *
+ * This calls btrfs_truncate_item with the correct args based on the
+ * overlap, and fixes up the key as required.
+ */
+static noinline int truncate_one_csum(struct btrfs_trans_handle *trans,
+                                     struct btrfs_root *root,
+                                     struct btrfs_path *path,
+                                     struct btrfs_key *key,
+                                     u64 bytenr, u64 len)
+{
+       struct extent_buffer *leaf;
+       u16 csum_size =
+               btrfs_super_csum_size(&root->fs_info->super_copy);
+       u64 csum_end;
+       u64 end_byte = bytenr + len;
+       u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
+       int ret;
+
+       leaf = path->nodes[0];
+       csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
+       csum_end <<= root->fs_info->sb->s_blocksize_bits;
+       csum_end += key->offset;
+
+       if (key->offset < bytenr && csum_end <= end_byte) {
+               /*
+                *         [ bytenr - len ]
+                *         [   ]
+                *   [csum     ]
+                *   A simple truncate off the end of the item
+                */
+               u32 new_size = (bytenr - key->offset) >> blocksize_bits;
+               new_size *= csum_size;
+               ret = btrfs_truncate_item(trans, root, path, new_size, 1);
+               BUG_ON(ret);
+       } else if (key->offset >= bytenr && csum_end > end_byte &&
+                  end_byte > key->offset) {
+               /*
+                *         [ bytenr - len ]
+                *                 [ ]
+                *                 [csum     ]
+                * we need to truncate from the beginning of the csum
+                */
+               u32 new_size = (csum_end - end_byte) >> blocksize_bits;
+               new_size *= csum_size;
+
+               ret = btrfs_truncate_item(trans, root, path, new_size, 0);
+               BUG_ON(ret);
+
+               key->offset = end_byte;
+               ret = btrfs_set_item_key_safe(trans, root, path, key);
+               BUG_ON(ret);
+       } else {
+               BUG();
+       }
+       return 0;
+}
+
+/*
+ * deletes the csum items from the csum tree for a given
+ * range of bytes.
+ */
+int btrfs_del_csums(struct btrfs_trans_handle *trans,
+                   struct btrfs_root *root, u64 bytenr, u64 len)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       u64 end_byte = bytenr + len;
+       u64 csum_end;
+       struct extent_buffer *leaf;
+       int ret;
+       u16 csum_size =
+               btrfs_super_csum_size(&root->fs_info->super_copy);
+       int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
+
+       root = root->fs_info->csum_root;
+
+       path = btrfs_alloc_path();
+
+       while(1) {
+               key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+               key.offset = end_byte - 1;
+               key.type = BTRFS_EXTENT_CSUM_KEY;
+
+               ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+               if (ret > 0) {
+                       if (path->slots[0] == 0)
+                               goto out;
+                       path->slots[0]--;
+               }
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+               if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+                   key.type != BTRFS_EXTENT_CSUM_KEY) {
+                       break;
+               }
+
+               if (key.offset >= end_byte)
+                       break;
+
+               csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
+               csum_end <<= blocksize_bits;
+               csum_end += key.offset;
+
+               /* this csum ends before we start, we're done */
+               if (csum_end <= bytenr)
+                       break;
+
+               /* delete the entire item, it is inside our range */
+               if (key.offset >= bytenr && csum_end <= end_byte) {
+                       ret = btrfs_del_item(trans, root, path);
+                       BUG_ON(ret);
+               } else if (key.offset < bytenr && csum_end > end_byte) {
+                       unsigned long offset;
+                       unsigned long shift_len;
+                       unsigned long item_offset;
+                       /*
+                        *        [ bytenr - len ]
+                        *     [csum                ]
+                        *
+                        * Our bytes are in the middle of the csum,
+                        * we need to split this item and insert a new one.
+                        *
+                        * But we can't drop the path because the
+                        * csum could change, get removed, extended etc.
+                        *
+                        * The trick here is the max size of a csum item leaves
+                        * enough room in the tree block for a single
+                        * item header.  So, we split the item in place,
+                        * adding a new header pointing to the existing
+                        * bytes.  Then we loop around again and we have
+                        * a nicely formed csum item that we can neatly
+                        * truncate.
+                        */
+                       offset = (bytenr - key.offset) >> blocksize_bits;
+                       offset *= csum_size;
+
+                       shift_len = (len >> blocksize_bits) * csum_size;
+
+                       item_offset = btrfs_item_ptr_offset(leaf,
+                                                           path->slots[0]);
+
+                       memset_extent_buffer(leaf, 0, item_offset + offset,
+                                            shift_len);
+                       key.offset = bytenr;
+
+                       /*
+                        * btrfs_split_item returns -EAGAIN when the
+                        * item changed size or key
+                        */
+                       ret = btrfs_split_item(trans, root, path, &key, offset);
+                       BUG_ON(ret && ret != -EAGAIN);
+
+                       key.offset = end_byte - 1;
+               } else {
+                       ret = truncate_one_csum(trans, root, path,
+                                               &key, bytenr, len);
+                       BUG_ON(ret);
+               }
+               btrfs_release_path(root, path);
+       }
+out:
+       btrfs_free_path(path);
+       return 0;
+}
+
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums)
@@ -396,28 +569,40 @@ again:
                                csum_size, 1);
        if (ret < 0)
                goto fail_unlock;
-       if (ret == 0) {
-               BUG();
-       }
-       if (path->slots[0] == 0) {
-               goto insert;
+
+       if (ret > 0) {
+               if (path->slots[0] == 0)
+                       goto insert;
+               path->slots[0]--;
        }
-       path->slots[0]--;
+
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
        csum_offset = (bytenr - found_key.offset) >>
                        root->fs_info->sb->s_blocksize_bits;
+
        if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
            found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
            csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
                goto insert;
        }
+
        if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
            csum_size) {
                u32 diff = (csum_offset + 1) * csum_size;
+
+               /*
+                * is the item big enough already?  we dropped our lock
+                * before and need to recheck
+                */
+               if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
+                       goto csum;
+
                diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
-               if (diff != csum_size)
+               if (diff != csum_size) {
                        goto insert;
+               }
+
                ret = btrfs_extend_item(trans, root, path, diff);
                BUG_ON(ret);
                goto csum;
@@ -518,30 +703,3 @@ out:
 fail_unlock:
        goto out;
 }
-
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root, struct btrfs_path *path,
-                       u64 isize)
-{
-       struct btrfs_key key;
-       struct extent_buffer *leaf = path->nodes[0];
-       int slot = path->slots[0];
-       int ret;
-       u32 new_item_size;
-       u64 new_item_span;
-       u64 blocks;
-
-       btrfs_item_key_to_cpu(leaf, &key, slot);
-       if (isize <= key.offset)
-               return 0;
-       new_item_span = isize - key.offset;
-       blocks = (new_item_span + root->sectorsize - 1) >>
-               root->fs_info->sb->s_blocksize_bits;
-       new_item_size = blocks *
-               btrfs_super_csum_size(&root->fs_info->super_copy);
-       if (new_item_size >= btrfs_item_size_nr(leaf, slot))
-               return 0;
-       ret = btrfs_truncate_item(trans, root, path, new_item_size, 1);
-       BUG_ON(ret);
-       return ret;
-}