return 1;
if (k1.objectid < k2->objectid)
return -1;
- if (k1.flags > k2->flags)
- return 1;
- if (k1.flags < k2->flags)
- return -1;
if (k1.offset > k2->offset)
return 1;
if (k1.offset < k2->offset)
return -1;
+ if (k1.flags > k2->flags)
+ return 1;
+ if (k1.flags < k2->flags)
+ return -1;
return 0;
}
BUG();
ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
if (ret == 0) {
- btrfs_release_path(root, path);
return -EEXIST;
}
if (ret < 0)
*/
#define BTRFS_NAME_LEN 255
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
/*
* the key defines the order in the tree, and so it also defines (optimal)
* block layout. objectid corresonds to the inode number. The flags
*/
struct btrfs_disk_key {
__le64 objectid;
- __le32 flags;
__le64 offset;
+ __le32 flags;
} __attribute__ ((__packed__));
struct btrfs_key {
u64 objectid;
- u32 flags;
u64 offset;
+ u32 flags;
} __attribute__ ((__packed__));
/*
* every tree block (leaf or node) starts with this header.
*/
struct btrfs_header {
- __le32 csum[8];
+ u8 csum[BTRFS_CSUM_SIZE];
u8 fsid[16]; /* FS specific uuid */
__le64 blocknr; /* which block this node is supposed to live in */
__le64 generation;
* it currently lacks any block count etc etc
*/
struct btrfs_super_block {
- __le32 csum[8];
+ u8 csum[BTRFS_CSUM_SIZE];
/* the first 3 fields must match struct btrfs_header */
u8 fsid[16]; /* FS specific uuid */
__le64 blocknr; /* this block number */
} __attribute__ ((__packed__));
struct btrfs_inode_timespec {
- __le32 sec;
+ __le64 sec;
__le32 nsec;
} __attribute__ ((__packed__));
__le64 num_blocks;
} __attribute__ ((__packed__));
+struct btrfs_csum_item {
+ u8 csum[BTRFS_CSUM_SIZE];
+} __attribute__ ((__packed__));
+
struct btrfs_inode_map_item {
struct btrfs_disk_key key;
} __attribute__ ((__packed__));
* a (hopefully) huge chunk of disk
*/
#define BTRFS_EXTENT_DATA_KEY 4
+/*
+ * csum items have the checksums for data in the extents
+ */
+#define BTRFS_CSUM_ITEM_KEY 5
+
/*
* root items point to tree roots. There are typically in the root
* tree used by the super block to find all the other trees
*/
-#define BTRFS_ROOT_ITEM_KEY 5
+#define BTRFS_ROOT_ITEM_KEY 6
/*
* extent items are in the extent map tree. These record which blocks
* are used, and how many references there are to each block
*/
-#define BTRFS_EXTENT_ITEM_KEY 6
+#define BTRFS_EXTENT_ITEM_KEY 7
/*
* the inode map records which inode numbers are in use and where
* they actually live on disk
*/
-#define BTRFS_INODE_MAP_ITEM_KEY 7
+#define BTRFS_INODE_MAP_ITEM_KEY 8
/*
* string items are for debugging. They just store a short string of
* data in the FS
*/
-#define BTRFS_STRING_ITEM_KEY 8
+#define BTRFS_STRING_ITEM_KEY 9
static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i)
{
i->compat_flags = cpu_to_le16(val);
}
-static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts)
+static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts)
{
- return le32_to_cpu(ts->sec);
+ return le64_to_cpu(ts->sec);
}
static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts,
- u32 val)
+ u64 val)
{
- ts->sec = cpu_to_le32(val);
+ ts->sec = cpu_to_le64(val);
}
static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts)
ts->nsec = cpu_to_le32(val);
}
-
-
static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei)
{
return le64_to_cpu(ei->owner);
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
u64 blocknr, int mod);
+int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, u64 offset,
+ char *data, size_t len);
+int btrfs_csum_verify_file_block(struct btrfs_root *root,
+ u64 objectid, u64 offset,
+ char *data, size_t len);
#endif
return 0;
}
-static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh,
- int verify)
+int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
+ char *result)
{
- struct btrfs_node *node = btrfs_buffer_node(bh);
struct scatterlist sg;
struct crypto_hash *tfm = root->fs_info->hash_tfm;
struct hash_desc desc;
int ret;
- char result[32];
desc.tfm = tfm;
desc.flags = 0;
- sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32);
+ sg_init_one(&sg, data, len);
spin_lock(&root->fs_info->hash_lock);
- ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result);
+ ret = crypto_hash_digest(&desc, &sg, len, result);
spin_unlock(&root->fs_info->hash_lock);
if (ret) {
printk("sha256 digest failed\n");
}
+ return ret;
+}
+static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
+ int verify)
+{
+ char result[BTRFS_CSUM_SIZE];
+ int ret;
+ struct btrfs_node *node;
+
+ ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
+ bh->b_size - BTRFS_CSUM_SIZE, result);
+ if (ret)
+ return ret;
if (verify) {
- if (memcmp(node->header.csum, result, sizeof(result)))
- printk("csum verify failed on %Lu\n", bh->b_blocknr);
- return -EINVAL;
- } else
- memcpy(node->header.csum, result, sizeof(node->header.csum));
+ if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
+ printk("checksum verify failed on %lu\n",
+ bh->b_blocknr);
+ return 1;
+ }
+ } else {
+ node = btrfs_buffer_node(bh);
+ memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE);
+ }
return 0;
}
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr);
+int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
+ char *result);
#endif
ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
return ret;
}
+
+int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 objectid, u64 offset,
+ char *data, size_t len)
+{
+ int ret;
+ struct btrfs_key file_key;
+ struct btrfs_path path;
+ struct btrfs_csum_item *item;
+
+ btrfs_init_path(&path);
+ file_key.objectid = objectid;
+ file_key.offset = offset;
+ file_key.flags = 0;
+ btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+ ret = btrfs_insert_empty_item(trans, root, &path, &file_key,
+ BTRFS_CSUM_SIZE);
+ if (ret != 0 && ret != -EEXIST)
+ goto fail;
+ item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
+ struct btrfs_csum_item);
+ ret = 0;
+ ret = btrfs_csum_data(root, data, len, item->csum);
+ mark_buffer_dirty(path.nodes[0]);
+fail:
+ btrfs_release_path(root, &path);
+ return ret;
+}
+
+int btrfs_csum_verify_file_block(struct btrfs_root *root,
+ u64 objectid, u64 offset,
+ char *data, size_t len)
+{
+ int ret;
+ struct btrfs_key file_key;
+ struct btrfs_path path;
+ struct btrfs_csum_item *item;
+ char result[BTRFS_CSUM_SIZE];
+
+ btrfs_init_path(&path);
+ file_key.objectid = objectid;
+ file_key.offset = offset;
+ file_key.flags = 0;
+ btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+ ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0);
+ if (ret)
+ goto fail;
+ item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0],
+ struct btrfs_csum_item);
+ ret = 0;
+ ret = btrfs_csum_data(root, data, len, result);
+ WARN_ON(ret);
+ if (memcmp(result, item->csum, BTRFS_CSUM_SIZE))
+ ret = 1;
+fail:
+ btrfs_release_path(root, &path);
+ return ret;
+}
+
struct btrfs_key key;
struct btrfs_disk_key *found_key;
struct btrfs_leaf *leaf;
- struct btrfs_file_extent_item *fi;
- u64 extent_start;
- u64 extent_num_blocks;
+ struct btrfs_file_extent_item *fi = NULL;
+ u64 extent_start = 0;
+ u64 extent_num_blocks = 0;
+ int found_extent;
/* FIXME, add redo link to tree so we don't leak on crash */
key.objectid = inode->i_ino;
key.offset = (u64)-1;
key.flags = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+ btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY);
while(1) {
btrfs_init_path(&path);
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
found_key = &leaf->items[path.slots[0]].key;
if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
break;
- if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
+ if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
+ btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
break;
if (btrfs_disk_key_offset(found_key) < inode->i_size)
break;
- fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
- path.slots[0],
- struct btrfs_file_extent_item);
- extent_start = btrfs_file_extent_disk_blocknr(fi);
- extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi);
- key.offset = btrfs_disk_key_offset(found_key) - 1;
+ if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
+ fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
+ path.slots[0],
+ struct btrfs_file_extent_item);
+ extent_start = btrfs_file_extent_disk_blocknr(fi);
+ extent_num_blocks =
+ btrfs_file_extent_disk_num_blocks(fi);
+ inode->i_blocks -=
+ btrfs_file_extent_num_blocks(fi) >> 9;
+ found_extent = 1;
+ } else {
+ found_extent = 0;
+ }
ret = btrfs_del_item(trans, root, &path);
BUG_ON(ret);
- inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9;
btrfs_release_path(root, &path);
- ret = btrfs_free_extent(trans, root, extent_start,
- extent_num_blocks, 0);
- BUG_ON(ret);
- if (key.offset + 1 == 0)
- break;
+ if (found_extent) {
+ ret = btrfs_free_extent(trans, root, extent_start,
+ extent_num_blocks, 0);
+ BUG_ON(ret);
+ }
}
btrfs_release_path(root, &path);
ret = 0;
int err = 0;
int ret;
int this_write;
+ struct inode *inode = file->f_path.dentry->d_inode;
for (i = 0; i < num_pages; i++) {
offset = pos & (PAGE_CACHE_SIZE -1);
this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
+ /* FIXME, one block at a time */
+
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_csum_file_block(trans, root, inode->i_ino,
+ pages[i]->index << PAGE_CACHE_SHIFT,
+ kmap(pages[i]), PAGE_CACHE_SIZE);
+ kunmap(pages[i]);
+ SetPageChecked(pages[i]);
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ mutex_unlock(&root->fs_info->fs_mutex);
+
ret = nobh_commit_write(file, pages[i], offset,
offset + this_write);
pos += this_write;
this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
ret = nobh_prepare_write(pages[i], offset,
offset + this_write,
- btrfs_get_block_lock);
+ btrfs_get_block);
pos += this_write;
if (ret) {
err = ret;
size_t num_written = 0;
int err = 0;
int ret = 0;
- struct btrfs_trans_handle *trans;
struct inode *inode = file->f_path.dentry->d_inode;
struct btrfs_root *root = btrfs_sb(inode->i_sb);
struct page *pages[1];
size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
- mutex_lock(&root->fs_info->fs_mutex);
- trans = btrfs_start_transaction(root, 1);
-
- ret = prepare_pages(trans, root, file, pages, num_pages,
+ ret = prepare_pages(NULL, root, file, pages, num_pages,
pos, write_bytes);
BUG_ON(ret);
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
BUG_ON(ret);
- mutex_unlock(&root->fs_info->fs_mutex);
-
- ret = dirty_and_release_pages(trans, root, file, pages,
+ ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
BUG_ON(ret);
btrfs_drop_pages(pages, num_pages);
- ret = btrfs_end_transaction(trans, root);
-
buf += write_bytes;
count -= write_bytes;
pos += write_bytes;
return num_written ? num_written : err;
}
+static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
+ unsigned long offset, unsigned long size)
+{
+ char *kaddr;
+ unsigned long left, count = desc->count;
+
+ if (size > count)
+ size = count;
+
+ if (!PageChecked(page)) {
+ /* FIXME, do it per block */
+ struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb);
+ int ret = btrfs_csum_verify_file_block(root,
+ page->mapping->host->i_ino,
+ page->index << PAGE_CACHE_SHIFT,
+ kmap(page), PAGE_CACHE_SIZE);
+ if (ret) {
+ printk("failed to verify ino %lu page %lu\n",
+ page->mapping->host->i_ino,
+ page->index);
+ memset(page_address(page), 0, PAGE_CACHE_SIZE);
+ }
+ SetPageChecked(page);
+ kunmap(page);
+ }
+ /*
+ * Faults on the destination of a read are common, so do it before
+ * taking the kmap.
+ */
+ if (!fault_in_pages_writeable(desc->arg.buf, size)) {
+ kaddr = kmap_atomic(page, KM_USER0);
+ left = __copy_to_user_inatomic(desc->arg.buf,
+ kaddr + offset, size);
+ kunmap_atomic(kaddr, KM_USER0);
+ if (left == 0)
+ goto success;
+ }
+
+ /* Do it the slow way */
+ kaddr = kmap(page);
+ left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+ kunmap(page);
+
+ if (left) {
+ size -= left;
+ desc->error = -EFAULT;
+ }
+success:
+ desc->count = count - size;
+ desc->written += size;
+ desc->arg.buf += size;
+ return size;
+}
+
+/**
+ * btrfs_file_aio_read - filesystem read routine
+ * @iocb: kernel I/O control block
+ * @iov: io vector request
+ * @nr_segs: number of segments in the iovec
+ * @pos: current file position
+ */
+static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *filp = iocb->ki_filp;
+ ssize_t retval;
+ unsigned long seg;
+ size_t count;
+ loff_t *ppos = &iocb->ki_pos;
+
+ count = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *iv = &iov[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ count += iv->iov_len;
+ if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+ return -EINVAL;
+ if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+ continue;
+ if (seg == 0)
+ return -EFAULT;
+ nr_segs = seg;
+ count -= iv->iov_len; /* This segment is no good */
+ break;
+ }
+ retval = 0;
+ if (count) {
+ for (seg = 0; seg < nr_segs; seg++) {
+ read_descriptor_t desc;
+
+ desc.written = 0;
+ desc.arg.buf = iov[seg].iov_base;
+ desc.count = iov[seg].iov_len;
+ if (desc.count == 0)
+ continue;
+ desc.error = 0;
+ do_generic_file_read(filp, ppos, &desc,
+ btrfs_read_actor);
+ retval += desc.written;
+ if (desc.error) {
+ retval = retval ?: desc.error;
+ break;
+ }
+ }
+ }
+ return retval;
+}
+
static int btrfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
static struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
- .aio_read = generic_file_aio_read,
+ .aio_read = btrfs_file_aio_read,
.write = btrfs_file_write,
.mmap = generic_file_mmap,
.open = generic_file_open,