this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
/* FIXME, one block at a time */
- mutex_lock(&root->fs_info->fs_mutex);
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
-
bh = page_buffers(pages[i]);
if (buffer_mapped(bh) && bh->b_blocknr == 0) {
char *ptr, *kaddr;
u32 datasize;
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
/* create an inline extent, and copy the data in */
path = btrfs_alloc_path();
BUG_ON(!path);
btrfs_set_file_extent_type(ei,
BTRFS_FILE_EXTENT_INLINE);
ptr = btrfs_file_extent_inline_start(ei);
+
kaddr = kmap_atomic(bh->b_page, KM_USER0);
btrfs_memcpy(root, path->nodes[0]->b_data,
ptr, kaddr + bh_offset(bh),
offset + write_bytes);
kunmap_atomic(kaddr, KM_USER0);
+
mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- } else if (buffer_mapped(bh)) {
- /* csum the file data */
- btrfs_csum_file_block(trans, root, inode->i_ino,
- pages[i]->index << PAGE_CACHE_SHIFT,
- kmap(pages[i]), PAGE_CACHE_SIZE);
- kunmap(pages[i]);
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ mutex_unlock(&root->fs_info->fs_mutex);
}
- SetPageChecked(pages[i]);
- ret = btrfs_end_transaction(trans, root);
- BUG_ON(ret);
- mutex_unlock(&root->fs_info->fs_mutex);
ret = btrfs_commit_write(file, pages[i], offset,
offset + this_write);
if ((pos & (PAGE_CACHE_SIZE - 1))) {
pinned[0] = grab_cache_page(inode->i_mapping, first_index);
if (!PageUptodate(pinned[0])) {
- ret = mpage_readpage(pinned[0], btrfs_get_block);
+ ret = btrfs_readpage(NULL, pinned[0]);
BUG_ON(ret);
wait_on_page_locked(pinned[0]);
} else {
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) {
- ret = mpage_readpage(pinned[1], btrfs_get_block);
+ ret = btrfs_readpage(NULL, pinned[1]);
BUG_ON(ret);
wait_on_page_locked(pinned[1]);
} else {
return num_written ? num_written : err;
}
-/*
- * FIXME, do this by stuffing the csum we want in the info hanging off
- * page->private. For now, verify file csums on read
- */
-static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
- unsigned long offset, unsigned long size)
-{
- char *kaddr;
- unsigned long left, count = desc->count;
- struct inode *inode = page->mapping->host;
-
- if (size > count)
- size = count;
-
- if (!PageChecked(page)) {
- /* FIXME, do it per block */
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
- struct buffer_head *bh;
-
- if (page_has_buffers(page)) {
- bh = page_buffers(page);
- if (!buffer_mapped(bh)) {
- SetPageChecked(page);
- goto checked;
- }
- }
-
- ret = btrfs_csum_verify_file_block(root,
- page->mapping->host->i_ino,
- page->index << PAGE_CACHE_SHIFT,
- kmap(page), PAGE_CACHE_SIZE);
- if (ret) {
- if (ret != -ENOENT) {
- printk("failed to verify ino %lu page %lu ret %d\n",
- page->mapping->host->i_ino,
- page->index, ret);
- memset(page_address(page), 1, PAGE_CACHE_SIZE);
- flush_dcache_page(page);
- }
- }
- SetPageChecked(page);
- kunmap(page);
- }
-checked:
- /*
- * Faults on the destination of a read are common, so do it before
- * taking the kmap.
- */
- if (!fault_in_pages_writeable(desc->arg.buf, size)) {
- kaddr = kmap_atomic(page, KM_USER0);
- left = __copy_to_user_inatomic(desc->arg.buf,
- kaddr + offset, size);
- kunmap_atomic(kaddr, KM_USER0);
- if (left == 0)
- goto success;
- }
-
- /* Do it the slow way */
- kaddr = kmap(page);
- left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
- kunmap(page);
-
- if (left) {
- size -= left;
- desc->error = -EFAULT;
- }
-success:
- desc->count = count - size;
- desc->written += size;
- desc->arg.buf += size;
- return size;
-}
-
-/**
- * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
- * @iocb: kernel I/O control block
- * @iov: io vector request
- * @nr_segs: number of segments in the iovec
- * @pos: current file position
- */
-static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file *filp = iocb->ki_filp;
- ssize_t retval;
- unsigned long seg;
- size_t count;
- loff_t *ppos = &iocb->ki_pos;
-
- count = 0;
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *iv = &iov[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- count += iv->iov_len;
- if (unlikely((ssize_t)(count|iv->iov_len) < 0))
- return -EINVAL;
- if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
- continue;
- if (seg == 0)
- return -EFAULT;
- nr_segs = seg;
- count -= iv->iov_len; /* This segment is no good */
- break;
- }
- retval = 0;
- if (count) {
- for (seg = 0; seg < nr_segs; seg++) {
- read_descriptor_t desc;
-
- desc.written = 0;
- desc.arg.buf = iov[seg].iov_base;
- desc.count = iov[seg].iov_len;
- if (desc.count == 0)
- continue;
- desc.error = 0;
- do_generic_file_read(filp, ppos, &desc,
- btrfs_read_actor);
- retval += desc.written;
- if (desc.error) {
- retval = retval ?: desc.error;
- break;
- }
- }
- }
- return retval;
-}
-
static int btrfs_sync_file(struct file *file,
struct dentry *dentry, int datasync)
{
return ret > 0 ? EIO : ret;
}
+static struct vm_operations_struct btrfs_file_vm_ops = {
+ .nopage = filemap_nopage,
+ .populate = filemap_populate,
+ .page_mkwrite = btrfs_page_mkwrite,
+};
+
+static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &btrfs_file_vm_ops;
+ file_accessed(filp);
+ return 0;
+}
+
struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
- .aio_read = btrfs_file_aio_read,
+ .aio_read = generic_file_aio_read,
.write = btrfs_file_write,
- .mmap = generic_file_mmap,
+ .mmap = btrfs_file_mmap,
.open = generic_file_open,
.ioctl = btrfs_ioctl,
.fsync = btrfs_sync_file,
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
+#include <linux/bit_spinlock.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
goto out;
if (!PageUptodate(page)) {
- ret = mpage_readpage(page, btrfs_get_block);
+ ret = btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
ret = -EIO;
path = btrfs_alloc_path();
BUG_ON(!path);
if (create & BTRFS_GET_BLOCK_CREATE) {
- WARN_ON(1);
- /* this almost but not quite works */
+ /*
+ * danger!, this only works if the page is properly up
+ * to date somehow
+ */
trans = btrfs_start_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
ins.objectid, ins.offset,
ins.offset);
BUG_ON(ret);
- SetPageChecked(result->b_page);
btrfs_map_bh_to_logical(root, result, ins.objectid);
}
out:
return err;
}
+int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
+ struct buffer_head *result, int create)
+{
+ int ret;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct page *page = result->b_page;
+ u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
+ struct btrfs_csum_item *item;
+ struct btrfs_path *path = NULL;
+
+ mutex_lock(&root->fs_info->fs_mutex);
+ ret = btrfs_get_block_lock(inode, iblock, result, create);
+ if (ret)
+ goto out;
+
+ path = btrfs_alloc_path();
+ item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
+ if (IS_ERR(item)) {
+ ret = PTR_ERR(item);
+ /* a csum that isn't present is a preallocated region. */
+ if (ret == -ENOENT || ret == -EFBIG)
+ ret = 0;
+ result->b_private = 0;
+ goto out;
+ }
+ memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
+printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum));
+out:
+ if (path)
+ btrfs_free_path(path);
+ mutex_unlock(&root->fs_info->fs_mutex);
+ return ret;
+}
+
static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *result, int create)
{
return block_prepare_write(page, from, to, btrfs_get_block);
}
-static int btrfs_readpage(struct file *file, struct page *page)
+static void buffer_io_error(struct buffer_head *bh)
+{
+ char b[BDEVNAME_SIZE];
+
+ printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
+ bdevname(bh->b_bdev, b),
+ (unsigned long long)bh->b_blocknr);
+}
+
+/*
+ * I/O completion handler for block_read_full_page() - pages
+ * which come unlocked at the end of I/O.
+ */
+static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
+{
+ unsigned long flags;
+ struct buffer_head *first;
+ struct buffer_head *tmp;
+ struct page *page;
+ int page_uptodate = 1;
+ struct inode *inode;
+ int ret;
+
+ BUG_ON(!buffer_async_read(bh));
+
+ page = bh->b_page;
+ inode = page->mapping->host;
+ if (uptodate) {
+ void *kaddr;
+ struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+ if (bh->b_private) {
+ char csum[BTRFS_CRC32_SIZE];
+ kaddr = kmap_atomic(page, KM_IRQ0);
+ ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
+ bh->b_size, csum);
+ BUG_ON(ret);
+ if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
+ u64 offset;
+ offset = (page->index << PAGE_CACHE_SHIFT) +
+ bh_offset(bh);
+ printk("btrfs csum failed ino %lu off %llu\n",
+ page->mapping->host->i_ino,
+ (unsigned long long)offset);
+ memset(kaddr + bh_offset(bh), 1, bh->b_size);
+ flush_dcache_page(page);
+printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum);
+ }
+ kunmap_atomic(kaddr, KM_IRQ0);
+ }
+ set_buffer_uptodate(bh);
+ } else {
+ clear_buffer_uptodate(bh);
+ if (printk_ratelimit())
+ buffer_io_error(bh);
+ SetPageError(page);
+ }
+
+ /*
+ * Be _very_ careful from here on. Bad things can happen if
+ * two buffer heads end IO at almost the same time and both
+ * decide that the page is now completely done.
+ */
+ first = page_buffers(page);
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ clear_buffer_async_read(bh);
+ unlock_buffer(bh);
+ tmp = bh;
+ do {
+ if (!buffer_uptodate(tmp))
+ page_uptodate = 0;
+ if (buffer_async_read(tmp)) {
+ BUG_ON(!buffer_locked(tmp));
+ goto still_busy;
+ }
+ tmp = tmp->b_this_page;
+ } while (tmp != bh);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+
+ /*
+ * If none of the buffers had errors and they are all
+ * uptodate then we can set the page uptodate.
+ */
+ if (page_uptodate && !PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ return;
+
+still_busy:
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
+ return;
+}
+
+/*
+ * Generic "read page" function for block devices that have the normal
+ * get_block functionality. This is most of the block device filesystems.
+ * Reads the page asynchronously --- the unlock_buffer() and
+ * set/clear_buffer_uptodate() functions propagate buffer state into the
+ * page struct once IO has completed.
+ */
+int btrfs_readpage(struct file *file, struct page *page)
{
- return mpage_readpage(page, btrfs_get_block);
+ struct inode *inode = page->mapping->host;
+ sector_t iblock, lblock;
+ struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ unsigned int blocksize;
+ int nr, i;
+ int fully_mapped = 1;
+
+ BUG_ON(!PageLocked(page));
+ blocksize = 1 << inode->i_blkbits;
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+ head = page_buffers(page);
+
+ iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
+ bh = head;
+ nr = 0;
+ i = 0;
+
+ do {
+ if (buffer_uptodate(bh))
+ continue;
+
+ if (!buffer_mapped(bh)) {
+ int err = 0;
+
+ fully_mapped = 0;
+ if (iblock < lblock) {
+ WARN_ON(bh->b_size != blocksize);
+ err = btrfs_get_block_csum(inode, iblock,
+ bh, 0);
+ if (err)
+ SetPageError(page);
+ }
+ if (!buffer_mapped(bh)) {
+ void *kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + i * blocksize, 0, blocksize);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ if (!err)
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ /*
+ * get_block() might have updated the buffer
+ * synchronously
+ */
+ if (buffer_uptodate(bh))
+ continue;
+ }
+ arr[nr++] = bh;
+ } while (i++, iblock++, (bh = bh->b_this_page) != head);
+
+ if (fully_mapped)
+ SetPageMappedToDisk(page);
+
+ if (!nr) {
+ /*
+ * All buffers are uptodate - we can set the page uptodate
+ * as well. But not if get_block() returned an error.
+ */
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+
+ /* Stage two: lock the buffers */
+ for (i = 0; i < nr; i++) {
+ bh = arr[i];
+ lock_buffer(bh);
+ bh->b_end_io = btrfs_end_buffer_async_read;
+ set_buffer_async_read(bh);
+ }
+
+ /*
+ * Stage 3: start the IO. Check for uptodateness
+ * inside the buffer lock in case another process reading
+ * the underlying blockdev brought it uptodate (the sct fix).
+ */
+ for (i = 0; i < nr; i++) {
+ bh = arr[i];
+ if (buffer_uptodate(bh))
+ btrfs_end_buffer_async_read(bh, 1);
+ else
+ submit_bh(READ, bh);
+ }
+ return 0;
}
/*
struct buffer_head *bh, *head;
const unsigned blocksize = 1 << inode->i_blkbits;
int nr_underway = 0;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
BUG_ON(!PageLocked(page));
continue;
}
if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
+ struct btrfs_trans_handle *trans;
+ int ret;
+ u64 off = page->index << PAGE_CACHE_SHIFT;
+ char *kaddr;
+
+ off += bh_offset(bh);
+ mutex_lock(&root->fs_info->fs_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ kaddr = kmap(page);
+ ret = btrfs_csum_file_block(trans, root, inode->i_ino,
+ off, kaddr + bh_offset(bh),
+ bh->b_size);
+ kunmap(page);
+ BUG_ON(ret);
+ ret = btrfs_end_transaction(trans, root);
+ BUG_ON(ret);
+ mutex_unlock(&root->fs_info->fs_mutex);
mark_buffer_async_write(bh);
} else {
unlock_buffer(bh);
return __btrfs_write_full_page(inode, page, wbc);
}
+/*
+ * btrfs_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF. Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ unsigned long end;
+ loff_t size;
+ int ret = -EINVAL;
+
+ lock_page(page);
+ wait_on_page_writeback(page);
+printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index);
+ size = i_size_read(inode);
+ if ((page->mapping != inode->i_mapping) ||
+ ((page->index << PAGE_CACHE_SHIFT) > size)) {
+ /* page got truncated out from underneath us */
+ goto out_unlock;
+ }
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+ end = size & ~PAGE_CACHE_MASK;
+ else
+ end = PAGE_CACHE_SIZE;
+
+ ret = btrfs_prepare_write(NULL, page, 0, end);
+ if (!ret)
+ ret = btrfs_commit_write(NULL, page, 0, end);
+
+out_unlock:
+ unlock_page(page);
+ return ret;
+}
+
static void btrfs_truncate(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;