Subject: Rework btrfs_file_write to only allocate while page locks are held
authorChris Mason <chris.mason@oracle.com>
Mon, 18 Jun 2007 13:57:58 +0000 (09:57 -0400)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Mon, 18 Jun 2007 13:57:58 +0000 (09:57 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/print-tree.c
fs/btrfs/transaction.c

index 602b63dc76c39425d281edaa72bbbf3650b011c5..15cc9ec92308921b20a3bb8b79f997955bc83939 100644 (file)
@@ -541,6 +541,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
        else
                ret = submit_bh(WRITE, bh);
        if (ret == -EOPNOTSUPP) {
+               get_bh(bh);
+               lock_buffer(bh);
                set_buffer_uptodate(bh);
                root->fs_info->do_barriers = 0;
                ret = submit_bh(WRITE, bh);
index 5f4eaba8b85f27de6cb798f01cadc898d6b6b0b9..e0738c65bc5bd32bb18a2fbc2edf47630f3fa980 100644 (file)
@@ -1477,7 +1477,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                        break;
                if (wret < 0)
                        ret = wret;
-               btrfs_btree_balance_dirty(root);
        }
        for (i = 0; i <= orig_level; i++) {
                if (path->nodes[i]) {
index 1068993ab1c93203e5337285bb875fbcfa011375..cf894f09f6c731d474b094788c33889255cd716f 100644 (file)
@@ -228,7 +228,6 @@ found:
                           path->nodes[0]->b_data,
                           root->fs_info->sb->s_blocksize);
        ret = btrfs_csum_data(root, data, len, &item->csum);
-// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum));
        btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
        btrfs_release_path(root, path);
index de8d47b44e1269edca92a2464a357ca7915aba3e..6b455c2b3f039bf40783efc8b30a43294f4f5798 100644 (file)
@@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        }
                        path->slots[0]--;
                }
+next_slot:
                keep = 0;
                bookend = 0;
                found_extent = 0;
@@ -214,39 +215,48 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                extent = NULL;
                leaf = btrfs_buffer_leaf(path->nodes[0]);
                slot = path->slots[0];
+               ret = 0;
                btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
                if (key.offset >= end || key.objectid != inode->i_ino) {
-                       ret = 0;
                        goto out;
                }
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
-                       ret = 0;
+               if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
                        goto out;
                }
-               extent = btrfs_item_ptr(leaf, slot,
-                                       struct btrfs_file_extent_item);
-               found_type = btrfs_file_extent_type(extent);
-               if (found_type == BTRFS_FILE_EXTENT_REG) {
-                       extent_end = key.offset +
-                               (btrfs_file_extent_num_blocks(extent) <<
-                                inode->i_blkbits);
-                       found_extent = 1;
-               } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-                       found_inline = 1;
-                       extent_end = key.offset +
-                            btrfs_file_extent_inline_len(leaf->items + slot);
+               if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
+                       extent = btrfs_item_ptr(leaf, slot,
+                                               struct btrfs_file_extent_item);
+                       found_type = btrfs_file_extent_type(extent);
+                       if (found_type == BTRFS_FILE_EXTENT_REG) {
+                               extent_end = key.offset +
+                                       (btrfs_file_extent_num_blocks(extent) <<
+                                        inode->i_blkbits);
+                               found_extent = 1;
+                       } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+                               found_inline = 1;
+                               extent_end = key.offset +
+                                    btrfs_file_extent_inline_len(leaf->items +
+                                                                 slot);
+                       }
+               } else {
+                       extent_end = search_start;
                }
 
                /* we found nothing we can drop */
-               if (!found_extent && !found_inline) {
-                       ret = 0;
-                       goto out;
-               }
-
-               /* we found nothing inside the range */
-               if (search_start >= extent_end) {
-                       ret = 0;
-                       goto out;
+               if ((!found_extent && !found_inline) ||
+                   search_start >= extent_end) {
+                       int nextret;
+                       u32 nritems;
+                       nritems = btrfs_header_nritems(
+                                       btrfs_buffer_header(path->nodes[0]));
+                       if (slot >= nritems - 1) {
+                               nextret = btrfs_next_leaf(root, path);
+                               if (nextret)
+                                       goto out;
+                       } else {
+                               path->slots[0]++;
+                       }
+                       goto next_slot;
                }
 
                /* FIXME, there's only one inline extent allowed right now */
@@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        WARN_ON(found_inline);
                        bookend = 1;
                }
-
                /* truncate existing extent */
                if (start > key.offset) {
                        u64 new_num;
@@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                        ins.offset = end;
                        ins.flags = 0;
                        btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
-
                        btrfs_release_path(root, path);
                        ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                                      sizeof(*extent));
+
+                       if (ret) {
+                               btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0]));
+                               printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end);
+                       }
                        BUG_ON(ret);
                        extent = btrfs_item_ptr(
                                    btrfs_buffer_leaf(path->nodes[0]),
@@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root,
                         loff_t pos,
                         unsigned long first_index,
                         unsigned long last_index,
-                        size_t write_bytes,
-                        u64 alloc_extent_start)
+                        size_t write_bytes)
 {
        int i;
        unsigned long index = pos >> PAGE_CACHE_SHIFT;
@@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root,
        struct buffer_head *bh;
        struct buffer_head *head;
        loff_t isize = i_size_read(inode);
+       struct btrfs_trans_handle *trans;
+       u64 hint_block;
+       u64 num_blocks;
+       u64 alloc_extent_start;
+       u64 start_pos;
+       struct btrfs_key ins;
+
+       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+       num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
+                       inode->i_blkbits;
 
        memset(pages, 0, num_pages * sizeof(struct page *));
 
@@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root,
                        err = -ENOMEM;
                        goto failed_release;
                }
+       }
+
+       mutex_lock(&root->fs_info->fs_mutex);
+       trans = btrfs_start_transaction(root, 1);
+       if (!trans) {
+               err = -ENOMEM;
+               mutex_unlock(&root->fs_info->fs_mutex);
+               goto out_unlock;
+       }
+       btrfs_set_trans_block_group(trans, inode);
+       /* FIXME blocksize != 4096 */
+       inode->i_blocks += num_blocks << 3;
+       hint_block = 0;
+
+       /* FIXME...EIEIO, ENOSPC and more */
+
+       /* step one, delete the existing extents in this range */
+       /* FIXME blocksize != pagesize */
+       if (start_pos < inode->i_size) {
+               err = btrfs_drop_extents(trans, root, inode,
+                        start_pos, (pos + write_bytes + root->blocksize -1) &
+                        ~((u64)root->blocksize - 1), &hint_block);
+               BUG_ON(err);
+       }
+
+       /* insert any holes we need to create */
+       if (inode->i_size < start_pos) {
+               u64 last_pos_in_file;
+               u64 hole_size;
+               u64 mask = root->blocksize - 1;
+               last_pos_in_file = (isize + mask) & ~mask;
+               hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+               hole_size >>= inode->i_blkbits;
+               if (last_pos_in_file < start_pos) {
+                       err = btrfs_insert_file_extent(trans, root,
+                                                      inode->i_ino,
+                                                      last_pos_in_file,
+                                                      0, 0, hole_size);
+               }
+               BUG_ON(err);
+       }
+
+       /*
+        * either allocate an extent for the new bytes or setup the key
+        * to show we are doing inline data in the extent
+        */
+       if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
+           pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+               err = btrfs_alloc_extent(trans, root, inode->i_ino,
+                                        num_blocks, hint_block, (u64)-1,
+                                        &ins, 1);
+               BUG_ON(err);
+               err = btrfs_insert_file_extent(trans, root, inode->i_ino,
+                                      start_pos, ins.objectid, ins.offset,
+                                      ins.offset);
+               BUG_ON(err);
+       } else {
+               ins.offset = 0;
+               ins.objectid = 0;
+       }
+       BUG_ON(err);
+       alloc_extent_start = ins.objectid;
+       err = btrfs_end_transaction(trans, root);
+       mutex_unlock(&root->fs_info->fs_mutex);
+
+       for (i = 0; i < num_pages; i++) {
                cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
                wait_on_page_writeback(pages[i]);
                offset = pos & (PAGE_CACHE_SIZE -1);
@@ -444,6 +532,11 @@ failed_truncate:
        if (pos > isize)
                vmtruncate(inode, isize);
        return err;
+
+out_unlock:
+       mutex_unlock(&root->fs_info->fs_mutex);
+       goto failed_release;
+
 }
 
 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
@@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        int ret = 0;
        struct inode *inode = file->f_path.dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct page *pages[8];
+       struct page **pages = NULL;
+       int nrptrs;
        struct page *pinned[2];
        unsigned long first_index;
        unsigned long last_index;
-       u64 start_pos;
-       u64 num_blocks;
-       u64 alloc_extent_start;
-       u64 hint_block;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_key ins;
+
+       nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
+                    PAGE_CACHE_SIZE / (sizeof(struct page *)));
        pinned[0] = NULL;
        pinned[1] = NULL;
        if (file->f_flags & O_DIRECT)
@@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                goto out;
        file_update_time(file);
 
-       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-       num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
-                       inode->i_blkbits;
+       pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
        mutex_lock(&inode->i_mutex);
        first_index = pos >> PAGE_CACHE_SHIFT;
@@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                }
        }
 
-       mutex_lock(&root->fs_info->fs_mutex);
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               err = -ENOMEM;
-               mutex_unlock(&root->fs_info->fs_mutex);
-               goto out_unlock;
-       }
-       btrfs_set_trans_block_group(trans, inode);
-       /* FIXME blocksize != 4096 */
-       inode->i_blocks += num_blocks << 3;
-       hint_block = 0;
-
-       /* FIXME...EIEIO, ENOSPC and more */
-
-       /* step one, delete the existing extents in this range */
-       if (start_pos < inode->i_size) {
-               /* FIXME blocksize != pagesize */
-               ret = btrfs_drop_extents(trans, root, inode,
-                                        start_pos,
-                                        (pos + count + root->blocksize -1) &
-                                        ~((u64)root->blocksize - 1),
-                                        &hint_block);
-               BUG_ON(ret);
-       }
-
-       /* insert any holes we need to create */
-       if (inode->i_size < start_pos) {
-               u64 last_pos_in_file;
-               u64 hole_size;
-               u64 mask = root->blocksize - 1;
-               last_pos_in_file = (inode->i_size + mask) & ~mask;
-               hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-               hole_size >>= inode->i_blkbits;
-               if (last_pos_in_file < start_pos) {
-                       ret = btrfs_insert_file_extent(trans, root,
-                                                      inode->i_ino,
-                                                      last_pos_in_file,
-                                                      0, 0, hole_size);
-               }
-               BUG_ON(ret);
-       }
-
-       /*
-        * either allocate an extent for the new bytes or setup the key
-        * to show we are doing inline data in the extent
-        */
-       if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
-           pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-               ret = btrfs_alloc_extent(trans, root, inode->i_ino,
-                                        num_blocks, hint_block, (u64)-1,
-                                        &ins, 1);
-               BUG_ON(ret);
-               ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
-                                      start_pos, ins.objectid, ins.offset,
-                                      ins.offset);
-               BUG_ON(ret);
-       } else {
-               ins.offset = 0;
-               ins.objectid = 0;
-       }
-       BUG_ON(ret);
-       alloc_extent_start = ins.objectid;
-       ret = btrfs_end_transaction(trans, root);
-       mutex_unlock(&root->fs_info->fs_mutex);
-
        while(count > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-               size_t write_bytes = min(count,
-                                        (size_t)PAGE_CACHE_SIZE - offset);
+               size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE -
+                                        offset);
                size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
                                        PAGE_CACHE_SHIFT;
 
+               WARN_ON(num_pages > nrptrs);
                memset(pages, 0, sizeof(pages));
                ret = prepare_pages(root, file, pages, num_pages,
                                    pos, first_index, last_index,
-                                   write_bytes, alloc_extent_start);
+                                   write_bytes);
                BUG_ON(ret);
 
-               /* FIXME blocks != pagesize */
-               if (alloc_extent_start)
-                       alloc_extent_start += num_pages;
                ret = btrfs_copy_from_user(pos, num_pages,
                                           write_bytes, pages, buf);
                BUG_ON(ret);
@@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                pos += write_bytes;
                num_written += write_bytes;
 
-               balance_dirty_pages_ratelimited(inode->i_mapping);
+               balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
                btrfs_btree_balance_dirty(root);
                cond_resched();
        }
-out_unlock:
        mutex_unlock(&inode->i_mutex);
 out:
+       kfree(pages);
        if (pinned[0])
                page_cache_release(pinned[0]);
        if (pinned[1])
index def33ac90d777e77aa01991aa728c6b80dd44e29..94f1c28c25b951b8804b9ffee61c5a1de387f0dc 100644 (file)
@@ -962,7 +962,6 @@ void btrfs_dirty_inode(struct inode *inode)
        btrfs_update_inode(trans, root, inode);
        btrfs_end_transaction(trans, root);
        mutex_unlock(&root->fs_info->fs_mutex);
-       btrfs_btree_balance_dirty(root);
 }
 
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -1402,7 +1401,6 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
                goto out;
        }
        memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
-printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum));
 out:
        if (path)
                btrfs_free_path(path);
@@ -1476,7 +1474,6 @@ static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
                                       (unsigned long long)offset);
                                memset(kaddr + bh_offset(bh), 1, bh->b_size);
                                flush_dcache_page(page);
-printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum);
                        }
                        kunmap_atomic(kaddr, KM_IRQ0);
                }
@@ -1655,6 +1652,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
 
        last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
+       /* no csumming allowed when from PF_MEMALLOC */
+       if (current->flags & PF_MEMALLOC) {
+               redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
+               return 0;
+       }
+
        if (!page_has_buffers(page)) {
                create_empty_buffers(page, blocksize,
                                        (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1885,7 +1889,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 
        lock_page(page);
        wait_on_page_writeback(page);
-printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index);
        size = i_size_read(inode);
        if ((page->mapping != inode->i_mapping) ||
            ((page->index << PAGE_CACHE_SHIFT) > size)) {
index 1fa7056830083557bed092049406a00d5618cdd3..e66acd4e2745936a7a6e3149726c3974ab6812fe 100644 (file)
@@ -31,6 +31,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l)
        struct btrfs_dir_item *di;
        struct btrfs_inode_item *ii;
        struct btrfs_block_group_item *bi;
+       struct btrfs_file_extent_item *fi;
        u32 type;
 
        printk("leaf %llu total ptrs %d free space %d\n",
@@ -75,6 +76,23 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l)
                        printk("\t\textent data refs %u\n",
                                btrfs_extent_refs(ei));
                        break;
+
+               case BTRFS_EXTENT_DATA_KEY:
+                       fi = btrfs_item_ptr(l, i,
+                                           struct btrfs_file_extent_item);
+                       if (btrfs_file_extent_type(fi) ==
+                           BTRFS_FILE_EXTENT_INLINE) {
+                               printk("\t\tinline extent data size %u\n",
+                                  btrfs_file_extent_inline_len(l->items + i));
+                               break;
+                       }
+                       printk("\t\textent data disk block %llu nr %llu\n",
+                              (unsigned long long)btrfs_file_extent_disk_blocknr(fi),
+                              (unsigned long long)btrfs_file_extent_disk_num_blocks(fi));
+                       printk("\t\textent data offset %llu nr %llu\n",
+                         (unsigned long long)btrfs_file_extent_offset(fi),
+                         (unsigned long long)btrfs_file_extent_num_blocks(fi));
+                       break;
                case BTRFS_BLOCK_GROUP_ITEM_KEY:
                        bi = btrfs_item_ptr(l, i,
                                            struct btrfs_block_group_item);
index cd4f1d1403855e2fc7185605fec137cded32098c..b5b99a85763f91e76723d8c05d0b8ee94bd264c6 100644 (file)
@@ -279,6 +279,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                BUG_ON(ret);
                kfree(dirty);
                mutex_unlock(&tree_root->fs_info->fs_mutex);
+               btrfs_btree_balance_dirty(tree_root);
        }
        return 0;
 }