f2fs: support in batch multi blocks preallocation
authorChao Yu <yuchao0@huawei.com>
Mon, 9 May 2016 11:56:30 +0000 (19:56 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Wed, 11 May 2016 16:56:35 +0000 (09:56 -0700)
This patch introduces reserve_new_blocks to make preallocation of multi
blocks as in batch operation, so it can avoid lots of redundant
operation, result in better performance.

In virtual machine, with rotational device:

time fallocate -l 32G /mnt/f2fs/file

Before:
real 0m4.584s
user 0m0.000s
sys 0m4.580s

After:
real 0m0.292s
user 0m0.000s
sys 0m0.272s

In x86, with SSD:

time fallocate -l 500G $MNT/testfile

Before : 24.758 s
After  :  1.604 s

Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix bugs and add performance numbers measured in x86.]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/data.c
fs/f2fs/f2fs.h
include/trace/events/f2fs.h

index 369d953bd770b4a8e992d578ac0c547b956b99c8..b61e2d40cdfb3d7e5cc2b04d6f5caa8d720ac0d6 100644 (file)
@@ -278,6 +278,16 @@ alloc_new:
        trace_f2fs_submit_page_mbio(fio->page, fio);
 }
 
+static void __set_data_blkaddr(struct dnode_of_data *dn)
+{
+       struct f2fs_node *rn = F2FS_NODE(dn->node_page);
+       __le32 *addr_array;
+
+       /* Get physical address of data block */
+       addr_array = blkaddr_in_node(rn);
+       addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
+}
+
 /*
  * Lock ordering for the change of data block address:
  * ->data_page
@@ -286,19 +296,9 @@ alloc_new:
  */
 void set_data_blkaddr(struct dnode_of_data *dn)
 {
-       struct f2fs_node *rn;
-       __le32 *addr_array;
-       struct page *node_page = dn->node_page;
-       unsigned int ofs_in_node = dn->ofs_in_node;
-
-       f2fs_wait_on_page_writeback(node_page, NODE, true);
-
-       rn = F2FS_NODE(node_page);
-
-       /* Get physical address of data block */
-       addr_array = blkaddr_in_node(rn);
-       addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
-       if (set_page_dirty(node_page))
+       f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+       __set_data_blkaddr(dn);
+       if (set_page_dirty(dn->node_page))
                dn->node_changed = true;
 }
 
@@ -309,24 +309,53 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
        f2fs_update_extent_cache(dn);
 }
 
-int reserve_new_block(struct dnode_of_data *dn)
+/* dn->ofs_in_node will be returned with up-to-date last block pointer */
+int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 
+       if (!count)
+               return 0;
+
        if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
                return -EPERM;
-       if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+       if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
                return -ENOSPC;
 
-       trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
+       trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
+                                               dn->ofs_in_node, count);
+
+       f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
+
+       for (; count > 0; dn->ofs_in_node++) {
+               block_t blkaddr =
+                       datablock_addr(dn->node_page, dn->ofs_in_node);
+               if (blkaddr == NULL_ADDR) {
+                       dn->data_blkaddr = NEW_ADDR;
+                       __set_data_blkaddr(dn);
+                       count--;
+               }
+       }
+
+       if (set_page_dirty(dn->node_page))
+               dn->node_changed = true;
 
-       dn->data_blkaddr = NEW_ADDR;
-       set_data_blkaddr(dn);
        mark_inode_dirty(dn->inode);
        sync_inode_page(dn);
        return 0;
 }
 
+/* Should keep dn->ofs_in_node unchanged */
+int reserve_new_block(struct dnode_of_data *dn)
+{
+       unsigned int ofs_in_node = dn->ofs_in_node;
+       int ret;
+
+       ret = reserve_new_blocks(dn, 1);
+       dn->ofs_in_node = ofs_in_node;
+       return ret;
+}
+
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 {
        bool need_put = dn->inode_page ? false : true;
@@ -545,6 +574,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
        struct node_info ni;
        int seg = CURSEG_WARM_DATA;
        pgoff_t fofs;
+       blkcnt_t count = 1;
 
        if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
                return -EPERM;
@@ -553,7 +583,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
        if (dn->data_blkaddr == NEW_ADDR)
                goto alloc;
 
-       if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+       if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
                return -ENOSPC;
 
 alloc:
@@ -621,8 +651,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        struct dnode_of_data dn;
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
-       pgoff_t pgofs, end_offset;
+       pgoff_t pgofs, end_offset, end;
        int err = 0, ofs = 1;
+       unsigned int ofs_in_node, last_ofs_in_node;
+       blkcnt_t prealloc;
        struct extent_info ei;
        bool allocated = false;
        block_t blkaddr;
@@ -632,6 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 
        /* it only supports block size == page size */
        pgofs = (pgoff_t)map->m_lblk;
+       end = pgofs + maxblocks;
 
        if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
                map->m_pblk = ei.blk + pgofs - ei.fofs;
@@ -659,6 +692,8 @@ next_dnode:
                goto unlock_out;
        }
 
+       prealloc = 0;
+       ofs_in_node = dn.ofs_in_node;
        end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
 next_block:
@@ -671,17 +706,20 @@ next_block:
                                goto sync_out;
                        }
                        if (flag == F2FS_GET_BLOCK_PRE_AIO) {
-                               if (blkaddr == NULL_ADDR)
-                                       err = reserve_new_block(&dn);
+                               if (blkaddr == NULL_ADDR) {
+                                       prealloc++;
+                                       last_ofs_in_node = dn.ofs_in_node;
+                               }
                        } else {
                                err = __allocate_data_block(&dn);
-                               if (!err)
+                               if (!err) {
                                        set_inode_flag(F2FS_I(inode),
                                                        FI_APPEND_WRITE);
+                                       allocated = true;
+                               }
                        }
                        if (err)
                                goto sync_out;
-                       allocated = true;
                        map->m_flags = F2FS_MAP_NEW;
                        blkaddr = dn.data_blkaddr;
                } else {
@@ -700,6 +738,9 @@ next_block:
                }
        }
 
+       if (flag == F2FS_GET_BLOCK_PRE_AIO)
+               goto skip;
+
        if (map->m_len == 0) {
                /* preallocated unwritten block should be mapped for fiemap. */
                if (blkaddr == NEW_ADDR)
@@ -711,32 +752,49 @@ next_block:
        } else if ((map->m_pblk != NEW_ADDR &&
                        blkaddr == (map->m_pblk + ofs)) ||
                        (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
-                       flag == F2FS_GET_BLOCK_PRE_DIO ||
-                       flag == F2FS_GET_BLOCK_PRE_AIO) {
+                       flag == F2FS_GET_BLOCK_PRE_DIO) {
                ofs++;
                map->m_len++;
        } else {
                goto sync_out;
        }
 
+skip:
        dn.ofs_in_node++;
        pgofs++;
 
-       if (map->m_len < maxblocks) {
-               if (dn.ofs_in_node < end_offset)
-                       goto next_block;
+       /* preallocate blocks in batch for one dnode page */
+       if (flag == F2FS_GET_BLOCK_PRE_AIO &&
+                       (pgofs == end || dn.ofs_in_node == end_offset)) {
 
-               if (allocated)
-                       sync_inode_page(&dn);
-               f2fs_put_dnode(&dn);
+               dn.ofs_in_node = ofs_in_node;
+               err = reserve_new_blocks(&dn, prealloc);
+               if (err)
+                       goto sync_out;
 
-               if (create) {
-                       f2fs_unlock_op(sbi);
-                       f2fs_balance_fs(sbi, allocated);
+               map->m_len += dn.ofs_in_node - ofs_in_node;
+               if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
+                       err = -ENOSPC;
+                       goto sync_out;
                }
-               allocated = false;
-               goto next_dnode;
+               dn.ofs_in_node = end_offset;
+       }
+
+       if (pgofs >= end)
+               goto sync_out;
+       else if (dn.ofs_in_node < end_offset)
+               goto next_block;
+
+       if (allocated)
+               sync_inode_page(&dn);
+       f2fs_put_dnode(&dn);
+
+       if (create) {
+               f2fs_unlock_op(sbi);
+               f2fs_balance_fs(sbi, allocated);
        }
+       allocated = false;
+       goto next_dnode;
 
 sync_out:
        if (allocated)
index 052f5a8c96f1227916be2905a9a159edec749255..1401c96724c6c2b70e84f6e16af14fd8c60fb1c0 100644 (file)
@@ -1094,7 +1094,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 }
 
 static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
-                                struct inode *inode, blkcnt_t count)
+                                struct inode *inode, blkcnt_t *count)
 {
        block_t valid_block_count;
 
@@ -1106,14 +1106,19 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
        }
 #endif
        valid_block_count =
-               sbi->total_valid_block_count + (block_t)count;
+               sbi->total_valid_block_count + (block_t)(*count);
        if (unlikely(valid_block_count > sbi->user_block_count)) {
-               spin_unlock(&sbi->stat_lock);
-               return false;
+               *count = sbi->user_block_count - sbi->total_valid_block_count;
+               if (!*count) {
+                       spin_unlock(&sbi->stat_lock);
+                       return false;
+               }
        }
-       inode->i_blocks += count;
-       sbi->total_valid_block_count = valid_block_count;
-       sbi->alloc_valid_block_count += (block_t)count;
+       /* *count can be recalculated */
+       inode->i_blocks += *count;
+       sbi->total_valid_block_count =
+               sbi->total_valid_block_count + (block_t)(*count);
+       sbi->alloc_valid_block_count += (block_t)(*count);
        spin_unlock(&sbi->stat_lock);
        return true;
 }
@@ -1945,6 +1950,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
 void f2fs_submit_page_mbio(struct f2fs_io_info *);
 void set_data_blkaddr(struct dnode_of_data *);
 void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
+int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
 int reserve_new_block(struct dnode_of_data *);
 int f2fs_get_block(struct dnode_of_data *, pgoff_t);
 ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
index 0f565845707bb338e32f619ebbfa0121b65d0fe9..497e6e80fb635548d51296c5161d2b106ed01863 100644 (file)
@@ -694,28 +694,32 @@ TRACE_EVENT(f2fs_direct_IO_exit,
                __entry->ret)
 );
 
-TRACE_EVENT(f2fs_reserve_new_block,
+TRACE_EVENT(f2fs_reserve_new_blocks,
 
-       TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
+       TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node,
+                                                       blkcnt_t count),
 
-       TP_ARGS(inode, nid, ofs_in_node),
+       TP_ARGS(inode, nid, ofs_in_node, count),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(nid_t, nid)
                __field(unsigned int, ofs_in_node)
+               __field(blkcnt_t, count)
        ),
 
        TP_fast_assign(
                __entry->dev    = inode->i_sb->s_dev;
                __entry->nid    = nid;
                __entry->ofs_in_node = ofs_in_node;
+               __entry->count = count;
        ),
 
-       TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u",
+       TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
                show_dev(__entry),
                (unsigned int)__entry->nid,
-               __entry->ofs_in_node)
+               __entry->ofs_in_node,
+               (unsigned long long)__entry->count)
 );
 
 DECLARE_EVENT_CLASS(f2fs__submit_page_bio,