f2fs: fix to avoid data update racing between GC and DIO
authorChao Yu <yuchao0@huawei.com>
Wed, 13 Jul 2016 01:18:29 +0000 (09:18 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Fri, 15 Jul 2016 22:21:22 +0000 (15:21 -0700)
Datas in file can be operated by GC and DIO simultaneously, so we will
face race case as below:

For write case:
Thread A Thread B
- generic_file_direct_write
 - invalidate_inode_pages2_range
 - f2fs_direct_IO
  - do_blockdev_direct_IO
   - do_direct_IO
    - get_more_blocks
- f2fs_gc
 - do_garbage_collect
  - gc_data_segment
   - move_data_page
    - do_write_data_page
    migrate data block to new block address
   - dio_bio_submit
   update user data to old block address

For read case:
Thread A                                Thread B
- generic_file_direct_write
 - invalidate_inode_pages2_range
 - f2fs_direct_IO
  - do_blockdev_direct_IO
   - do_direct_IO
    - get_more_blocks
- f2fs_balance_fs
 - f2fs_gc
  - do_garbage_collect
   - gc_data_segment
    - move_data_page
     - do_write_data_page
     migrate data block to new block address
  - write_checkpoint
   - do_checkpoint
    - clear_prefree_segments
     - f2fs_issue_discard
                                             discard old block adress
   - dio_bio_submit
   update user buffer from obsolete block address

In order to fix this, for one file, we should let DIO and GC getting exclusion
against with each other.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/gc.c
fs/f2fs/super.c

index 650099597dd2d8338dd7388b864fa97aee29e184..adfe47b21991de04fe5f0b073be765f35b268de1 100644 (file)
@@ -1716,6 +1716,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        struct inode *inode = mapping->host;
        size_t count = iov_iter_count(iter);
        loff_t offset = iocb->ki_pos;
+       int rw = iov_iter_rw(iter);
        int err;
 
        err = check_direct_IO(inode, iter, offset);
@@ -1729,8 +1730,11 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
        trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 
+       down_read(&F2FS_I(inode)->dio_rwsem[rw]);
        err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
-       if (iov_iter_rw(iter) == WRITE) {
+       up_read(&F2FS_I(inode)->dio_rwsem[rw]);
+
+       if (rw == WRITE) {
                if (err > 0)
                        set_inode_flag(inode, FI_UPDATE_WRITE);
                else if (err < 0)
index b4a46b6823dc9e4d3b362e162ff4784011509d9e..211183c4e5c39be8fe6fcf3acc89d90be156506c 100644 (file)
@@ -454,6 +454,7 @@ struct f2fs_inode_info {
        struct list_head inmem_pages;   /* inmemory pages managed by f2fs */
        struct mutex inmem_lock;        /* lock for inmemory pages */
        struct extent_tree *extent_tree;        /* cached extent_tree entry */
+       struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
 };
 
 static inline void get_extent_info(struct extent_info *ext,
index c6121378591489c29685fb76c9ef382cdb01a109..5c8acf754513fd11734894c335d816218f299e48 100644 (file)
@@ -755,12 +755,32 @@ next_step:
                /* phase 3 */
                inode = find_gc_inode(gc_list, dni.ino);
                if (inode) {
+                       struct f2fs_inode_info *fi = F2FS_I(inode);
+                       bool locked = false;
+
+                       if (S_ISREG(inode->i_mode)) {
+                               if (!down_write_trylock(&fi->dio_rwsem[READ]))
+                                       continue;
+                               if (!down_write_trylock(
+                                               &fi->dio_rwsem[WRITE])) {
+                                       up_write(&fi->dio_rwsem[READ]);
+                                       continue;
+                               }
+                               locked = true;
+                       }
+
                        start_bidx = start_bidx_of_node(nofs, inode)
                                                                + ofs_in_node;
                        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                                move_encrypted_block(inode, start_bidx);
                        else
                                move_data_page(inode, start_bidx, gc_type);
+
+                       if (locked) {
+                               up_write(&fi->dio_rwsem[WRITE]);
+                               up_write(&fi->dio_rwsem[READ]);
+                       }
+
                        stat_inc_data_blk_count(sbi, 1, gc_type);
                }
        }
index 451dfb4041e8854d52c579450c6793d2a1fed196..b97c065cbe744b1eed931e3dd5754d8815ef3e6c 100644 (file)
@@ -579,6 +579,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        INIT_LIST_HEAD(&fi->gdirty_list);
        INIT_LIST_HEAD(&fi->inmem_pages);
        mutex_init(&fi->inmem_lock);
+       init_rwsem(&fi->dio_rwsem[READ]);
+       init_rwsem(&fi->dio_rwsem[WRITE]);
 
        /* Will be used by directory only */
        fi->i_dir_level = F2FS_SB(sb)->dir_level;