f2fs: avoid race in between GC and block exchange
authorChao Yu <yuchao0@huawei.com>
Fri, 3 Nov 2017 02:21:05 +0000 (10:21 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Tue, 19 Dec 2017 03:38:29 +0000 (19:38 -0800)
During block exchange in {insert,collapse,move}_range, page-block mapping
is unstable due to mapping moving or recovery, so there should be no
concurrent cache read operation rely on such mapping, nor cache write
operation to mess up block exchange.

So this patch let background GC be aware of that.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/file.c
fs/f2fs/gc.c

index 86d6dfe3faab1e2c774d7684476e085009e339f1..6034f693f92fa12dd930019748ca07cb2f763c5b 100644 (file)
@@ -1189,11 +1189,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                goto out;
 
+       /* avoid gc operation during block exchange */
+       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
        truncate_pagecache(inode, offset);
 
        ret = f2fs_do_collapse(inode, pg_start, pg_end);
        if (ret)
-               goto out;
+               goto out_unlock;
 
        /* write out all moved pages, if possible */
        filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1205,7 +1208,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ret = truncate_blocks(inode, new_size, true);
        if (!ret)
                f2fs_i_size_write(inode, new_size);
-
+out_unlock:
+       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 out:
        up_write(&F2FS_I(inode)->i_mmap_sem);
        return ret;
@@ -1388,6 +1392,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        if (ret)
                goto out;
 
+       /* avoid gc operation during block exchange */
+       down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
        truncate_pagecache(inode, offset);
 
        pg_start = offset >> PAGE_SHIFT;
@@ -1415,6 +1422,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
        if (!ret)
                f2fs_i_size_write(inode, new_size);
+
+       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
 out:
        up_write(&F2FS_I(inode)->i_mmap_sem);
        return ret;
@@ -2277,9 +2286,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
        }
 
        inode_lock(src);
+       down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
        if (src != dst) {
-               if (!inode_trylock(dst)) {
-                       ret = -EBUSY;
+               ret = -EBUSY;
+               if (!inode_trylock(dst))
+                       goto out;
+               if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
+                       inode_unlock(dst);
                        goto out;
                }
        }
@@ -2339,9 +2352,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
        }
        f2fs_unlock_op(sbi);
 out_unlock:
-       if (src != dst)
+       if (src != dst) {
+               up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
                inode_unlock(dst);
+       }
 out:
+       up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
        inode_unlock(src);
        return ret;
 }
index ff8f0012888d464cca5621c914b5b4e725710f8f..5d5bba462f26390512a50c4359ebc99b3b3481dc 100644 (file)
@@ -832,10 +832,17 @@ next_step:
                                continue;
                        }
 
+                       if (!down_write_trylock(
+                               &F2FS_I(inode)->dio_rwsem[WRITE])) {
+                               iput(inode);
+                               continue;
+                       }
+
                        start_bidx = start_bidx_of_node(nofs, inode);
                        data_page = get_read_data_page(inode,
                                        start_bidx + ofs_in_node, REQ_RAHEAD,
                                        true);
+                       up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
                        if (IS_ERR(data_page)) {
                                iput(inode);
                                continue;