f2fs: set fsync mark only for the last dnode
authorJaegeuk Kim <jaegeuk@kernel.org>
Fri, 15 Apr 2016 16:43:17 +0000 (09:43 -0700)
committerJaegeuk Kim <jaegeuk@kernel.org>
Tue, 26 Apr 2016 21:24:59 +0000 (14:24 -0700)
In order to give atomic writes, we should consider power failure during
sync_node_pages in fsync.
So, this patch marks fsync flag only in the last dnode block.

Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/node.c
fs/f2fs/recovery.c

index 269abe5959e7a28d2fa3606f20f5035e5afed437..ca828b0e7d6d89798c2c45cefc80c07df516713b 100644 (file)
@@ -159,7 +159,6 @@ struct fsync_inode_entry {
        struct inode *inode;    /* vfs inode pointer */
        block_t blkaddr;        /* block address locating the last fsync */
        block_t last_dentry;    /* block address locating the last dentry */
-       block_t last_inode;     /* block address locating the last inode */
 };
 
 #define nats_in_cursum(jnl)            (le16_to_cpu(jnl->n_nats))
@@ -1784,7 +1783,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t);
 struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_node_page_ra(struct page *, int);
 void sync_inode_page(struct dnode_of_data *);
-int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
+int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *,
+                                                               bool);
 int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
 bool alloc_nid(struct f2fs_sb_info *, nid_t *);
 void alloc_nid_done(struct f2fs_sb_info *, nid_t);
index 60fd64c59cce4b18da8e3ff89fec161e73c48a6f..dc47d5c7b882e8fc30b282da61ef6bd2196e850e 100644 (file)
@@ -182,7 +182,8 @@ static void try_to_fix_pino(struct inode *inode)
        }
 }
 
-int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
+                                               int datasync, bool atomic)
 {
        struct inode *inode = file->f_mapping->host;
        struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -256,7 +257,7 @@ go_write:
                goto out;
        }
 sync_nodes:
-       ret = fsync_node_pages(sbi, ino, &wbc);
+       ret = fsync_node_pages(sbi, ino, &wbc, atomic);
        if (ret)
                goto out;
 
@@ -290,6 +291,11 @@ out:
        return ret;
 }
 
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       return f2fs_do_sync_file(file, start, end, datasync, false);
+}
+
 static pgoff_t __get_first_dirty_index(struct address_space *mapping,
                                                pgoff_t pgofs, int whence)
 {
@@ -1407,7 +1413,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
                }
        }
 
-       ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+       ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 err_out:
        mnt_drop_write_file(filp);
        return ret;
@@ -1465,7 +1471,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
                drop_inmem_pages(inode);
        if (f2fs_is_volatile_file(inode)) {
                clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
-               ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+               ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
        }
 
        mnt_drop_write_file(filp);
index 8a1e21144ecbebed0449d0650ce2a3493bf2fcca..de070a524fd2c8341cbb18cebb2e61cbbee6e7d6 100644 (file)
@@ -1222,13 +1222,81 @@ iput_out:
        iput(inode);
 }
 
+static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+       pgoff_t index, end;
+       struct pagevec pvec;
+       struct page *last_page = NULL;
+
+       pagevec_init(&pvec, 0);
+       index = 0;
+       end = ULONG_MAX;
+
+       while (index <= end) {
+               int i, nr_pages;
+               nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+                               PAGECACHE_TAG_DIRTY,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               f2fs_put_page(last_page, 0);
+                               pagevec_release(&pvec);
+                               return ERR_PTR(-EIO);
+                       }
+
+                       if (!IS_DNODE(page) || !is_cold_node(page))
+                               continue;
+                       if (ino_of_node(page) != ino)
+                               continue;
+
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+                       if (ino_of_node(page) != ino)
+                               goto continue_unlock;
+
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+
+                       if (last_page)
+                               f2fs_put_page(last_page, 0);
+
+                       get_page(page);
+                       last_page = page;
+                       unlock_page(page);
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return last_page;
+}
+
 int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
-                                       struct writeback_control *wbc)
+                       struct writeback_control *wbc, bool atomic)
 {
        pgoff_t index, end;
        struct pagevec pvec;
        int ret = 0;
+       struct page *last_page = NULL;
+       bool marked = false;
 
+       if (atomic) {
+               last_page = last_fsync_dnode(sbi, ino);
+               if (IS_ERR_OR_NULL(last_page))
+                       return PTR_ERR_OR_ZERO(last_page);
+       }
+retry:
        pagevec_init(&pvec, 0);
        index = 0;
        end = ULONG_MAX;
@@ -1245,6 +1313,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
                        struct page *page = pvec.pages[i];
 
                        if (unlikely(f2fs_cp_error(sbi))) {
+                               f2fs_put_page(last_page, 0);
                                pagevec_release(&pvec);
                                return -EIO;
                        }
@@ -1264,33 +1333,54 @@ continue_unlock:
                        if (ino_of_node(page) != ino)
                                goto continue_unlock;
 
-                       if (!PageDirty(page)) {
+                       if (!PageDirty(page) && page != last_page) {
                                /* someone wrote it for us */
                                goto continue_unlock;
                        }
 
                        f2fs_wait_on_page_writeback(page, NODE, true);
                        BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
 
-                       set_fsync_mark(page, 1);
-                       if (IS_INODE(page))
-                               set_dentry_mark(page,
+                       if (!atomic || page == last_page) {
+                               set_fsync_mark(page, 1);
+                               if (IS_INODE(page))
+                                       set_dentry_mark(page,
                                                need_dentry_mark(sbi, ino));
+                               /*  may be written by other thread */
+                               if (!PageDirty(page))
+                                       set_page_dirty(page);
+                       }
+
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
 
                        ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
                        if (ret) {
                                unlock_page(page);
+                               f2fs_put_page(last_page, 0);
+                               break;
+                       }
+                       if (page == last_page) {
+                               f2fs_put_page(page, 0);
+                               marked = true;
                                break;
                        }
                }
                pagevec_release(&pvec);
                cond_resched();
 
-               if (ret)
+               if (ret || marked)
                        break;
        }
+       if (!ret && atomic && !marked) {
+               f2fs_msg(sbi->sb, KERN_DEBUG,
+                       "Retry to write fsync mark: ino=%u, idx=%lx",
+                                       ino, last_page->index);
+               lock_page(last_page);
+               set_page_dirty(last_page);
+               unlock_page(last_page);
+               goto retry;
+       }
        return ret ? -EIO: 0;
 }
 
index 2c87c12b6f1c4f1e23d7c63b08d3363fd1b8244e..a646d3ba3b25de1aa827c72f61312568a0f8aa66 100644 (file)
@@ -257,11 +257,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
                }
                entry->blkaddr = blkaddr;
 
-               if (IS_INODE(page)) {
-                       entry->last_inode = blkaddr;
-                       if (is_dent_dnode(page))
-                               entry->last_dentry = blkaddr;
-               }
+               if (IS_INODE(page) && is_dent_dnode(page))
+                       entry->last_dentry = blkaddr;
 next:
                /* check next segment */
                blkaddr = next_blkaddr_of_node(page);
@@ -521,7 +518,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
                 * In this case, we can lose the latest inode(x).
                 * So, call recover_inode for the inode update.
                 */
-               if (entry->last_inode == blkaddr)
+               if (IS_INODE(page))
                        recover_inode(entry->inode, page);
                if (entry->last_dentry == blkaddr) {
                        err = recover_dentry(entry->inode, page);