FROMLIST: f2fs: early updates queued for v4.18-rc1
authorJaegeuk Kim <jaegeuk@kernel.org>
Thu, 24 May 2018 20:57:26 +0000 (13:57 -0700)
committerJaegeuk Kim <jaegeuk@google.com>
Thu, 31 May 2018 02:43:19 +0000 (19:43 -0700)
Cherry-picked from:
  origin/upstream-f2fs-stable-linux-4.14.y

72497b8d5271 ("f2fs: turn down IO priority of discard from background")
cfe6e2f69eef ("f2fs: don't split checkpoint in fstrim")
596c97672db2 ("f2fs: issue discard commands proactively in high fs utilization")
0fbb04e9980a ("f2fs: add fsync_mode=nobarrier for non-atomic files")
13e346436f4e ("f2fs: let fstrim issue discard commands in lower priority")

Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/filesystems/f2fs.txt
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/segment.c
fs/f2fs/super.c
fs/f2fs/sysfs.c

index 540553c933b6197e51cd81367ffbc16fbd24273d..372b88f4e706250123ef30b8a851b8c8bd02b8d2 100644 (file)
@@ -101,6 +101,7 @@ Date:               February 2015
 Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
                 Controls the trimming rate in batch mode.
+                <deprecated>
 
 What:          /sys/fs/f2fs/<disk>/cp_interval
 Date:          October 2015
index 4b6cf4c5e06168023db327035f149b8f5ca584a6..37d8698ca2d656e62f970b342ad84528b914a3c1 100644 (file)
@@ -182,13 +182,15 @@ whint_mode=%s          Control which write hints are passed down to block
                        passes down hints with its policy.
 alloc_mode=%s          Adjust block allocation policy, which supports "reuse"
                        and "default".
-fsync_mode=%s          Control the policy of fsync. Currently supports "posix"
-                       and "strict". In "posix" mode, which is default, fsync
-                       will follow POSIX semantics and does a light operation
-                       to improve the filesystem performance. In "strict" mode,
-                       fsync will be heavy and behaves in line with xfs, ext4
-                       and btrfs, where xfstest generic/342 will pass, but the
-                       performance will regress.
+fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
+                       "strict", and "nobarrier". In "posix" mode, which is
+                       default, fsync will follow POSIX semantics and does a
+                       light operation to improve the filesystem performance.
+                       In "strict" mode, fsync will be heavy and behaves in line
+                       with xfs, ext4 and btrfs, where xfstest generic/342 will
+                       pass, but the performance will regress. "nobarrier" is
+                       based on "posix", but doesn't issue flush command for
+                       non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
 
index f32a0c79702f998d251230871018594f1c1faa07..534872d064bc9d11242a76a81a6f1495042f2d45 100644 (file)
@@ -176,15 +176,12 @@ enum {
 #define        CP_DISCARD      0x00000010
 #define CP_TRIMMED     0x00000020
 
-#define DEF_BATCHED_TRIM_SECTIONS      2048
-#define BATCHED_TRIM_SEGMENTS(sbi)     \
-               (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections))
-#define BATCHED_TRIM_BLOCKS(sbi)       \
-               (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST                8       /* issue 8 discards per round */
+#define DEF_MAX_DISCARD_LEN            512     /* Max. 2MB per discard */
 #define DEF_MIN_DISCARD_ISSUE_TIME     50      /* 50 ms, if exists */
 #define DEF_MAX_DISCARD_ISSUE_TIME     60000   /* 60 s, if no candidates */
+#define DEF_DISCARD_URGENT_UTIL                80      /* do more discard over 80% */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
 
@@ -694,7 +691,8 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
 static inline bool __is_discard_mergeable(struct discard_info *back,
                                                struct discard_info *front)
 {
-       return back->lstart + back->len == front->lstart;
+       return (back->lstart + back->len == front->lstart) &&
+               (back->len + front->len < DEF_MAX_DISCARD_LEN);
 }
 
 static inline bool __is_discard_back_mergeable(struct discard_info *cur,
@@ -1080,6 +1078,7 @@ enum {
 enum fsync_mode {
        FSYNC_MODE_POSIX,       /* fsync follows posix semantics */
        FSYNC_MODE_STRICT,      /* fsync behaves in line with ext4 */
+       FSYNC_MODE_NOBARRIER,   /* fsync behaves nobarrier based on posix */
 };
 
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
@@ -2774,8 +2773,6 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
-                                               unsigned int granularity);
 void drop_discard_cmd(struct f2fs_sb_info *sbi);
 void stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
index cb231b004e6104b2bf50e7c438ffc24676be0217..ed72fc2cc68de5fda4430da920114a0dd27b35a6 100644 (file)
@@ -306,7 +306,7 @@ sync_nodes:
        remove_ino_entry(sbi, ino, APPEND_INO);
        clear_inode_flag(inode, FI_APPEND_WRITE);
 flush_out:
-       if (!atomic)
+       if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
                ret = f2fs_issue_flush(sbi, inode->i_ino);
        if (!ret) {
                remove_ino_entry(sbi, ino, UPDATE_INO);
index 1e365e9138586a1c945a36689b53609c57e27c1e..bef74d628f66990853632041b5b54b460bb71219 100644 (file)
@@ -915,6 +915,39 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 #endif
 }
 
+static void __init_discard_policy(struct f2fs_sb_info *sbi,
+                               struct discard_policy *dpolicy,
+                               int discard_type, unsigned int granularity)
+{
+       /* common policy */
+       dpolicy->type = discard_type;
+       dpolicy->sync = true;
+       dpolicy->granularity = granularity;
+
+       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+       dpolicy->io_aware_gran = MAX_PLIST_NUM;
+
+       if (discard_type == DPOLICY_BG) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = true;
+               dpolicy->sync = false;
+               if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
+                       dpolicy->granularity = 1;
+                       dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               }
+       } else if (discard_type == DPOLICY_FORCE) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_FSTRIM) {
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_UMOUNT) {
+               dpolicy->io_aware = false;
+       }
+}
+
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
                                                struct discard_policy *dpolicy,
@@ -1130,68 +1163,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
        return 0;
 }
 
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
-                                       struct discard_policy *dpolicy,
-                                       unsigned int start, unsigned int end)
-{
-       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
-       struct rb_node **insert_p = NULL, *insert_parent = NULL;
-       struct discard_cmd *dc;
-       struct blk_plug plug;
-       int issued;
-
-next:
-       issued = 0;
-
-       mutex_lock(&dcc->cmd_lock);
-       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
-
-       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
-                                       NULL, start,
-                                       (struct rb_entry **)&prev_dc,
-                                       (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
-       if (!dc)
-               dc = next_dc;
-
-       blk_start_plug(&plug);
-
-       while (dc && dc->lstart <= end) {
-               struct rb_node *node;
-
-               if (dc->len < dpolicy->granularity)
-                       goto skip;
-
-               if (dc->state != D_PREP) {
-                       list_move_tail(&dc->list, &dcc->fstrim_list);
-                       goto skip;
-               }
-
-               __submit_discard_cmd(sbi, dpolicy, dc);
-
-               if (++issued >= dpolicy->max_requests) {
-                       start = dc->lstart + dc->len;
-
-                       blk_finish_plug(&plug);
-                       mutex_unlock(&dcc->cmd_lock);
-
-                       schedule();
-
-                       goto next;
-               }
-skip:
-               node = rb_next(&dc->rb_node);
-               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
-
-               if (fatal_signal_pending(current))
-                       break;
-       }
-
-       blk_finish_plug(&plug);
-       mutex_unlock(&dcc->cmd_lock);
-}
-
 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
                                        struct discard_policy *dpolicy)
 {
@@ -1332,7 +1303,18 @@ next:
 static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
                                                struct discard_policy *dpolicy)
 {
-       __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+       struct discard_policy dp;
+
+       if (dpolicy) {
+               __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+               return;
+       }
+
+       /* wait all */
+       __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+       __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1377,11 +1359,13 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
        struct discard_policy dpolicy;
        bool dropped;
 
-       init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
+                                       dcc->discard_granularity);
        __issue_discard_cmd(sbi, &dpolicy);
        dropped = __drop_discard_cmd(sbi);
-       __wait_all_discard_cmd(sbi, &dpolicy);
 
+       /* just to make sure there is no pending discard commands */
+       __wait_all_discard_cmd(sbi, NULL);
        return dropped;
 }
 
@@ -1397,7 +1381,7 @@ static int issue_discard_thread(void *data)
        set_freezable();
 
        do {
-               init_discard_policy(&dpolicy, DPOLICY_BG,
+               __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
                                        dcc->discard_granularity);
 
                wait_event_interruptible_timeout(*q,
@@ -1415,7 +1399,7 @@ static int issue_discard_thread(void *data)
                        dcc->discard_wake = 0;
 
                if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
-                       init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
 
                sb_start_intwrite(sbi->sb);
 
@@ -1708,32 +1692,6 @@ skip:
        wake_up_discard_thread(sbi, false);
 }
 
-void init_discard_policy(struct discard_policy *dpolicy,
-                               int discard_type, unsigned int granularity)
-{
-       /* common policy */
-       dpolicy->type = discard_type;
-       dpolicy->sync = true;
-       dpolicy->granularity = granularity;
-
-       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
-       dpolicy->io_aware_gran = MAX_PLIST_NUM;
-
-       if (discard_type == DPOLICY_BG) {
-               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->io_aware = true;
-       } else if (discard_type == DPOLICY_FORCE) {
-               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
-               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
-               dpolicy->io_aware = false;
-       } else if (discard_type == DPOLICY_FSTRIM) {
-               dpolicy->io_aware = false;
-       } else if (discard_type == DPOLICY_UMOUNT) {
-               dpolicy->io_aware = false;
-       }
-}
-
 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
        dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -2373,11 +2331,72 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        return has_candidate;
 }
 
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy,
+                                       unsigned int start, unsigned int end)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       struct discard_cmd *dc;
+       struct blk_plug plug;
+       int issued;
+
+next:
+       issued = 0;
+
+       mutex_lock(&dcc->cmd_lock);
+       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, start,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (!dc)
+               dc = next_dc;
+
+       blk_start_plug(&plug);
+
+       while (dc && dc->lstart <= end) {
+               struct rb_node *node;
+
+               if (dc->len < dpolicy->granularity)
+                       goto skip;
+
+               if (dc->state != D_PREP) {
+                       list_move_tail(&dc->list, &dcc->fstrim_list);
+                       goto skip;
+               }
+
+               __submit_discard_cmd(sbi, dpolicy, dc);
+
+               if (++issued >= dpolicy->max_requests) {
+                       start = dc->lstart + dc->len;
+
+                       blk_finish_plug(&plug);
+                       mutex_unlock(&dcc->cmd_lock);
+                       __wait_all_discard_cmd(sbi, NULL);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto next;
+               }
+skip:
+               node = rb_next(&dc->rb_node);
+               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+               if (fatal_signal_pending(current))
+                       break;
+       }
+
+       blk_finish_plug(&plug);
+       mutex_unlock(&dcc->cmd_lock);
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
        __u64 start = F2FS_BYTES_TO_BLK(range->start);
        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
-       unsigned int start_segno, end_segno, cur_segno;
+       unsigned int start_segno, end_segno;
        block_t start_block, end_block;
        struct cp_control cpc;
        struct discard_policy dpolicy;
@@ -2403,40 +2422,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 
        cpc.reason = CP_DISCARD;
        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+       cpc.trim_start = start_segno;
+       cpc.trim_end = end_segno;
 
-       /* do checkpoint to issue discard commands safely */
-       for (cur_segno = start_segno; cur_segno <= end_segno;
-                                       cur_segno = cpc.trim_end + 1) {
-               cpc.trim_start = cur_segno;
-
-               if (sbi->discard_blks == 0)
-                       break;
-               else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
-                       cpc.trim_end = end_segno;
-               else
-                       cpc.trim_end = min_t(unsigned int,
-                               rounddown(cur_segno +
-                               BATCHED_TRIM_SEGMENTS(sbi),
-                               sbi->segs_per_sec) - 1, end_segno);
-
-               mutex_lock(&sbi->gc_mutex);
-               err = write_checkpoint(sbi, &cpc);
-               mutex_unlock(&sbi->gc_mutex);
-               if (err)
-                       break;
+       if (sbi->discard_blks == 0)
+               goto out;
 
-               schedule();
-       }
+       mutex_lock(&sbi->gc_mutex);
+       err = write_checkpoint(sbi, &cpc);
+       mutex_unlock(&sbi->gc_mutex);
+       if (err)
+               goto out;
 
        start_block = START_BLOCK(sbi, start_segno);
-       end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+       end_block = START_BLOCK(sbi, end_segno + 1);
 
-       init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
        __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
        trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
                                        start_block, end_block);
-out:
        range->len = F2FS_BLK_TO_BYTES(trimmed);
+out:
        return err;
 }
 
@@ -3823,8 +3829,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
        sm_info->min_ssr_sections = reserved_sections(sbi);
 
-       sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
-
        INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
        init_rwsem(&sm_info->curseg_lock);
index 7cbddecdf41f7669d9fff9311a9875eda50af2d5..2b79c1a7a2f2a020e52c1274734712501dfdf2a0 100644 (file)
@@ -740,6 +740,10 @@ static int parse_options(struct super_block *sb, char *options)
                        } else if (strlen(name) == 6 &&
                                        !strncmp(name, "strict", 6)) {
                                F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
+                       } else if (strlen(name) == 9 &&
+                                       !strncmp(name, "nobarrier", 9)) {
+                               F2FS_OPTION(sbi).fsync_mode =
+                                                       FSYNC_MODE_NOBARRIER;
                        } else {
                                kfree(name);
                                return -EINVAL;
index f33a56d6e6dd7916feee0206bfadce16f873dc78..2c53de9251becae26f05e13d06acc23239beb3ca 100644 (file)
@@ -245,6 +245,9 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "trim_sections"))
+               return -EINVAL;
+
        *ui = t;
 
        if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)