Merge 4.14.86 into android-4.14-p
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / fs / f2fs / segment.c
index 5c698757e116dac68e005b1f5abddcfcdc8b1302..ae37019e88868dff86869df2747ce9872a49d60f 100644 (file)
@@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
                return true;
 
        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
-                                               2 * reserved_sections(sbi));
+                       SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
 }
 
 void register_inmem_page(struct inode *inode, struct page *page)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
        struct inmem_pages *new;
 
@@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page)
        mutex_lock(&fi->inmem_lock);
        get_page(page);
        list_add_tail(&new->list, &fi->inmem_pages);
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (list_empty(&fi->inmem_ilist))
+               list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
        mutex_unlock(&fi->inmem_lock);
 
@@ -245,7 +250,11 @@ retry:
                                goto next;
                        }
                        get_node_info(sbi, dn.nid, &ni);
-                       f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+                       if (cur->old_addr == NEW_ADDR) {
+                               invalidate_blocks(sbi, dn.data_blkaddr);
+                               f2fs_update_data_blkaddr(&dn, NEW_ADDR);
+                       } else
+                               f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
                                        cur->old_addr, ni.version, true, true);
                        f2fs_put_dnode(&dn);
                }
@@ -264,12 +273,41 @@ next:
        return err;
 }
 
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+{
+       struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
+       struct inode *inode;
+       struct f2fs_inode_info *fi;
+next:
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (list_empty(head)) {
+               spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+               return;
+       }
+       fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
+       inode = igrab(&fi->vfs_inode);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+       if (inode) {
+               drop_inmem_pages(inode);
+               iput(inode);
+       }
+       congestion_wait(BLK_RW_ASYNC, HZ/50);
+       cond_resched();
+       goto next;
+}
+
 void drop_inmem_pages(struct inode *inode)
 {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
        mutex_lock(&fi->inmem_lock);
        __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (!list_empty(&fi->inmem_ilist))
+               list_del_init(&fi->inmem_ilist);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        mutex_unlock(&fi->inmem_lock);
 
        clear_inode_flag(inode, FI_ATOMIC_FILE);
@@ -315,6 +353,7 @@ static int __commit_inmem_pages(struct inode *inode,
        struct inmem_pages *cur, *tmp;
        struct f2fs_io_info fio = {
                .sbi = sbi,
+               .ino = inode->i_ino,
                .type = DATA,
                .op = REQ_OP_WRITE,
                .op_flags = REQ_SYNC | REQ_PRIO,
@@ -400,6 +439,10 @@ int commit_inmem_pages(struct inode *inode)
                /* drop all uncommitted pages */
                __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
        }
+       spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+       if (!list_empty(&fi->inmem_ilist))
+               list_del_init(&fi->inmem_ilist);
+       spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
        mutex_unlock(&fi->inmem_lock);
 
        clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -477,7 +520,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
                                struct block_device *bdev)
 {
-       struct bio *bio = f2fs_bio_alloc(0);
+       struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
        int ret;
 
        bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
@@ -490,15 +533,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
        return ret;
 }
 
-static int submit_flush_wait(struct f2fs_sb_info *sbi)
+static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
 {
-       int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev);
+       int ret = 0;
        int i;
 
-       if (!sbi->s_ndevs || ret)
-               return ret;
+       if (!sbi->s_ndevs)
+               return __submit_flush_wait(sbi, sbi->sb->s_bdev);
 
-       for (i = 1; i < sbi->s_ndevs; i++) {
+       for (i = 0; i < sbi->s_ndevs; i++) {
+               if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+                       continue;
                ret = __submit_flush_wait(sbi, FDEV(i).bdev);
                if (ret)
                        break;
@@ -524,7 +569,9 @@ repeat:
                fcc->dispatch_list = llist_del_all(&fcc->issue_list);
                fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
 
-               ret = submit_flush_wait(sbi);
+               cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
+
+               ret = submit_flush_wait(sbi, cmd->ino);
                atomic_inc(&fcc->issued_flush);
 
                llist_for_each_entry_safe(cmd, next,
@@ -542,7 +589,7 @@ repeat:
        goto repeat;
 }
 
-int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
 {
        struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
        struct flush_cmd cmd;
@@ -552,19 +599,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
                return 0;
 
        if (!test_opt(sbi, FLUSH_MERGE)) {
-               ret = submit_flush_wait(sbi);
+               ret = submit_flush_wait(sbi, ino);
                atomic_inc(&fcc->issued_flush);
                return ret;
        }
 
-       if (atomic_inc_return(&fcc->issing_flush) == 1) {
-               ret = submit_flush_wait(sbi);
+       if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+               ret = submit_flush_wait(sbi, ino);
                atomic_dec(&fcc->issing_flush);
 
                atomic_inc(&fcc->issued_flush);
                return ret;
        }
 
+       cmd.ino = ino;
        init_completion(&cmd.wait);
 
        llist_add(&cmd.llnode, &fcc->issue_list);
@@ -588,7 +636,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
                } else {
                        struct flush_cmd *tmp, *next;
 
-                       ret = submit_flush_wait(sbi);
+                       ret = submit_flush_wait(sbi, ino);
 
                        llist_for_each_entry_safe(tmp, next, list, llnode) {
                                if (tmp == &cmd) {
@@ -618,7 +666,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
                goto init_thread;
        }
 
-       fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+       fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
        if (!fcc)
                return -ENOMEM;
        atomic_set(&fcc->issued_flush, 0);
@@ -658,6 +706,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
        }
 }
 
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
+{
+       int ret = 0, i;
+
+       if (!sbi->s_ndevs)
+               return 0;
+
+       for (i = 1; i < sbi->s_ndevs; i++) {
+               if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
+                       continue;
+               ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+               if (ret)
+                       break;
+
+               spin_lock(&sbi->dev_lock);
+               f2fs_clear_bit(i, (char *)&sbi->dirty_device);
+               spin_unlock(&sbi->dev_lock);
+       }
+
+       return ret;
+}
+
 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
                enum dirty_type dirty_type)
 {
@@ -799,6 +869,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+       trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+
        f2fs_bug_on(sbi, dc->ref);
 
        if (dc->error == -EOPNOTSUPP)
@@ -821,7 +893,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
        bio_put(bio);
 }
 
-void __check_sit_bitmap(struct f2fs_sb_info *sbi,
+static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
                                block_t start, block_t end)
 {
 #ifdef CONFIG_F2FS_CHECK_FS
@@ -848,12 +920,49 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi,
 #endif
 }
 
+static void __init_discard_policy(struct f2fs_sb_info *sbi,
+                               struct discard_policy *dpolicy,
+                               int discard_type, unsigned int granularity)
+{
+       /* common policy */
+       dpolicy->type = discard_type;
+       dpolicy->sync = true;
+       dpolicy->granularity = granularity;
+
+       dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+       dpolicy->io_aware_gran = MAX_PLIST_NUM;
+
+       if (discard_type == DPOLICY_BG) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = true;
+               dpolicy->sync = false;
+               if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
+                       dpolicy->granularity = 1;
+                       dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               }
+       } else if (discard_type == DPOLICY_FORCE) {
+               dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+               dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_FSTRIM) {
+               dpolicy->io_aware = false;
+       } else if (discard_type == DPOLICY_UMOUNT) {
+               dpolicy->io_aware = false;
+       }
+}
+
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
-                               struct discard_cmd *dc)
+                                               struct discard_policy *dpolicy,
+                                               struct discard_cmd *dc)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+                                       &(dcc->fstrim_list) : &(dcc->wait_list);
        struct bio *bio = NULL;
+       int flag = dpolicy->sync ? REQ_SYNC : 0;
 
        if (dc->state != D_PREP)
                return;
@@ -872,9 +981,9 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
                if (bio) {
                        bio->bi_private = dc;
                        bio->bi_end_io = f2fs_submit_discard_endio;
-                       bio->bi_opf |= REQ_SYNC;
+                       bio->bi_opf |= flag;
                        submit_bio(bio);
-                       list_move_tail(&dc->list, &dcc->wait_list);
+                       list_move_tail(&dc->list, wait_list);
                        __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
 
                        f2fs_update_iostat(sbi, FS_DISCARD, 1);
@@ -891,7 +1000,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
                                struct rb_node *insert_parent)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct rb_node **p = &dcc->root.rb_node;
+       struct rb_node **p;
        struct rb_node *parent = NULL;
        struct discard_cmd *dc = NULL;
 
@@ -1059,58 +1168,48 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
        return 0;
 }
 
-static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
+static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *pend_list;
        struct discard_cmd *dc, *tmp;
        struct blk_plug plug;
-       int iter = 0, issued = 0;
-       int i;
+       int i, iter = 0, issued = 0;
        bool io_interrupted = false;
 
-       mutex_lock(&dcc->cmd_lock);
-       f2fs_bug_on(sbi,
-               !__check_rb_tree_consistence(sbi, &dcc->root));
-       blk_start_plug(&plug);
-       for (i = MAX_PLIST_NUM - 1;
-                       i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+       for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+               if (i + 1 < dpolicy->granularity)
+                       break;
                pend_list = &dcc->pend_list[i];
+
+               mutex_lock(&dcc->cmd_lock);
+               if (list_empty(pend_list))
+                       goto next;
+               f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+               blk_start_plug(&plug);
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
                        f2fs_bug_on(sbi, dc->state != D_PREP);
 
-                       /* Hurry up to finish fstrim */
-                       if (dcc->pend_list_tag[i] & P_TRIM) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-
-                               if (fatal_signal_pending(current))
-                                       break;
-                               continue;
-                       }
-
-                       if (!issue_cond) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-                               continue;
-                       }
-
-                       if (is_idle(sbi)) {
-                               __submit_discard_cmd(sbi, dc);
-                               issued++;
-                       } else {
+                       if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
+                                                               !is_idle(sbi)) {
                                io_interrupted = true;
+                               goto skip;
                        }
 
-                       if (++iter >= DISCARD_ISSUE_RATE)
-                               goto out;
+                       __submit_discard_cmd(sbi, dpolicy, dc);
+                       issued++;
+skip:
+                       if (++iter >= dpolicy->max_requests)
+                               break;
                }
-               if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM)
-                       dcc->pend_list_tag[i] &= (~P_TRIM);
+               blk_finish_plug(&plug);
+next:
+               mutex_unlock(&dcc->cmd_lock);
+
+               if (iter >= dpolicy->max_requests)
+                       break;
        }
-out:
-       blk_finish_plug(&plug);
-       mutex_unlock(&dcc->cmd_lock);
 
        if (!issued && io_interrupted)
                issued = -1;
@@ -1118,12 +1217,13 @@ out:
        return issued;
 }
 
-static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
+static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *pend_list;
        struct discard_cmd *dc, *tmp;
        int i;
+       bool dropped = false;
 
        mutex_lock(&dcc->cmd_lock);
        for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
@@ -1131,39 +1231,63 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
                        f2fs_bug_on(sbi, dc->state != D_PREP);
                        __remove_discard_cmd(sbi, dc);
+                       dropped = true;
                }
        }
        mutex_unlock(&dcc->cmd_lock);
+
+       return dropped;
 }
 
-static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+void drop_discard_cmd(struct f2fs_sb_info *sbi)
+{
+       __drop_discard_cmd(sbi);
+}
+
+static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
                                                        struct discard_cmd *dc)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       unsigned int len = 0;
 
        wait_for_completion_io(&dc->wait);
        mutex_lock(&dcc->cmd_lock);
        f2fs_bug_on(sbi, dc->state != D_DONE);
        dc->ref--;
-       if (!dc->ref)
+       if (!dc->ref) {
+               if (!dc->error)
+                       len = dc->len;
                __remove_discard_cmd(sbi, dc);
+       }
        mutex_unlock(&dcc->cmd_lock);
+
+       return len;
 }
 
-static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
+static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                               struct discard_policy *dpolicy,
+                                               block_t start, block_t end)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct list_head *wait_list = &(dcc->wait_list);
+       struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+                                       &(dcc->fstrim_list) : &(dcc->wait_list);
        struct discard_cmd *dc, *tmp;
        bool need_wait;
+       unsigned int trimmed = 0;
 
 next:
        need_wait = false;
 
        mutex_lock(&dcc->cmd_lock);
        list_for_each_entry_safe(dc, tmp, wait_list, list) {
-               if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
+               if (dc->lstart + dc->len <= start || end <= dc->lstart)
+                       continue;
+               if (dc->len < dpolicy->granularity)
+                       continue;
+               if (dc->state == D_DONE && !dc->ref) {
                        wait_for_completion_io(&dc->wait);
+                       if (!dc->error)
+                               trimmed += dc->len;
                        __remove_discard_cmd(sbi, dc);
                } else {
                        dc->ref++;
@@ -1174,13 +1298,32 @@ next:
        mutex_unlock(&dcc->cmd_lock);
 
        if (need_wait) {
-               __wait_one_discard_bio(sbi, dc);
+               trimmed += __wait_one_discard_bio(sbi, dc);
                goto next;
        }
+
+       return trimmed;
+}
+
+static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+                                               struct discard_policy *dpolicy)
+{
+       struct discard_policy dp;
+
+       if (dpolicy) {
+               __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+               return;
+       }
+
+       /* wait all */
+       __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+       __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
-void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct discard_cmd *dc;
@@ -1214,23 +1357,21 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
        }
 }
 
-/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
-{
-       __issue_discard_cmd(sbi, false);
-       __drop_discard_cmd(sbi);
-       __wait_discard_cmd(sbi, !umount);
-}
-
-static void mark_discard_range_all(struct f2fs_sb_info *sbi)
+/* This comes from f2fs_put_super */
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       int i;
+       struct discard_policy dpolicy;
+       bool dropped;
 
-       mutex_lock(&dcc->cmd_lock);
-       for (i = 0; i < MAX_PLIST_NUM; i++)
-               dcc->pend_list_tag[i] |= P_TRIM;
-       mutex_unlock(&dcc->cmd_lock);
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
+                                       dcc->discard_granularity);
+       __issue_discard_cmd(sbi, &dpolicy);
+       dropped = __drop_discard_cmd(sbi);
+
+       /* just to make sure there is no pending discard commands */
+       __wait_all_discard_cmd(sbi, NULL);
+       return dropped;
 }
 
 static int issue_discard_thread(void *data)
@@ -1238,35 +1379,41 @@ static int issue_discard_thread(void *data)
        struct f2fs_sb_info *sbi = data;
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        wait_queue_head_t *q = &dcc->discard_wait_queue;
+       struct discard_policy dpolicy;
        unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
        int issued;
 
        set_freezable();
 
        do {
+               __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
+                                       dcc->discard_granularity);
+
                wait_event_interruptible_timeout(*q,
                                kthread_should_stop() || freezing(current) ||
                                dcc->discard_wake,
                                msecs_to_jiffies(wait_ms));
                if (try_to_freeze())
                        continue;
+               if (f2fs_readonly(sbi->sb))
+                       continue;
                if (kthread_should_stop())
                        return 0;
 
-               if (dcc->discard_wake) {
+               if (dcc->discard_wake)
                        dcc->discard_wake = 0;
-                       if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
-                               mark_discard_range_all(sbi);
-               }
+
+               if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
+                       __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
 
                sb_start_intwrite(sbi->sb);
 
-               issued = __issue_discard_cmd(sbi, true);
+               issued = __issue_discard_cmd(sbi, &dpolicy);
                if (issued) {
-                       __wait_discard_cmd(sbi, true);
-                       wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+                       __wait_all_discard_cmd(sbi, &dpolicy);
+                       wait_ms = dpolicy.min_interval;
                } else {
-                       wait_ms = DEF_MAX_DISCARD_ISSUE_TIME;
+                       wait_ms = dpolicy.max_interval;
                }
 
                sb_end_intwrite(sbi->sb);
@@ -1326,7 +1473,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
                struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 #ifdef CONFIG_BLK_DEV_ZONED
-       if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
+       if (f2fs_sb_has_blkzoned(sbi->sb) &&
                                bdev_zoned_model(bdev) != BLK_ZONED_NONE)
                return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
 #endif
@@ -1524,13 +1671,12 @@ find_next:
                                        sbi->blocks_per_seg, cur_pos);
                        len = next_pos - cur_pos;
 
-                       if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
+                       if (f2fs_sb_has_blkzoned(sbi->sb) ||
                            (force && len < cpc->trim_minlen))
                                goto skip;
 
                        f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
                                                                        len);
-                       cpc->trimmed += len;
                        total_len += len;
                } else {
                        next_pos = find_next_bit_le(entry->discard_map,
@@ -1562,18 +1708,16 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
                goto init_thread;
        }
 
-       dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+       dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
        if (!dcc)
                return -ENOMEM;
 
        dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
        INIT_LIST_HEAD(&dcc->entry_list);
-       for (i = 0; i < MAX_PLIST_NUM; i++) {
+       for (i = 0; i < MAX_PLIST_NUM; i++)
                INIT_LIST_HEAD(&dcc->pend_list[i]);
-               if (i >= dcc->discard_granularity - 1)
-                       dcc->pend_list_tag[i] |= P_ACTIVE;
-       }
        INIT_LIST_HEAD(&dcc->wait_list);
+       INIT_LIST_HEAD(&dcc->fstrim_list);
        mutex_init(&dcc->cmd_lock);
        atomic_set(&dcc->issued_discard, 0);
        atomic_set(&dcc->issing_discard, 0);
@@ -1681,7 +1825,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                        sbi->discard_blks--;
 
                /* don't overwrite by SSR to keep node chain */
-               if (se->type == CURSEG_WARM_NODE) {
+               if (IS_NODESEG(se->type)) {
                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
                                se->ckpt_valid_blocks++;
                }
@@ -1721,16 +1865,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
                get_sec_entry(sbi, segno)->valid_blocks += del;
 }
 
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
-{
-       update_sit_entry(sbi, new, 1);
-       if (GET_SEGNO(sbi, old) != NULL_SEGNO)
-               update_sit_entry(sbi, old, -1);
-
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
-       locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
-}
-
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
 {
        unsigned int segno = GET_SEGNO(sbi, addr);
@@ -1741,14 +1875,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
                return;
 
        /* add it into sit main buffer */
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        update_sit_entry(sbi, addr, -1);
 
        /* add it into dirty seglist */
        locate_dirty_segment(sbi, segno);
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 }
 
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
@@ -1761,7 +1895,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
        if (!is_valid_data_blkaddr(sbi, blkaddr))
                return true;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_read(&sit_i->sentry_lock);
 
        segno = GET_SEGNO(sbi, blkaddr);
        se = get_seg_entry(sbi, segno);
@@ -1770,7 +1904,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
        if (f2fs_test_bit(offset, se->ckpt_valid_map))
                is_cp = true;
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_read(&sit_i->sentry_lock);
 
        return is_cp;
 }
@@ -1828,12 +1962,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
 void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
 {
        struct page *page = grab_meta_page(sbi, blk_addr);
-       void *dst = page_address(page);
 
-       if (src)
-               memcpy(dst, src, PAGE_SIZE);
-       else
-               memset(dst, 0, PAGE_SIZE);
+       memcpy(page_address(page), src, PAGE_SIZE);
        set_page_dirty(page);
        f2fs_put_page(page, 1);
 }
@@ -1932,7 +2062,6 @@ find_other_zone:
        }
        secno = left_start;
 skip_left:
-       hint = secno;
        segno = GET_SEG_FROM_SEC(sbi, secno);
        zoneno = GET_ZONE_FROM_SEC(sbi, secno);
 
@@ -2003,6 +2132,11 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 
        if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
                return SIT_I(sbi)->last_victim[ALLOC_NEXT];
+
+       /* find segments from 0 to reuse freed segments */
+       if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
+               return 0;
+
        return CURSEG_I(sbi, type)->segno;
 }
 
@@ -2168,12 +2302,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
        unsigned int old_segno;
        int i;
 
+       down_write(&SIT_I(sbi)->sentry_lock);
+
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
                curseg = CURSEG_I(sbi, i);
                old_segno = curseg->segno;
                SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
                locate_dirty_segment(sbi, old_segno);
        }
+
+       up_write(&SIT_I(sbi)->sentry_lock);
 }
 
 static const struct segment_allocation default_salloc_ops = {
@@ -2185,31 +2323,94 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        __u64 trim_start = cpc->trim_start;
        bool has_candidate = false;
 
-       mutex_lock(&SIT_I(sbi)->sentry_lock);
+       down_write(&SIT_I(sbi)->sentry_lock);
        for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
                if (add_discard_addrs(sbi, cpc, true)) {
                        has_candidate = true;
                        break;
                }
        }
-       mutex_unlock(&SIT_I(sbi)->sentry_lock);
+       up_write(&SIT_I(sbi)->sentry_lock);
 
        cpc->trim_start = trim_start;
        return has_candidate;
 }
 
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy,
+                                       unsigned int start, unsigned int end)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       struct discard_cmd *dc;
+       struct blk_plug plug;
+       int issued;
+
+next:
+       issued = 0;
+
+       mutex_lock(&dcc->cmd_lock);
+       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, start,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (!dc)
+               dc = next_dc;
+
+       blk_start_plug(&plug);
+
+       while (dc && dc->lstart <= end) {
+               struct rb_node *node;
+
+               if (dc->len < dpolicy->granularity)
+                       goto skip;
+
+               if (dc->state != D_PREP) {
+                       list_move_tail(&dc->list, &dcc->fstrim_list);
+                       goto skip;
+               }
+
+               __submit_discard_cmd(sbi, dpolicy, dc);
+
+               if (++issued >= dpolicy->max_requests) {
+                       start = dc->lstart + dc->len;
+
+                       blk_finish_plug(&plug);
+                       mutex_unlock(&dcc->cmd_lock);
+                       __wait_all_discard_cmd(sbi, NULL);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto next;
+               }
+skip:
+               node = rb_next(&dc->rb_node);
+               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+               if (fatal_signal_pending(current))
+                       break;
+       }
+
+       blk_finish_plug(&plug);
+       mutex_unlock(&dcc->cmd_lock);
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
        __u64 start = F2FS_BYTES_TO_BLK(range->start);
        __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
        unsigned int start_segno, end_segno;
+       block_t start_block, end_block;
        struct cp_control cpc;
+       struct discard_policy dpolicy;
+       unsigned long long trimmed = 0;
        int err = 0;
 
        if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
                return -EINVAL;
 
-       cpc.trimmed = 0;
        if (end <= MAIN_BLKADDR(sbi))
                goto out;
 
@@ -2223,36 +2424,39 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
        start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
        end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
                                                GET_SEGNO(sbi, end);
+
        cpc.reason = CP_DISCARD;
        cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+       cpc.trim_start = start_segno;
+       cpc.trim_end = end_segno;
 
-       /* do checkpoint to issue discard commands safely */
-       for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
-               cpc.trim_start = start_segno;
+       if (sbi->discard_blks == 0)
+               goto out;
 
-               if (sbi->discard_blks == 0)
-                       break;
-               else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
-                       cpc.trim_end = end_segno;
-               else
-                       cpc.trim_end = min_t(unsigned int,
-                               rounddown(start_segno +
-                               BATCHED_TRIM_SEGMENTS(sbi),
-                               sbi->segs_per_sec) - 1, end_segno);
+       mutex_lock(&sbi->gc_mutex);
+       err = write_checkpoint(sbi, &cpc);
+       mutex_unlock(&sbi->gc_mutex);
+       if (err)
+               goto out;
 
-               mutex_lock(&sbi->gc_mutex);
-               err = write_checkpoint(sbi, &cpc);
-               mutex_unlock(&sbi->gc_mutex);
-               if (err)
-                       break;
+       start_block = START_BLOCK(sbi, start_segno);
+       end_block = START_BLOCK(sbi, end_segno + 1);
+
+       __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+       __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
 
-               schedule();
+       /*
+        * We filed discard candidates, but actually we don't need to wait for
+        * all of them, since they'll be issued in idle time along with runtime
+        * discard option. User configuration looks like using runtime discard
+        * or periodic fstrim instead of it.
+        */
+       if (!test_opt(sbi, DISCARD)) {
+               trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+                                       start_block, end_block);
+               range->len = F2FS_BLK_TO_BYTES(trimmed);
        }
-       /* It's time to issue all the filed discards */
-       mark_discard_range_all(sbi);
-       f2fs_wait_discard_bios(sbi, false);
 out:
-       range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
        return err;
 }
 
@@ -2264,6 +2468,113 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
        return false;
 }
 
+int rw_hint_to_seg_type(enum rw_hint hint)
+{
+       switch (hint) {
+       case WRITE_LIFE_SHORT:
+               return CURSEG_HOT_DATA;
+       case WRITE_LIFE_EXTREME:
+               return CURSEG_COLD_DATA;
+       default:
+               return CURSEG_WARM_DATA;
+       }
+}
+
+/* This returns write hints for each segment type. This hints will be
+ * passed down to block layer. There are mapping tables which depend on
+ * the mount option 'whint_mode'.
+ *
+ * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
+ *
+ * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
+ *
+ * User                  F2FS                     Block
+ * ----                  ----                     -----
+ *                       META                     WRITE_LIFE_NOT_SET
+ *                       HOT_NODE                 "
+ *                       WARM_NODE                "
+ *                       COLD_NODE                "
+ * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
+ * extension list        "                        "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE       "                        "
+ * WRITE_LIFE_MEDIUM     "                        "
+ * WRITE_LIFE_LONG       "                        "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
+ *
+ * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
+ *
+ * User                  F2FS                     Block
+ * ----                  ----                     -----
+ *                       META                     WRITE_LIFE_MEDIUM;
+ *                       HOT_NODE                 WRITE_LIFE_NOT_SET
+ *                       WARM_NODE                "
+ *                       COLD_NODE                WRITE_LIFE_NONE
+ * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
+ * extension list        "                        "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
+ * WRITE_LIFE_NONE       "                        "
+ * WRITE_LIFE_MEDIUM     "                        "
+ * WRITE_LIFE_LONG       "                        "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
+ */
+
+enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+                               enum page_type type, enum temp_type temp)
+{
+       if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
+               if (type == DATA) {
+                       if (temp == WARM)
+                               return WRITE_LIFE_NOT_SET;
+                       else if (temp == HOT)
+                               return WRITE_LIFE_SHORT;
+                       else if (temp == COLD)
+                               return WRITE_LIFE_EXTREME;
+               } else {
+                       return WRITE_LIFE_NOT_SET;
+               }
+       } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
+               if (type == DATA) {
+                       if (temp == WARM)
+                               return WRITE_LIFE_LONG;
+                       else if (temp == HOT)
+                               return WRITE_LIFE_SHORT;
+                       else if (temp == COLD)
+                               return WRITE_LIFE_EXTREME;
+               } else if (type == NODE) {
+                       if (temp == WARM || temp == HOT)
+                               return WRITE_LIFE_NOT_SET;
+                       else if (temp == COLD)
+                               return WRITE_LIFE_NONE;
+               } else if (type == META) {
+                       return WRITE_LIFE_MEDIUM;
+               }
+       }
+       return WRITE_LIFE_NOT_SET;
+}
+
 static int __get_segment_type_2(struct f2fs_io_info *fio)
 {
        if (fio->type == DATA)
@@ -2296,9 +2607,10 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 
                if (is_cold_data(fio->page) || file_is_cold(inode))
                        return CURSEG_COLD_DATA;
-               if (is_inode_flag_set(inode, FI_HOT_DATA))
+               if (file_is_hot(inode) ||
+                               is_inode_flag_set(inode, FI_HOT_DATA))
                        return CURSEG_HOT_DATA;
-               return CURSEG_WARM_DATA;
+               return rw_hint_to_seg_type(inode->i_write_hint);
        } else {
                if (IS_DNODE(fio->page))
                        return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
@@ -2311,7 +2623,7 @@ static int __get_segment_type(struct f2fs_io_info *fio)
 {
        int type = 0;
 
-       switch (fio->sbi->active_logs) {
+       switch (F2FS_OPTION(fio->sbi).active_logs) {
        case 2:
                type = __get_segment_type_2(fio);
                break;
@@ -2342,8 +2654,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        struct sit_info *sit_i = SIT_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, type);
 
+       down_read(&SM_I(sbi)->curseg_lock);
+
        mutex_lock(&curseg->curseg_mutex);
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
@@ -2360,15 +2674,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
        stat_inc_block_count(sbi, curseg);
 
+       /*
+        * SIT information should be updated before segment allocation,
+        * since SSR needs latest valid block information.
+        */
+       update_sit_entry(sbi, *new_blkaddr, 1);
+       if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+               update_sit_entry(sbi, old_blkaddr, -1);
+
        if (!__has_curseg_space(sbi, type))
                sit_i->s_ops->allocate_segment(sbi, type, false);
+
        /*
-        * SIT information should be updated after segment allocation,
-        * since we need to keep dirty segments precisely under SSR.
+        * segment dirty status should be updated after segment allocation,
+        * so we just need to update status only one time after previous
+        * segment being closed.
         */
-       refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+       locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 
        if (page && IS_NODESEG(type)) {
                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
@@ -2388,6 +2713,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        }
 
        mutex_unlock(&curseg->curseg_mutex);
+
+       up_read(&SM_I(sbi)->curseg_lock);
+}
+
+static void update_device_state(struct f2fs_io_info *fio)
+{
+       struct f2fs_sb_info *sbi = fio->sbi;
+       unsigned int devidx;
+
+       if (!sbi->s_ndevs)
+               return;
+
+       devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+
+       /* update device state for fsync */
+       set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+
+       /* update device state for checkpoint */
+       if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+               spin_lock(&sbi->dev_lock);
+               f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+               spin_unlock(&sbi->dev_lock);
+       }
 }
 
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -2404,6 +2752,8 @@ reallocate:
        if (err == -EAGAIN) {
                fio->old_blkaddr = fio->new_blkaddr;
                goto reallocate;
+       } else if (!err) {
+               update_device_state(fio);
        }
 }
 
@@ -2413,6 +2763,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
        struct f2fs_io_info fio = {
                .sbi = sbi,
                .type = META,
+               .temp = HOT,
                .op = REQ_OP_WRITE,
                .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
                .old_blkaddr = page->index,
@@ -2426,6 +2777,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
                fio.op_flags &= ~REQ_META;
 
        set_page_writeback(page);
+       ClearPageError(page);
        f2fs_submit_page_write(&fio);
 
        f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
@@ -2459,17 +2811,38 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
 int rewrite_data_page(struct f2fs_io_info *fio)
 {
        int err;
+       struct f2fs_sb_info *sbi = fio->sbi;
 
        fio->new_blkaddr = fio->old_blkaddr;
+       /* i/o temperature is needed for passing down write hints */
+       __get_segment_type(fio);
+
+       f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
+                       GET_SEGNO(sbi, fio->new_blkaddr))->type));
+
        stat_inc_inplace_blocks(fio->sbi);
 
        err = f2fs_submit_page_bio(fio);
+       if (!err)
+               update_device_state(fio);
 
        f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
 
        return err;
 }
 
+static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
+                                               unsigned int segno)
+{
+       int i;
+
+       for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+               if (CURSEG_I(sbi, i)->segno == segno)
+                       break;
+       }
+       return i;
+}
+
 void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                                block_t old_blkaddr, block_t new_blkaddr,
                                bool recover_curseg, bool recover_newaddr)
@@ -2485,6 +2858,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        se = get_seg_entry(sbi, segno);
        type = se->type;
 
+       down_write(&SM_I(sbi)->curseg_lock);
+
        if (!recover_curseg) {
                /* for recovery flow */
                if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
@@ -2494,14 +2869,20 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                                type = CURSEG_WARM_DATA;
                }
        } else {
-               if (!IS_CURSEG(sbi, segno))
+               if (IS_CURSEG(sbi, segno)) {
+                       /* se->type is volatile as SSR allocation */
+                       type = __f2fs_get_curseg(sbi, segno);
+                       f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
+               } else {
                        type = CURSEG_WARM_DATA;
+               }
        }
 
+       f2fs_bug_on(sbi, !IS_DATASEG(type));
        curseg = CURSEG_I(sbi, type);
 
        mutex_lock(&curseg->curseg_mutex);
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        old_cursegno = curseg->segno;
        old_blkoff = curseg->next_blkoff;
@@ -2533,8 +2914,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
                curseg->next_blkoff = old_blkoff;
        }
 
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
+       up_write(&SM_I(sbi)->curseg_lock);
 }
 
 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -2581,7 +2963,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
        }
 }
 
-static int read_compacted_summaries(struct f2fs_sb_info *sbi)
+static void read_compacted_summaries(struct f2fs_sb_info *sbi)
 {
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
        struct curseg_info *seg_i;
@@ -2638,7 +3020,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
                }
        }
        f2fs_put_page(page, 1);
-       return 0;
 }
 
 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -2684,13 +3065,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                                ns->ofs_in_node = 0;
                        }
                } else {
-                       int err;
-
-                       err = restore_node_summary(sbi, segno, sum);
-                       if (err) {
-                               f2fs_put_page(new, 1);
-                               return err;
-                       }
+                       restore_node_summary(sbi, segno, sum);
                }
        }
 
@@ -2729,8 +3104,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
                                                        META_CP, true);
 
                /* restore for compacted data summary */
-               if (read_compacted_summaries(sbi))
-                       return -EINVAL;
+               read_compacted_summaries(sbi);
                type = CURSEG_HOT_NODE;
        }
 
@@ -2866,28 +3240,19 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
                                        unsigned int start)
 {
        struct sit_info *sit_i = SIT_I(sbi);
-       struct page *src_page, *dst_page;
+       struct page *page;
        pgoff_t src_off, dst_off;
-       void *src_addr, *dst_addr;
 
        src_off = current_sit_addr(sbi, start);
        dst_off = next_sit_addr(sbi, src_off);
 
-       /* get current sit block page without lock */
-       src_page = get_meta_page(sbi, src_off);
-       dst_page = grab_meta_page(sbi, dst_off);
-       f2fs_bug_on(sbi, PageDirty(src_page));
-
-       src_addr = page_address(src_page);
-       dst_addr = page_address(dst_page);
-       memcpy(dst_addr, src_addr, PAGE_SIZE);
-
-       set_page_dirty(dst_page);
-       f2fs_put_page(src_page, 1);
+       page = grab_meta_page(sbi, dst_off);
+       seg_info_to_sit_page(sbi, page, start);
 
+       set_page_dirty(page);
        set_to_next_sit(sit_i, start);
 
-       return dst_page;
+       return page;
 }
 
 static struct sit_entry_set *grab_sit_entry_set(void)
@@ -2988,7 +3353,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        bool to_journal = true;
        struct seg_entry *se;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        if (!sit_i->dirty_sentries)
                goto out;
@@ -3082,7 +3447,7 @@ out:
 
                cpc->trim_start = trim_start;
        }
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 
        set_prefree_as_free_segments(sbi);
 }
@@ -3096,52 +3461,54 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size;
 
        /* allocate memory for SIT information */
-       sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
+       sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
        if (!sit_i)
                return -ENOMEM;
 
        SM_I(sbi)->sit_info = sit_i;
 
-       sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) *
+       sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) *
                                        sizeof(struct seg_entry), GFP_KERNEL);
        if (!sit_i->sentries)
                return -ENOMEM;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
+       sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
+                                                               GFP_KERNEL);
        if (!sit_i->dirty_sentries_bitmap)
                return -ENOMEM;
 
        for (start = 0; start < MAIN_SEGS(sbi); start++) {
                sit_i->sentries[start].cur_valid_map
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                sit_i->sentries[start].ckpt_valid_map
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                if (!sit_i->sentries[start].cur_valid_map ||
                                !sit_i->sentries[start].ckpt_valid_map)
                        return -ENOMEM;
 
 #ifdef CONFIG_F2FS_CHECK_FS
                sit_i->sentries[start].cur_valid_map_mir
-                       = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                       = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
                if (!sit_i->sentries[start].cur_valid_map_mir)
                        return -ENOMEM;
 #endif
 
                if (f2fs_discard_en(sbi)) {
                        sit_i->sentries[start].discard_map
-                               = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+                               = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
+                                                               GFP_KERNEL);
                        if (!sit_i->sentries[start].discard_map)
                                return -ENOMEM;
                }
        }
 
-       sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+       sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
        if (!sit_i->tmp_map)
                return -ENOMEM;
 
        if (sbi->segs_per_sec > 1) {
-               sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) *
+               sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) *
                                        sizeof(struct sec_entry), GFP_KERNEL);
                if (!sit_i->sec_entries)
                        return -ENOMEM;
@@ -3175,7 +3542,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
        sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
        sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
        sit_i->mounted_time = ktime_get_real_seconds();
-       mutex_init(&sit_i->sentry_lock);
+       init_rwsem(&sit_i->sentry_lock);
        return 0;
 }
 
@@ -3185,19 +3552,19 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size, sec_bitmap_size;
 
        /* allocate memory for free segmap information */
-       free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
+       free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
        if (!free_i)
                return -ENOMEM;
 
        SM_I(sbi)->free_info = free_i;
 
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-       free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL);
+       free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
        if (!free_i->free_segmap)
                return -ENOMEM;
 
        sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
-       free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL);
+       free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
        if (!free_i->free_secmap)
                return -ENOMEM;
 
@@ -3218,7 +3585,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
        struct curseg_info *array;
        int i;
 
-       array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
+       array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
        if (!array)
                return -ENOMEM;
 
@@ -3226,12 +3593,12 @@ static int build_curseg(struct f2fs_sb_info *sbi)
 
        for (i = 0; i < NR_CURSEG_TYPE; i++) {
                mutex_init(&array[i].curseg_mutex);
-               array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+               array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
                if (!array[i].sum_blk)
                        return -ENOMEM;
                init_rwsem(&array[i].journal_rwsem);
-               array[i].journal = kzalloc(sizeof(struct f2fs_journal),
-                                                       GFP_KERNEL);
+               array[i].journal = f2fs_kzalloc(sbi,
+                               sizeof(struct f2fs_journal), GFP_KERNEL);
                if (!array[i].journal)
                        return -ENOMEM;
                array[i].segno = NULL_SEGNO;
@@ -3408,7 +3775,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
 
-       dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL);
+       dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
        if (!dirty_i->victim_secmap)
                return -ENOMEM;
        return 0;
@@ -3420,7 +3787,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        unsigned int bitmap_size, i;
 
        /* allocate memory for dirty segments list information */
-       dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
+       dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
+                                                               GFP_KERNEL);
        if (!dirty_i)
                return -ENOMEM;
 
@@ -3430,7 +3798,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
        bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
 
        for (i = 0; i < NR_DIRTY_TYPE; i++) {
-               dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL);
+               dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
+                                                               GFP_KERNEL);
                if (!dirty_i->dirty_segmap[i])
                        return -ENOMEM;
        }
@@ -3447,7 +3816,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
        struct sit_info *sit_i = SIT_I(sbi);
        unsigned int segno;
 
-       mutex_lock(&sit_i->sentry_lock);
+       down_write(&sit_i->sentry_lock);
 
        sit_i->min_mtime = LLONG_MAX;
 
@@ -3464,7 +3833,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
                        sit_i->min_mtime = mtime;
        }
        sit_i->max_mtime = get_mtime(sbi);
-       mutex_unlock(&sit_i->sentry_lock);
+       up_write(&sit_i->sentry_lock);
 }
 
 int build_segment_manager(struct f2fs_sb_info *sbi)
@@ -3474,7 +3843,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        struct f2fs_sm_info *sm_info;
        int err;
 
-       sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
+       sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
        if (!sm_info)
                return -ENOMEM;
 
@@ -3497,11 +3866,12 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
-
-       sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
+       sm_info->min_ssr_sections = reserved_sections(sbi);
 
        INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
+       init_rwsem(&sm_info->curseg_lock);
+
        if (!f2fs_readonly(sbi->sb)) {
                err = create_flush_cmd_control(sbi);
                if (err)