f2fs: split bio cache
authorJaegeuk Kim <jaegeuk@kernel.org>
Wed, 10 May 2017 18:18:25 +0000 (11:18 -0700)
committerJaegeuk Kim <jaegeuk@kernel.org>
Wed, 24 May 2017 04:05:39 +0000 (21:05 -0700)
Split DATA/NODE type bio cache according to different temperature,
so write IOs with the same temperature can be merged in corresponding
bio cache as much as possible, otherwise, different temperature write
IOs submitting into one bio cache will always cause split of bio.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/gc.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
include/trace/events/f2fs.h

index 06bb2042385ea7e494afe4900361743b3489bf46..4ca0899621a0cb2fecac0e94ceb5ed3632f208e3 100644 (file)
@@ -282,27 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
                                nid_t ino, pgoff_t idx, enum page_type type)
 {
        enum page_type btype = PAGE_TYPE_OF_BIO(type);
-       struct f2fs_bio_info *io = &sbi->write_io[btype];
-       bool ret;
+       enum temp_type temp;
+       struct f2fs_bio_info *io;
+       bool ret = false;
 
-       down_read(&io->io_rwsem);
-       ret = __has_merged_page(io, inode, ino, idx);
-       up_read(&io->io_rwsem);
+       for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+               io = sbi->write_io[btype] + temp;
+
+               down_read(&io->io_rwsem);
+               ret = __has_merged_page(io, inode, ino, idx);
+               up_read(&io->io_rwsem);
+
+               /* TODO: use HOT temp only for meta pages now. */
+               if (ret || btype == META)
+                       break;
+       }
        return ret;
 }
 
 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
-                               struct inode *inode, nid_t ino, pgoff_t idx,
-                               enum page_type type)
+                               enum page_type type, enum temp_type temp)
 {
        enum page_type btype = PAGE_TYPE_OF_BIO(type);
-       struct f2fs_bio_info *io = &sbi->write_io[btype];
+       struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
        down_write(&io->io_rwsem);
 
-       if (!__has_merged_page(io, inode, ino, idx))
-               goto out;
-
        /* change META to META_FLUSH in the checkpoint procedure */
        if (type >= META_FLUSH) {
                io->fio.type = META_FLUSH;
@@ -312,21 +317,38 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
                        io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
        }
        __submit_merged_bio(io);
-out:
        up_write(&io->io_rwsem);
 }
 
+static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
+                               struct inode *inode, nid_t ino, pgoff_t idx,
+                               enum page_type type, bool force)
+{
+       enum temp_type temp;
+
+       if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+               return;
+
+       for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+
+               __f2fs_submit_merged_write(sbi, type, temp);
+
+               /* TODO: use HOT temp only for meta pages now. */
+               if (type >= META)
+                       break;
+       }
+}
+
 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 {
-       __f2fs_submit_merged_write(sbi, NULL, 0, 0, type);
+       __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
 }
 
 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
                                struct inode *inode, nid_t ino, pgoff_t idx,
                                enum page_type type)
 {
-       if (has_merged_page(sbi, inode, ino, idx, type))
-               __f2fs_submit_merged_write(sbi, inode, ino, idx, type);
+       __submit_merged_write_cond(sbi, inode, ino, idx, type, false);
 }
 
 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
@@ -369,7 +391,7 @@ int f2fs_submit_page_write(struct f2fs_io_info *fio)
 {
        struct f2fs_sb_info *sbi = fio->sbi;
        enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
-       struct f2fs_bio_info *io = &sbi->write_io[btype];
+       struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
        struct page *bio_page;
        int err = 0;
 
@@ -405,8 +427,7 @@ alloc_new:
                io->fio = *fio;
        }
 
-       if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
-                                                       PAGE_SIZE) {
+       if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
                __submit_merged_bio(io);
                goto alloc_new;
        }
index 58697bd588fa24c82721d83c18805603eb17ece2..c6643783adfff690ed296a4a5e58929e60bbbd7a 100644 (file)
@@ -792,9 +792,17 @@ enum page_type {
        OPU,
 };
 
+enum temp_type {
+       HOT = 0,        /* must be zero for meta bio */
+       WARM,
+       COLD,
+       NR_TEMP_TYPE,
+};
+
 struct f2fs_io_info {
        struct f2fs_sb_info *sbi;       /* f2fs_sb_info pointer */
        enum page_type type;    /* contains DATA/NODE/META/META_FLUSH */
+       enum temp_type temp;    /* contains HOT/WARM/COLD */
        int op;                 /* contains REQ_OP_ */
        int op_flags;           /* req_flag_bits */
        block_t new_blkaddr;    /* new block address to be written */
@@ -879,7 +887,7 @@ struct f2fs_sb_info {
        struct f2fs_sm_info *sm_info;           /* segment manager */
 
        /* for bio operations */
-       struct f2fs_bio_info write_io[NR_PAGE_TYPE];    /* for write bios */
+       struct f2fs_bio_info *write_io[NR_PAGE_TYPE];   /* for write bios */
        struct mutex wio_mutex[NODE + 1];       /* bio ordering for NODE/DATA */
        int write_io_size_bits;                 /* Write IO size bits */
        mempool_t *write_io_dummy;              /* Dummy pages */
index 67b87155bc482fcdafd5e6d12a6384f049eb26a0..e2b13558a915cbed7fcc99490ed33dd974406a5b 100644 (file)
@@ -586,6 +586,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
        struct f2fs_io_info fio = {
                .sbi = F2FS_I_SB(inode),
                .type = DATA,
+               .temp = COLD,
                .op = REQ_OP_READ,
                .op_flags = 0,
                .encrypted_page = NULL,
@@ -712,6 +713,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
                struct f2fs_io_info fio = {
                        .sbi = F2FS_I_SB(inode),
                        .type = DATA,
+                       .temp = COLD,
                        .op = REQ_OP_WRITE,
                        .op_flags = REQ_SYNC,
                        .old_blkaddr = NULL_ADDR,
index c9f3a2faee21ff811f09a330bf2ddafe92fc169f..fcada9d03817b8a47ce69a38cd43a8cf0ebea408 100644 (file)
@@ -2084,17 +2084,29 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 
 static int __get_segment_type(struct f2fs_io_info *fio)
 {
+       int type = 0;
+
        switch (fio->sbi->active_logs) {
        case 2:
-               return __get_segment_type_2(fio);
+               type = __get_segment_type_2(fio);
+               break;
        case 4:
-               return __get_segment_type_4(fio);
+               type = __get_segment_type_4(fio);
+               break;
+       case 6:
+               type = __get_segment_type_6(fio);
+               break;
+       default:
+               f2fs_bug_on(fio->sbi, true);
        }
 
-       /* NR_CURSEG_TYPE(6) logs by default */
-       f2fs_bug_on(fio->sbi, fio->sbi->active_logs != NR_CURSEG_TYPE);
-
-       return __get_segment_type_6(fio);
+       if (IS_HOT(type))
+               fio->temp = HOT;
+       else if (IS_WARM(type))
+               fio->temp = WARM;
+       else
+               fio->temp = COLD;
+       return type;
 }
 
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
index 10bf05d4cff49be8152a8e6aa434ec0ccb0827d4..e9ba1f1d9723284b4c3f7b8f0f0518f7c1d5b706 100644 (file)
 #define IS_DATASEG(t)  ((t) <= CURSEG_COLD_DATA)
 #define IS_NODESEG(t)  ((t) >= CURSEG_HOT_NODE)
 
+#define IS_HOT(t)      ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
+#define IS_WARM(t)     ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
+#define IS_COLD(t)     ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
+
 #define IS_CURSEG(sbi, seg)                                            \
        (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) ||    \
         ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) ||   \
index 90599397425ac940024fc628077712dacf126176..aa451ec9fb809699c9adb7d91e48a35d9bbd48a8 100644 (file)
@@ -768,6 +768,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 static void f2fs_put_super(struct super_block *sb)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int i;
 
        if (sbi->s_proc) {
                remove_proc_entry("segment_info", sbi->s_proc);
@@ -838,6 +839,8 @@ static void f2fs_put_super(struct super_block *sb)
        destroy_device_list(sbi);
        mempool_destroy(sbi->write_io_dummy);
        destroy_percpu_info(sbi);
+       for (i = 0; i < NR_PAGE_TYPE; i++)
+               kfree(sbi->write_io[i]);
        kfree(sbi);
 }
 
@@ -1967,9 +1970,19 @@ try_onemore:
        spin_lock_init(&sbi->stat_lock);
 
        for (i = 0; i < NR_PAGE_TYPE; i++) {
-               init_rwsem(&sbi->write_io[i].io_rwsem);
-               sbi->write_io[i].sbi = sbi;
-               sbi->write_io[i].bio = NULL;
+               int n = (i == META) ? 1: NR_TEMP_TYPE;
+               int j;
+
+               sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
+                                                               GFP_KERNEL);
+               if (!sbi->write_io[i])
+                       goto free_options;
+
+               for (j = HOT; j < n; j++) {
+                       init_rwsem(&sbi->write_io[i][j].io_rwsem);
+                       sbi->write_io[i][j].sbi = sbi;
+                       sbi->write_io[i][j].bio = NULL;
+               }
        }
 
        init_rwsem(&sbi->cp_rwsem);
@@ -2215,6 +2228,8 @@ free_meta_inode:
 free_io_dummy:
        mempool_destroy(sbi->write_io_dummy);
 free_options:
+       for (i = 0; i < NR_PAGE_TYPE; i++)
+               kfree(sbi->write_io[i]);
        destroy_percpu_info(sbi);
        kfree(options);
 free_sb_buf:
index 5805d92893a8deda0faf8037f20d1356428a6c92..6f77a2755abbfa89b4ab93467d69d36364650693 100644 (file)
@@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
 TRACE_DEFINE_ENUM(INMEM_REVOKE);
 TRACE_DEFINE_ENUM(IPU);
 TRACE_DEFINE_ENUM(OPU);
+TRACE_DEFINE_ENUM(HOT);
+TRACE_DEFINE_ENUM(WARM);
+TRACE_DEFINE_ENUM(COLD);
 TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
 TRACE_DEFINE_ENUM(CURSEG_WARM_DATA);
 TRACE_DEFINE_ENUM(CURSEG_COLD_DATA);
@@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
                { IPU,          "IN-PLACE" },                           \
                { OPU,          "OUT-OF-PLACE" })
 
+#define show_block_temp(temp)                                          \
+       __print_symbolic(temp,                                          \
+               { HOT,          "HOT" },                                \
+               { WARM,         "WARM" },                               \
+               { COLD,         "COLD" })
+
 #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO |   \
                        REQ_PREFLUSH | REQ_FUA)
 #define F2FS_BIO_FLAG_MASK(t)  (t & F2FS_OP_FLAGS)
@@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
                __field(block_t, new_blkaddr)
                __field(int, op)
                __field(int, op_flags)
+               __field(int, temp)
                __field(int, type)
        ),
 
@@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
                __entry->new_blkaddr    = fio->new_blkaddr;
                __entry->op             = fio->op;
                __entry->op_flags       = fio->op_flags;
+               __entry->temp           = fio->temp;
                __entry->type           = fio->type;
        ),
 
        TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
-               "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s",
+               "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s",
                show_dev_ino(__entry),
                (unsigned long)__entry->index,
                (unsigned long long)__entry->old_blkaddr,
                (unsigned long long)__entry->new_blkaddr,
                show_bio_type(__entry->op, __entry->op_flags),
+               show_block_temp(__entry->temp),
                show_block_type(__entry->type))
 );