f2fs: use rb-tree to track pending discard commands
authorChao Yu <yuchao0@huawei.com>
Fri, 14 Apr 2017 15:24:55 +0000 (23:24 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Wed, 19 Apr 2017 18:00:40 +0000 (11:00 -0700)
Introduce rb-tree based discard cache infrastructure to speed up lookup and
merge operation of discard entry.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: initialize dc to avoid build warning]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/segment.c

index 68e649a31c7d328540ab0abb4ee3a2a10cd972c3..221ad086ee000a075733201c6881cf0b69854d9e 100644 (file)
@@ -49,7 +49,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
        return NULL;
 }
 
-static struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+struct rb_entry *__lookup_rb_tree(struct rb_root *root,
                                struct rb_entry *cached_re, unsigned int ofs)
 {
        struct rb_entry *re;
@@ -61,7 +61,7 @@ static struct rb_entry *__lookup_rb_tree(struct rb_root *root,
        return re;
 }
 
-static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
                                struct rb_root *root, struct rb_node **parent,
                                unsigned int ofs)
 {
@@ -92,13 +92,14 @@ static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
  * in order to simpfy the insertion after.
  * tree must stay unchanged between lookup and insertion.
  */
-static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
                                struct rb_entry *cached_re,
                                unsigned int ofs,
                                struct rb_entry **prev_entry,
                                struct rb_entry **next_entry,
                                struct rb_node ***insert_p,
-                               struct rb_node **insert_parent)
+                               struct rb_node **insert_parent,
+                               bool force)
 {
        struct rb_node **pnode = &root->rb_node;
        struct rb_node *parent = NULL, *tmp_node;
@@ -145,12 +146,12 @@ static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
        return NULL;
 
 lookup_neighbors:
-       if (ofs == re->ofs) {
+       if (ofs == re->ofs || force) {
                /* lookup prev node for merging backward later */
                tmp_node = rb_prev(&re->rb_node);
                *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
        }
-       if (ofs == re->ofs + re->len - 1) {
+       if (ofs == re->ofs + re->len - 1 || force) {
                /* lookup next node for merging frontward later */
                tmp_node = rb_next(&re->rb_node);
                *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
@@ -481,7 +482,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
                                        (struct rb_entry *)et->cached_en, fofs,
                                        (struct rb_entry **)&prev_en,
                                        (struct rb_entry **)&next_en,
-                                       &insert_p, &insert_parent);
+                                       &insert_p, &insert_parent, false);
        if (!en)
                en = next_en;
 
index 562db8989a4e4fc60dcb32d38c345828551d286a..ee7d6105a7a5f2c072f5044ac4128d157fd2c3b8 100644 (file)
@@ -194,13 +194,26 @@ enum {
        D_DONE,
 };
 
+struct discard_info {
+       block_t lstart;                 /* logical start address */
+       block_t len;                    /* length */
+       block_t start;                  /* actual start address in dev */
+};
+
 struct discard_cmd {
+       struct rb_node rb_node;         /* rb node located in rb-tree */
+       union {
+               struct {
+                       block_t lstart; /* logical start address */
+                       block_t len;    /* length */
+                       block_t start;  /* actual start address in dev */
+               };
+               struct discard_info di; /* discard info */
+
+       };
        struct list_head list;          /* command list */
        struct completion wait;         /* compleation */
        struct block_device *bdev;      /* bdev */
-       block_t lstart;                 /* logical start address */
-       block_t start;                  /* actual start address in dev */
-       block_t len;                    /* length */
        int state;                      /* state */
        int error;                      /* bio error */
 };
@@ -217,6 +230,7 @@ struct discard_cmd_control {
        atomic_t issued_discard;                /* # of issued discard */
        atomic_t issing_discard;                /* # of issing discard */
        atomic_t discard_cmd_cnt;               /* # of cached cmd count */
+       struct rb_root root;                    /* root of discard rb-tree */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -517,6 +531,24 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
        ei->len = len;
 }
 
+static inline bool __is_discard_mergeable(struct discard_info *back,
+                                               struct discard_info *front)
+{
+       return back->lstart + back->len == front->lstart;
+}
+
+static inline bool __is_discard_back_mergeable(struct discard_info *cur,
+                                               struct discard_info *back)
+{
+       return __is_discard_mergeable(back, cur);
+}
+
+static inline bool __is_discard_front_mergeable(struct discard_info *cur,
+                                               struct discard_info *front)
+{
+       return __is_discard_mergeable(cur, front);
+}
+
 static inline bool __is_extent_mergeable(struct extent_info *back,
                                                struct extent_info *front)
 {
@@ -2573,6 +2605,16 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 /*
  * extent_cache.c
  */
+struct rb_entry *__lookup_rb_tree(struct rb_root *root,
+                               struct rb_entry *cached_re, unsigned int ofs);
+struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
+                               struct rb_root *root, struct rb_node **parent,
+                               unsigned int ofs);
+struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
+               struct rb_entry *cached_re, unsigned int ofs,
+               struct rb_entry **prev_entry, struct rb_entry **next_entry,
+               struct rb_node ***insert_p, struct rb_node **insert_parent,
+               bool force);
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
 bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
 void f2fs_drop_extent_tree(struct inode *inode);
index 58cfbe3d4dc7d6679537c66dde924bf2f9f8df9f..d137a08ec3a055a98b58a0a8dbd8ca7c290a75cc 100644 (file)
@@ -672,7 +672,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
        mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static void __add_discard_cmd(struct f2fs_sb_info *sbi,
+static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
                struct block_device *bdev, block_t lstart,
                block_t start, block_t len)
 {
@@ -689,18 +689,46 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi,
        dc->state = D_PREP;
        dc->error = 0;
        init_completion(&dc->wait);
-
-       mutex_lock(&dcc->cmd_lock);
        list_add_tail(&dc->list, pend_list);
-       mutex_unlock(&dcc->cmd_lock);
-
        atomic_inc(&dcc->discard_cmd_cnt);
+
+       return dc;
+}
+
+static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
+                               struct block_device *bdev, block_t lstart,
+                               block_t start, block_t len,
+                               struct rb_node *parent, struct rb_node **p)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *dc;
+
+       dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
+
+       rb_link_node(&dc->rb_node, parent, p);
+       rb_insert_color(&dc->rb_node, &dcc->root);
+
+       return dc;
 }
 
-static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
+static void __detach_discard_cmd(struct discard_cmd_control *dcc,
+                                                       struct discard_cmd *dc)
 {
        if (dc->state == D_DONE)
-               atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard));
+               atomic_dec(&dcc->issing_discard);
+
+       list_del(&dc->list);
+       rb_erase(&dc->rb_node, &dcc->root);
+
+       kmem_cache_free(discard_cmd_slab, dc);
+
+       atomic_dec(&dcc->discard_cmd_cnt);
+}
+
+static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
+                                                       struct discard_cmd *dc)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
        if (dc->error == -EOPNOTSUPP)
                dc->error = 0;
@@ -708,9 +736,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
        if (dc->error)
                f2fs_msg(sbi->sb, KERN_INFO,
                                "Issue discard failed, ret: %d", dc->error);
-       list_del(&dc->list);
-       kmem_cache_free(discard_cmd_slab, dc);
-       atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
+       __detach_discard_cmd(dcc, dc);
 }
 
 static void f2fs_submit_discard_endio(struct bio *bio)
@@ -754,62 +780,178 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
        }
 }
 
-static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
-               struct block_device *bdev, block_t blkstart, block_t blklen)
+static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
+                               struct block_device *bdev, block_t lstart,
+                               block_t start, block_t len,
+                               struct rb_node **insert_p,
+                               struct rb_node *insert_parent)
 {
-       block_t lblkstart = blkstart;
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct rb_node **p = &dcc->root.rb_node;
+       struct rb_node *parent = NULL;
+       struct discard_cmd *dc = NULL;
 
-       trace_f2fs_issue_discard(bdev, blkstart, blklen);
+       if (insert_p && insert_parent) {
+               parent = insert_parent;
+               p = insert_p;
+               goto do_insert;
+       }
 
-       if (sbi->s_ndevs) {
-               int devi = f2fs_target_device_index(sbi, blkstart);
+       p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
+do_insert:
+       dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
+       if (!dc)
+               return NULL;
 
-               blkstart -= FDEV(devi).start_blk;
-       }
-       __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
-       wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
-       return 0;
+       return dc;
 }
 
 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
                                struct discard_cmd *dc, block_t blkaddr)
 {
-       block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1);
+       struct discard_info di = dc->di;
+       bool modified = false;
 
-       if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) {
+       if (dc->state == D_DONE || dc->len == 1) {
                __remove_discard_cmd(sbi, dc);
                return;
        }
 
-       if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) {
-               dc->start += (end_block - dc->lstart);
-               dc->len -= (end_block - dc->lstart);
-               dc->lstart = end_block;
-       } else {
+       if (blkaddr > di.lstart) {
                dc->len = blkaddr - dc->lstart;
+               modified = true;
+       }
+
+       if (blkaddr < di.lstart + di.len - 1) {
+               if (modified) {
+                       __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
+                                       di.start + blkaddr + 1 - di.lstart,
+                                       di.lstart + di.len - 1 - blkaddr,
+                                       NULL, NULL);
+               } else {
+                       dc->lstart++;
+                       dc->len--;
+                       dc->start++;
+               }
        }
 }
 
-/* This should be covered by global mutex, &sit_i->sentry_lock */
-void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+                               struct block_device *bdev, block_t lstart,
+                               block_t start, block_t len)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct list_head *pend_list = &(dcc->discard_pend_list);
-       struct list_head *wait_list = &(dcc->discard_wait_list);
-       struct discard_cmd *dc, *tmp;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct discard_cmd *dc;
+       struct discard_info di = {0};
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       block_t end = lstart + len;
 
        mutex_lock(&dcc->cmd_lock);
 
-       list_for_each_entry_safe(dc, tmp, pend_list, list) {
-               if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len)
-                       __punch_discard_cmd(sbi, dc, blkaddr);
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, lstart,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (dc)
+               prev_dc = dc;
+
+       if (!prev_dc) {
+               di.lstart = lstart;
+               di.len = next_dc ? next_dc->lstart - lstart : len;
+               di.len = min(di.len, len);
+               di.start = start;
        }
 
-       list_for_each_entry_safe(dc, tmp, wait_list, list) {
-               if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
-                       wait_for_completion_io(&dc->wait);
-                       __punch_discard_cmd(sbi, dc, blkaddr);
+       while (1) {
+               struct rb_node *node;
+               bool merged = false;
+               struct discard_cmd *tdc = NULL;
+
+               if (prev_dc) {
+                       di.lstart = prev_dc->lstart + prev_dc->len;
+                       if (di.lstart < lstart)
+                               di.lstart = lstart;
+                       if (di.lstart >= end)
+                               break;
+
+                       if (!next_dc || next_dc->lstart > end)
+                               di.len = end - di.lstart;
+                       else
+                               di.len = next_dc->lstart - di.lstart;
+                       di.start = start + di.lstart - lstart;
+               }
+
+               if (!di.len)
+                       goto next;
+
+               if (prev_dc && prev_dc->state == D_PREP &&
+                       prev_dc->bdev == bdev &&
+                       __is_discard_back_mergeable(&di, &prev_dc->di)) {
+                       prev_dc->di.len += di.len;
+                       di = prev_dc->di;
+                       tdc = prev_dc;
+                       merged = true;
+               }
+
+               if (next_dc && next_dc->state == D_PREP &&
+                       next_dc->bdev == bdev &&
+                       __is_discard_front_mergeable(&di, &next_dc->di)) {
+                       next_dc->di.lstart = di.lstart;
+                       next_dc->di.len += di.len;
+                       next_dc->di.start = di.start;
+                       if (tdc)
+                               __remove_discard_cmd(sbi, tdc);
+
+                       merged = true;
                }
+
+               if (!merged)
+                       __insert_discard_tree(sbi, bdev, di.lstart, di.start,
+                                                       di.len, NULL, NULL);
+ next:
+               prev_dc = next_dc;
+               if (!prev_dc)
+                       break;
+
+               node = rb_next(&prev_dc->rb_node);
+               next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+       }
+
+       mutex_unlock(&dcc->cmd_lock);
+}
+
+static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
+               struct block_device *bdev, block_t blkstart, block_t blklen)
+{
+       block_t lblkstart = blkstart;
+
+       trace_f2fs_issue_discard(bdev, blkstart, blklen);
+
+       if (sbi->s_ndevs) {
+               int devi = f2fs_target_device_index(sbi, blkstart);
+
+               blkstart -= FDEV(devi).start_blk;
+       }
+       __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+       wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
+       return 0;
+}
+
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *dc;
+
+       mutex_lock(&dcc->cmd_lock);
+
+       dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr);
+       if (dc) {
+               if (dc->state != D_PREP)
+                       wait_for_completion_io(&dc->wait);
+               __punch_discard_cmd(sbi, dc, blkaddr);
        }
 
        mutex_unlock(&dcc->cmd_lock);
@@ -1178,6 +1320,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        atomic_set(&dcc->discard_cmd_cnt, 0);
        dcc->nr_discards = 0;
        dcc->max_discards = 0;
+       dcc->root = RB_ROOT;
 
        init_waitqueue_head(&dcc->discard_wait_queue);
        SM_I(sbi)->dcc_info = dcc;