ext4: cache all of an extent tree's leaf block upon reading
authorTheodore Ts'o <tytso@mit.edu>
Sat, 17 Aug 2013 01:23:41 +0000 (21:23 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sat, 17 Aug 2013 01:23:41 +0000 (21:23 -0400)
When we read in an extent tree leaf block from disk, arrange to have
all of its entries cached.  In nearly all cases the in-memory
representation will be more compact than the on-disk representation in
the buffer cache, and it allows us to get the information without
having to traverse the extent tree for successive extents.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/extents_status.c
fs/ext4/extents_status.h
fs/ext4/migrate.c
fs/ext4/move_extent.c
include/trace/events/ext4.h

index 0ab26fbf33808c565ae9ef4449d6836e4cb01fae..c74b1948feb01de99f27e7b0bc84f53fa4fcdc5c 100644 (file)
@@ -560,6 +560,17 @@ enum {
        /* Do not put hole in extent cache */
 #define EXT4_GET_BLOCKS_NO_PUT_HOLE            0x0200
 
+/*
+ * The bit position of this flag must not overlap with any of the
+ * EXT4_GET_BLOCKS_*.  It is used by ext4_ext_find_extent(),
+ * read_extent_tree_block(), ext4_split_extent_at(),
+ * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf() to
+ * indicate that the we shouldn't be caching the extents when reading
+ * from the extent tree while a truncate or punch hole operation
+ * is in progress.
+ */
+#define EXT4_EX_NOCACHE                                0x0400
+
 /*
  * Flags used by ext4_free_blocks
  */
@@ -2684,7 +2695,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
                                  struct ext4_ext_path *,
                                  struct ext4_extent *, int);
 extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
-                                                 struct ext4_ext_path *);
+                                                 struct ext4_ext_path *,
+                                                 int flags);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
 extern int ext4_find_delalloc_range(struct inode *inode,
index 6e7b7d928f4afaf872bd4d89373e5cbb5d4d55db..08c1ac976479a57cc9c89194a08c2b1ca9bd1967 100644 (file)
@@ -466,7 +466,8 @@ int ext4_ext_check_inode(struct inode *inode)
 
 static struct buffer_head *
 __read_extent_tree_block(const char *function, unsigned int line,
-                        struct inode *inode, ext4_fsblk_t pblk, int depth)
+                        struct inode *inode, ext4_fsblk_t pblk, int depth,
+                        int flags)
 {
        struct buffer_head              *bh;
        int                             err;
@@ -488,6 +489,32 @@ __read_extent_tree_block(const char *function, unsigned int line,
        if (err)
                goto errout;
        set_buffer_verified(bh);
+       /*
+        * If this is a leaf block, cache all of its entries
+        */
+       if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
+               struct ext4_extent_header *eh = ext_block_hdr(bh);
+               struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+               ext4_lblk_t prev = 0;
+               int i;
+
+               for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+                       unsigned int status = EXTENT_STATUS_WRITTEN;
+                       ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+                       int len = ext4_ext_get_actual_len(ex);
+
+                       if (prev && (prev != lblk))
+                               ext4_es_cache_extent(inode, prev,
+                                                    lblk - prev, ~0,
+                                                    EXTENT_STATUS_HOLE);
+
+                       if (ext4_ext_is_uninitialized(ex))
+                               status = EXTENT_STATUS_UNWRITTEN;
+                       ext4_es_cache_extent(inode, lblk, len,
+                                            ext4_ext_pblock(ex), status);
+                       prev = lblk + len;
+               }
+       }
        return bh;
 errout:
        put_bh(bh);
@@ -495,8 +522,9 @@ errout:
 
 }
 
-#define read_extent_tree_block(inode, pblk, depth)             \
-       __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), (depth))
+#define read_extent_tree_block(inode, pblk, depth, flags)              \
+       __read_extent_tree_block(__func__, __LINE__, (inode), (pblk),   \
+                                (depth), (flags))
 
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -730,7 +758,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 
 struct ext4_ext_path *
 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
-                                       struct ext4_ext_path *path)
+                    struct ext4_ext_path *path, int flags)
 {
        struct ext4_extent_header *eh;
        struct buffer_head *bh;
@@ -762,7 +790,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                path[ppos].p_depth = i;
                path[ppos].p_ext = NULL;
 
-               bh = read_extent_tree_block(inode, path[ppos].p_block, --i);
+               bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
+                                           flags);
                if (IS_ERR(bh)) {
                        ret = PTR_ERR(bh);
                        goto err;
@@ -1199,7 +1228,8 @@ out:
  * if no free index is found, then it requests in-depth growing.
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
-                                   unsigned int flags,
+                                   unsigned int mb_flags,
+                                   unsigned int gb_flags,
                                    struct ext4_ext_path *path,
                                    struct ext4_extent *newext)
 {
@@ -1221,7 +1251,7 @@ repeat:
        if (EXT_HAS_FREE_INDEX(curp)) {
                /* if we found index with free entry, then use that
                 * entry: create all needed subtree and add new leaf */
-               err = ext4_ext_split(handle, inode, flags, path, newext, i);
+               err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
                if (err)
                        goto out;
 
@@ -1229,12 +1259,12 @@ repeat:
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path);
+                                   path, gb_flags);
                if (IS_ERR(path))
                        err = PTR_ERR(path);
        } else {
                /* tree is full, time to grow in depth */
-               err = ext4_ext_grow_indepth(handle, inode, flags, newext);
+               err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
                if (err)
                        goto out;
 
@@ -1242,7 +1272,7 @@ repeat:
                ext4_ext_drop_refs(path);
                path = ext4_ext_find_extent(inode,
                                   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-                                   path);
+                                   path, gb_flags);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -1415,7 +1445,7 @@ got_index:
        while (++depth < path->p_depth) {
                /* subtract from p_depth to get proper eh_depth */
                bh = read_extent_tree_block(inode, block,
-                                           path->p_depth - depth);
+                                           path->p_depth - depth, 0);
                if (IS_ERR(bh))
                        return PTR_ERR(bh);
                eh = ext_block_hdr(bh);
@@ -1424,7 +1454,7 @@ got_index:
                put_bh(bh);
        }
 
-       bh = read_extent_tree_block(inode, block, path->p_depth - depth);
+       bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
        if (IS_ERR(bh))
                return PTR_ERR(bh);
        eh = ext_block_hdr(bh);
@@ -1786,7 +1816,7 @@ out:
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
                                struct ext4_ext_path *path,
-                               struct ext4_extent *newext, int flag)
+                               struct ext4_extent *newext, int gb_flags)
 {
        struct ext4_extent_header *eh;
        struct ext4_extent *ex, *fex;
@@ -1795,7 +1825,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        int depth, len, err;
        ext4_lblk_t next;
        unsigned uninitialized = 0;
-       int flags = 0;
+       int mb_flags = 0;
 
        if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
                EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1810,7 +1840,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        }
 
        /* try to insert block into found extent and return */
-       if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
+       if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
 
                /*
                 * Try to see whether we should rather test the extent on
@@ -1913,7 +1943,7 @@ prepend:
        if (next != EXT_MAX_BLOCKS) {
                ext_debug("next leaf block - %u\n", next);
                BUG_ON(npath != NULL);
-               npath = ext4_ext_find_extent(inode, next, NULL);
+               npath = ext4_ext_find_extent(inode, next, NULL, 0);
                if (IS_ERR(npath))
                        return PTR_ERR(npath);
                BUG_ON(npath->p_depth != path->p_depth);
@@ -1932,9 +1962,10 @@ prepend:
         * There is no free space in the found leaf.
         * We're gonna add a new leaf in the tree.
         */
-       if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
-               flags = EXT4_MB_USE_RESERVED;
-       err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
+       if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+               mb_flags = EXT4_MB_USE_RESERVED;
+       err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
+                                      path, newext);
        if (err)
                goto cleanup;
        depth = ext_depth(inode);
@@ -2000,7 +2031,7 @@ has_space:
 
 merge:
        /* try to merge extents */
-       if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
+       if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
                ext4_ext_try_to_merge(handle, inode, path, nearex);
 
 
@@ -2043,7 +2074,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
                        path = NULL;
                }
 
-               path = ext4_ext_find_extent(inode, block, path);
+               path = ext4_ext_find_extent(inode, block, path, 0);
                if (IS_ERR(path)) {
                        up_read(&EXT4_I(inode)->i_data_sem);
                        err = PTR_ERR(path);
@@ -2705,7 +2736,7 @@ again:
                ext4_lblk_t ee_block;
 
                /* find extent for this block */
-               path = ext4_ext_find_extent(inode, end, NULL);
+               path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
                if (IS_ERR(path)) {
                        ext4_journal_stop(handle);
                        return PTR_ERR(path);
@@ -2747,6 +2778,7 @@ again:
                         */
                        err = ext4_split_extent_at(handle, inode, path,
                                        end + 1, split_flag,
+                                       EXT4_EX_NOCACHE |
                                        EXT4_GET_BLOCKS_PRE_IO |
                                        EXT4_GET_BLOCKS_METADATA_NOFAIL);
 
@@ -2823,7 +2855,8 @@ again:
                                  i + 1, ext4_idx_pblock(path[i].p_idx));
                        memset(path + i + 1, 0, sizeof(*path));
                        bh = read_extent_tree_block(inode,
-                               ext4_idx_pblock(path[i].p_idx), depth - i - 1);
+                               ext4_idx_pblock(path[i].p_idx), depth - i - 1,
+                               EXT4_EX_NOCACHE);
                        if (IS_ERR(bh)) {
                                /* should we reset i_size? */
                                err = PTR_ERR(bh);
@@ -3170,7 +3203,7 @@ static int ext4_split_extent(handle_t *handle,
         * result in split of original leaf or extent zeroout.
         */
        ext4_ext_drop_refs(path);
-       path = ext4_ext_find_extent(inode, map->m_lblk, path);
+       path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
        if (IS_ERR(path))
                return PTR_ERR(path);
        depth = ext_depth(inode);
@@ -3554,7 +3587,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                if (err < 0)
                        goto out;
                ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -4041,7 +4074,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* find extent for this block */
-       path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
+       path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
        if (IS_ERR(path)) {
                err = PTR_ERR(path);
                path = NULL;
@@ -4760,6 +4793,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                error = ext4_fill_fiemap_extents(inode, start_blk,
                                                 len_blks, fieinfo);
        }
-
+       ext4_es_lru_add(inode);
        return error;
 }
index ded2615b63e0edfa2f641e51190e461d28402111..1dc5df016e25215c57e471f265784fbf8497009f 100644 (file)
@@ -419,7 +419,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
        unsigned short ee_len;
        int depth, ee_status, es_status;
 
-       path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+       path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
        if (IS_ERR(path))
                return;
 
@@ -683,6 +683,41 @@ error:
        return err;
 }
 
+/*
+ * ext4_es_cache_extent() inserts information into the extent status
+ * tree if and only if there isn't information about the range in
+ * question already.
+ */
+void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+                         ext4_lblk_t len, ext4_fsblk_t pblk,
+                         unsigned int status)
+{
+       struct extent_status *es;
+       struct extent_status newes;
+       ext4_lblk_t end = lblk + len - 1;
+
+       newes.es_lblk = lblk;
+       newes.es_len = len;
+       ext4_es_store_pblock(&newes, pblk);
+       ext4_es_store_status(&newes, status);
+       trace_ext4_es_cache_extent(inode, &newes);
+
+       if (!len)
+               return;
+
+       BUG_ON(end < lblk);
+
+       write_lock(&EXT4_I(inode)->i_es_lock);
+
+       es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
+       if (es && ((es->es_lblk <= lblk) || (es->es_lblk <= end)))
+               goto out;
+
+       __es_insert_extent(inode, &newes);
+out:
+       write_unlock(&EXT4_I(inode)->i_es_lock);
+}
+
 /*
  * ext4_es_lookup_extent() looks up an extent in extent status tree.
  *
index d72af848f98910b037a477ad0a589a091fc89cf8..3e83aef3653a03938a0b9f30533ffbd1566b2fe1 100644 (file)
@@ -71,6 +71,9 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
 extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
                                 ext4_lblk_t len, ext4_fsblk_t pblk,
                                 unsigned int status);
+extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+                                ext4_lblk_t len, ext4_fsblk_t pblk,
+                                unsigned int status);
 extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                                 ext4_lblk_t len);
 extern void ext4_es_find_delayed_extent_range(struct inode *inode,
index 49e8bdff9163e830931570f4c2660cea201ab3fd..f99bdb8548b2bd91e9fec87b2955ef3457af0202 100644 (file)
@@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
        newext.ee_block = cpu_to_le32(lb->first_block);
        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
        ext4_ext_store_pblock(&newext, lb->first_pblock);
-       path = ext4_ext_find_extent(inode, lb->first_block, NULL);
+       path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
 
        if (IS_ERR(path)) {
                retval = PTR_ERR(path);
index e86dddbd8296c138347f9ba1c5f6e52d51a4c91c..7fa4d855dbd5e65e373b8222800f2a5d6bc5520c 100644 (file)
@@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
        int ret = 0;
        struct ext4_ext_path *path;
 
-       path = ext4_ext_find_extent(inode, lblock, *orig_path);
+       path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
        if (IS_ERR(path))
                ret = PTR_ERR(path);
        else if (path[ext_depth(inode)].p_ext == NULL)
index 47a355b251e4d8f72945c60b1c1abec18a1cf413..d892b55d91ab84fd00539f54677dae9ce7a064cc 100644 (file)
@@ -2192,7 +2192,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
                  (unsigned short) __entry->eh_entries)
 );
 
-TRACE_EVENT(ext4_es_insert_extent,
+DECLARE_EVENT_CLASS(ext4__es_extent,
        TP_PROTO(struct inode *inode, struct extent_status *es),
 
        TP_ARGS(inode, es),
@@ -2222,6 +2222,18 @@ TRACE_EVENT(ext4_es_insert_extent,
                  __entry->pblk, show_extent_status(__entry->status))
 );
 
+DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent,
+       TP_PROTO(struct inode *inode, struct extent_status *es),
+
+       TP_ARGS(inode, es)
+);
+
+DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent,
+       TP_PROTO(struct inode *inode, struct extent_status *es),
+
+       TP_ARGS(inode, es)
+);
+
 TRACE_EVENT(ext4_es_remove_extent,
        TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),