ext4: more efficient SEEK_DATA implementation
authorJan Kara <jack@suse.cz>
Thu, 10 Mar 2016 04:11:13 +0000 (23:11 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 10 Mar 2016 04:11:13 +0000 (23:11 -0500)
Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as
ext4_seek_data() iterates hole block-by-block. Fix the problem by using
returned hole size from ext4_map_blocks() and thus skip the hole in one
go.

Update also SEEK_HOLE implementation to follow the same pattern as
SEEK_DATA to make future maintenance easier.

Furthermore we add cond_resched() to both ext4_seek_data() and
ext4_seek_hole() to avoid softlockups in case evil user creates huge
fragmented file and we have to go through lots of extents.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/file.c
fs/ext4/inode.c

index 70b8e04095661484882615f00915f7b17664a279..5623eec7fd229c6b6196af3eb24275912cb5e978 100644 (file)
@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
                                        int used, int quota_claim);
 extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
                              ext4_fsblk_t pblk, ext4_lblk_t len);
+extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+                               unsigned int map_len,
+                               struct extent_status *result);
 
 /* indirect.c */
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
index 4a1153561580a86e20e6206d3be1b15a6f2ac095..e93a7efaf78fd3bbaab81fe0895e282edcfb8f41 100644 (file)
@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
  */
 static int ext4_find_unwritten_pgoff(struct inode *inode,
                                     int whence,
-                                    struct ext4_map_blocks *map,
+                                    ext4_lblk_t end_blk,
                                     loff_t *offset)
 {
        struct pagevec pvec;
@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
        blkbits = inode->i_sb->s_blocksize_bits;
        startoff = *offset;
        lastoff = startoff;
-       endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
+       endoff = (loff_t)end_blk << blkbits;
 
        index = startoff >> PAGE_CACHE_SHIFT;
        end = endoff >> PAGE_CACHE_SHIFT;
@@ -559,12 +559,11 @@ out:
 static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
 {
        struct inode *inode = file->f_mapping->host;
-       struct ext4_map_blocks map;
        struct extent_status es;
        ext4_lblk_t start, last, end;
        loff_t dataoff, isize;
        int blkbits;
-       int ret = 0;
+       int ret;
 
        inode_lock(inode);
 
@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
        dataoff = offset;
 
        do {
-               map.m_lblk = last;
-               map.m_len = end - last + 1;
-               ret = ext4_map_blocks(NULL, inode, &map, 0);
-               if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
-                       if (last != start)
-                               dataoff = (loff_t)last << blkbits;
-                       break;
+               ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+               if (ret <= 0) {
+                       /* No extent found -> no data */
+                       if (ret == 0)
+                               ret = -ENXIO;
+                       inode_unlock(inode);
+                       return ret;
                }
 
-               /*
-                * If there is a delay extent at this offset,
-                * it will be as a data.
-                */
-               ext4_es_find_delayed_extent_range(inode, last, last, &es);
-               if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
-                       if (last != start)
-                               dataoff = (loff_t)last << blkbits;
+               last = es.es_lblk;
+               if (last != start)
+                       dataoff = (loff_t)last << blkbits;
+               if (!ext4_es_is_unwritten(&es))
                        break;
-               }
 
                /*
                 * If there is a unwritten extent at this offset,
                 * it will be as a data or a hole according to page
                 * cache that has data or not.
                 */
-               if (map.m_flags & EXT4_MAP_UNWRITTEN) {
-                       int unwritten;
-                       unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
-                                                             &map, &dataoff);
-                       if (unwritten)
-                               break;
-               }
-
-               last++;
+               if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+                                             es.es_lblk + es.es_len, &dataoff))
+                       break;
+               last += es.es_len;
                dataoff = (loff_t)last << blkbits;
+               cond_resched();
        } while (last <= end);
 
        inode_unlock(inode);
@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
 static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
 {
        struct inode *inode = file->f_mapping->host;
-       struct ext4_map_blocks map;
        struct extent_status es;
        ext4_lblk_t start, last, end;
        loff_t holeoff, isize;
        int blkbits;
-       int ret = 0;
+       int ret;
 
        inode_lock(inode);
 
@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
        holeoff = offset;
 
        do {
-               map.m_lblk = last;
-               map.m_len = end - last + 1;
-               ret = ext4_map_blocks(NULL, inode, &map, 0);
-               if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
-                       last += ret;
-                       holeoff = (loff_t)last << blkbits;
-                       continue;
+               ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+               if (ret < 0) {
+                       inode_unlock(inode);
+                       return ret;
                }
-
-               /*
-                * If there is a delay extent at this offset,
-                * we will skip this extent.
-                */
-               ext4_es_find_delayed_extent_range(inode, last, last, &es);
-               if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
-                       last = es.es_lblk + es.es_len;
-                       holeoff = (loff_t)last << blkbits;
-                       continue;
+               /* Found a hole? */
+               if (ret == 0 || es.es_lblk > last) {
+                       if (last != start)
+                               holeoff = (loff_t)last << blkbits;
+                       break;
                }
-
                /*
                 * If there is a unwritten extent at this offset,
                 * it will be as a data or a hole according to page
                 * cache that has data or not.
                 */
-               if (map.m_flags & EXT4_MAP_UNWRITTEN) {
-                       int unwritten;
-                       unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-                                                             &map, &holeoff);
-                       if (!unwritten) {
-                               last += ret;
-                               holeoff = (loff_t)last << blkbits;
-                               continue;
-                       }
-               }
+               if (ext4_es_is_unwritten(&es) &&
+                   ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+                                             last + es.es_len, &holeoff))
+                       break;
 
-               /* find a hole */
-               break;
+               last += es.es_len;
+               holeoff = (loff_t)last << blkbits;
+               cond_resched();
        } while (last <= end);
 
        inode_unlock(inode);
index fddc6ddc53a889806338601a8dce0a9923ce3b83..ce2c4c62386f4f28243d89328949d1871a1329c6 100644 (file)
@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        return err;
 }
+
+/*
+ * Find the first extent at or after @lblk in an inode that is not a hole.
+ * Search for @map_len blocks at most. The extent is returned in @result.
+ *
+ * The function returns 1 if we found an extent. The function returns 0 in
+ * case there is no extent at or after @lblk and in that case also sets
+ * @result->es_len to 0. In case of error, the error code is returned.
+ */
+int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+                        unsigned int map_len, struct extent_status *result)
+{
+       struct ext4_map_blocks map;
+       struct extent_status es = {};
+       int ret;
+
+       map.m_lblk = lblk;
+       map.m_len = map_len;
+
+       /*
+        * For non-extent based files this loop may iterate several times since
+        * we do not determine full hole size.
+        */
+       while (map.m_len > 0) {
+               ret = ext4_map_blocks(NULL, inode, &map, 0);
+               if (ret < 0)
+                       return ret;
+               /* There's extent covering m_lblk? Just return it. */
+               if (ret > 0) {
+                       int status;
+
+                       ext4_es_store_pblock(result, map.m_pblk);
+                       result->es_lblk = map.m_lblk;
+                       result->es_len = map.m_len;
+                       if (map.m_flags & EXT4_MAP_UNWRITTEN)
+                               status = EXTENT_STATUS_UNWRITTEN;
+                       else
+                               status = EXTENT_STATUS_WRITTEN;
+                       ext4_es_store_status(result, status);
+                       return 1;
+               }
+               ext4_es_find_delayed_extent_range(inode, map.m_lblk,
+                                                 map.m_lblk + map.m_len - 1,
+                                                 &es);
+               /* Is delalloc data before next block in extent tree? */
+               if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
+                       ext4_lblk_t offset = 0;
+
+                       if (es.es_lblk < lblk)
+                               offset = lblk - es.es_lblk;
+                       result->es_lblk = es.es_lblk + offset;
+                       ext4_es_store_pblock(result,
+                                            ext4_es_pblock(&es) + offset);
+                       result->es_len = es.es_len - offset;
+                       ext4_es_store_status(result, ext4_es_status(&es));
+
+                       return 1;
+               }
+               /* There's a hole at m_lblk, advance us after it */
+               map.m_lblk += map.m_len;
+               map_len -= map.m_len;
+               map.m_len = map_len;
+               cond_resched();
+       }
+       result->es_len = 0;
+       return 0;
+}