Btrfs: don't wait for unrelated IO to finish before relocation
authorFilipe Manana <fdmanana@suse.com>
Tue, 26 Apr 2016 14:36:38 +0000 (15:36 +0100)
committerFilipe Manana <fdmanana@suse.com>
Fri, 13 May 2016 00:59:14 +0000 (01:59 +0100)
Before the relocation process of a block group starts, it sets the block
group to readonly mode, then flushes all delalloc writes and then finally
it waits for all ordered extents to complete. This last step includes
waiting for ordered extents destinated at extents allocated in other block
groups, making us waste unecessary time.

So improve this by waiting only for ordered extents that fall into the
block group's range.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
fs/btrfs/dev-replace.c
fs/btrfs/extent-tree.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/relocation.c
fs/btrfs/super.c
fs/btrfs/transaction.c

index 26bcb487f95885295ad3e24d2a8063cbf6355351..3371f9e546d96885e8f52dd629d7337b422300f7 100644 (file)
@@ -403,7 +403,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        if (ret)
                btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret);
 
-       btrfs_wait_ordered_roots(root->fs_info, -1);
+       btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
 
        /* force writing the updated state information to disk */
        trans = btrfs_start_transaction(root, 0);
@@ -495,7 +495,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
                return ret;
        }
-       btrfs_wait_ordered_roots(root->fs_info, -1);
+       btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
index 84e060eb0de8c6aca562bcfb537f5236a71e9e66..251452a2b72c2172dac0e73e9b3b47ffcd421886 100644 (file)
@@ -4141,7 +4141,7 @@ commit_trans:
 
                        if (need_commit > 0) {
                                btrfs_start_delalloc_roots(fs_info, 0, -1);
-                               btrfs_wait_ordered_roots(fs_info, -1);
+                               btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
                        }
 
                        trans = btrfs_join_transaction(root);
@@ -4583,7 +4583,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
                 */
                btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
                if (!current->journal_info)
-                       btrfs_wait_ordered_roots(root->fs_info, nr_items);
+                       btrfs_wait_ordered_roots(root->fs_info, nr_items,
+                                                0, (u64)-1);
        }
 }
 
@@ -4632,7 +4633,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
                if (trans)
                        return;
                if (wait_ordered)
-                       btrfs_wait_ordered_roots(root->fs_info, items);
+                       btrfs_wait_ordered_roots(root->fs_info, items,
+                                                0, (u64)-1);
                return;
        }
 
@@ -4671,7 +4673,8 @@ skip_async:
 
                loops++;
                if (wait_ordered && !trans) {
-                       btrfs_wait_ordered_roots(root->fs_info, items);
+                       btrfs_wait_ordered_roots(root->fs_info, items,
+                                                0, (u64)-1);
                } else {
                        time_left = schedule_timeout_killable(1);
                        if (time_left)
index 5a23806ae418af8e3952b4cbf65df06aae384b57..697cc336bd1ce19fad52743964eec4f2d4073f50 100644 (file)
@@ -681,7 +681,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (ret)
                goto dec_and_free;
 
-       btrfs_wait_ordered_extents(root, -1);
+       btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
 
        btrfs_init_block_rsv(&pending_snapshot->block_rsv,
                             BTRFS_BLOCK_RSV_TEMP);
index 0de7da5a610d7a2175c6264c240b2696897ee2f6..559170464d7c50f43d6804d59dfd9f73e57a707f 100644 (file)
@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
  * wait for all the ordered extents in a root.  This is done when balancing
  * space between drives.
  */
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+                              const u64 range_start, const u64 range_len)
 {
-       struct list_head splice, works;
+       LIST_HEAD(splice);
+       LIST_HEAD(skipped);
+       LIST_HEAD(works);
        struct btrfs_ordered_extent *ordered, *next;
        int count = 0;
-
-       INIT_LIST_HEAD(&splice);
-       INIT_LIST_HEAD(&works);
+       const u64 range_end = range_start + range_len;
 
        mutex_lock(&root->ordered_extent_mutex);
        spin_lock(&root->ordered_extent_lock);
@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
        while (!list_empty(&splice) && nr) {
                ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
                                           root_extent_list);
+
+               if (range_end <= ordered->start ||
+                   ordered->start + ordered->disk_len <= range_start) {
+                       list_move_tail(&ordered->root_extent_list, &skipped);
+                       cond_resched_lock(&root->ordered_extent_lock);
+                       continue;
+               }
+
                list_move_tail(&ordered->root_extent_list,
                               &root->ordered_extents);
                atomic_inc(&ordered->refs);
@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
                        nr--;
                count++;
        }
+       list_splice_tail(&skipped, &root->ordered_extents);
        list_splice_tail(&splice, &root->ordered_extents);
        spin_unlock(&root->ordered_extent_lock);
 
@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
        return count;
 }
 
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+                             const u64 range_start, const u64 range_len)
 {
        struct btrfs_root *root;
        struct list_head splice;
@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
                               &fs_info->ordered_roots);
                spin_unlock(&fs_info->ordered_root_lock);
 
-               done = btrfs_wait_ordered_extents(root, nr);
+               done = btrfs_wait_ordered_extents(root, nr,
+                                                 range_start, range_len);
                btrfs_put_fs_root(root);
 
                spin_lock(&fs_info->ordered_root_lock);
index 23c96059cef26a6292847a051b893f254cc472b3..8ef12623d65c14b5eafb4c8c3f1171f7b2b56caf 100644 (file)
@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
                           u32 *sum, int len);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
+                              const u64 range_start, const u64 range_len);
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+                             const u64 range_start, const u64 range_len);
 void btrfs_get_logged_extents(struct inode *inode,
                              struct list_head *logged_list,
                              const loff_t start,
index 08ef890deca69fdce7a35d26bcf5ae45d9f67d8b..30f77ed60133955e8ae0b27b05c022c03945c902 100644 (file)
@@ -4259,7 +4259,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                err = ret;
                goto out;
        }
-       btrfs_wait_ordered_roots(fs_info, -1);
+       btrfs_wait_ordered_roots(fs_info, -1,
+                                rc->block_group->key.objectid,
+                                rc->block_group->key.offset);
 
        while (1) {
                mutex_lock(&fs_info->cleaner_mutex);
index 00b8f37cc306df540989a9a888f0fb6cf0eb8a0e..89d134794d47d65b5e841e5b078210c0f3870849 100644 (file)
@@ -1160,7 +1160,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
                return 0;
        }
 
-       btrfs_wait_ordered_roots(fs_info, -1);
+       btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
 
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
index 43885e51b88299d1015d75c3eb2d138b67d2b6d8..f0bb54a773148c5acfeb2c235444da0854bcf866 100644 (file)
@@ -1821,7 +1821,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
        if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
-               btrfs_wait_ordered_roots(fs_info, -1);
+               btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
 }
 
 static inline void