Steps to reproduce:
while true; do
dd if=/dev/zero of=/btrfs_dir/file count=[fs_size * 75%]
rm /btrfs_dir/file
sync
done
And we'll see dd failed because btrfs return NO_SPACE.
Reason:
Normally, btrfs_commit_transaction() call btrfs_run_delayed_iputs()
in end to free fs space for next write, but sometimes it hadn't
done work on time, because btrfs-cleaner thread get delayed-iputs
from list before, but do iput() after next write.
This is log:
[ 2569.050776] comm=btrfs-cleaner func=btrfs_evict_inode() begin
[ 2569.084280] comm=sync func=btrfs_commit_transaction() call btrfs_run_delayed_iputs()
[ 2569.085418] comm=sync func=btrfs_commit_transaction() done btrfs_run_delayed_iputs()
[ 2569.087554] comm=sync func=btrfs_commit_transaction() end
[ 2569.191081] comm=dd begin
[ 2569.790112] comm=dd func=__btrfs_buffered_write() ret=-28
[ 2569.847479] comm=btrfs-cleaner func=add_pinned_bytes() 0 +
32677888 =
32677888
[ 2569.849530] comm=btrfs-cleaner func=add_pinned_bytes()
32677888 +
23834624 =
56512512
...
[ 2569.903893] comm=btrfs-cleaner func=add_pinned_bytes()
943976448 +
21762048 =
965738496
[ 2569.908270] comm=btrfs-cleaner func=btrfs_evict_inode() end
Fix:
Make btrfs_commit_transaction() wait current running btrfs-cleaner's
delayed-iputs() done in end.
Test:
Use script similar to above(more complex),
before patch:
7 failed in 100 * 20 loop.
after patch:
0 failed in 100 * 20 loop.
Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
+ struct rw_semaphore delayed_iput_sem;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
- mutex_init(&fs_info->unused_bg_unpin_mutex);
rwlock_init(&fs_info->tree_mod_log_lock);
+ mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
seqlock_init(&fs_info->profiles_lock);
+ init_rwsem(&fs_info->delayed_iput_sem);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
ret = btrfs_commit_transaction(trans, root);
if (ret)
return ret;
+ /*
+ * make sure that all running delayed iput are
+ * done
+ */
+ down_write(&root->fs_info->delayed_iput_sem);
+ up_write(&root->fs_info->delayed_iput_sem);
goto again;
} else {
btrfs_end_transaction(trans, root);
if (empty)
return;
+ down_read(&fs_info->delayed_iput_sem);
+
spin_lock(&fs_info->delayed_iput_lock);
list_splice_init(&fs_info->delayed_iputs, &list);
spin_unlock(&fs_info->delayed_iput_lock);
iput(delayed->inode);
kfree(delayed);
}
+
+ up_read(&root->fs_info->delayed_iput_sem);
}
/*