unsigned int update_uuid_tree_gen:1;
};
+struct btrfs_subvolume_writers {
+ struct percpu_counter counter;
+ wait_queue_head_t wait;
+};
+
/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
+ struct btrfs_subvolume_writers *subv_writers;
+ atomic_t will_be_snapshoted;
};
struct btrfs_ioctl_defrag_range_args {
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
+
+int btrfs_start_nocow_write(struct btrfs_root *root);
+void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
}
}
+static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
+{
+ struct btrfs_subvolume_writers *writers;
+ int ret;
+
+ writers = kmalloc(sizeof(*writers), GFP_NOFS);
+ if (!writers)
+ return ERR_PTR(-ENOMEM);
+
+ ret = percpu_counter_init(&writers->counter, 0);
+ if (ret < 0) {
+ kfree(writers);
+ return ERR_PTR(ret);
+ }
+
+ init_waitqueue_head(&writers->wait);
+ return writers;
+}
+
+static void
+btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
+{
+ percpu_counter_destroy(&writers->counter);
+ kfree(writers);
+}
+
static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info,
atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
+ atomic_set(&root->will_be_snapshoted, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
int btrfs_init_fs_root(struct btrfs_root *root)
{
int ret;
+ struct btrfs_subvolume_writers *writers;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
goto fail;
}
+ writers = btrfs_alloc_subvolume_writers();
+ if (IS_ERR(writers)) {
+ ret = PTR_ERR(writers);
+ goto fail;
+ }
+ root->subv_writers = writers;
+
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
ret = get_anon_bdev(&root->anon_dev);
if (ret)
- goto fail;
+ goto free_writers;
return 0;
+
+free_writers:
+ btrfs_free_subvolume_writers(root->subv_writers);
fail:
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
root->orphan_block_rsv = NULL;
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
+ if (root->subv_writers)
+ btrfs_free_subvolume_writers(root->subv_writers);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
range->len = trimmed;
return ret;
}
+
+/*
+ * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
+ * they are used to prevent the some tasks writing data into the page cache
+ * by nocow before the subvolume is snapshoted, but flush the data into
+ * the disk after the snapshot creation.
+ */
+void btrfs_end_nocow_write(struct btrfs_root *root)
+{
+ percpu_counter_dec(&root->subv_writers->counter);
+ /*
+ * Make sure counter is updated before we wake up
+ * waiters.
+ */
+ smp_mb();
+ if (waitqueue_active(&root->subv_writers->wait))
+ wake_up(&root->subv_writers->wait);
+}
+
+int btrfs_start_nocow_write(struct btrfs_root *root)
+{
+ if (unlikely(atomic_read(&root->will_be_snapshoted)))
+ return 0;
+
+ percpu_counter_inc(&root->subv_writers->counter);
+ /*
+ * Make sure counter is updated before we check for snapshot creation.
+ */
+ smp_mb();
+ if (unlikely(atomic_read(&root->will_be_snapshoted))) {
+ btrfs_end_nocow_write(root);
+ return 0;
+ }
+ return 1;
+}
u64 num_bytes;
int ret;
+ ret = btrfs_start_nocow_write(root);
+ if (!ret)
+ return -ENOSPC;
+
lockstart = round_down(pos, root->sectorsize);
lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
- if (ret <= 0)
+ if (ret <= 0) {
ret = 0;
- else
+ btrfs_end_nocow_write(root);
+ } else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
+ }
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode,
reserve_bytes);
+ else
+ btrfs_end_nocow_write(root);
break;
}
}
release_bytes = 0;
+ if (only_release_metadata)
+ btrfs_end_nocow_write(root);
+
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart +
kfree(pages);
if (release_bytes) {
- if (only_release_metadata)
+ if (only_release_metadata) {
+ btrfs_end_nocow_write(root);
btrfs_delalloc_release_metadata(inode, release_bytes);
- else
+ } else {
btrfs_delalloc_release_space(inode, release_bytes);
+ }
}
return num_written ? num_written : ret;
return ret;
}
+static void btrfs_wait_nocow_write(struct btrfs_root *root)
+{
+ s64 writers;
+ DEFINE_WAIT(wait);
+
+ do {
+ prepare_to_wait(&root->subv_writers->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ writers = percpu_counter_sum(&root->subv_writers->counter);
+ if (writers)
+ schedule();
+
+ finish_wait(&root->subv_writers->wait, &wait);
+ } while (writers);
+}
+
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, char *name, int namelen,
u64 *async_transid, bool readonly,
if (!root->ref_cows)
return -EINVAL;
+ atomic_inc(&root->will_be_snapshoted);
+ smp_mb__after_atomic_inc();
+ btrfs_wait_nocow_write(root);
+
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
- return ret;
+ goto out;
btrfs_wait_ordered_extents(root, -1);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
- if (!pending_snapshot)
- return -ENOMEM;
+ if (!pending_snapshot) {
+ ret = -ENOMEM;
+ goto out;
+ }
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
&pending_snapshot->qgroup_reserved,
false);
if (ret)
- goto out;
+ goto free;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv,
pending_snapshot->qgroup_reserved);
-out:
+free:
kfree(pending_snapshot);
+out:
+ atomic_dec(&root->will_be_snapshoted);
return ret;
}