Currently, for pre_alloc or delay_alloc, the bytes will be accounted
in space_info by the three guys.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
But on the other hand, in qgroup, there are only two counters to account the
bytes, qgroup->reserved and qgroup->excl. And qg->reserved accounts
bytes in space_info->bytes_may_use and qg->excl accounts bytes in
space_info->used. So the bytes in space_info->reserved is not accounted
in qgroup. If so, there is a window we can exceed the quota limit when
bytes is in space_info->reserved.
Example:
# btrfs quota enable /mnt
# btrfs qgroup limit -e 10M /mnt
# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
# sync
# btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent child
-------- ---- ---- -------- -------- ------ -----
0/5
20987904 20987904 0
10485760 --- ---
qg->excl is
20987904 larger than max_excl
10485760.
This patch introduce a new counter named may_use to qgroup, then
there are three counters in qgroup to account bytes in space_info
as below.
space_info->bytes_may_use --- space_info->reserved --- space_info->used.
qgroup->may_use --- qgroup->reserved --- qgroup->excl
With this patch applied:
# btrfs quota enable /mnt
# btrfs qgroup limit -e 10M /mnt
# for((i=0;i<20;i++));do fallocate -l 1M /mnt/data$i; done
fallocate: /mnt/data9: fallocate failed: Disk quota exceeded
fallocate: /mnt/data10: fallocate failed: Disk quota exceeded
fallocate: /mnt/data11: fallocate failed: Disk quota exceeded
fallocate: /mnt/data12: fallocate failed: Disk quota exceeded
fallocate: /mnt/data13: fallocate failed: Disk quota exceeded
fallocate: /mnt/data14: fallocate failed: Disk quota exceeded
fallocate: /mnt/data15: fallocate failed: Disk quota exceeded
fallocate: /mnt/data16: fallocate failed: Disk quota exceeded
fallocate: /mnt/data17: fallocate failed: Disk quota exceeded
fallocate: /mnt/data18: fallocate failed: Disk quota exceeded
fallocate: /mnt/data19: fallocate failed: Disk quota exceeded
# sync
# btrfs qgroup show -pcre /mnt
qgroupid rfer excl max_rfer max_excl parent child
-------- ---- ---- -------- -------- ------ -----
0/5
9453568 9453568 0
10485760 --- ---
Reported-by: Cyril SCETBON <cyril.scetbon@free.fr>
Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
- if (reserved)
+ if (reserved) {
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ num_bytes, -1);
trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
+ }
return 0;
}
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ buf->len, -1);
}
out:
if (pin)
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ len, -1);
}
+
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ ins->offset, 1);
btrfs_put_block_group(block_group);
return ret;
}
return ERR_PTR(ret);
}
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root_objectid,
+ ins.offset, 1);
+
buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
BUG_ON(IS_ERR(buf)); /* -ENOMEM */
#include "backref.h"
#include "hash.h"
#include "props.h"
+#include "qgroup.h"
struct btrfs_iget_args {
struct btrfs_key *location;
}
goto out_free;
}
-
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ ins.offset, 1);
/*
* here we're doing allocation and writeback of the
* compressed pages
if (ret < 0)
goto out_unlock;
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ ins.offset, 1);
+
em = alloc_extent_map();
if (!em) {
ret = -ENOMEM;
return ERR_PTR(ret);
}
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ ins.offset, 1);
+
return em;
}
btrfs_end_transaction(trans, root);
break;
}
+
+ btrfs_qgroup_update_reserved_bytes(root->fs_info,
+ root->root_key.objectid,
+ ins.offset, 1);
+
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
/*
* reservation tracking
*/
+ u64 may_use;
u64 reserved;
/*
WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
qgroup->excl += sign * oper->num_bytes;
qgroup->excl_cmpr += sign * oper->num_bytes;
+ if (sign > 0)
+ qgroup->reserved -= oper->num_bytes;
qgroup_dirty(fs_info, qgroup);
qgroup->rfer_cmpr += sign * oper->num_bytes;
WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
qgroup->excl += sign * oper->num_bytes;
+ if (sign > 0)
+ qgroup->reserved -= oper->num_bytes;
qgroup->excl_cmpr += sign * oper->num_bytes;
qgroup_dirty(fs_info, qgroup);
return ret;
}
+int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
+ u64 ref_root,
+ u64 num_bytes,
+ int sign)
+{
+ struct btrfs_root *quota_root;
+ struct btrfs_qgroup *qgroup;
+ int ret = 0;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+ return 0;
+
+ if (num_bytes == 0)
+ return 0;
+
+ spin_lock(&fs_info->qgroup_lock);
+ quota_root = fs_info->quota_root;
+ if (!quota_root)
+ goto out;
+
+ qgroup = find_qgroup_rb(fs_info, ref_root);
+ if (!qgroup)
+ goto out;
+
+ ulist_reinit(fs_info->qgroup_ulist);
+ ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = u64_to_ptr(unode->aux);
+
+ qg->reserved += sign * num_bytes;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(fs_info->qgroup_ulist,
+ glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ spin_unlock(&fs_info->qgroup_lock);
+ return ret;
+}
+
/*
* reserve some space for a qgroup and all its parents. The reservation takes
* place with start_transaction or dealloc_reserve, similar to ENOSPC
qg = u64_to_ptr(unode->aux);
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + (s64)qg->rfer + num_bytes >
+ qg->reserved + qg->may_use + (s64)qg->rfer + num_bytes >
qg->max_rfer) {
ret = -EDQUOT;
goto out;
}
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + (s64)qg->excl + num_bytes >
+ qg->reserved + qg->may_use + (s64)qg->excl + num_bytes >
qg->max_excl) {
ret = -EDQUOT;
goto out;
qg = u64_to_ptr(unode->aux);
- qg->reserved += num_bytes;
+ qg->may_use += num_bytes;
}
out:
qg = u64_to_ptr(unode->aux);
- qg->reserved -= num_bytes;
+ qg->may_use -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
+int btrfs_qgroup_update_reserved_bytes(struct btrfs_fs_info *fs_info,
+ u64 ref_root,
+ u64 num_bytes,
+ int sign);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);