If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
to the given cgroup the descendent cgroup will be able to override
effective bpf program that was inherited from this cgroup.
By default it's not passed, therefore override is disallowed.
Examples:
1.
prog X attached to /A with default
prog Y fails to attach to /A/B and /A/B/C
Everything under /A runs prog X
2.
prog X attached to /A with allow_override.
prog Y fails to attach to /A/B with default (non-override)
prog M attached to /A/B with allow_override.
Everything under /A/B runs prog M only.
3.
prog X attached to /A with allow_override.
prog Y fails to attach to /A with default.
The user has to detach first to switch the mode.
In the future this behavior may be extended with a chain of
non-overridable programs.
Also fix the bug where detach from cgroup where nothing is attached
was not throwing error. Return ENOENT in such case.
Add several testcases and adjust libbpf.
Fixes:
3007098494be ("cgroup: add support for eBPF programs")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
*/
struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
+ bool disallow_override[MAX_BPF_ATTACH_TYPE];
};
void cgroup_bpf_put(struct cgroup *cgrp);
void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
-void __cgroup_bpf_update(struct cgroup *cgrp,
- struct cgroup *parent,
- struct bpf_prog *prog,
- enum bpf_attach_type type);
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+ struct bpf_prog *prog, enum bpf_attach_type type,
+ bool overridable);
/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
-void cgroup_bpf_update(struct cgroup *cgrp,
- struct bpf_prog *prog,
- enum bpf_attach_type type);
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, bool overridable);
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+
#define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
+ __u32 attach_flags;
};
} __attribute__((aligned(8)));
e = rcu_dereference_protected(parent->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
rcu_assign_pointer(cgrp->bpf.effective[type], e);
+ cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
}
}
*
* Must be called with cgroup_mutex held.
*/
-void __cgroup_bpf_update(struct cgroup *cgrp,
- struct cgroup *parent,
- struct bpf_prog *prog,
- enum bpf_attach_type type)
+int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
+ struct bpf_prog *prog, enum bpf_attach_type type,
+ bool new_overridable)
{
- struct bpf_prog *old_prog, *effective;
+ struct bpf_prog *old_prog, *effective = NULL;
struct cgroup_subsys_state *pos;
+ bool overridable = true;
- old_prog = xchg(cgrp->bpf.prog + type, prog);
+ if (parent) {
+ overridable = !parent->bpf.disallow_override[type];
+ effective = rcu_dereference_protected(parent->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+ }
+
+ if (prog && effective && !overridable)
+ /* if parent has non-overridable prog attached, disallow
+ * attaching new programs to descendent cgroup
+ */
+ return -EPERM;
+
+ if (prog && effective && overridable != new_overridable)
+ /* if parent has overridable prog attached, only
+ * allow overridable programs in descendent cgroup
+ */
+ return -EPERM;
- effective = (!prog && parent) ?
- rcu_dereference_protected(parent->bpf.effective[type],
- lockdep_is_held(&cgroup_mutex)) :
- prog;
+ old_prog = cgrp->bpf.prog[type];
+
+ if (prog) {
+ overridable = new_overridable;
+ effective = prog;
+ if (old_prog &&
+ cgrp->bpf.disallow_override[type] == new_overridable)
+ /* disallow attaching non-overridable on top
+ * of existing overridable in this cgroup
+ * and vice versa
+ */
+ return -EPERM;
+ }
+
+ if (!prog && !old_prog)
+ /* report error when trying to detach and nothing is attached */
+ return -ENOENT;
+
+ cgrp->bpf.prog[type] = prog;
css_for_each_descendant_pre(pos, &cgrp->self) {
struct cgroup *desc = container_of(pos, struct cgroup, self);
/* skip the subtree if the descendant has its own program */
- if (desc->bpf.prog[type] && desc != cgrp)
+ if (desc->bpf.prog[type] && desc != cgrp) {
pos = css_rightmost_descendant(pos);
- else
+ } else {
rcu_assign_pointer(desc->bpf.effective[type],
effective);
+ desc->bpf.disallow_override[type] = !overridable;
+ }
}
if (prog)
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
+ return 0;
}
/**
#ifdef CONFIG_CGROUP_BPF
-#define BPF_PROG_ATTACH_LAST_FIELD attach_type
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
static int bpf_prog_attach(const union bpf_attr *attr)
{
+ enum bpf_prog_type ptype;
struct bpf_prog *prog;
struct cgroup *cgrp;
- enum bpf_prog_type ptype;
+ int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
+ if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+ return -EINVAL;
+
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
return PTR_ERR(cgrp);
}
- cgroup_bpf_update(cgrp, prog, attr->attach_type);
+ ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
+ attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+ if (ret)
+ bpf_prog_put(prog);
cgroup_put(cgrp);
- return 0;
+ return ret;
}
#define BPF_PROG_DETACH_LAST_FIELD attach_type
static int bpf_prog_detach(const union bpf_attr *attr)
{
struct cgroup *cgrp;
+ int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
- cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+ ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
cgroup_put(cgrp);
break;
return -EINVAL;
}
- return 0;
+ return ret;
}
#endif /* CONFIG_CGROUP_BPF */
subsys_initcall(cgroup_namespaces_init);
#ifdef CONFIG_CGROUP_BPF
-void cgroup_bpf_update(struct cgroup *cgrp,
- struct bpf_prog *prog,
- enum bpf_attach_type type)
+int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, bool overridable)
{
struct cgroup *parent = cgroup_parent(cgrp);
+ int ret;
mutex_lock(&cgroup_mutex);
- __cgroup_bpf_update(cgrp, parent, prog, type);
+ ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
mutex_unlock(&cgroup_mutex);
+ return ret;
}
#endif /* CONFIG_CGROUP_BPF */
return EXIT_FAILURE;
}
- ret = bpf_prog_attach(prog_fd, cg_fd, type);
+ ret = bpf_prog_attach(prog_fd, cg_fd, type, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
if (join_cgroup(FOO))
goto err;
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) {
+ if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo");
goto err;
}
+ printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
assert(system(PING_CMD) != 0);
/* Create cgroup /foo/bar, get fd, and join it */
if (join_cgroup(BAR))
goto err;
+ printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
+ printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
-
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
log_err("Detaching program from /foo/bar");
goto err;
}
+ printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+ "This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) {
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
log_err("Attaching prog to /foo/bar");
goto err;
}
goto err;
}
+ printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+ "This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
+ if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ log_err("Attaching prog to /foo/bar");
+ goto err;
+ }
+
+ if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+ errno = 0;
+ log_err("Unexpected success attaching prog to /foo/bar");
+ goto err;
+ }
+
+ if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching program from /foo/bar");
+ goto err;
+ }
+
+ if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+ errno = 0;
+ log_err("Unexpected success in double detach from /foo");
+ goto err;
+ }
+
+ if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+ log_err("Attaching non-overridable prog to /foo");
+ goto err;
+ }
+
+ if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+ errno = 0;
+ log_err("Unexpected success attaching non-overridable prog to /foo/bar");
+ goto err;
+ }
+
+ if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) {
+ errno = 0;
+ log_err("Unexpected success attaching overridable prog to /foo/bar");
+ goto err;
+ }
+
+ if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) {
+ errno = 0;
+ log_err("Unexpected success attaching overridable prog to /foo");
+ goto err;
+ }
+
+ if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+ log_err("Attaching different non-overridable prog to /foo");
+ goto err;
+ }
+
goto out;
err:
close(foo);
close(bar);
cleanup_cgroup_environment();
+ if (!rc)
+ printf("PASS\n");
+ else
+ printf("FAIL\n");
return rc;
}
return EXIT_FAILURE;
}
- ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+ ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
}
ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
- BPF_CGROUP_INET_SOCK_CREATE);
+ BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno));
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
}
-int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+ unsigned int flags)
{
union bpf_attr attr;
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
+ attr.attach_flags = flags;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
int bpf_map_get_next_key(int fd, void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
-int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
+ unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);