bpf: introduce bpf_perf_event_output() helper
authorAlexei Starovoitov <ast@plumgrid.com>
Wed, 21 Oct 2015 03:02:34 +0000 (20:02 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 22 Oct 2015 13:42:15 +0000 (06:42 -0700)
This helper is used to send raw data from eBPF program into
special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event.
User space needs to perf_event_open() it (either for one or all cpus) and
store FD into perf_event_array (similar to bpf_perf_event_read() helper)
before eBPF program can send data into it.

Today the programs triggered by kprobe collect the data and either store
it into the maps or print it via bpf_trace_printk() where latter is the debug
facility and not suitable to stream the data. This new helper replaces
such bpf_trace_printk() usage and allows programs to have dedicated
channel into user space for post-processing of the raw data collected.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/uapi/linux/bpf.h
include/uapi/linux/perf_event.h
kernel/bpf/arraymap.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c

index 564f1f091991b7f1f7b40f06c433b7a663787103..2e032426cfb78c34c3e795d230e9120e1c6a168b 100644 (file)
@@ -287,6 +287,17 @@ enum bpf_func_id {
         * Return: realm if != 0
         */
        BPF_FUNC_get_route_realm,
+
+       /**
+        * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
+        * @ctx: struct pt_regs*
+        * @map: pointer to perf_event_array map
+        * @index: index of event in the map
+        * @data: data on stack to be output as raw data
+        * @size: size of data
+        * Return: 0 on success
+        */
+       BPF_FUNC_perf_event_output,
        __BPF_FUNC_MAX_ID,
 };
 
index 2881145cda86cda91621da082ebec83ae490f3c5..d3c4176153611c25a3f3baf091fde5738b7078ee 100644 (file)
@@ -110,6 +110,7 @@ enum perf_sw_ids {
        PERF_COUNT_SW_ALIGNMENT_FAULTS          = 7,
        PERF_COUNT_SW_EMULATION_FAULTS          = 8,
        PERF_COUNT_SW_DUMMY                     = 9,
+       PERF_COUNT_SW_BPF_OUTPUT                = 10,
 
        PERF_COUNT_SW_MAX,                      /* non-ABI */
 };
index f2d9e698c7538e61fa8dec26465608643770e06a..e3cfe46b074f38379136c1cf147a648959af12da 100644 (file)
@@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
                return (void *)attr;
 
        if (attr->type != PERF_TYPE_RAW &&
+           !(attr->type == PERF_TYPE_SOFTWARE &&
+             attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
            attr->type != PERF_TYPE_HARDWARE) {
                perf_event_release_kernel(event);
                return ERR_PTR(-EINVAL);
index 1d6b97be79e1dd6000e626a74bf001e6cab5835d..b56cf51f8d426ceec23ef5e03f24fd19e58814c7 100644 (file)
@@ -245,6 +245,7 @@ static const struct {
 } func_limit[] = {
        {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
        {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
+       {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
 };
 
 static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                 * don't allow any other map type to be passed into
                 * the special func;
                 */
-               if (bool_map != bool_func)
+               if (bool_func && bool_map != bool_func)
                        return -EINVAL;
        }
 
index 0fe96c7c8803c759b963411198fa18153a539f46..47febbe7998e42a64616d47a52eeeca98edac8e9 100644 (file)
@@ -215,6 +215,50 @@ const struct bpf_func_proto bpf_perf_event_read_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+{
+       struct pt_regs *regs = (struct pt_regs *) (long) r1;
+       struct bpf_map *map = (struct bpf_map *) (long) r2;
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
+       void *data = (void *) (long) r4;
+       struct perf_sample_data sample_data;
+       struct perf_event *event;
+       struct perf_raw_record raw = {
+               .size = size,
+               .data = data,
+       };
+
+       if (unlikely(index >= array->map.max_entries))
+               return -E2BIG;
+
+       event = (struct perf_event *)array->ptrs[index];
+       if (unlikely(!event))
+               return -ENOENT;
+
+       if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
+                    event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
+               return -EINVAL;
+
+       if (unlikely(event->oncpu != smp_processor_id()))
+               return -EOPNOTSUPP;
+
+       perf_sample_data_init(&sample_data, 0, 0);
+       sample_data.raw = &raw;
+       perf_event_output(event, &sample_data, regs);
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_perf_event_output_proto = {
+       .func           = bpf_perf_event_output,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_STACK,
+       .arg5_type      = ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
        switch (func_id) {
@@ -242,6 +286,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_perf_event_read:
                return &bpf_perf_event_read_proto;
+       case BPF_FUNC_perf_event_output:
+               return &bpf_perf_event_output_proto;
        default:
                return NULL;
        }