net: bpf: make eBPF interpreter images read-only
authorDaniel Borkmann <dborkman@redhat.com>
Tue, 2 Sep 2014 20:53:44 +0000 (22:53 +0200)
committerDavid S. Miller <davem@davemloft.net>
Fri, 5 Sep 2014 19:02:48 +0000 (12:02 -0700)
With eBPF getting more extended and exposure to user space is on it's way,
hardening the memory range the interpreter uses to steer its command flow
seems appropriate.  This patch moves the to be interpreted bytecode to
read-only pages.

In case we execute a corrupted BPF interpreter image for some reason e.g.
caused by an attacker which got past a verifier stage, it would not only
provide arbitrary read/write memory access but arbitrary function calls
as well. After setting up the BPF interpreter image, its contents do not
change until destruction time, thus we can setup the image on immutable
made pages in order to mitigate modifications to that code. The idea
is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks").

This is possible because bpf_prog is not part of sk_filter anymore.
After setup bpf_prog cannot be altered during its life-time. This prevents
any modifications to the entire bpf_prog structure (incl. function/JIT
image pointer).

Every eBPF program (including classic BPF that are migrated) have to call
bpf_prog_select_runtime() to select either interpreter or a JIT image
as a last setup step, and they all are being freed via bpf_prog_free(),
including non-JIT. Therefore, we can easily integrate this into the
eBPF life-time, plus since we directly allocate a bpf_prog, we have no
performance penalty.

Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual
inspection of kernel_page_tables.  Brad Spengler proposed the same idea
via Twitter during development of this patch.

Joint work with Hannes Frederic Sowa.

Suggested-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
arch/arm/net/bpf_jit_32.c
arch/mips/net/bpf_jit.c
arch/powerpc/net/bpf_jit_comp.c
arch/s390/net/bpf_jit_comp.c
arch/sparc/net/bpf_jit_comp.c
arch/x86/net/bpf_jit_comp.c
include/linux/filter.h
kernel/bpf/core.c
kernel/seccomp.c
lib/test_bpf.c
net/core/filter.c

index a37b989a2f91e302be654a7633082321e8f0c5bb..a76623bcf722b2a576469e755ce2c5f301532916 100644 (file)
@@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
        if (fp->jited)
                module_free(NULL, fp->bpf_func);
-       kfree(fp);
+
+       bpf_prog_unlock_free(fp);
 }
index 05a56619ece2f72044641c6aa1fd2d99f558bb69..cfa83cf2447dc6a8831c1bdf394c1a3fd8b203be 100644 (file)
@@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
        if (fp->jited)
                module_free(NULL, fp->bpf_func);
-       kfree(fp);
+
+       bpf_prog_unlock_free(fp);
 }
index 3afa6f4c195705569726ef927ddc8f392af097ac..40c53ff59124d306f111ed65ceede655ca7f6682 100644 (file)
@@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
        if (fp->jited)
                module_free(NULL, fp->bpf_func);
-       kfree(fp);
+
+       bpf_prog_unlock_free(fp);
 }
index 61e45b7c04d7bdf122a195903face00cfb6c2ce3..f2833c5b218a798260b89f4d2216da3569b04749 100644 (file)
@@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp)
        module_free(NULL, header);
 
 free_filter:
-       kfree(fp);
+       bpf_prog_unlock_free(fp);
 }
index 1f76c22a6a75d64a5dafd6f24da19eec6c816e96..f7a736b645e843da002c02338aa0892965a44d42 100644 (file)
@@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
        if (fp->jited)
                module_free(NULL, fp->bpf_func);
-       kfree(fp);
+
+       bpf_prog_unlock_free(fp);
 }
index b08a98c59530c55d929560fd4bd9c65162e7a35c..39ccfbb4a72311e95f1f09b52a62b1fba2ab65e0 100644 (file)
@@ -972,23 +972,17 @@ out:
        kfree(addrs);
 }
 
-static void bpf_jit_free_deferred(struct work_struct *work)
+void bpf_jit_free(struct bpf_prog *fp)
 {
-       struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
        unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
        struct bpf_binary_header *header = (void *)addr;
 
+       if (!fp->jited)
+               goto free_filter;
+
        set_memory_rw(addr, header->pages);
        module_free(NULL, header);
-       kfree(fp);
-}
 
-void bpf_jit_free(struct bpf_prog *fp)
-{
-       if (fp->jited) {
-               INIT_WORK(&fp->work, bpf_jit_free_deferred);
-               schedule_work(&fp->work);
-       } else {
-               kfree(fp);
-       }
+free_filter:
+       bpf_prog_unlock_free(fp);
 }
index a5227ab8ccb17ebd4cf9ed7a55f80c352a2d6fb3..c78994593355979ad4ce9fa2273e32c3b31a8065 100644 (file)
@@ -9,6 +9,11 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
+#include <asm/cacheflush.h>
+
+struct sk_buff;
+struct sock;
+struct seccomp_data;
 
 /* Internally used and optimized filter representation with extended
  * instruction set based on top of classic BPF.
@@ -320,20 +325,23 @@ struct sock_fprog_kern {
        struct sock_filter      *filter;
 };
 
-struct sk_buff;
-struct sock;
-struct seccomp_data;
+struct bpf_work_struct {
+       struct bpf_prog *prog;
+       struct work_struct work;
+};
 
 struct bpf_prog {
+       u32                     pages;          /* Number of allocated pages */
        u32                     jited:1,        /* Is our filter JIT'ed? */
                                len:31;         /* Number of filter blocks */
        struct sock_fprog_kern  *orig_prog;     /* Original BPF program */
+       struct bpf_work_struct  *work;          /* Deferred free work struct */
        unsigned int            (*bpf_func)(const struct sk_buff *skb,
                                            const struct bpf_insn *filter);
+       /* Instructions for interpreter */
        union {
                struct sock_filter      insns[0];
                struct bpf_insn         insnsi[0];
-               struct work_struct      work;
        };
 };
 
@@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+       set_memory_ro((unsigned long)fp, fp->pages);
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+       set_memory_rw((unsigned long)fp, fp->pages);
+}
+#else
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+}
+#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 void bpf_prog_select_runtime(struct bpf_prog *fp);
@@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp);
 int bpf_convert_filter(struct sock_filter *prog, int len,
                       struct bpf_insn *new_prog, int *new_len);
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+                                 gfp_t gfp_extra_flags);
+void __bpf_prog_free(struct bpf_prog *fp);
+
+static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
+{
+       bpf_prog_unlock_ro(fp);
+       __bpf_prog_free(fp);
+}
+
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
@@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp)
 
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
-       kfree(fp);
+       bpf_prog_unlock_free(fp);
 }
 #endif /* CONFIG_BPF_JIT */
 
index 7f0dbcbb34af1559657adea42b7c726f145a3986..b54bb2c2e494e086ee1e34ee39b8972fd9d6f344 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include <linux/filter.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
        return NULL;
 }
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+       gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+                         gfp_extra_flags;
+       struct bpf_work_struct *ws;
+       struct bpf_prog *fp;
+
+       size = round_up(size, PAGE_SIZE);
+       fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+       if (fp == NULL)
+               return NULL;
+
+       ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
+       if (ws == NULL) {
+               vfree(fp);
+               return NULL;
+       }
+
+       fp->pages = size / PAGE_SIZE;
+       fp->work = ws;
+
+       return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_alloc);
+
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+                                 gfp_t gfp_extra_flags)
+{
+       gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+                         gfp_extra_flags;
+       struct bpf_prog *fp;
+
+       BUG_ON(fp_old == NULL);
+
+       size = round_up(size, PAGE_SIZE);
+       if (size <= fp_old->pages * PAGE_SIZE)
+               return fp_old;
+
+       fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+       if (fp != NULL) {
+               memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
+               fp->pages = size / PAGE_SIZE;
+
+               /* We keep fp->work from fp_old around in the new
+                * reallocated structure.
+                */
+               fp_old->work = NULL;
+               __bpf_prog_free(fp_old);
+       }
+
+       return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_realloc);
+
+void __bpf_prog_free(struct bpf_prog *fp)
+{
+       kfree(fp->work);
+       vfree(fp);
+}
+EXPORT_SYMBOL_GPL(__bpf_prog_free);
+
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
  * anyway later on, so do not let the compiler omit it.
@@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
 
        /* Probe if internal BPF can be JITed */
        bpf_int_jit_compile(fp);
+       /* Lock whole bpf_prog as read-only */
+       bpf_prog_lock_ro(fp);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
-/* free internal BPF program */
+static void bpf_prog_free_deferred(struct work_struct *work)
+{
+       struct bpf_work_struct *ws;
+
+       ws = container_of(work, struct bpf_work_struct, work);
+       bpf_jit_free(ws->prog);
+}
+
+/* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-       bpf_jit_free(fp);
+       struct bpf_work_struct *ws = fp->work;
+
+       INIT_WORK(&ws->work, bpf_prog_free_deferred);
+       ws->prog = fp;
+       schedule_work(&ws->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
index 44eb005c6695010e79a5041b837914a12c69aa45..84922befea8414468eafe1330ffc372ec9bdb8da 100644 (file)
@@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
        if (!filter)
                goto free_prog;
 
-       filter->prog = kzalloc(bpf_prog_size(new_len),
-                              GFP_KERNEL|__GFP_NOWARN);
+       filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
        if (!filter->prog)
                goto free_filter;
 
        ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
        if (ret)
                goto free_filter_prog;
-       kfree(fp);
 
+       kfree(fp);
        atomic_set(&filter->usage, 1);
        filter->prog->len = new_len;
 
@@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
        return filter;
 
 free_filter_prog:
-       kfree(filter->prog);
+       __bpf_prog_free(filter->prog);
 free_filter:
        kfree(filter);
 free_prog:
index 8c66c6aace040721a74d30fdbef1504b9da71ee5..9a67456ba29a2ee24b63e3367dd7f58b3d2e922b 100644 (file)
@@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
                break;
 
        case INTERNAL:
-               fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
+               fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
                if (fp == NULL) {
                        pr_cont("UNEXPECTED_FAIL no memory left\n");
                        *err = -ENOMEM;
index d814b8a89d0f2f65efb95858f2e4fe26bf5a4ca9..37f8eb06fdeea4944765a14f8f922864e18d5a2b 100644 (file)
@@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 
        /* Expand fp for appending the new filter representation. */
        old_fp = fp;
-       fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
+       fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
        if (!fp) {
                /* The old_fp is still around in case we couldn't
                 * allocate new memory, so uncharge on that one.
@@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
        if (fprog->filter == NULL)
                return -EINVAL;
 
-       fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
+       fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
        if (!fp)
                return -ENOMEM;
 
@@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
        if (fprog->filter == NULL)
                return -EINVAL;
 
-       prog = kmalloc(bpf_fsize, GFP_KERNEL);
+       prog = bpf_prog_alloc(bpf_fsize, 0);
        if (!prog)
                return -ENOMEM;