#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
+#include <linux/bpf.h>
int bpf_jit_enable __read_mostly;
return ptr + len;
}
-#define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0)
+#define EMIT(bytes, len) \
+ do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
#define EMIT1(b1) EMIT(b1, 1)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
- int oldproglen, struct jit_context *ctx)
+#define STACKSIZE \
+ (MAX_BPF_STACK + \
+ 32 /* space for rbx, r13, r14, r15 */ + \
+ 8 /* space for skb_copy_bits() buffer */)
+
+#define PROLOGUE_SIZE 51
+
+/* emit x64 prologue code for BPF program and check it's size.
+ * bpf_tail_call helper will skip it while jumping into another program
+ */
+static void emit_prologue(u8 **pprog)
{
- struct bpf_insn *insn = bpf_prog->insnsi;
- int insn_cnt = bpf_prog->len;
- bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
- bool seen_exit = false;
- u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i;
- int proglen = 0;
- u8 *prog = temp;
- int stacksize = MAX_BPF_STACK +
- 32 /* space for rbx, r13, r14, r15 */ +
- 8 /* space for skb_copy_bits() buffer */;
+ u8 *prog = *pprog;
+ int cnt = 0;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
- /* sub rsp, stacksize */
- EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
+ /* sub rsp, STACKSIZE */
+ EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE);
/* all classic BPF filters use R6(rbx) save it */
/* mov qword ptr [rbp-X],rbx */
- EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
+ EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE);
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
*/
/* mov qword ptr [rbp-X],r13 */
- EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
+ EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8);
/* mov qword ptr [rbp-X],r14 */
- EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
+ EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16);
/* mov qword ptr [rbp-X],r15 */
- EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
+ EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
/* clear A and X registers */
EMIT2(0x31, 0xc0); /* xor eax, eax */
EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+ /* clear tail_cnt: mov qword ptr [rbp-X], rax */
+ EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ *pprog = prog;
+}
+
+/* generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+ * if (index >= array->map.max_entries)
+ * goto out;
+ * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ * prog = array->prog[index];
+ * if (prog == NULL)
+ * goto out;
+ * goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+static void emit_bpf_tail_call(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int label1, label2, label3;
+ int cnt = 0;
+
+ /* rdi - pointer to ctx
+ * rsi - pointer to bpf_array
+ * rdx - index in bpf_array
+ */
+
+ /* if (index >= array->map.max_entries)
+ * goto out;
+ */
+ EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
+ offsetof(struct bpf_array, map.max_entries));
+ EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
+#define OFFSET1 44 /* number of bytes to jump */
+ EMIT2(X86_JBE, OFFSET1); /* jbe out */
+ label1 = cnt;
+
+ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+#define OFFSET2 33
+ EMIT2(X86_JA, OFFSET2); /* ja out */
+ label2 = cnt;
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
+
+ /* prog = array->prog[index]; */
+ EMIT4(0x48, 0x8D, 0x44, 0xD6); /* lea rax, [rsi + rdx * 8 + 0x50] */
+ EMIT1(offsetof(struct bpf_array, prog));
+ EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
+
+ /* if (prog == NULL)
+ * goto out;
+ */
+ EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */
+#define OFFSET3 10
+ EMIT2(X86_JE, OFFSET3); /* je out */
+ label3 = cnt;
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */
+ offsetof(struct bpf_prog, bpf_func));
+ EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
+
+ /* now we're ready to jump into next BPF program
+ * rdi == ctx (1st arg)
+ * rax == prog->bpf_func + prologue_size
+ */
+ EMIT2(0xFF, 0xE0); /* jmp rax */
+
+ /* out: */
+ BUILD_BUG_ON(cnt - label1 != OFFSET1);
+ BUILD_BUG_ON(cnt - label2 != OFFSET2);
+ BUILD_BUG_ON(cnt - label3 != OFFSET3);
+ *pprog = prog;
+}
+
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ int oldproglen, struct jit_context *ctx)
+{
+ struct bpf_insn *insn = bpf_prog->insnsi;
+ int insn_cnt = bpf_prog->len;
+ bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
+ bool seen_exit = false;
+ u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ int i, cnt = 0;
+ int proglen = 0;
+ u8 *prog = temp;
+
+ emit_prologue(&prog);
+
if (seen_ld_abs) {
/* r9d : skb->len - skb->data_len (headlen)
* r10 : skb->data
}
break;
+ case BPF_JMP | BPF_CALL | BPF_X:
+ emit_bpf_tail_call(&prog);
+ break;
+
/* cond jump */
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
/* update cleanup_addr */
ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp-X] */
- EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
+ EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE);
/* mov r13, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
+ EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8);
/* mov r14, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
+ EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16);
/* mov r15, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
+ EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */