2 * Linux Socket Filter - Kernel level socket filtering
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
20 * Andi Kleen - Fix a few bad bugs and races.
21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
24 #include <linux/module.h>
25 #include <linux/types.h>
27 #include <linux/fcntl.h>
28 #include <linux/socket.h>
29 #include <linux/sock_diag.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_packet.h>
34 #include <linux/if_arp.h>
35 #include <linux/gfp.h>
37 #include <net/protocol.h>
38 #include <net/netlink.h>
39 #include <linux/skbuff.h>
41 #include <net/flow_dissector.h>
42 #include <linux/errno.h>
43 #include <linux/timer.h>
44 #include <linux/uaccess.h>
45 #include <asm/unaligned.h>
46 #include <linux/filter.h>
47 #include <linux/ratelimit.h>
48 #include <linux/seccomp.h>
49 #include <linux/if_vlan.h>
50 #include <linux/bpf.h>
51 #include <net/sch_generic.h>
52 #include <net/cls_cgroup.h>
53 #include <net/dst_metadata.h>
55 #include <net/sock_reuseport.h>
56 #include <net/busy_poll.h>
58 #include <linux/bpf_trace.h>
61 * sk_filter_trim_cap - run a packet through a socket filter
62 * @sk: sock associated with &sk_buff
63 * @skb: buffer to filter
64 * @cap: limit on how short the eBPF program may trim the packet
66 * Run the eBPF program and then cut skb->data to correct size returned by
67 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
68 * than pkt_len we keep whole skb->data. This is the socket level
69 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
70 * be accepted or -EPERM if the packet should be tossed.
73 int sk_filter_trim_cap(struct sock
*sk
, struct sk_buff
*skb
, unsigned int cap
)
76 struct sk_filter
*filter
;
79 * If the skb was allocated from pfmemalloc reserves, only
80 * allow SOCK_MEMALLOC sockets to use it as this socket is
83 if (skb_pfmemalloc(skb
) && !sock_flag(sk
, SOCK_MEMALLOC
)) {
84 NET_INC_STATS(sock_net(sk
), LINUX_MIB_PFMEMALLOCDROP
);
87 err
= BPF_CGROUP_RUN_PROG_INET_INGRESS(sk
, skb
);
91 err
= security_sock_rcv_skb(sk
, skb
);
96 filter
= rcu_dereference(sk
->sk_filter
);
98 struct sock
*save_sk
= skb
->sk
;
102 pkt_len
= bpf_prog_run_save_cb(filter
->prog
, skb
);
104 err
= pkt_len
? pskb_trim(skb
, max(cap
, pkt_len
)) : -EPERM
;
110 EXPORT_SYMBOL(sk_filter_trim_cap
);
112 BPF_CALL_1(__skb_get_pay_offset
, struct sk_buff
*, skb
)
114 return skb_get_poff(skb
);
117 BPF_CALL_3(__skb_get_nlattr
, struct sk_buff
*, skb
, u32
, a
, u32
, x
)
121 if (skb_is_nonlinear(skb
))
124 if (skb
->len
< sizeof(struct nlattr
))
127 if (a
> skb
->len
- sizeof(struct nlattr
))
130 nla
= nla_find((struct nlattr
*) &skb
->data
[a
], skb
->len
- a
, x
);
132 return (void *) nla
- (void *) skb
->data
;
137 BPF_CALL_3(__skb_get_nlattr_nest
, struct sk_buff
*, skb
, u32
, a
, u32
, x
)
141 if (skb_is_nonlinear(skb
))
144 if (skb
->len
< sizeof(struct nlattr
))
147 if (a
> skb
->len
- sizeof(struct nlattr
))
150 nla
= (struct nlattr
*) &skb
->data
[a
];
151 if (nla
->nla_len
> skb
->len
- a
)
154 nla
= nla_find_nested(nla
, x
);
156 return (void *) nla
- (void *) skb
->data
;
161 BPF_CALL_0(__get_raw_cpu_id
)
163 return raw_smp_processor_id();
166 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto
= {
167 .func
= __get_raw_cpu_id
,
169 .ret_type
= RET_INTEGER
,
172 static u32
convert_skb_access(int skb_field
, int dst_reg
, int src_reg
,
173 struct bpf_insn
*insn_buf
)
175 struct bpf_insn
*insn
= insn_buf
;
179 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, mark
) != 4);
181 *insn
++ = BPF_LDX_MEM(BPF_W
, dst_reg
, src_reg
,
182 offsetof(struct sk_buff
, mark
));
186 *insn
++ = BPF_LDX_MEM(BPF_B
, dst_reg
, src_reg
, PKT_TYPE_OFFSET());
187 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
, PKT_TYPE_MAX
);
188 #ifdef __BIG_ENDIAN_BITFIELD
189 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, dst_reg
, 5);
194 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, queue_mapping
) != 2);
196 *insn
++ = BPF_LDX_MEM(BPF_H
, dst_reg
, src_reg
,
197 offsetof(struct sk_buff
, queue_mapping
));
200 case SKF_AD_VLAN_TAG
:
201 case SKF_AD_VLAN_TAG_PRESENT
:
202 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, vlan_tci
) != 2);
203 BUILD_BUG_ON(VLAN_TAG_PRESENT
!= 0x1000);
205 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
206 *insn
++ = BPF_LDX_MEM(BPF_H
, dst_reg
, src_reg
,
207 offsetof(struct sk_buff
, vlan_tci
));
208 if (skb_field
== SKF_AD_VLAN_TAG
) {
209 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
,
213 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, dst_reg
, 12);
215 *insn
++ = BPF_ALU32_IMM(BPF_AND
, dst_reg
, 1);
220 return insn
- insn_buf
;
223 static bool convert_bpf_extensions(struct sock_filter
*fp
,
224 struct bpf_insn
**insnp
)
226 struct bpf_insn
*insn
= *insnp
;
230 case SKF_AD_OFF
+ SKF_AD_PROTOCOL
:
231 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, protocol
) != 2);
233 /* A = *(u16 *) (CTX + offsetof(protocol)) */
234 *insn
++ = BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_CTX
,
235 offsetof(struct sk_buff
, protocol
));
236 /* A = ntohs(A) [emitting a nop or swap16] */
237 *insn
= BPF_ENDIAN(BPF_FROM_BE
, BPF_REG_A
, 16);
240 case SKF_AD_OFF
+ SKF_AD_PKTTYPE
:
241 cnt
= convert_skb_access(SKF_AD_PKTTYPE
, BPF_REG_A
, BPF_REG_CTX
, insn
);
245 case SKF_AD_OFF
+ SKF_AD_IFINDEX
:
246 case SKF_AD_OFF
+ SKF_AD_HATYPE
:
247 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, ifindex
) != 4);
248 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device
, type
) != 2);
250 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
251 BPF_REG_TMP
, BPF_REG_CTX
,
252 offsetof(struct sk_buff
, dev
));
253 /* if (tmp != 0) goto pc + 1 */
254 *insn
++ = BPF_JMP_IMM(BPF_JNE
, BPF_REG_TMP
, 0, 1);
255 *insn
++ = BPF_EXIT_INSN();
256 if (fp
->k
== SKF_AD_OFF
+ SKF_AD_IFINDEX
)
257 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_TMP
,
258 offsetof(struct net_device
, ifindex
));
260 *insn
= BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_TMP
,
261 offsetof(struct net_device
, type
));
264 case SKF_AD_OFF
+ SKF_AD_MARK
:
265 cnt
= convert_skb_access(SKF_AD_MARK
, BPF_REG_A
, BPF_REG_CTX
, insn
);
269 case SKF_AD_OFF
+ SKF_AD_RXHASH
:
270 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, hash
) != 4);
272 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_CTX
,
273 offsetof(struct sk_buff
, hash
));
276 case SKF_AD_OFF
+ SKF_AD_QUEUE
:
277 cnt
= convert_skb_access(SKF_AD_QUEUE
, BPF_REG_A
, BPF_REG_CTX
, insn
);
281 case SKF_AD_OFF
+ SKF_AD_VLAN_TAG
:
282 cnt
= convert_skb_access(SKF_AD_VLAN_TAG
,
283 BPF_REG_A
, BPF_REG_CTX
, insn
);
287 case SKF_AD_OFF
+ SKF_AD_VLAN_TAG_PRESENT
:
288 cnt
= convert_skb_access(SKF_AD_VLAN_TAG_PRESENT
,
289 BPF_REG_A
, BPF_REG_CTX
, insn
);
293 case SKF_AD_OFF
+ SKF_AD_VLAN_TPID
:
294 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff
, vlan_proto
) != 2);
296 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
297 *insn
++ = BPF_LDX_MEM(BPF_H
, BPF_REG_A
, BPF_REG_CTX
,
298 offsetof(struct sk_buff
, vlan_proto
));
299 /* A = ntohs(A) [emitting a nop or swap16] */
300 *insn
= BPF_ENDIAN(BPF_FROM_BE
, BPF_REG_A
, 16);
303 case SKF_AD_OFF
+ SKF_AD_PAY_OFFSET
:
304 case SKF_AD_OFF
+ SKF_AD_NLATTR
:
305 case SKF_AD_OFF
+ SKF_AD_NLATTR_NEST
:
306 case SKF_AD_OFF
+ SKF_AD_CPU
:
307 case SKF_AD_OFF
+ SKF_AD_RANDOM
:
309 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG1
, BPF_REG_CTX
);
311 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG2
, BPF_REG_A
);
313 *insn
++ = BPF_MOV64_REG(BPF_REG_ARG3
, BPF_REG_X
);
314 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
316 case SKF_AD_OFF
+ SKF_AD_PAY_OFFSET
:
317 *insn
= BPF_EMIT_CALL(__skb_get_pay_offset
);
319 case SKF_AD_OFF
+ SKF_AD_NLATTR
:
320 *insn
= BPF_EMIT_CALL(__skb_get_nlattr
);
322 case SKF_AD_OFF
+ SKF_AD_NLATTR_NEST
:
323 *insn
= BPF_EMIT_CALL(__skb_get_nlattr_nest
);
325 case SKF_AD_OFF
+ SKF_AD_CPU
:
326 *insn
= BPF_EMIT_CALL(__get_raw_cpu_id
);
328 case SKF_AD_OFF
+ SKF_AD_RANDOM
:
329 *insn
= BPF_EMIT_CALL(bpf_user_rnd_u32
);
330 bpf_user_rnd_init_once();
335 case SKF_AD_OFF
+ SKF_AD_ALU_XOR_X
:
337 *insn
= BPF_ALU32_REG(BPF_XOR
, BPF_REG_A
, BPF_REG_X
);
341 /* This is just a dummy call to avoid letting the compiler
342 * evict __bpf_call_base() as an optimization. Placed here
343 * where no-one bothers.
345 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
354 * bpf_convert_filter - convert filter program
355 * @prog: the user passed filter program
356 * @len: the length of the user passed filter program
357 * @new_prog: allocated 'struct bpf_prog' or NULL
358 * @new_len: pointer to store length of converted program
360 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
361 * style extended BPF (eBPF).
362 * Conversion workflow:
364 * 1) First pass for calculating the new program length:
365 * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
367 * 2) 2nd pass to remap in two passes: 1st pass finds new
368 * jump offsets, 2nd pass remapping:
369 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
371 static int bpf_convert_filter(struct sock_filter
*prog
, int len
,
372 struct bpf_prog
*new_prog
, int *new_len
)
374 int new_flen
= 0, pass
= 0, target
, i
, stack_off
;
375 struct bpf_insn
*new_insn
, *first_insn
= NULL
;
376 struct sock_filter
*fp
;
380 BUILD_BUG_ON(BPF_MEMWORDS
* sizeof(u32
) > MAX_BPF_STACK
);
381 BUILD_BUG_ON(BPF_REG_FP
+ 1 != MAX_BPF_REG
);
383 if (len
<= 0 || len
> BPF_MAXINSNS
)
387 first_insn
= new_prog
->insnsi
;
388 addrs
= kcalloc(len
, sizeof(*addrs
),
389 GFP_KERNEL
| __GFP_NOWARN
);
395 new_insn
= first_insn
;
398 /* Classic BPF related prologue emission. */
400 /* Classic BPF expects A and X to be reset first. These need
401 * to be guaranteed to be the first two instructions.
403 *new_insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_A
, BPF_REG_A
);
404 *new_insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_X
, BPF_REG_X
);
406 /* All programs must keep CTX in callee saved BPF_REG_CTX.
407 * In eBPF case it's done by the compiler, here we need to
408 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
410 *new_insn
++ = BPF_MOV64_REG(BPF_REG_CTX
, BPF_REG_ARG1
);
415 for (i
= 0; i
< len
; fp
++, i
++) {
416 struct bpf_insn tmp_insns
[6] = { };
417 struct bpf_insn
*insn
= tmp_insns
;
420 addrs
[i
] = new_insn
- first_insn
;
423 /* All arithmetic insns and skb loads map as-is. */
424 case BPF_ALU
| BPF_ADD
| BPF_X
:
425 case BPF_ALU
| BPF_ADD
| BPF_K
:
426 case BPF_ALU
| BPF_SUB
| BPF_X
:
427 case BPF_ALU
| BPF_SUB
| BPF_K
:
428 case BPF_ALU
| BPF_AND
| BPF_X
:
429 case BPF_ALU
| BPF_AND
| BPF_K
:
430 case BPF_ALU
| BPF_OR
| BPF_X
:
431 case BPF_ALU
| BPF_OR
| BPF_K
:
432 case BPF_ALU
| BPF_LSH
| BPF_X
:
433 case BPF_ALU
| BPF_LSH
| BPF_K
:
434 case BPF_ALU
| BPF_RSH
| BPF_X
:
435 case BPF_ALU
| BPF_RSH
| BPF_K
:
436 case BPF_ALU
| BPF_XOR
| BPF_X
:
437 case BPF_ALU
| BPF_XOR
| BPF_K
:
438 case BPF_ALU
| BPF_MUL
| BPF_X
:
439 case BPF_ALU
| BPF_MUL
| BPF_K
:
440 case BPF_ALU
| BPF_DIV
| BPF_X
:
441 case BPF_ALU
| BPF_DIV
| BPF_K
:
442 case BPF_ALU
| BPF_MOD
| BPF_X
:
443 case BPF_ALU
| BPF_MOD
| BPF_K
:
444 case BPF_ALU
| BPF_NEG
:
445 case BPF_LD
| BPF_ABS
| BPF_W
:
446 case BPF_LD
| BPF_ABS
| BPF_H
:
447 case BPF_LD
| BPF_ABS
| BPF_B
:
448 case BPF_LD
| BPF_IND
| BPF_W
:
449 case BPF_LD
| BPF_IND
| BPF_H
:
450 case BPF_LD
| BPF_IND
| BPF_B
:
451 /* Check for overloaded BPF extension and
452 * directly convert it if found, otherwise
453 * just move on with mapping.
455 if (BPF_CLASS(fp
->code
) == BPF_LD
&&
456 BPF_MODE(fp
->code
) == BPF_ABS
&&
457 convert_bpf_extensions(fp
, &insn
))
460 *insn
= BPF_RAW_INSN(fp
->code
, BPF_REG_A
, BPF_REG_X
, 0, fp
->k
);
463 /* Jump transformation cannot use BPF block macros
464 * everywhere as offset calculation and target updates
465 * require a bit more work than the rest, i.e. jump
466 * opcodes map as-is, but offsets need adjustment.
469 #define BPF_EMIT_JMP \
471 if (target >= len || target < 0) \
473 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
474 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
475 insn->off -= insn - tmp_insns; \
478 case BPF_JMP
| BPF_JA
:
479 target
= i
+ fp
->k
+ 1;
480 insn
->code
= fp
->code
;
484 case BPF_JMP
| BPF_JEQ
| BPF_K
:
485 case BPF_JMP
| BPF_JEQ
| BPF_X
:
486 case BPF_JMP
| BPF_JSET
| BPF_K
:
487 case BPF_JMP
| BPF_JSET
| BPF_X
:
488 case BPF_JMP
| BPF_JGT
| BPF_K
:
489 case BPF_JMP
| BPF_JGT
| BPF_X
:
490 case BPF_JMP
| BPF_JGE
| BPF_K
:
491 case BPF_JMP
| BPF_JGE
| BPF_X
:
492 if (BPF_SRC(fp
->code
) == BPF_K
&& (int) fp
->k
< 0) {
493 /* BPF immediates are signed, zero extend
494 * immediate into tmp register and use it
497 *insn
++ = BPF_MOV32_IMM(BPF_REG_TMP
, fp
->k
);
499 insn
->dst_reg
= BPF_REG_A
;
500 insn
->src_reg
= BPF_REG_TMP
;
503 insn
->dst_reg
= BPF_REG_A
;
505 bpf_src
= BPF_SRC(fp
->code
);
506 insn
->src_reg
= bpf_src
== BPF_X
? BPF_REG_X
: 0;
509 /* Common case where 'jump_false' is next insn. */
511 insn
->code
= BPF_JMP
| BPF_OP(fp
->code
) | bpf_src
;
512 target
= i
+ fp
->jt
+ 1;
517 /* Convert some jumps when 'jump_true' is next insn. */
519 switch (BPF_OP(fp
->code
)) {
521 insn
->code
= BPF_JMP
| BPF_JNE
| bpf_src
;
524 insn
->code
= BPF_JMP
| BPF_JLE
| bpf_src
;
527 insn
->code
= BPF_JMP
| BPF_JLT
| bpf_src
;
533 target
= i
+ fp
->jf
+ 1;
538 /* Other jumps are mapped into two insns: Jxx and JA. */
539 target
= i
+ fp
->jt
+ 1;
540 insn
->code
= BPF_JMP
| BPF_OP(fp
->code
) | bpf_src
;
544 insn
->code
= BPF_JMP
| BPF_JA
;
545 target
= i
+ fp
->jf
+ 1;
549 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
550 case BPF_LDX
| BPF_MSH
| BPF_B
:
552 *insn
++ = BPF_MOV64_REG(BPF_REG_TMP
, BPF_REG_A
);
553 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
554 *insn
++ = BPF_LD_ABS(BPF_B
, fp
->k
);
556 *insn
++ = BPF_ALU32_IMM(BPF_AND
, BPF_REG_A
, 0xf);
558 *insn
++ = BPF_ALU32_IMM(BPF_LSH
, BPF_REG_A
, 2);
560 *insn
++ = BPF_MOV64_REG(BPF_REG_X
, BPF_REG_A
);
562 *insn
= BPF_MOV64_REG(BPF_REG_A
, BPF_REG_TMP
);
565 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
566 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
568 case BPF_RET
| BPF_A
:
569 case BPF_RET
| BPF_K
:
570 if (BPF_RVAL(fp
->code
) == BPF_K
)
571 *insn
++ = BPF_MOV32_RAW(BPF_K
, BPF_REG_0
,
573 *insn
= BPF_EXIT_INSN();
576 /* Store to stack. */
579 stack_off
= fp
->k
* 4 + 4;
580 *insn
= BPF_STX_MEM(BPF_W
, BPF_REG_FP
, BPF_CLASS(fp
->code
) ==
581 BPF_ST
? BPF_REG_A
: BPF_REG_X
,
583 /* check_load_and_stores() verifies that classic BPF can
584 * load from stack only after write, so tracking
585 * stack_depth for ST|STX insns is enough
587 if (new_prog
&& new_prog
->aux
->stack_depth
< stack_off
)
588 new_prog
->aux
->stack_depth
= stack_off
;
591 /* Load from stack. */
592 case BPF_LD
| BPF_MEM
:
593 case BPF_LDX
| BPF_MEM
:
594 stack_off
= fp
->k
* 4 + 4;
595 *insn
= BPF_LDX_MEM(BPF_W
, BPF_CLASS(fp
->code
) == BPF_LD
?
596 BPF_REG_A
: BPF_REG_X
, BPF_REG_FP
,
601 case BPF_LD
| BPF_IMM
:
602 case BPF_LDX
| BPF_IMM
:
603 *insn
= BPF_MOV32_IMM(BPF_CLASS(fp
->code
) == BPF_LD
?
604 BPF_REG_A
: BPF_REG_X
, fp
->k
);
608 case BPF_MISC
| BPF_TAX
:
609 *insn
= BPF_MOV64_REG(BPF_REG_X
, BPF_REG_A
);
613 case BPF_MISC
| BPF_TXA
:
614 *insn
= BPF_MOV64_REG(BPF_REG_A
, BPF_REG_X
);
617 /* A = skb->len or X = skb->len */
618 case BPF_LD
| BPF_W
| BPF_LEN
:
619 case BPF_LDX
| BPF_W
| BPF_LEN
:
620 *insn
= BPF_LDX_MEM(BPF_W
, BPF_CLASS(fp
->code
) == BPF_LD
?
621 BPF_REG_A
: BPF_REG_X
, BPF_REG_CTX
,
622 offsetof(struct sk_buff
, len
));
625 /* Access seccomp_data fields. */
626 case BPF_LDX
| BPF_ABS
| BPF_W
:
627 /* A = *(u32 *) (ctx + K) */
628 *insn
= BPF_LDX_MEM(BPF_W
, BPF_REG_A
, BPF_REG_CTX
, fp
->k
);
631 /* Unknown instruction. */
638 memcpy(new_insn
, tmp_insns
,
639 sizeof(*insn
) * (insn
- tmp_insns
));
640 new_insn
+= insn
- tmp_insns
;
644 /* Only calculating new length. */
645 *new_len
= new_insn
- first_insn
;
650 if (new_flen
!= new_insn
- first_insn
) {
651 new_flen
= new_insn
- first_insn
;
658 BUG_ON(*new_len
!= new_flen
);
667 * As we dont want to clear mem[] array for each packet going through
668 * __bpf_prog_run(), we check that filter loaded by user never try to read
669 * a cell if not previously written, and we check all branches to be sure
670 * a malicious user doesn't try to abuse us.
672 static int check_load_and_stores(const struct sock_filter
*filter
, int flen
)
674 u16
*masks
, memvalid
= 0; /* One bit per cell, 16 cells */
677 BUILD_BUG_ON(BPF_MEMWORDS
> 16);
679 masks
= kmalloc_array(flen
, sizeof(*masks
), GFP_KERNEL
);
683 memset(masks
, 0xff, flen
* sizeof(*masks
));
685 for (pc
= 0; pc
< flen
; pc
++) {
686 memvalid
&= masks
[pc
];
688 switch (filter
[pc
].code
) {
691 memvalid
|= (1 << filter
[pc
].k
);
693 case BPF_LD
| BPF_MEM
:
694 case BPF_LDX
| BPF_MEM
:
695 if (!(memvalid
& (1 << filter
[pc
].k
))) {
700 case BPF_JMP
| BPF_JA
:
701 /* A jump must set masks on target */
702 masks
[pc
+ 1 + filter
[pc
].k
] &= memvalid
;
705 case BPF_JMP
| BPF_JEQ
| BPF_K
:
706 case BPF_JMP
| BPF_JEQ
| BPF_X
:
707 case BPF_JMP
| BPF_JGE
| BPF_K
:
708 case BPF_JMP
| BPF_JGE
| BPF_X
:
709 case BPF_JMP
| BPF_JGT
| BPF_K
:
710 case BPF_JMP
| BPF_JGT
| BPF_X
:
711 case BPF_JMP
| BPF_JSET
| BPF_K
:
712 case BPF_JMP
| BPF_JSET
| BPF_X
:
713 /* A jump must set masks on targets */
714 masks
[pc
+ 1 + filter
[pc
].jt
] &= memvalid
;
715 masks
[pc
+ 1 + filter
[pc
].jf
] &= memvalid
;
725 static bool chk_code_allowed(u16 code_to_probe
)
727 static const bool codes
[] = {
728 /* 32 bit ALU operations */
729 [BPF_ALU
| BPF_ADD
| BPF_K
] = true,
730 [BPF_ALU
| BPF_ADD
| BPF_X
] = true,
731 [BPF_ALU
| BPF_SUB
| BPF_K
] = true,
732 [BPF_ALU
| BPF_SUB
| BPF_X
] = true,
733 [BPF_ALU
| BPF_MUL
| BPF_K
] = true,
734 [BPF_ALU
| BPF_MUL
| BPF_X
] = true,
735 [BPF_ALU
| BPF_DIV
| BPF_K
] = true,
736 [BPF_ALU
| BPF_DIV
| BPF_X
] = true,
737 [BPF_ALU
| BPF_MOD
| BPF_K
] = true,
738 [BPF_ALU
| BPF_MOD
| BPF_X
] = true,
739 [BPF_ALU
| BPF_AND
| BPF_K
] = true,
740 [BPF_ALU
| BPF_AND
| BPF_X
] = true,
741 [BPF_ALU
| BPF_OR
| BPF_K
] = true,
742 [BPF_ALU
| BPF_OR
| BPF_X
] = true,
743 [BPF_ALU
| BPF_XOR
| BPF_K
] = true,
744 [BPF_ALU
| BPF_XOR
| BPF_X
] = true,
745 [BPF_ALU
| BPF_LSH
| BPF_K
] = true,
746 [BPF_ALU
| BPF_LSH
| BPF_X
] = true,
747 [BPF_ALU
| BPF_RSH
| BPF_K
] = true,
748 [BPF_ALU
| BPF_RSH
| BPF_X
] = true,
749 [BPF_ALU
| BPF_NEG
] = true,
750 /* Load instructions */
751 [BPF_LD
| BPF_W
| BPF_ABS
] = true,
752 [BPF_LD
| BPF_H
| BPF_ABS
] = true,
753 [BPF_LD
| BPF_B
| BPF_ABS
] = true,
754 [BPF_LD
| BPF_W
| BPF_LEN
] = true,
755 [BPF_LD
| BPF_W
| BPF_IND
] = true,
756 [BPF_LD
| BPF_H
| BPF_IND
] = true,
757 [BPF_LD
| BPF_B
| BPF_IND
] = true,
758 [BPF_LD
| BPF_IMM
] = true,
759 [BPF_LD
| BPF_MEM
] = true,
760 [BPF_LDX
| BPF_W
| BPF_LEN
] = true,
761 [BPF_LDX
| BPF_B
| BPF_MSH
] = true,
762 [BPF_LDX
| BPF_IMM
] = true,
763 [BPF_LDX
| BPF_MEM
] = true,
764 /* Store instructions */
767 /* Misc instructions */
768 [BPF_MISC
| BPF_TAX
] = true,
769 [BPF_MISC
| BPF_TXA
] = true,
770 /* Return instructions */
771 [BPF_RET
| BPF_K
] = true,
772 [BPF_RET
| BPF_A
] = true,
773 /* Jump instructions */
774 [BPF_JMP
| BPF_JA
] = true,
775 [BPF_JMP
| BPF_JEQ
| BPF_K
] = true,
776 [BPF_JMP
| BPF_JEQ
| BPF_X
] = true,
777 [BPF_JMP
| BPF_JGE
| BPF_K
] = true,
778 [BPF_JMP
| BPF_JGE
| BPF_X
] = true,
779 [BPF_JMP
| BPF_JGT
| BPF_K
] = true,
780 [BPF_JMP
| BPF_JGT
| BPF_X
] = true,
781 [BPF_JMP
| BPF_JSET
| BPF_K
] = true,
782 [BPF_JMP
| BPF_JSET
| BPF_X
] = true,
785 if (code_to_probe
>= ARRAY_SIZE(codes
))
788 return codes
[code_to_probe
];
791 static bool bpf_check_basics_ok(const struct sock_filter
*filter
,
796 if (flen
== 0 || flen
> BPF_MAXINSNS
)
803 * bpf_check_classic - verify socket filter code
804 * @filter: filter to verify
805 * @flen: length of filter
807 * Check the user's filter code. If we let some ugly
808 * filter code slip through kaboom! The filter must contain
809 * no references or jumps that are out of range, no illegal
810 * instructions, and must end with a RET instruction.
812 * All jumps are forward as they are not signed.
814 * Returns 0 if the rule set is legal or -EINVAL if not.
816 static int bpf_check_classic(const struct sock_filter
*filter
,
822 /* Check the filter code now */
823 for (pc
= 0; pc
< flen
; pc
++) {
824 const struct sock_filter
*ftest
= &filter
[pc
];
826 /* May we actually operate on this code? */
827 if (!chk_code_allowed(ftest
->code
))
830 /* Some instructions need special checks */
831 switch (ftest
->code
) {
832 case BPF_ALU
| BPF_DIV
| BPF_K
:
833 case BPF_ALU
| BPF_MOD
| BPF_K
:
834 /* Check for division by zero */
838 case BPF_ALU
| BPF_LSH
| BPF_K
:
839 case BPF_ALU
| BPF_RSH
| BPF_K
:
843 case BPF_LD
| BPF_MEM
:
844 case BPF_LDX
| BPF_MEM
:
847 /* Check for invalid memory addresses */
848 if (ftest
->k
>= BPF_MEMWORDS
)
851 case BPF_JMP
| BPF_JA
:
852 /* Note, the large ftest->k might cause loops.
853 * Compare this with conditional jumps below,
854 * where offsets are limited. --ANK (981016)
856 if (ftest
->k
>= (unsigned int)(flen
- pc
- 1))
859 case BPF_JMP
| BPF_JEQ
| BPF_K
:
860 case BPF_JMP
| BPF_JEQ
| BPF_X
:
861 case BPF_JMP
| BPF_JGE
| BPF_K
:
862 case BPF_JMP
| BPF_JGE
| BPF_X
:
863 case BPF_JMP
| BPF_JGT
| BPF_K
:
864 case BPF_JMP
| BPF_JGT
| BPF_X
:
865 case BPF_JMP
| BPF_JSET
| BPF_K
:
866 case BPF_JMP
| BPF_JSET
| BPF_X
:
867 /* Both conditionals must be safe */
868 if (pc
+ ftest
->jt
+ 1 >= flen
||
869 pc
+ ftest
->jf
+ 1 >= flen
)
872 case BPF_LD
| BPF_W
| BPF_ABS
:
873 case BPF_LD
| BPF_H
| BPF_ABS
:
874 case BPF_LD
| BPF_B
| BPF_ABS
:
876 if (bpf_anc_helper(ftest
) & BPF_ANC
)
878 /* Ancillary operation unknown or unsupported */
879 if (anc_found
== false && ftest
->k
>= SKF_AD_OFF
)
884 /* Last instruction must be a RET code */
885 switch (filter
[flen
- 1].code
) {
886 case BPF_RET
| BPF_K
:
887 case BPF_RET
| BPF_A
:
888 return check_load_and_stores(filter
, flen
);
894 static int bpf_prog_store_orig_filter(struct bpf_prog
*fp
,
895 const struct sock_fprog
*fprog
)
897 unsigned int fsize
= bpf_classic_proglen(fprog
);
898 struct sock_fprog_kern
*fkprog
;
900 fp
->orig_prog
= kmalloc(sizeof(*fkprog
), GFP_KERNEL
);
904 fkprog
= fp
->orig_prog
;
905 fkprog
->len
= fprog
->len
;
907 fkprog
->filter
= kmemdup(fp
->insns
, fsize
,
908 GFP_KERNEL
| __GFP_NOWARN
);
909 if (!fkprog
->filter
) {
910 kfree(fp
->orig_prog
);
917 static void bpf_release_orig_filter(struct bpf_prog
*fp
)
919 struct sock_fprog_kern
*fprog
= fp
->orig_prog
;
922 kfree(fprog
->filter
);
927 static void __bpf_prog_release(struct bpf_prog
*prog
)
929 if (prog
->type
== BPF_PROG_TYPE_SOCKET_FILTER
) {
932 bpf_release_orig_filter(prog
);
937 static void __sk_filter_release(struct sk_filter
*fp
)
939 __bpf_prog_release(fp
->prog
);
944 * sk_filter_release_rcu - Release a socket filter by rcu_head
945 * @rcu: rcu_head that contains the sk_filter to free
947 static void sk_filter_release_rcu(struct rcu_head
*rcu
)
949 struct sk_filter
*fp
= container_of(rcu
, struct sk_filter
, rcu
);
951 __sk_filter_release(fp
);
955 * sk_filter_release - release a socket filter
956 * @fp: filter to remove
958 * Remove a filter from a socket and release its resources.
960 static void sk_filter_release(struct sk_filter
*fp
)
962 if (refcount_dec_and_test(&fp
->refcnt
))
963 call_rcu(&fp
->rcu
, sk_filter_release_rcu
);
966 void sk_filter_uncharge(struct sock
*sk
, struct sk_filter
*fp
)
968 u32 filter_size
= bpf_prog_size(fp
->prog
->len
);
970 atomic_sub(filter_size
, &sk
->sk_omem_alloc
);
971 sk_filter_release(fp
);
974 /* try to charge the socket memory if there is space available
975 * return true on success
977 static bool __sk_filter_charge(struct sock
*sk
, struct sk_filter
*fp
)
979 u32 filter_size
= bpf_prog_size(fp
->prog
->len
);
981 /* same check as in sock_kmalloc() */
982 if (filter_size
<= sysctl_optmem_max
&&
983 atomic_read(&sk
->sk_omem_alloc
) + filter_size
< sysctl_optmem_max
) {
984 atomic_add(filter_size
, &sk
->sk_omem_alloc
);
990 bool sk_filter_charge(struct sock
*sk
, struct sk_filter
*fp
)
992 if (!refcount_inc_not_zero(&fp
->refcnt
))
995 if (!__sk_filter_charge(sk
, fp
)) {
996 sk_filter_release(fp
);
1002 static struct bpf_prog
*bpf_migrate_filter(struct bpf_prog
*fp
)
1004 struct sock_filter
*old_prog
;
1005 struct bpf_prog
*old_fp
;
1006 int err
, new_len
, old_len
= fp
->len
;
1008 /* We are free to overwrite insns et al right here as it
1009 * won't be used at this point in time anymore internally
1010 * after the migration to the internal BPF instruction
1013 BUILD_BUG_ON(sizeof(struct sock_filter
) !=
1014 sizeof(struct bpf_insn
));
1016 /* Conversion cannot happen on overlapping memory areas,
1017 * so we need to keep the user BPF around until the 2nd
1018 * pass. At this time, the user BPF is stored in fp->insns.
1020 old_prog
= kmemdup(fp
->insns
, old_len
* sizeof(struct sock_filter
),
1021 GFP_KERNEL
| __GFP_NOWARN
);
1027 /* 1st pass: calculate the new program length. */
1028 err
= bpf_convert_filter(old_prog
, old_len
, NULL
, &new_len
);
1032 /* Expand fp for appending the new filter representation. */
1034 fp
= bpf_prog_realloc(old_fp
, bpf_prog_size(new_len
), 0);
1036 /* The old_fp is still around in case we couldn't
1037 * allocate new memory, so uncharge on that one.
1046 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1047 err
= bpf_convert_filter(old_prog
, old_len
, fp
, &new_len
);
1049 /* 2nd bpf_convert_filter() can fail only if it fails
1050 * to allocate memory, remapping must succeed. Note,
1051 * that at this time old_fp has already been released
1056 /* We are guaranteed to never error here with cBPF to eBPF
1057 * transitions, since there's no issue with type compatibility
1058 * checks on program arrays.
1060 fp
= bpf_prog_select_runtime(fp
, &err
);
1068 __bpf_prog_release(fp
);
1069 return ERR_PTR(err
);
1072 static struct bpf_prog
*bpf_prepare_filter(struct bpf_prog
*fp
,
1073 bpf_aux_classic_check_t trans
)
1077 fp
->bpf_func
= NULL
;
1080 err
= bpf_check_classic(fp
->insns
, fp
->len
);
1082 __bpf_prog_release(fp
);
1083 return ERR_PTR(err
);
1086 /* There might be additional checks and transformations
1087 * needed on classic filters, f.e. in case of seccomp.
1090 err
= trans(fp
->insns
, fp
->len
);
1092 __bpf_prog_release(fp
);
1093 return ERR_PTR(err
);
1097 /* Probe if we can JIT compile the filter and if so, do
1098 * the compilation of the filter.
1100 bpf_jit_compile(fp
);
1102 /* JIT compiler couldn't process this filter, so do the
1103 * internal BPF translation for the optimized interpreter.
1106 fp
= bpf_migrate_filter(fp
);
1112 * bpf_prog_create - create an unattached filter
1113 * @pfp: the unattached filter that is created
1114 * @fprog: the filter program
1116 * Create a filter independent of any socket. We first run some
1117 * sanity checks on it to make sure it does not explode on us later.
1118 * If an error occurs or there is insufficient memory for the filter
1119 * a negative errno code is returned. On success the return is zero.
1121 int bpf_prog_create(struct bpf_prog
**pfp
, struct sock_fprog_kern
*fprog
)
1123 unsigned int fsize
= bpf_classic_proglen(fprog
);
1124 struct bpf_prog
*fp
;
1126 /* Make sure new filter is there and in the right amounts. */
1127 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1130 fp
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1134 memcpy(fp
->insns
, fprog
->filter
, fsize
);
1136 fp
->len
= fprog
->len
;
1137 /* Since unattached filters are not copied back to user
1138 * space through sk_get_filter(), we do not need to hold
1139 * a copy here, and can spare us the work.
1141 fp
->orig_prog
= NULL
;
1143 /* bpf_prepare_filter() already takes care of freeing
1144 * memory in case something goes wrong.
1146 fp
= bpf_prepare_filter(fp
, NULL
);
1153 EXPORT_SYMBOL_GPL(bpf_prog_create
);
1156 * bpf_prog_create_from_user - create an unattached filter from user buffer
1157 * @pfp: the unattached filter that is created
1158 * @fprog: the filter program
1159 * @trans: post-classic verifier transformation handler
1160 * @save_orig: save classic BPF program
1162 * This function effectively does the same as bpf_prog_create(), only
1163 * that it builds up its insns buffer from user space provided buffer.
1164 * It also allows for passing a bpf_aux_classic_check_t handler.
1166 int bpf_prog_create_from_user(struct bpf_prog
**pfp
, struct sock_fprog
*fprog
,
1167 bpf_aux_classic_check_t trans
, bool save_orig
)
1169 unsigned int fsize
= bpf_classic_proglen(fprog
);
1170 struct bpf_prog
*fp
;
1173 /* Make sure new filter is there and in the right amounts. */
1174 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1177 fp
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1181 if (copy_from_user(fp
->insns
, fprog
->filter
, fsize
)) {
1182 __bpf_prog_free(fp
);
1186 fp
->len
= fprog
->len
;
1187 fp
->orig_prog
= NULL
;
1190 err
= bpf_prog_store_orig_filter(fp
, fprog
);
1192 __bpf_prog_free(fp
);
1197 /* bpf_prepare_filter() already takes care of freeing
1198 * memory in case something goes wrong.
1200 fp
= bpf_prepare_filter(fp
, trans
);
1207 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user
);
1209 void bpf_prog_destroy(struct bpf_prog
*fp
)
1211 __bpf_prog_release(fp
);
1213 EXPORT_SYMBOL_GPL(bpf_prog_destroy
);
1215 static int __sk_attach_prog(struct bpf_prog
*prog
, struct sock
*sk
)
1217 struct sk_filter
*fp
, *old_fp
;
1219 fp
= kmalloc(sizeof(*fp
), GFP_KERNEL
);
1225 if (!__sk_filter_charge(sk
, fp
)) {
1229 refcount_set(&fp
->refcnt
, 1);
1231 old_fp
= rcu_dereference_protected(sk
->sk_filter
,
1232 lockdep_sock_is_held(sk
));
1233 rcu_assign_pointer(sk
->sk_filter
, fp
);
1236 sk_filter_uncharge(sk
, old_fp
);
1241 static int __reuseport_attach_prog(struct bpf_prog
*prog
, struct sock
*sk
)
1243 struct bpf_prog
*old_prog
;
1246 if (bpf_prog_size(prog
->len
) > sysctl_optmem_max
)
1249 if (sk_unhashed(sk
) && sk
->sk_reuseport
) {
1250 err
= reuseport_alloc(sk
);
1253 } else if (!rcu_access_pointer(sk
->sk_reuseport_cb
)) {
1254 /* The socket wasn't bound with SO_REUSEPORT */
1258 old_prog
= reuseport_attach_prog(sk
, prog
);
1260 bpf_prog_destroy(old_prog
);
1266 struct bpf_prog
*__get_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1268 unsigned int fsize
= bpf_classic_proglen(fprog
);
1269 struct bpf_prog
*prog
;
1272 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
1273 return ERR_PTR(-EPERM
);
1275 /* Make sure new filter is there and in the right amounts. */
1276 if (!bpf_check_basics_ok(fprog
->filter
, fprog
->len
))
1277 return ERR_PTR(-EINVAL
);
1279 prog
= bpf_prog_alloc(bpf_prog_size(fprog
->len
), 0);
1281 return ERR_PTR(-ENOMEM
);
1283 if (copy_from_user(prog
->insns
, fprog
->filter
, fsize
)) {
1284 __bpf_prog_free(prog
);
1285 return ERR_PTR(-EFAULT
);
1288 prog
->len
= fprog
->len
;
1290 err
= bpf_prog_store_orig_filter(prog
, fprog
);
1292 __bpf_prog_free(prog
);
1293 return ERR_PTR(-ENOMEM
);
1296 /* bpf_prepare_filter() already takes care of freeing
1297 * memory in case something goes wrong.
1299 return bpf_prepare_filter(prog
, NULL
);
1303 * sk_attach_filter - attach a socket filter
1304 * @fprog: the filter program
1305 * @sk: the socket to use
1307 * Attach the user's filter code. We first run some sanity checks on
1308 * it to make sure it does not explode on us later. If an error
1309 * occurs or there is insufficient memory for the filter a negative
1310 * errno code is returned. On success the return is zero.
1312 int sk_attach_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1314 struct bpf_prog
*prog
= __get_filter(fprog
, sk
);
1318 return PTR_ERR(prog
);
1320 err
= __sk_attach_prog(prog
, sk
);
1322 __bpf_prog_release(prog
);
1328 EXPORT_SYMBOL_GPL(sk_attach_filter
);
1330 int sk_reuseport_attach_filter(struct sock_fprog
*fprog
, struct sock
*sk
)
1332 struct bpf_prog
*prog
= __get_filter(fprog
, sk
);
1336 return PTR_ERR(prog
);
1338 err
= __reuseport_attach_prog(prog
, sk
);
1340 __bpf_prog_release(prog
);
1347 static struct bpf_prog
*__get_bpf(u32 ufd
, struct sock
*sk
)
1349 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
1350 return ERR_PTR(-EPERM
);
1352 return bpf_prog_get_type(ufd
, BPF_PROG_TYPE_SOCKET_FILTER
);
1355 int sk_attach_bpf(u32 ufd
, struct sock
*sk
)
1357 struct bpf_prog
*prog
= __get_bpf(ufd
, sk
);
1361 return PTR_ERR(prog
);
1363 err
= __sk_attach_prog(prog
, sk
);
1372 int sk_reuseport_attach_bpf(u32 ufd
, struct sock
*sk
)
1374 struct bpf_prog
*prog
= __get_bpf(ufd
, sk
);
1378 return PTR_ERR(prog
);
1380 err
= __reuseport_attach_prog(prog
, sk
);
1389 struct bpf_scratchpad
{
1391 __be32 diff
[MAX_BPF_STACK
/ sizeof(__be32
)];
1392 u8 buff
[MAX_BPF_STACK
];
1396 static DEFINE_PER_CPU(struct bpf_scratchpad
, bpf_sp
);
1398 static inline int __bpf_try_make_writable(struct sk_buff
*skb
,
1399 unsigned int write_len
)
1401 return skb_ensure_writable(skb
, write_len
);
1404 static inline int bpf_try_make_writable(struct sk_buff
*skb
,
1405 unsigned int write_len
)
1407 int err
= __bpf_try_make_writable(skb
, write_len
);
1409 bpf_compute_data_end(skb
);
1413 static int bpf_try_make_head_writable(struct sk_buff
*skb
)
1415 return bpf_try_make_writable(skb
, skb_headlen(skb
));
1418 static inline void bpf_push_mac_rcsum(struct sk_buff
*skb
)
1420 if (skb_at_tc_ingress(skb
))
1421 skb_postpush_rcsum(skb
, skb_mac_header(skb
), skb
->mac_len
);
1424 static inline void bpf_pull_mac_rcsum(struct sk_buff
*skb
)
1426 if (skb_at_tc_ingress(skb
))
1427 skb_postpull_rcsum(skb
, skb_mac_header(skb
), skb
->mac_len
);
1430 BPF_CALL_5(bpf_skb_store_bytes
, struct sk_buff
*, skb
, u32
, offset
,
1431 const void *, from
, u32
, len
, u64
, flags
)
1435 if (unlikely(flags
& ~(BPF_F_RECOMPUTE_CSUM
| BPF_F_INVALIDATE_HASH
)))
1437 if (unlikely(offset
> 0xffff))
1439 if (unlikely(bpf_try_make_writable(skb
, offset
+ len
)))
1442 ptr
= skb
->data
+ offset
;
1443 if (flags
& BPF_F_RECOMPUTE_CSUM
)
1444 __skb_postpull_rcsum(skb
, ptr
, len
, offset
);
1446 memcpy(ptr
, from
, len
);
1448 if (flags
& BPF_F_RECOMPUTE_CSUM
)
1449 __skb_postpush_rcsum(skb
, ptr
, len
, offset
);
1450 if (flags
& BPF_F_INVALIDATE_HASH
)
1451 skb_clear_hash(skb
);
1456 static const struct bpf_func_proto bpf_skb_store_bytes_proto
= {
1457 .func
= bpf_skb_store_bytes
,
1459 .ret_type
= RET_INTEGER
,
1460 .arg1_type
= ARG_PTR_TO_CTX
,
1461 .arg2_type
= ARG_ANYTHING
,
1462 .arg3_type
= ARG_PTR_TO_MEM
,
1463 .arg4_type
= ARG_CONST_SIZE
,
1464 .arg5_type
= ARG_ANYTHING
,
1467 BPF_CALL_4(bpf_skb_load_bytes
, const struct sk_buff
*, skb
, u32
, offset
,
1468 void *, to
, u32
, len
)
1472 if (unlikely(offset
> 0xffff))
1475 ptr
= skb_header_pointer(skb
, offset
, len
, to
);
1479 memcpy(to
, ptr
, len
);
1487 static const struct bpf_func_proto bpf_skb_load_bytes_proto
= {
1488 .func
= bpf_skb_load_bytes
,
1490 .ret_type
= RET_INTEGER
,
1491 .arg1_type
= ARG_PTR_TO_CTX
,
1492 .arg2_type
= ARG_ANYTHING
,
1493 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
1494 .arg4_type
= ARG_CONST_SIZE
,
1497 BPF_CALL_2(bpf_skb_pull_data
, struct sk_buff
*, skb
, u32
, len
)
1499 /* Idea is the following: should the needed direct read/write
1500 * test fail during runtime, we can pull in more data and redo
1501 * again, since implicitly, we invalidate previous checks here.
1503 * Or, since we know how much we need to make read/writeable,
1504 * this can be done once at the program beginning for direct
1505 * access case. By this we overcome limitations of only current
1506 * headroom being accessible.
1508 return bpf_try_make_writable(skb
, len
? : skb_headlen(skb
));
1511 static const struct bpf_func_proto bpf_skb_pull_data_proto
= {
1512 .func
= bpf_skb_pull_data
,
1514 .ret_type
= RET_INTEGER
,
1515 .arg1_type
= ARG_PTR_TO_CTX
,
1516 .arg2_type
= ARG_ANYTHING
,
1519 BPF_CALL_5(bpf_l3_csum_replace
, struct sk_buff
*, skb
, u32
, offset
,
1520 u64
, from
, u64
, to
, u64
, flags
)
1524 if (unlikely(flags
& ~(BPF_F_HDR_FIELD_MASK
)))
1526 if (unlikely(offset
> 0xffff || offset
& 1))
1528 if (unlikely(bpf_try_make_writable(skb
, offset
+ sizeof(*ptr
))))
1531 ptr
= (__sum16
*)(skb
->data
+ offset
);
1532 switch (flags
& BPF_F_HDR_FIELD_MASK
) {
1534 if (unlikely(from
!= 0))
1537 csum_replace_by_diff(ptr
, to
);
1540 csum_replace2(ptr
, from
, to
);
1543 csum_replace4(ptr
, from
, to
);
1552 static const struct bpf_func_proto bpf_l3_csum_replace_proto
= {
1553 .func
= bpf_l3_csum_replace
,
1555 .ret_type
= RET_INTEGER
,
1556 .arg1_type
= ARG_PTR_TO_CTX
,
1557 .arg2_type
= ARG_ANYTHING
,
1558 .arg3_type
= ARG_ANYTHING
,
1559 .arg4_type
= ARG_ANYTHING
,
1560 .arg5_type
= ARG_ANYTHING
,
1563 BPF_CALL_5(bpf_l4_csum_replace
, struct sk_buff
*, skb
, u32
, offset
,
1564 u64
, from
, u64
, to
, u64
, flags
)
1566 bool is_pseudo
= flags
& BPF_F_PSEUDO_HDR
;
1567 bool is_mmzero
= flags
& BPF_F_MARK_MANGLED_0
;
1568 bool do_mforce
= flags
& BPF_F_MARK_ENFORCE
;
1571 if (unlikely(flags
& ~(BPF_F_MARK_MANGLED_0
| BPF_F_MARK_ENFORCE
|
1572 BPF_F_PSEUDO_HDR
| BPF_F_HDR_FIELD_MASK
)))
1574 if (unlikely(offset
> 0xffff || offset
& 1))
1576 if (unlikely(bpf_try_make_writable(skb
, offset
+ sizeof(*ptr
))))
1579 ptr
= (__sum16
*)(skb
->data
+ offset
);
1580 if (is_mmzero
&& !do_mforce
&& !*ptr
)
1583 switch (flags
& BPF_F_HDR_FIELD_MASK
) {
1585 if (unlikely(from
!= 0))
1588 inet_proto_csum_replace_by_diff(ptr
, skb
, to
, is_pseudo
);
1591 inet_proto_csum_replace2(ptr
, skb
, from
, to
, is_pseudo
);
1594 inet_proto_csum_replace4(ptr
, skb
, from
, to
, is_pseudo
);
1600 if (is_mmzero
&& !*ptr
)
1601 *ptr
= CSUM_MANGLED_0
;
1605 static const struct bpf_func_proto bpf_l4_csum_replace_proto
= {
1606 .func
= bpf_l4_csum_replace
,
1608 .ret_type
= RET_INTEGER
,
1609 .arg1_type
= ARG_PTR_TO_CTX
,
1610 .arg2_type
= ARG_ANYTHING
,
1611 .arg3_type
= ARG_ANYTHING
,
1612 .arg4_type
= ARG_ANYTHING
,
1613 .arg5_type
= ARG_ANYTHING
,
1616 BPF_CALL_5(bpf_csum_diff
, __be32
*, from
, u32
, from_size
,
1617 __be32
*, to
, u32
, to_size
, __wsum
, seed
)
1619 struct bpf_scratchpad
*sp
= this_cpu_ptr(&bpf_sp
);
1620 u32 diff_size
= from_size
+ to_size
;
1623 /* This is quite flexible, some examples:
1625 * from_size == 0, to_size > 0, seed := csum --> pushing data
1626 * from_size > 0, to_size == 0, seed := csum --> pulling data
1627 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1629 * Even for diffing, from_size and to_size don't need to be equal.
1631 if (unlikely(((from_size
| to_size
) & (sizeof(__be32
) - 1)) ||
1632 diff_size
> sizeof(sp
->diff
)))
1635 for (i
= 0; i
< from_size
/ sizeof(__be32
); i
++, j
++)
1636 sp
->diff
[j
] = ~from
[i
];
1637 for (i
= 0; i
< to_size
/ sizeof(__be32
); i
++, j
++)
1638 sp
->diff
[j
] = to
[i
];
1640 return csum_partial(sp
->diff
, diff_size
, seed
);
1643 static const struct bpf_func_proto bpf_csum_diff_proto
= {
1644 .func
= bpf_csum_diff
,
1647 .ret_type
= RET_INTEGER
,
1648 .arg1_type
= ARG_PTR_TO_MEM
,
1649 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
1650 .arg3_type
= ARG_PTR_TO_MEM
,
1651 .arg4_type
= ARG_CONST_SIZE_OR_ZERO
,
1652 .arg5_type
= ARG_ANYTHING
,
1655 BPF_CALL_2(bpf_csum_update
, struct sk_buff
*, skb
, __wsum
, csum
)
1657 /* The interface is to be used in combination with bpf_csum_diff()
1658 * for direct packet writes. csum rotation for alignment as well
1659 * as emulating csum_sub() can be done from the eBPF program.
1661 if (skb
->ip_summed
== CHECKSUM_COMPLETE
)
1662 return (skb
->csum
= csum_add(skb
->csum
, csum
));
1667 static const struct bpf_func_proto bpf_csum_update_proto
= {
1668 .func
= bpf_csum_update
,
1670 .ret_type
= RET_INTEGER
,
1671 .arg1_type
= ARG_PTR_TO_CTX
,
1672 .arg2_type
= ARG_ANYTHING
,
1675 static inline int __bpf_rx_skb(struct net_device
*dev
, struct sk_buff
*skb
)
1677 return dev_forward_skb(dev
, skb
);
1680 static inline int __bpf_rx_skb_no_mac(struct net_device
*dev
,
1681 struct sk_buff
*skb
)
1683 int ret
= ____dev_forward_skb(dev
, skb
);
1687 ret
= netif_rx(skb
);
1693 static inline int __bpf_tx_skb(struct net_device
*dev
, struct sk_buff
*skb
)
1697 if (unlikely(__this_cpu_read(xmit_recursion
) > XMIT_RECURSION_LIMIT
)) {
1698 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
1705 __this_cpu_inc(xmit_recursion
);
1706 ret
= dev_queue_xmit(skb
);
1707 __this_cpu_dec(xmit_recursion
);
1712 static int __bpf_redirect_no_mac(struct sk_buff
*skb
, struct net_device
*dev
,
1715 /* skb->mac_len is not set on normal egress */
1716 unsigned int mlen
= skb
->network_header
- skb
->mac_header
;
1718 __skb_pull(skb
, mlen
);
1720 /* At ingress, the mac header has already been pulled once.
1721 * At egress, skb_pospull_rcsum has to be done in case that
1722 * the skb is originated from ingress (i.e. a forwarded skb)
1723 * to ensure that rcsum starts at net header.
1725 if (!skb_at_tc_ingress(skb
))
1726 skb_postpull_rcsum(skb
, skb_mac_header(skb
), mlen
);
1727 skb_pop_mac_header(skb
);
1728 skb_reset_mac_len(skb
);
1729 return flags
& BPF_F_INGRESS
?
1730 __bpf_rx_skb_no_mac(dev
, skb
) : __bpf_tx_skb(dev
, skb
);
1733 static int __bpf_redirect_common(struct sk_buff
*skb
, struct net_device
*dev
,
1736 /* Verify that a link layer header is carried */
1737 if (unlikely(skb
->mac_header
>= skb
->network_header
)) {
1742 bpf_push_mac_rcsum(skb
);
1743 return flags
& BPF_F_INGRESS
?
1744 __bpf_rx_skb(dev
, skb
) : __bpf_tx_skb(dev
, skb
);
1747 static int __bpf_redirect(struct sk_buff
*skb
, struct net_device
*dev
,
1750 if (dev_is_mac_header_xmit(dev
))
1751 return __bpf_redirect_common(skb
, dev
, flags
);
1753 return __bpf_redirect_no_mac(skb
, dev
, flags
);
1756 BPF_CALL_3(bpf_clone_redirect
, struct sk_buff
*, skb
, u32
, ifindex
, u64
, flags
)
1758 struct net_device
*dev
;
1759 struct sk_buff
*clone
;
1762 if (unlikely(flags
& ~(BPF_F_INGRESS
)))
1765 dev
= dev_get_by_index_rcu(dev_net(skb
->dev
), ifindex
);
1769 clone
= skb_clone(skb
, GFP_ATOMIC
);
1770 if (unlikely(!clone
))
1773 /* For direct write, we need to keep the invariant that the skbs
1774 * we're dealing with need to be uncloned. Should uncloning fail
1775 * here, we need to free the just generated clone to unclone once
1778 ret
= bpf_try_make_head_writable(skb
);
1779 if (unlikely(ret
)) {
1784 return __bpf_redirect(clone
, dev
, flags
);
1787 static const struct bpf_func_proto bpf_clone_redirect_proto
= {
1788 .func
= bpf_clone_redirect
,
1790 .ret_type
= RET_INTEGER
,
1791 .arg1_type
= ARG_PTR_TO_CTX
,
1792 .arg2_type
= ARG_ANYTHING
,
1793 .arg3_type
= ARG_ANYTHING
,
1796 struct redirect_info
{
1799 struct bpf_map
*map
;
1800 struct bpf_map
*map_to_flush
;
1801 unsigned long map_owner
;
1804 static DEFINE_PER_CPU(struct redirect_info
, redirect_info
);
1806 BPF_CALL_2(bpf_redirect
, u32
, ifindex
, u64
, flags
)
1808 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
1810 if (unlikely(flags
& ~(BPF_F_INGRESS
)))
1813 ri
->ifindex
= ifindex
;
1816 return TC_ACT_REDIRECT
;
1819 int skb_do_redirect(struct sk_buff
*skb
)
1821 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
1822 struct net_device
*dev
;
1824 dev
= dev_get_by_index_rcu(dev_net(skb
->dev
), ri
->ifindex
);
1826 if (unlikely(!dev
)) {
1831 return __bpf_redirect(skb
, dev
, ri
->flags
);
1834 static const struct bpf_func_proto bpf_redirect_proto
= {
1835 .func
= bpf_redirect
,
1837 .ret_type
= RET_INTEGER
,
1838 .arg1_type
= ARG_ANYTHING
,
1839 .arg2_type
= ARG_ANYTHING
,
1842 BPF_CALL_4(bpf_sk_redirect_map
, struct sk_buff
*, skb
,
1843 struct bpf_map
*, map
, u32
, key
, u64
, flags
)
1845 struct tcp_skb_cb
*tcb
= TCP_SKB_CB(skb
);
1847 /* If user passes invalid input drop the packet. */
1848 if (unlikely(flags
))
1852 tcb
->bpf
.flags
= flags
;
1858 struct sock
*do_sk_redirect_map(struct sk_buff
*skb
)
1860 struct tcp_skb_cb
*tcb
= TCP_SKB_CB(skb
);
1861 struct sock
*sk
= NULL
;
1864 sk
= __sock_map_lookup_elem(tcb
->bpf
.map
, tcb
->bpf
.key
);
1867 tcb
->bpf
.map
= NULL
;
1873 static const struct bpf_func_proto bpf_sk_redirect_map_proto
= {
1874 .func
= bpf_sk_redirect_map
,
1876 .ret_type
= RET_INTEGER
,
1877 .arg1_type
= ARG_PTR_TO_CTX
,
1878 .arg2_type
= ARG_CONST_MAP_PTR
,
1879 .arg3_type
= ARG_ANYTHING
,
1880 .arg4_type
= ARG_ANYTHING
,
1883 BPF_CALL_1(bpf_get_cgroup_classid
, const struct sk_buff
*, skb
)
1885 return task_get_classid(skb
);
1888 static const struct bpf_func_proto bpf_get_cgroup_classid_proto
= {
1889 .func
= bpf_get_cgroup_classid
,
1891 .ret_type
= RET_INTEGER
,
1892 .arg1_type
= ARG_PTR_TO_CTX
,
1895 BPF_CALL_1(bpf_get_route_realm
, const struct sk_buff
*, skb
)
1897 return dst_tclassid(skb
);
1900 static const struct bpf_func_proto bpf_get_route_realm_proto
= {
1901 .func
= bpf_get_route_realm
,
1903 .ret_type
= RET_INTEGER
,
1904 .arg1_type
= ARG_PTR_TO_CTX
,
1907 BPF_CALL_1(bpf_get_hash_recalc
, struct sk_buff
*, skb
)
1909 /* If skb_clear_hash() was called due to mangling, we can
1910 * trigger SW recalculation here. Later access to hash
1911 * can then use the inline skb->hash via context directly
1912 * instead of calling this helper again.
1914 return skb_get_hash(skb
);
1917 static const struct bpf_func_proto bpf_get_hash_recalc_proto
= {
1918 .func
= bpf_get_hash_recalc
,
1920 .ret_type
= RET_INTEGER
,
1921 .arg1_type
= ARG_PTR_TO_CTX
,
1924 BPF_CALL_1(bpf_set_hash_invalid
, struct sk_buff
*, skb
)
1926 /* After all direct packet write, this can be used once for
1927 * triggering a lazy recalc on next skb_get_hash() invocation.
1929 skb_clear_hash(skb
);
1933 static const struct bpf_func_proto bpf_set_hash_invalid_proto
= {
1934 .func
= bpf_set_hash_invalid
,
1936 .ret_type
= RET_INTEGER
,
1937 .arg1_type
= ARG_PTR_TO_CTX
,
1940 BPF_CALL_2(bpf_set_hash
, struct sk_buff
*, skb
, u32
, hash
)
1942 /* Set user specified hash as L4(+), so that it gets returned
1943 * on skb_get_hash() call unless BPF prog later on triggers a
1946 __skb_set_sw_hash(skb
, hash
, true);
1950 static const struct bpf_func_proto bpf_set_hash_proto
= {
1951 .func
= bpf_set_hash
,
1953 .ret_type
= RET_INTEGER
,
1954 .arg1_type
= ARG_PTR_TO_CTX
,
1955 .arg2_type
= ARG_ANYTHING
,
1958 BPF_CALL_3(bpf_skb_vlan_push
, struct sk_buff
*, skb
, __be16
, vlan_proto
,
1963 if (unlikely(vlan_proto
!= htons(ETH_P_8021Q
) &&
1964 vlan_proto
!= htons(ETH_P_8021AD
)))
1965 vlan_proto
= htons(ETH_P_8021Q
);
1967 bpf_push_mac_rcsum(skb
);
1968 ret
= skb_vlan_push(skb
, vlan_proto
, vlan_tci
);
1969 bpf_pull_mac_rcsum(skb
);
1971 bpf_compute_data_end(skb
);
1975 const struct bpf_func_proto bpf_skb_vlan_push_proto
= {
1976 .func
= bpf_skb_vlan_push
,
1978 .ret_type
= RET_INTEGER
,
1979 .arg1_type
= ARG_PTR_TO_CTX
,
1980 .arg2_type
= ARG_ANYTHING
,
1981 .arg3_type
= ARG_ANYTHING
,
1983 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto
);
1985 BPF_CALL_1(bpf_skb_vlan_pop
, struct sk_buff
*, skb
)
1989 bpf_push_mac_rcsum(skb
);
1990 ret
= skb_vlan_pop(skb
);
1991 bpf_pull_mac_rcsum(skb
);
1993 bpf_compute_data_end(skb
);
1997 const struct bpf_func_proto bpf_skb_vlan_pop_proto
= {
1998 .func
= bpf_skb_vlan_pop
,
2000 .ret_type
= RET_INTEGER
,
2001 .arg1_type
= ARG_PTR_TO_CTX
,
2003 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto
);
2005 static int bpf_skb_generic_push(struct sk_buff
*skb
, u32 off
, u32 len
)
2007 /* Caller already did skb_cow() with len as headroom,
2008 * so no need to do it here.
2011 memmove(skb
->data
, skb
->data
+ len
, off
);
2012 memset(skb
->data
+ off
, 0, len
);
2014 /* No skb_postpush_rcsum(skb, skb->data + off, len)
2015 * needed here as it does not change the skb->csum
2016 * result for checksum complete when summing over
2022 static int bpf_skb_generic_pop(struct sk_buff
*skb
, u32 off
, u32 len
)
2024 /* skb_ensure_writable() is not needed here, as we're
2025 * already working on an uncloned skb.
2027 if (unlikely(!pskb_may_pull(skb
, off
+ len
)))
2030 skb_postpull_rcsum(skb
, skb
->data
+ off
, len
);
2031 memmove(skb
->data
+ len
, skb
->data
, off
);
2032 __skb_pull(skb
, len
);
2037 static int bpf_skb_net_hdr_push(struct sk_buff
*skb
, u32 off
, u32 len
)
2039 bool trans_same
= skb
->transport_header
== skb
->network_header
;
2042 /* There's no need for __skb_push()/__skb_pull() pair to
2043 * get to the start of the mac header as we're guaranteed
2044 * to always start from here under eBPF.
2046 ret
= bpf_skb_generic_push(skb
, off
, len
);
2048 skb
->mac_header
-= len
;
2049 skb
->network_header
-= len
;
2051 skb
->transport_header
= skb
->network_header
;
2057 static int bpf_skb_net_hdr_pop(struct sk_buff
*skb
, u32 off
, u32 len
)
2059 bool trans_same
= skb
->transport_header
== skb
->network_header
;
2062 /* Same here, __skb_push()/__skb_pull() pair not needed. */
2063 ret
= bpf_skb_generic_pop(skb
, off
, len
);
2065 skb
->mac_header
+= len
;
2066 skb
->network_header
+= len
;
2068 skb
->transport_header
= skb
->network_header
;
2074 static int bpf_skb_proto_4_to_6(struct sk_buff
*skb
)
2076 const u32 len_diff
= sizeof(struct ipv6hdr
) - sizeof(struct iphdr
);
2077 u32 off
= skb_mac_header_len(skb
);
2080 ret
= skb_cow(skb
, len_diff
);
2081 if (unlikely(ret
< 0))
2084 ret
= bpf_skb_net_hdr_push(skb
, off
, len_diff
);
2085 if (unlikely(ret
< 0))
2088 if (skb_is_gso(skb
)) {
2089 /* SKB_GSO_TCPV4 needs to be changed into
2092 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
) {
2093 skb_shinfo(skb
)->gso_type
&= ~SKB_GSO_TCPV4
;
2094 skb_shinfo(skb
)->gso_type
|= SKB_GSO_TCPV6
;
2097 /* Due to IPv6 header, MSS needs to be downgraded. */
2098 skb_shinfo(skb
)->gso_size
-= len_diff
;
2099 /* Header must be checked, and gso_segs recomputed. */
2100 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2101 skb_shinfo(skb
)->gso_segs
= 0;
2104 skb
->protocol
= htons(ETH_P_IPV6
);
2105 skb_clear_hash(skb
);
2110 static int bpf_skb_proto_6_to_4(struct sk_buff
*skb
)
2112 const u32 len_diff
= sizeof(struct ipv6hdr
) - sizeof(struct iphdr
);
2113 u32 off
= skb_mac_header_len(skb
);
2116 ret
= skb_unclone(skb
, GFP_ATOMIC
);
2117 if (unlikely(ret
< 0))
2120 ret
= bpf_skb_net_hdr_pop(skb
, off
, len_diff
);
2121 if (unlikely(ret
< 0))
2124 if (skb_is_gso(skb
)) {
2125 /* SKB_GSO_TCPV6 needs to be changed into
2128 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV6
) {
2129 skb_shinfo(skb
)->gso_type
&= ~SKB_GSO_TCPV6
;
2130 skb_shinfo(skb
)->gso_type
|= SKB_GSO_TCPV4
;
2133 /* Due to IPv4 header, MSS can be upgraded. */
2134 skb_shinfo(skb
)->gso_size
+= len_diff
;
2135 /* Header must be checked, and gso_segs recomputed. */
2136 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2137 skb_shinfo(skb
)->gso_segs
= 0;
2140 skb
->protocol
= htons(ETH_P_IP
);
2141 skb_clear_hash(skb
);
2146 static int bpf_skb_proto_xlat(struct sk_buff
*skb
, __be16 to_proto
)
2148 __be16 from_proto
= skb
->protocol
;
2150 if (from_proto
== htons(ETH_P_IP
) &&
2151 to_proto
== htons(ETH_P_IPV6
))
2152 return bpf_skb_proto_4_to_6(skb
);
2154 if (from_proto
== htons(ETH_P_IPV6
) &&
2155 to_proto
== htons(ETH_P_IP
))
2156 return bpf_skb_proto_6_to_4(skb
);
2161 BPF_CALL_3(bpf_skb_change_proto
, struct sk_buff
*, skb
, __be16
, proto
,
2166 if (unlikely(flags
))
2169 /* General idea is that this helper does the basic groundwork
2170 * needed for changing the protocol, and eBPF program fills the
2171 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
2172 * and other helpers, rather than passing a raw buffer here.
2174 * The rationale is to keep this minimal and without a need to
2175 * deal with raw packet data. F.e. even if we would pass buffers
2176 * here, the program still needs to call the bpf_lX_csum_replace()
2177 * helpers anyway. Plus, this way we keep also separation of
2178 * concerns, since f.e. bpf_skb_store_bytes() should only take
2181 * Currently, additional options and extension header space are
2182 * not supported, but flags register is reserved so we can adapt
2183 * that. For offloads, we mark packet as dodgy, so that headers
2184 * need to be verified first.
2186 ret
= bpf_skb_proto_xlat(skb
, proto
);
2187 bpf_compute_data_end(skb
);
2191 static const struct bpf_func_proto bpf_skb_change_proto_proto
= {
2192 .func
= bpf_skb_change_proto
,
2194 .ret_type
= RET_INTEGER
,
2195 .arg1_type
= ARG_PTR_TO_CTX
,
2196 .arg2_type
= ARG_ANYTHING
,
2197 .arg3_type
= ARG_ANYTHING
,
2200 BPF_CALL_2(bpf_skb_change_type
, struct sk_buff
*, skb
, u32
, pkt_type
)
2202 /* We only allow a restricted subset to be changed for now. */
2203 if (unlikely(!skb_pkt_type_ok(skb
->pkt_type
) ||
2204 !skb_pkt_type_ok(pkt_type
)))
2207 skb
->pkt_type
= pkt_type
;
2211 static const struct bpf_func_proto bpf_skb_change_type_proto
= {
2212 .func
= bpf_skb_change_type
,
2214 .ret_type
= RET_INTEGER
,
2215 .arg1_type
= ARG_PTR_TO_CTX
,
2216 .arg2_type
= ARG_ANYTHING
,
2219 static u32
bpf_skb_net_base_len(const struct sk_buff
*skb
)
2221 switch (skb
->protocol
) {
2222 case htons(ETH_P_IP
):
2223 return sizeof(struct iphdr
);
2224 case htons(ETH_P_IPV6
):
2225 return sizeof(struct ipv6hdr
);
2231 static int bpf_skb_net_grow(struct sk_buff
*skb
, u32 len_diff
)
2233 u32 off
= skb_mac_header_len(skb
) + bpf_skb_net_base_len(skb
);
2236 ret
= skb_cow(skb
, len_diff
);
2237 if (unlikely(ret
< 0))
2240 ret
= bpf_skb_net_hdr_push(skb
, off
, len_diff
);
2241 if (unlikely(ret
< 0))
2244 if (skb_is_gso(skb
)) {
2245 /* Due to header grow, MSS needs to be downgraded. */
2246 skb_shinfo(skb
)->gso_size
-= len_diff
;
2247 /* Header must be checked, and gso_segs recomputed. */
2248 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2249 skb_shinfo(skb
)->gso_segs
= 0;
2255 static int bpf_skb_net_shrink(struct sk_buff
*skb
, u32 len_diff
)
2257 u32 off
= skb_mac_header_len(skb
) + bpf_skb_net_base_len(skb
);
2260 ret
= skb_unclone(skb
, GFP_ATOMIC
);
2261 if (unlikely(ret
< 0))
2264 ret
= bpf_skb_net_hdr_pop(skb
, off
, len_diff
);
2265 if (unlikely(ret
< 0))
2268 if (skb_is_gso(skb
)) {
2269 /* Due to header shrink, MSS can be upgraded. */
2270 skb_shinfo(skb
)->gso_size
+= len_diff
;
2271 /* Header must be checked, and gso_segs recomputed. */
2272 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
2273 skb_shinfo(skb
)->gso_segs
= 0;
2279 static u32
__bpf_skb_max_len(const struct sk_buff
*skb
)
2281 return skb
->dev
->mtu
+ skb
->dev
->hard_header_len
;
2284 static int bpf_skb_adjust_net(struct sk_buff
*skb
, s32 len_diff
)
2286 bool trans_same
= skb
->transport_header
== skb
->network_header
;
2287 u32 len_cur
, len_diff_abs
= abs(len_diff
);
2288 u32 len_min
= bpf_skb_net_base_len(skb
);
2289 u32 len_max
= __bpf_skb_max_len(skb
);
2290 __be16 proto
= skb
->protocol
;
2291 bool shrink
= len_diff
< 0;
2294 if (unlikely(len_diff_abs
> 0xfffU
))
2296 if (unlikely(proto
!= htons(ETH_P_IP
) &&
2297 proto
!= htons(ETH_P_IPV6
)))
2300 len_cur
= skb
->len
- skb_network_offset(skb
);
2301 if (skb_transport_header_was_set(skb
) && !trans_same
)
2302 len_cur
= skb_network_header_len(skb
);
2303 if ((shrink
&& (len_diff_abs
>= len_cur
||
2304 len_cur
- len_diff_abs
< len_min
)) ||
2305 (!shrink
&& (skb
->len
+ len_diff_abs
> len_max
&&
2309 ret
= shrink
? bpf_skb_net_shrink(skb
, len_diff_abs
) :
2310 bpf_skb_net_grow(skb
, len_diff_abs
);
2312 bpf_compute_data_end(skb
);
2316 BPF_CALL_4(bpf_skb_adjust_room
, struct sk_buff
*, skb
, s32
, len_diff
,
2317 u32
, mode
, u64
, flags
)
2319 if (unlikely(flags
))
2321 if (likely(mode
== BPF_ADJ_ROOM_NET
))
2322 return bpf_skb_adjust_net(skb
, len_diff
);
2327 static const struct bpf_func_proto bpf_skb_adjust_room_proto
= {
2328 .func
= bpf_skb_adjust_room
,
2330 .ret_type
= RET_INTEGER
,
2331 .arg1_type
= ARG_PTR_TO_CTX
,
2332 .arg2_type
= ARG_ANYTHING
,
2333 .arg3_type
= ARG_ANYTHING
,
2334 .arg4_type
= ARG_ANYTHING
,
2337 static u32
__bpf_skb_min_len(const struct sk_buff
*skb
)
2339 u32 min_len
= skb_network_offset(skb
);
2341 if (skb_transport_header_was_set(skb
))
2342 min_len
= skb_transport_offset(skb
);
2343 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
2344 min_len
= skb_checksum_start_offset(skb
) +
2345 skb
->csum_offset
+ sizeof(__sum16
);
2349 static int bpf_skb_grow_rcsum(struct sk_buff
*skb
, unsigned int new_len
)
2351 unsigned int old_len
= skb
->len
;
2354 ret
= __skb_grow_rcsum(skb
, new_len
);
2356 memset(skb
->data
+ old_len
, 0, new_len
- old_len
);
2360 static int bpf_skb_trim_rcsum(struct sk_buff
*skb
, unsigned int new_len
)
2362 return __skb_trim_rcsum(skb
, new_len
);
2365 BPF_CALL_3(bpf_skb_change_tail
, struct sk_buff
*, skb
, u32
, new_len
,
2368 u32 max_len
= __bpf_skb_max_len(skb
);
2369 u32 min_len
= __bpf_skb_min_len(skb
);
2372 if (unlikely(flags
|| new_len
> max_len
|| new_len
< min_len
))
2374 if (skb
->encapsulation
)
2377 /* The basic idea of this helper is that it's performing the
2378 * needed work to either grow or trim an skb, and eBPF program
2379 * rewrites the rest via helpers like bpf_skb_store_bytes(),
2380 * bpf_lX_csum_replace() and others rather than passing a raw
2381 * buffer here. This one is a slow path helper and intended
2382 * for replies with control messages.
2384 * Like in bpf_skb_change_proto(), we want to keep this rather
2385 * minimal and without protocol specifics so that we are able
2386 * to separate concerns as in bpf_skb_store_bytes() should only
2387 * be the one responsible for writing buffers.
2389 * It's really expected to be a slow path operation here for
2390 * control message replies, so we're implicitly linearizing,
2391 * uncloning and drop offloads from the skb by this.
2393 ret
= __bpf_try_make_writable(skb
, skb
->len
);
2395 if (new_len
> skb
->len
)
2396 ret
= bpf_skb_grow_rcsum(skb
, new_len
);
2397 else if (new_len
< skb
->len
)
2398 ret
= bpf_skb_trim_rcsum(skb
, new_len
);
2399 if (!ret
&& skb_is_gso(skb
))
2403 bpf_compute_data_end(skb
);
2407 static const struct bpf_func_proto bpf_skb_change_tail_proto
= {
2408 .func
= bpf_skb_change_tail
,
2410 .ret_type
= RET_INTEGER
,
2411 .arg1_type
= ARG_PTR_TO_CTX
,
2412 .arg2_type
= ARG_ANYTHING
,
2413 .arg3_type
= ARG_ANYTHING
,
2416 BPF_CALL_3(bpf_skb_change_head
, struct sk_buff
*, skb
, u32
, head_room
,
2419 u32 max_len
= __bpf_skb_max_len(skb
);
2420 u32 new_len
= skb
->len
+ head_room
;
2423 if (unlikely(flags
|| (!skb_is_gso(skb
) && new_len
> max_len
) ||
2424 new_len
< skb
->len
))
2427 ret
= skb_cow(skb
, head_room
);
2429 /* Idea for this helper is that we currently only
2430 * allow to expand on mac header. This means that
2431 * skb->protocol network header, etc, stay as is.
2432 * Compared to bpf_skb_change_tail(), we're more
2433 * flexible due to not needing to linearize or
2434 * reset GSO. Intention for this helper is to be
2435 * used by an L3 skb that needs to push mac header
2436 * for redirection into L2 device.
2438 __skb_push(skb
, head_room
);
2439 memset(skb
->data
, 0, head_room
);
2440 skb_reset_mac_header(skb
);
2443 bpf_compute_data_end(skb
);
2447 static const struct bpf_func_proto bpf_skb_change_head_proto
= {
2448 .func
= bpf_skb_change_head
,
2450 .ret_type
= RET_INTEGER
,
2451 .arg1_type
= ARG_PTR_TO_CTX
,
2452 .arg2_type
= ARG_ANYTHING
,
2453 .arg3_type
= ARG_ANYTHING
,
2456 BPF_CALL_2(bpf_xdp_adjust_head
, struct xdp_buff
*, xdp
, int, offset
)
2458 void *data
= xdp
->data
+ offset
;
2460 if (unlikely(data
< xdp
->data_hard_start
||
2461 data
> xdp
->data_end
- ETH_HLEN
))
2469 static const struct bpf_func_proto bpf_xdp_adjust_head_proto
= {
2470 .func
= bpf_xdp_adjust_head
,
2472 .ret_type
= RET_INTEGER
,
2473 .arg1_type
= ARG_PTR_TO_CTX
,
2474 .arg2_type
= ARG_ANYTHING
,
2477 static int __bpf_tx_xdp(struct net_device
*dev
,
2478 struct bpf_map
*map
,
2479 struct xdp_buff
*xdp
,
2484 if (!dev
->netdev_ops
->ndo_xdp_xmit
) {
2488 err
= dev
->netdev_ops
->ndo_xdp_xmit(dev
, xdp
);
2492 __dev_map_insert_ctx(map
, index
);
2494 dev
->netdev_ops
->ndo_xdp_flush(dev
);
2498 void xdp_do_flush_map(void)
2500 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2501 struct bpf_map
*map
= ri
->map_to_flush
;
2503 ri
->map_to_flush
= NULL
;
2505 __dev_map_flush(map
);
2507 EXPORT_SYMBOL_GPL(xdp_do_flush_map
);
2509 static inline bool xdp_map_invalid(const struct bpf_prog
*xdp_prog
,
2512 return (unsigned long)xdp_prog
->aux
!= aux
;
2515 static int xdp_do_redirect_map(struct net_device
*dev
, struct xdp_buff
*xdp
,
2516 struct bpf_prog
*xdp_prog
)
2518 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2519 unsigned long map_owner
= ri
->map_owner
;
2520 struct bpf_map
*map
= ri
->map
;
2521 struct net_device
*fwd
= NULL
;
2522 u32 index
= ri
->ifindex
;
2529 if (unlikely(xdp_map_invalid(xdp_prog
, map_owner
))) {
2535 fwd
= __dev_map_lookup_elem(map
, index
);
2540 if (ri
->map_to_flush
&& ri
->map_to_flush
!= map
)
2543 err
= __bpf_tx_xdp(fwd
, map
, xdp
, index
);
2547 ri
->map_to_flush
= map
;
2548 _trace_xdp_redirect_map(dev
, xdp_prog
, fwd
, map
, index
);
2551 _trace_xdp_redirect_map_err(dev
, xdp_prog
, fwd
, map
, index
, err
);
2555 int xdp_do_redirect(struct net_device
*dev
, struct xdp_buff
*xdp
,
2556 struct bpf_prog
*xdp_prog
)
2558 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2559 struct net_device
*fwd
;
2560 u32 index
= ri
->ifindex
;
2564 return xdp_do_redirect_map(dev
, xdp
, xdp_prog
);
2566 fwd
= dev_get_by_index_rcu(dev_net(dev
), index
);
2568 if (unlikely(!fwd
)) {
2573 err
= __bpf_tx_xdp(fwd
, NULL
, xdp
, 0);
2577 _trace_xdp_redirect(dev
, xdp_prog
, index
);
2580 _trace_xdp_redirect_err(dev
, xdp_prog
, index
, err
);
2583 EXPORT_SYMBOL_GPL(xdp_do_redirect
);
2585 int xdp_do_generic_redirect(struct net_device
*dev
, struct sk_buff
*skb
,
2586 struct bpf_prog
*xdp_prog
)
2588 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2589 unsigned long map_owner
= ri
->map_owner
;
2590 struct bpf_map
*map
= ri
->map
;
2591 struct net_device
*fwd
= NULL
;
2592 u32 index
= ri
->ifindex
;
2601 if (unlikely(xdp_map_invalid(xdp_prog
, map_owner
))) {
2606 fwd
= __dev_map_lookup_elem(map
, index
);
2608 fwd
= dev_get_by_index_rcu(dev_net(dev
), index
);
2610 if (unlikely(!fwd
)) {
2615 if (unlikely(!(fwd
->flags
& IFF_UP
))) {
2620 len
= fwd
->mtu
+ fwd
->hard_header_len
+ VLAN_HLEN
;
2621 if (skb
->len
> len
) {
2627 map
? _trace_xdp_redirect_map(dev
, xdp_prog
, fwd
, map
, index
)
2628 : _trace_xdp_redirect(dev
, xdp_prog
, index
);
2631 map
? _trace_xdp_redirect_map_err(dev
, xdp_prog
, fwd
, map
, index
, err
)
2632 : _trace_xdp_redirect_err(dev
, xdp_prog
, index
, err
);
2635 EXPORT_SYMBOL_GPL(xdp_do_generic_redirect
);
2637 BPF_CALL_2(bpf_xdp_redirect
, u32
, ifindex
, u64
, flags
)
2639 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2641 if (unlikely(flags
))
2644 ri
->ifindex
= ifindex
;
2649 return XDP_REDIRECT
;
2652 static const struct bpf_func_proto bpf_xdp_redirect_proto
= {
2653 .func
= bpf_xdp_redirect
,
2655 .ret_type
= RET_INTEGER
,
2656 .arg1_type
= ARG_ANYTHING
,
2657 .arg2_type
= ARG_ANYTHING
,
2660 BPF_CALL_4(bpf_xdp_redirect_map
, struct bpf_map
*, map
, u32
, ifindex
, u64
, flags
,
2661 unsigned long, map_owner
)
2663 struct redirect_info
*ri
= this_cpu_ptr(&redirect_info
);
2665 if (unlikely(flags
))
2668 ri
->ifindex
= ifindex
;
2671 ri
->map_owner
= map_owner
;
2673 return XDP_REDIRECT
;
2676 /* Note, arg4 is hidden from users and populated by the verifier
2677 * with the right pointer.
2679 static const struct bpf_func_proto bpf_xdp_redirect_map_proto
= {
2680 .func
= bpf_xdp_redirect_map
,
2682 .ret_type
= RET_INTEGER
,
2683 .arg1_type
= ARG_CONST_MAP_PTR
,
2684 .arg2_type
= ARG_ANYTHING
,
2685 .arg3_type
= ARG_ANYTHING
,
2688 bool bpf_helper_changes_pkt_data(void *func
)
2690 if (func
== bpf_skb_vlan_push
||
2691 func
== bpf_skb_vlan_pop
||
2692 func
== bpf_skb_store_bytes
||
2693 func
== bpf_skb_change_proto
||
2694 func
== bpf_skb_change_head
||
2695 func
== bpf_skb_change_tail
||
2696 func
== bpf_skb_adjust_room
||
2697 func
== bpf_skb_pull_data
||
2698 func
== bpf_clone_redirect
||
2699 func
== bpf_l3_csum_replace
||
2700 func
== bpf_l4_csum_replace
||
2701 func
== bpf_xdp_adjust_head
)
2707 static unsigned long bpf_skb_copy(void *dst_buff
, const void *skb
,
2708 unsigned long off
, unsigned long len
)
2710 void *ptr
= skb_header_pointer(skb
, off
, len
, dst_buff
);
2714 if (ptr
!= dst_buff
)
2715 memcpy(dst_buff
, ptr
, len
);
2720 BPF_CALL_5(bpf_skb_event_output
, struct sk_buff
*, skb
, struct bpf_map
*, map
,
2721 u64
, flags
, void *, meta
, u64
, meta_size
)
2723 u64 skb_size
= (flags
& BPF_F_CTXLEN_MASK
) >> 32;
2725 if (unlikely(flags
& ~(BPF_F_CTXLEN_MASK
| BPF_F_INDEX_MASK
)))
2727 if (unlikely(skb_size
> skb
->len
))
2730 return bpf_event_output(map
, flags
, meta
, meta_size
, skb
, skb_size
,
2734 static const struct bpf_func_proto bpf_skb_event_output_proto
= {
2735 .func
= bpf_skb_event_output
,
2737 .ret_type
= RET_INTEGER
,
2738 .arg1_type
= ARG_PTR_TO_CTX
,
2739 .arg2_type
= ARG_CONST_MAP_PTR
,
2740 .arg3_type
= ARG_ANYTHING
,
2741 .arg4_type
= ARG_PTR_TO_MEM
,
2742 .arg5_type
= ARG_CONST_SIZE
,
2745 static unsigned short bpf_tunnel_key_af(u64 flags
)
2747 return flags
& BPF_F_TUNINFO_IPV6
? AF_INET6
: AF_INET
;
2750 BPF_CALL_4(bpf_skb_get_tunnel_key
, struct sk_buff
*, skb
, struct bpf_tunnel_key
*, to
,
2751 u32
, size
, u64
, flags
)
2753 const struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2754 u8 compat
[sizeof(struct bpf_tunnel_key
)];
2758 if (unlikely(!info
|| (flags
& ~(BPF_F_TUNINFO_IPV6
)))) {
2762 if (ip_tunnel_info_af(info
) != bpf_tunnel_key_af(flags
)) {
2766 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
))) {
2769 case offsetof(struct bpf_tunnel_key
, tunnel_label
):
2770 case offsetof(struct bpf_tunnel_key
, tunnel_ext
):
2772 case offsetof(struct bpf_tunnel_key
, remote_ipv6
[1]):
2773 /* Fixup deprecated structure layouts here, so we have
2774 * a common path later on.
2776 if (ip_tunnel_info_af(info
) != AF_INET
)
2779 to
= (struct bpf_tunnel_key
*)compat
;
2786 to
->tunnel_id
= be64_to_cpu(info
->key
.tun_id
);
2787 to
->tunnel_tos
= info
->key
.tos
;
2788 to
->tunnel_ttl
= info
->key
.ttl
;
2790 if (flags
& BPF_F_TUNINFO_IPV6
) {
2791 memcpy(to
->remote_ipv6
, &info
->key
.u
.ipv6
.src
,
2792 sizeof(to
->remote_ipv6
));
2793 to
->tunnel_label
= be32_to_cpu(info
->key
.label
);
2795 to
->remote_ipv4
= be32_to_cpu(info
->key
.u
.ipv4
.src
);
2798 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
)))
2799 memcpy(to_orig
, to
, size
);
2803 memset(to_orig
, 0, size
);
2807 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto
= {
2808 .func
= bpf_skb_get_tunnel_key
,
2810 .ret_type
= RET_INTEGER
,
2811 .arg1_type
= ARG_PTR_TO_CTX
,
2812 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
2813 .arg3_type
= ARG_CONST_SIZE
,
2814 .arg4_type
= ARG_ANYTHING
,
2817 BPF_CALL_3(bpf_skb_get_tunnel_opt
, struct sk_buff
*, skb
, u8
*, to
, u32
, size
)
2819 const struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2822 if (unlikely(!info
||
2823 !(info
->key
.tun_flags
& TUNNEL_OPTIONS_PRESENT
))) {
2827 if (unlikely(size
< info
->options_len
)) {
2832 ip_tunnel_info_opts_get(to
, info
);
2833 if (size
> info
->options_len
)
2834 memset(to
+ info
->options_len
, 0, size
- info
->options_len
);
2836 return info
->options_len
;
2838 memset(to
, 0, size
);
2842 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto
= {
2843 .func
= bpf_skb_get_tunnel_opt
,
2845 .ret_type
= RET_INTEGER
,
2846 .arg1_type
= ARG_PTR_TO_CTX
,
2847 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
2848 .arg3_type
= ARG_CONST_SIZE
,
2851 static struct metadata_dst __percpu
*md_dst
;
2853 BPF_CALL_4(bpf_skb_set_tunnel_key
, struct sk_buff
*, skb
,
2854 const struct bpf_tunnel_key
*, from
, u32
, size
, u64
, flags
)
2856 struct metadata_dst
*md
= this_cpu_ptr(md_dst
);
2857 u8 compat
[sizeof(struct bpf_tunnel_key
)];
2858 struct ip_tunnel_info
*info
;
2860 if (unlikely(flags
& ~(BPF_F_TUNINFO_IPV6
| BPF_F_ZERO_CSUM_TX
|
2861 BPF_F_DONT_FRAGMENT
)))
2863 if (unlikely(size
!= sizeof(struct bpf_tunnel_key
))) {
2865 case offsetof(struct bpf_tunnel_key
, tunnel_label
):
2866 case offsetof(struct bpf_tunnel_key
, tunnel_ext
):
2867 case offsetof(struct bpf_tunnel_key
, remote_ipv6
[1]):
2868 /* Fixup deprecated structure layouts here, so we have
2869 * a common path later on.
2871 memcpy(compat
, from
, size
);
2872 memset(compat
+ size
, 0, sizeof(compat
) - size
);
2873 from
= (const struct bpf_tunnel_key
*) compat
;
2879 if (unlikely((!(flags
& BPF_F_TUNINFO_IPV6
) && from
->tunnel_label
) ||
2884 dst_hold((struct dst_entry
*) md
);
2885 skb_dst_set(skb
, (struct dst_entry
*) md
);
2887 info
= &md
->u
.tun_info
;
2888 info
->mode
= IP_TUNNEL_INFO_TX
;
2890 info
->key
.tun_flags
= TUNNEL_KEY
| TUNNEL_CSUM
| TUNNEL_NOCACHE
;
2891 if (flags
& BPF_F_DONT_FRAGMENT
)
2892 info
->key
.tun_flags
|= TUNNEL_DONT_FRAGMENT
;
2894 info
->key
.tun_id
= cpu_to_be64(from
->tunnel_id
);
2895 info
->key
.tos
= from
->tunnel_tos
;
2896 info
->key
.ttl
= from
->tunnel_ttl
;
2898 if (flags
& BPF_F_TUNINFO_IPV6
) {
2899 info
->mode
|= IP_TUNNEL_INFO_IPV6
;
2900 memcpy(&info
->key
.u
.ipv6
.dst
, from
->remote_ipv6
,
2901 sizeof(from
->remote_ipv6
));
2902 info
->key
.label
= cpu_to_be32(from
->tunnel_label
) &
2903 IPV6_FLOWLABEL_MASK
;
2905 info
->key
.u
.ipv4
.dst
= cpu_to_be32(from
->remote_ipv4
);
2906 if (flags
& BPF_F_ZERO_CSUM_TX
)
2907 info
->key
.tun_flags
&= ~TUNNEL_CSUM
;
2913 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto
= {
2914 .func
= bpf_skb_set_tunnel_key
,
2916 .ret_type
= RET_INTEGER
,
2917 .arg1_type
= ARG_PTR_TO_CTX
,
2918 .arg2_type
= ARG_PTR_TO_MEM
,
2919 .arg3_type
= ARG_CONST_SIZE
,
2920 .arg4_type
= ARG_ANYTHING
,
2923 BPF_CALL_3(bpf_skb_set_tunnel_opt
, struct sk_buff
*, skb
,
2924 const u8
*, from
, u32
, size
)
2926 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
2927 const struct metadata_dst
*md
= this_cpu_ptr(md_dst
);
2929 if (unlikely(info
!= &md
->u
.tun_info
|| (size
& (sizeof(u32
) - 1))))
2931 if (unlikely(size
> IP_TUNNEL_OPTS_MAX
))
2934 ip_tunnel_info_opts_set(info
, from
, size
);
2939 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto
= {
2940 .func
= bpf_skb_set_tunnel_opt
,
2942 .ret_type
= RET_INTEGER
,
2943 .arg1_type
= ARG_PTR_TO_CTX
,
2944 .arg2_type
= ARG_PTR_TO_MEM
,
2945 .arg3_type
= ARG_CONST_SIZE
,
2948 static const struct bpf_func_proto
*
2949 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which
)
2952 /* Race is not possible, since it's called from verifier
2953 * that is holding verifier mutex.
2955 md_dst
= metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX
,
2963 case BPF_FUNC_skb_set_tunnel_key
:
2964 return &bpf_skb_set_tunnel_key_proto
;
2965 case BPF_FUNC_skb_set_tunnel_opt
:
2966 return &bpf_skb_set_tunnel_opt_proto
;
2972 BPF_CALL_3(bpf_skb_under_cgroup
, struct sk_buff
*, skb
, struct bpf_map
*, map
,
2975 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
2976 struct cgroup
*cgrp
;
2979 sk
= skb_to_full_sk(skb
);
2980 if (!sk
|| !sk_fullsock(sk
))
2982 if (unlikely(idx
>= array
->map
.max_entries
))
2985 cgrp
= READ_ONCE(array
->ptrs
[idx
]);
2986 if (unlikely(!cgrp
))
2989 return sk_under_cgroup_hierarchy(sk
, cgrp
);
2992 static const struct bpf_func_proto bpf_skb_under_cgroup_proto
= {
2993 .func
= bpf_skb_under_cgroup
,
2995 .ret_type
= RET_INTEGER
,
2996 .arg1_type
= ARG_PTR_TO_CTX
,
2997 .arg2_type
= ARG_CONST_MAP_PTR
,
2998 .arg3_type
= ARG_ANYTHING
,
3001 static unsigned long bpf_xdp_copy(void *dst_buff
, const void *src_buff
,
3002 unsigned long off
, unsigned long len
)
3004 memcpy(dst_buff
, src_buff
+ off
, len
);
3008 BPF_CALL_5(bpf_xdp_event_output
, struct xdp_buff
*, xdp
, struct bpf_map
*, map
,
3009 u64
, flags
, void *, meta
, u64
, meta_size
)
3011 u64 xdp_size
= (flags
& BPF_F_CTXLEN_MASK
) >> 32;
3013 if (unlikely(flags
& ~(BPF_F_CTXLEN_MASK
| BPF_F_INDEX_MASK
)))
3015 if (unlikely(xdp_size
> (unsigned long)(xdp
->data_end
- xdp
->data
)))
3018 return bpf_event_output(map
, flags
, meta
, meta_size
, xdp
->data
,
3019 xdp_size
, bpf_xdp_copy
);
3022 static const struct bpf_func_proto bpf_xdp_event_output_proto
= {
3023 .func
= bpf_xdp_event_output
,
3025 .ret_type
= RET_INTEGER
,
3026 .arg1_type
= ARG_PTR_TO_CTX
,
3027 .arg2_type
= ARG_CONST_MAP_PTR
,
3028 .arg3_type
= ARG_ANYTHING
,
3029 .arg4_type
= ARG_PTR_TO_MEM
,
3030 .arg5_type
= ARG_CONST_SIZE
,
3033 BPF_CALL_1(bpf_get_socket_cookie
, struct sk_buff
*, skb
)
3035 return skb
->sk
? sock_gen_cookie(skb
->sk
) : 0;
3038 static const struct bpf_func_proto bpf_get_socket_cookie_proto
= {
3039 .func
= bpf_get_socket_cookie
,
3041 .ret_type
= RET_INTEGER
,
3042 .arg1_type
= ARG_PTR_TO_CTX
,
3045 BPF_CALL_1(bpf_get_socket_uid
, struct sk_buff
*, skb
)
3047 struct sock
*sk
= sk_to_full_sk(skb
->sk
);
3050 if (!sk
|| !sk_fullsock(sk
))
3052 kuid
= sock_net_uid(sock_net(sk
), sk
);
3053 return from_kuid_munged(sock_net(sk
)->user_ns
, kuid
);
3056 static const struct bpf_func_proto bpf_get_socket_uid_proto
= {
3057 .func
= bpf_get_socket_uid
,
3059 .ret_type
= RET_INTEGER
,
3060 .arg1_type
= ARG_PTR_TO_CTX
,
3063 BPF_CALL_5(bpf_setsockopt
, struct bpf_sock_ops_kern
*, bpf_sock
,
3064 int, level
, int, optname
, char *, optval
, int, optlen
)
3066 struct sock
*sk
= bpf_sock
->sk
;
3070 if (!sk_fullsock(sk
))
3073 if (level
== SOL_SOCKET
) {
3074 if (optlen
!= sizeof(int))
3076 val
= *((int *)optval
);
3078 /* Only some socketops are supported */
3081 sk
->sk_userlocks
|= SOCK_RCVBUF_LOCK
;
3082 sk
->sk_rcvbuf
= max_t(int, val
* 2, SOCK_MIN_RCVBUF
);
3085 sk
->sk_userlocks
|= SOCK_SNDBUF_LOCK
;
3086 sk
->sk_sndbuf
= max_t(int, val
* 2, SOCK_MIN_SNDBUF
);
3088 case SO_MAX_PACING_RATE
:
3089 sk
->sk_max_pacing_rate
= val
;
3090 sk
->sk_pacing_rate
= min(sk
->sk_pacing_rate
,
3091 sk
->sk_max_pacing_rate
);
3094 sk
->sk_priority
= val
;
3099 sk
->sk_rcvlowat
= val
? : 1;
3108 } else if (level
== SOL_TCP
&&
3109 sk
->sk_prot
->setsockopt
== tcp_setsockopt
) {
3110 if (optname
== TCP_CONGESTION
) {
3111 char name
[TCP_CA_NAME_MAX
];
3112 bool reinit
= bpf_sock
->op
> BPF_SOCK_OPS_NEEDS_ECN
;
3114 strncpy(name
, optval
, min_t(long, optlen
,
3115 TCP_CA_NAME_MAX
-1));
3116 name
[TCP_CA_NAME_MAX
-1] = 0;
3117 ret
= tcp_set_congestion_control(sk
, name
, false, reinit
);
3119 struct tcp_sock
*tp
= tcp_sk(sk
);
3121 if (optlen
!= sizeof(int))
3124 val
= *((int *)optval
);
3125 /* Only some options are supported */
3128 if (val
<= 0 || tp
->data_segs_out
> 0)
3133 case TCP_BPF_SNDCWND_CLAMP
:
3137 tp
->snd_cwnd_clamp
= val
;
3138 tp
->snd_ssthresh
= val
;
3152 static const struct bpf_func_proto bpf_setsockopt_proto
= {
3153 .func
= bpf_setsockopt
,
3155 .ret_type
= RET_INTEGER
,
3156 .arg1_type
= ARG_PTR_TO_CTX
,
3157 .arg2_type
= ARG_ANYTHING
,
3158 .arg3_type
= ARG_ANYTHING
,
3159 .arg4_type
= ARG_PTR_TO_MEM
,
3160 .arg5_type
= ARG_CONST_SIZE
,
3163 static const struct bpf_func_proto
*
3164 bpf_base_func_proto(enum bpf_func_id func_id
)
3167 case BPF_FUNC_map_lookup_elem
:
3168 return &bpf_map_lookup_elem_proto
;
3169 case BPF_FUNC_map_update_elem
:
3170 return &bpf_map_update_elem_proto
;
3171 case BPF_FUNC_map_delete_elem
:
3172 return &bpf_map_delete_elem_proto
;
3173 case BPF_FUNC_get_prandom_u32
:
3174 return &bpf_get_prandom_u32_proto
;
3175 case BPF_FUNC_get_smp_processor_id
:
3176 return &bpf_get_raw_smp_processor_id_proto
;
3177 case BPF_FUNC_get_numa_node_id
:
3178 return &bpf_get_numa_node_id_proto
;
3179 case BPF_FUNC_tail_call
:
3180 return &bpf_tail_call_proto
;
3181 case BPF_FUNC_ktime_get_ns
:
3182 return &bpf_ktime_get_ns_proto
;
3183 case BPF_FUNC_trace_printk
:
3184 if (capable(CAP_SYS_ADMIN
))
3185 return bpf_get_trace_printk_proto();
3191 static const struct bpf_func_proto
*
3192 sock_filter_func_proto(enum bpf_func_id func_id
)
3195 /* inet and inet6 sockets are created in a process
3196 * context so there is always a valid uid/gid
3198 case BPF_FUNC_get_current_uid_gid
:
3199 return &bpf_get_current_uid_gid_proto
;
3201 return bpf_base_func_proto(func_id
);
3205 static const struct bpf_func_proto
*
3206 sk_filter_func_proto(enum bpf_func_id func_id
)
3209 case BPF_FUNC_skb_load_bytes
:
3210 return &bpf_skb_load_bytes_proto
;
3211 case BPF_FUNC_get_socket_cookie
:
3212 return &bpf_get_socket_cookie_proto
;
3213 case BPF_FUNC_get_socket_uid
:
3214 return &bpf_get_socket_uid_proto
;
3216 return bpf_base_func_proto(func_id
);
3220 static const struct bpf_func_proto
*
3221 tc_cls_act_func_proto(enum bpf_func_id func_id
)
3224 case BPF_FUNC_skb_store_bytes
:
3225 return &bpf_skb_store_bytes_proto
;
3226 case BPF_FUNC_skb_load_bytes
:
3227 return &bpf_skb_load_bytes_proto
;
3228 case BPF_FUNC_skb_pull_data
:
3229 return &bpf_skb_pull_data_proto
;
3230 case BPF_FUNC_csum_diff
:
3231 return &bpf_csum_diff_proto
;
3232 case BPF_FUNC_csum_update
:
3233 return &bpf_csum_update_proto
;
3234 case BPF_FUNC_l3_csum_replace
:
3235 return &bpf_l3_csum_replace_proto
;
3236 case BPF_FUNC_l4_csum_replace
:
3237 return &bpf_l4_csum_replace_proto
;
3238 case BPF_FUNC_clone_redirect
:
3239 return &bpf_clone_redirect_proto
;
3240 case BPF_FUNC_get_cgroup_classid
:
3241 return &bpf_get_cgroup_classid_proto
;
3242 case BPF_FUNC_skb_vlan_push
:
3243 return &bpf_skb_vlan_push_proto
;
3244 case BPF_FUNC_skb_vlan_pop
:
3245 return &bpf_skb_vlan_pop_proto
;
3246 case BPF_FUNC_skb_change_proto
:
3247 return &bpf_skb_change_proto_proto
;
3248 case BPF_FUNC_skb_change_type
:
3249 return &bpf_skb_change_type_proto
;
3250 case BPF_FUNC_skb_adjust_room
:
3251 return &bpf_skb_adjust_room_proto
;
3252 case BPF_FUNC_skb_change_tail
:
3253 return &bpf_skb_change_tail_proto
;
3254 case BPF_FUNC_skb_get_tunnel_key
:
3255 return &bpf_skb_get_tunnel_key_proto
;
3256 case BPF_FUNC_skb_set_tunnel_key
:
3257 return bpf_get_skb_set_tunnel_proto(func_id
);
3258 case BPF_FUNC_skb_get_tunnel_opt
:
3259 return &bpf_skb_get_tunnel_opt_proto
;
3260 case BPF_FUNC_skb_set_tunnel_opt
:
3261 return bpf_get_skb_set_tunnel_proto(func_id
);
3262 case BPF_FUNC_redirect
:
3263 return &bpf_redirect_proto
;
3264 case BPF_FUNC_get_route_realm
:
3265 return &bpf_get_route_realm_proto
;
3266 case BPF_FUNC_get_hash_recalc
:
3267 return &bpf_get_hash_recalc_proto
;
3268 case BPF_FUNC_set_hash_invalid
:
3269 return &bpf_set_hash_invalid_proto
;
3270 case BPF_FUNC_set_hash
:
3271 return &bpf_set_hash_proto
;
3272 case BPF_FUNC_perf_event_output
:
3273 return &bpf_skb_event_output_proto
;
3274 case BPF_FUNC_get_smp_processor_id
:
3275 return &bpf_get_smp_processor_id_proto
;
3276 case BPF_FUNC_skb_under_cgroup
:
3277 return &bpf_skb_under_cgroup_proto
;
3278 case BPF_FUNC_get_socket_cookie
:
3279 return &bpf_get_socket_cookie_proto
;
3280 case BPF_FUNC_get_socket_uid
:
3281 return &bpf_get_socket_uid_proto
;
3283 return bpf_base_func_proto(func_id
);
3287 static const struct bpf_func_proto
*
3288 xdp_func_proto(enum bpf_func_id func_id
)
3291 case BPF_FUNC_perf_event_output
:
3292 return &bpf_xdp_event_output_proto
;
3293 case BPF_FUNC_get_smp_processor_id
:
3294 return &bpf_get_smp_processor_id_proto
;
3295 case BPF_FUNC_xdp_adjust_head
:
3296 return &bpf_xdp_adjust_head_proto
;
3297 case BPF_FUNC_redirect
:
3298 return &bpf_xdp_redirect_proto
;
3299 case BPF_FUNC_redirect_map
:
3300 return &bpf_xdp_redirect_map_proto
;
3302 return bpf_base_func_proto(func_id
);
3306 static const struct bpf_func_proto
*
3307 lwt_inout_func_proto(enum bpf_func_id func_id
)
3310 case BPF_FUNC_skb_load_bytes
:
3311 return &bpf_skb_load_bytes_proto
;
3312 case BPF_FUNC_skb_pull_data
:
3313 return &bpf_skb_pull_data_proto
;
3314 case BPF_FUNC_csum_diff
:
3315 return &bpf_csum_diff_proto
;
3316 case BPF_FUNC_get_cgroup_classid
:
3317 return &bpf_get_cgroup_classid_proto
;
3318 case BPF_FUNC_get_route_realm
:
3319 return &bpf_get_route_realm_proto
;
3320 case BPF_FUNC_get_hash_recalc
:
3321 return &bpf_get_hash_recalc_proto
;
3322 case BPF_FUNC_perf_event_output
:
3323 return &bpf_skb_event_output_proto
;
3324 case BPF_FUNC_get_smp_processor_id
:
3325 return &bpf_get_smp_processor_id_proto
;
3326 case BPF_FUNC_skb_under_cgroup
:
3327 return &bpf_skb_under_cgroup_proto
;
3329 return bpf_base_func_proto(func_id
);
3333 static const struct bpf_func_proto
*
3334 sock_ops_func_proto(enum bpf_func_id func_id
)
3337 case BPF_FUNC_setsockopt
:
3338 return &bpf_setsockopt_proto
;
3339 case BPF_FUNC_sock_map_update
:
3340 return &bpf_sock_map_update_proto
;
3342 return bpf_base_func_proto(func_id
);
3346 static const struct bpf_func_proto
*sk_skb_func_proto(enum bpf_func_id func_id
)
3349 case BPF_FUNC_skb_store_bytes
:
3350 return &bpf_skb_store_bytes_proto
;
3351 case BPF_FUNC_skb_load_bytes
:
3352 return &bpf_skb_load_bytes_proto
;
3353 case BPF_FUNC_skb_pull_data
:
3354 return &bpf_skb_pull_data_proto
;
3355 case BPF_FUNC_skb_change_tail
:
3356 return &bpf_skb_change_tail_proto
;
3357 case BPF_FUNC_skb_change_head
:
3358 return &bpf_skb_change_head_proto
;
3359 case BPF_FUNC_get_socket_cookie
:
3360 return &bpf_get_socket_cookie_proto
;
3361 case BPF_FUNC_get_socket_uid
:
3362 return &bpf_get_socket_uid_proto
;
3363 case BPF_FUNC_sk_redirect_map
:
3364 return &bpf_sk_redirect_map_proto
;
3366 return bpf_base_func_proto(func_id
);
3370 static const struct bpf_func_proto
*
3371 lwt_xmit_func_proto(enum bpf_func_id func_id
)
3374 case BPF_FUNC_skb_get_tunnel_key
:
3375 return &bpf_skb_get_tunnel_key_proto
;
3376 case BPF_FUNC_skb_set_tunnel_key
:
3377 return bpf_get_skb_set_tunnel_proto(func_id
);
3378 case BPF_FUNC_skb_get_tunnel_opt
:
3379 return &bpf_skb_get_tunnel_opt_proto
;
3380 case BPF_FUNC_skb_set_tunnel_opt
:
3381 return bpf_get_skb_set_tunnel_proto(func_id
);
3382 case BPF_FUNC_redirect
:
3383 return &bpf_redirect_proto
;
3384 case BPF_FUNC_clone_redirect
:
3385 return &bpf_clone_redirect_proto
;
3386 case BPF_FUNC_skb_change_tail
:
3387 return &bpf_skb_change_tail_proto
;
3388 case BPF_FUNC_skb_change_head
:
3389 return &bpf_skb_change_head_proto
;
3390 case BPF_FUNC_skb_store_bytes
:
3391 return &bpf_skb_store_bytes_proto
;
3392 case BPF_FUNC_csum_update
:
3393 return &bpf_csum_update_proto
;
3394 case BPF_FUNC_l3_csum_replace
:
3395 return &bpf_l3_csum_replace_proto
;
3396 case BPF_FUNC_l4_csum_replace
:
3397 return &bpf_l4_csum_replace_proto
;
3398 case BPF_FUNC_set_hash_invalid
:
3399 return &bpf_set_hash_invalid_proto
;
3401 return lwt_inout_func_proto(func_id
);
3405 static bool bpf_skb_is_valid_access(int off
, int size
, enum bpf_access_type type
,
3406 struct bpf_insn_access_aux
*info
)
3408 const int size_default
= sizeof(__u32
);
3410 if (off
< 0 || off
>= sizeof(struct __sk_buff
))
3413 /* The verifier guarantees that size > 0. */
3414 if (off
% size
!= 0)
3418 case bpf_ctx_range_till(struct __sk_buff
, cb
[0], cb
[4]):
3419 if (off
+ size
> offsetofend(struct __sk_buff
, cb
[4]))
3422 case bpf_ctx_range_till(struct __sk_buff
, remote_ip6
[0], remote_ip6
[3]):
3423 case bpf_ctx_range_till(struct __sk_buff
, local_ip6
[0], local_ip6
[3]):
3424 case bpf_ctx_range_till(struct __sk_buff
, remote_ip4
, remote_ip4
):
3425 case bpf_ctx_range_till(struct __sk_buff
, local_ip4
, local_ip4
):
3426 case bpf_ctx_range(struct __sk_buff
, data
):
3427 case bpf_ctx_range(struct __sk_buff
, data_end
):
3428 if (size
!= size_default
)
3432 /* Only narrow read access allowed for now. */
3433 if (type
== BPF_WRITE
) {
3434 if (size
!= size_default
)
3437 bpf_ctx_record_field_size(info
, size_default
);
3438 if (!bpf_ctx_narrow_access_ok(off
, size
, size_default
))
3446 static bool sk_filter_is_valid_access(int off
, int size
,
3447 enum bpf_access_type type
,
3448 struct bpf_insn_access_aux
*info
)
3451 case bpf_ctx_range(struct __sk_buff
, tc_classid
):
3452 case bpf_ctx_range(struct __sk_buff
, data
):
3453 case bpf_ctx_range(struct __sk_buff
, data_end
):
3454 case bpf_ctx_range_till(struct __sk_buff
, family
, local_port
):
3458 if (type
== BPF_WRITE
) {
3460 case bpf_ctx_range_till(struct __sk_buff
, cb
[0], cb
[4]):
3467 return bpf_skb_is_valid_access(off
, size
, type
, info
);
3470 static bool lwt_is_valid_access(int off
, int size
,
3471 enum bpf_access_type type
,
3472 struct bpf_insn_access_aux
*info
)
3475 case bpf_ctx_range(struct __sk_buff
, tc_classid
):
3476 case bpf_ctx_range_till(struct __sk_buff
, family
, local_port
):
3480 if (type
== BPF_WRITE
) {
3482 case bpf_ctx_range(struct __sk_buff
, mark
):
3483 case bpf_ctx_range(struct __sk_buff
, priority
):
3484 case bpf_ctx_range_till(struct __sk_buff
, cb
[0], cb
[4]):
3492 case bpf_ctx_range(struct __sk_buff
, data
):
3493 info
->reg_type
= PTR_TO_PACKET
;
3495 case bpf_ctx_range(struct __sk_buff
, data_end
):
3496 info
->reg_type
= PTR_TO_PACKET_END
;
3500 return bpf_skb_is_valid_access(off
, size
, type
, info
);
3503 static bool sock_filter_is_valid_access(int off
, int size
,
3504 enum bpf_access_type type
,
3505 struct bpf_insn_access_aux
*info
)
3507 if (type
== BPF_WRITE
) {
3509 case offsetof(struct bpf_sock
, bound_dev_if
):
3510 case offsetof(struct bpf_sock
, mark
):
3511 case offsetof(struct bpf_sock
, priority
):
3518 if (off
< 0 || off
+ size
> sizeof(struct bpf_sock
))
3520 /* The verifier guarantees that size > 0. */
3521 if (off
% size
!= 0)
3523 if (size
!= sizeof(__u32
))
3529 static int bpf_unclone_prologue(struct bpf_insn
*insn_buf
, bool direct_write
,
3530 const struct bpf_prog
*prog
, int drop_verdict
)
3532 struct bpf_insn
*insn
= insn_buf
;
3537 /* if (!skb->cloned)
3540 * (Fast-path, otherwise approximation that we might be
3541 * a clone, do the rest in helper.)
3543 *insn
++ = BPF_LDX_MEM(BPF_B
, BPF_REG_6
, BPF_REG_1
, CLONED_OFFSET());
3544 *insn
++ = BPF_ALU32_IMM(BPF_AND
, BPF_REG_6
, CLONED_MASK
);
3545 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, BPF_REG_6
, 0, 7);
3547 /* ret = bpf_skb_pull_data(skb, 0); */
3548 *insn
++ = BPF_MOV64_REG(BPF_REG_6
, BPF_REG_1
);
3549 *insn
++ = BPF_ALU64_REG(BPF_XOR
, BPF_REG_2
, BPF_REG_2
);
3550 *insn
++ = BPF_RAW_INSN(BPF_JMP
| BPF_CALL
, 0, 0, 0,
3551 BPF_FUNC_skb_pull_data
);
3554 * return TC_ACT_SHOT;
3556 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, 2);
3557 *insn
++ = BPF_ALU32_IMM(BPF_MOV
, BPF_REG_0
, drop_verdict
);
3558 *insn
++ = BPF_EXIT_INSN();
3561 *insn
++ = BPF_MOV64_REG(BPF_REG_1
, BPF_REG_6
);
3563 *insn
++ = prog
->insnsi
[0];
3565 return insn
- insn_buf
;
3568 static int tc_cls_act_prologue(struct bpf_insn
*insn_buf
, bool direct_write
,
3569 const struct bpf_prog
*prog
)
3571 return bpf_unclone_prologue(insn_buf
, direct_write
, prog
, TC_ACT_SHOT
);
3574 static bool tc_cls_act_is_valid_access(int off
, int size
,
3575 enum bpf_access_type type
,
3576 struct bpf_insn_access_aux
*info
)
3578 if (type
== BPF_WRITE
) {
3580 case bpf_ctx_range(struct __sk_buff
, mark
):
3581 case bpf_ctx_range(struct __sk_buff
, tc_index
):
3582 case bpf_ctx_range(struct __sk_buff
, priority
):
3583 case bpf_ctx_range(struct __sk_buff
, tc_classid
):
3584 case bpf_ctx_range_till(struct __sk_buff
, cb
[0], cb
[4]):
3592 case bpf_ctx_range(struct __sk_buff
, data
):
3593 info
->reg_type
= PTR_TO_PACKET
;
3595 case bpf_ctx_range(struct __sk_buff
, data_end
):
3596 info
->reg_type
= PTR_TO_PACKET_END
;
3598 case bpf_ctx_range_till(struct __sk_buff
, family
, local_port
):
3602 return bpf_skb_is_valid_access(off
, size
, type
, info
);
3605 static bool __is_valid_xdp_access(int off
, int size
)
3607 if (off
< 0 || off
>= sizeof(struct xdp_md
))
3609 if (off
% size
!= 0)
3611 if (size
!= sizeof(__u32
))
3617 static bool xdp_is_valid_access(int off
, int size
,
3618 enum bpf_access_type type
,
3619 struct bpf_insn_access_aux
*info
)
3621 if (type
== BPF_WRITE
)
3625 case offsetof(struct xdp_md
, data
):
3626 info
->reg_type
= PTR_TO_PACKET
;
3628 case offsetof(struct xdp_md
, data_end
):
3629 info
->reg_type
= PTR_TO_PACKET_END
;
3633 return __is_valid_xdp_access(off
, size
);
3636 void bpf_warn_invalid_xdp_action(u32 act
)
3638 const u32 act_max
= XDP_REDIRECT
;
3640 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
3641 act
> act_max
? "Illegal" : "Driver unsupported",
3644 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action
);
3646 static bool __is_valid_sock_ops_access(int off
, int size
)
3648 if (off
< 0 || off
>= sizeof(struct bpf_sock_ops
))
3650 /* The verifier guarantees that size > 0. */
3651 if (off
% size
!= 0)
3653 if (size
!= sizeof(__u32
))
3659 static bool sock_ops_is_valid_access(int off
, int size
,
3660 enum bpf_access_type type
,
3661 struct bpf_insn_access_aux
*info
)
3663 if (type
== BPF_WRITE
) {
3665 case offsetof(struct bpf_sock_ops
, op
) ...
3666 offsetof(struct bpf_sock_ops
, replylong
[3]):
3673 return __is_valid_sock_ops_access(off
, size
);
3676 static int sk_skb_prologue(struct bpf_insn
*insn_buf
, bool direct_write
,
3677 const struct bpf_prog
*prog
)
3679 return bpf_unclone_prologue(insn_buf
, direct_write
, prog
, SK_DROP
);
3682 static bool sk_skb_is_valid_access(int off
, int size
,
3683 enum bpf_access_type type
,
3684 struct bpf_insn_access_aux
*info
)
3686 if (type
== BPF_WRITE
) {
3688 case bpf_ctx_range(struct __sk_buff
, tc_index
):
3689 case bpf_ctx_range(struct __sk_buff
, priority
):
3697 case bpf_ctx_range(struct __sk_buff
, mark
):
3698 case bpf_ctx_range(struct __sk_buff
, tc_classid
):
3700 case bpf_ctx_range(struct __sk_buff
, data
):
3701 info
->reg_type
= PTR_TO_PACKET
;
3703 case bpf_ctx_range(struct __sk_buff
, data_end
):
3704 info
->reg_type
= PTR_TO_PACKET_END
;
3708 return bpf_skb_is_valid_access(off
, size
, type
, info
);
3711 static u32
bpf_convert_ctx_access(enum bpf_access_type type
,
3712 const struct bpf_insn
*si
,
3713 struct bpf_insn
*insn_buf
,
3714 struct bpf_prog
*prog
, u32
*target_size
)
3716 struct bpf_insn
*insn
= insn_buf
;
3720 case offsetof(struct __sk_buff
, len
):
3721 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3722 bpf_target_off(struct sk_buff
, len
, 4,
3726 case offsetof(struct __sk_buff
, protocol
):
3727 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3728 bpf_target_off(struct sk_buff
, protocol
, 2,
3732 case offsetof(struct __sk_buff
, vlan_proto
):
3733 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3734 bpf_target_off(struct sk_buff
, vlan_proto
, 2,
3738 case offsetof(struct __sk_buff
, priority
):
3739 if (type
== BPF_WRITE
)
3740 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3741 bpf_target_off(struct sk_buff
, priority
, 4,
3744 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3745 bpf_target_off(struct sk_buff
, priority
, 4,
3749 case offsetof(struct __sk_buff
, ingress_ifindex
):
3750 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3751 bpf_target_off(struct sk_buff
, skb_iif
, 4,
3755 case offsetof(struct __sk_buff
, ifindex
):
3756 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
3757 si
->dst_reg
, si
->src_reg
,
3758 offsetof(struct sk_buff
, dev
));
3759 *insn
++ = BPF_JMP_IMM(BPF_JEQ
, si
->dst_reg
, 0, 1);
3760 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3761 bpf_target_off(struct net_device
, ifindex
, 4,
3765 case offsetof(struct __sk_buff
, hash
):
3766 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3767 bpf_target_off(struct sk_buff
, hash
, 4,
3771 case offsetof(struct __sk_buff
, mark
):
3772 if (type
== BPF_WRITE
)
3773 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3774 bpf_target_off(struct sk_buff
, mark
, 4,
3777 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3778 bpf_target_off(struct sk_buff
, mark
, 4,
3782 case offsetof(struct __sk_buff
, pkt_type
):
3784 *insn
++ = BPF_LDX_MEM(BPF_B
, si
->dst_reg
, si
->src_reg
,
3786 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, PKT_TYPE_MAX
);
3787 #ifdef __BIG_ENDIAN_BITFIELD
3788 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, 5);
3792 case offsetof(struct __sk_buff
, queue_mapping
):
3793 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3794 bpf_target_off(struct sk_buff
, queue_mapping
, 2,
3798 case offsetof(struct __sk_buff
, vlan_present
):
3799 case offsetof(struct __sk_buff
, vlan_tci
):
3800 BUILD_BUG_ON(VLAN_TAG_PRESENT
!= 0x1000);
3802 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3803 bpf_target_off(struct sk_buff
, vlan_tci
, 2,
3805 if (si
->off
== offsetof(struct __sk_buff
, vlan_tci
)) {
3806 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
,
3809 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, 12);
3810 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, 1);
3814 case offsetof(struct __sk_buff
, cb
[0]) ...
3815 offsetofend(struct __sk_buff
, cb
[4]) - 1:
3816 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb
, data
) < 20);
3817 BUILD_BUG_ON((offsetof(struct sk_buff
, cb
) +
3818 offsetof(struct qdisc_skb_cb
, data
)) %
3821 prog
->cb_access
= 1;
3823 off
-= offsetof(struct __sk_buff
, cb
[0]);
3824 off
+= offsetof(struct sk_buff
, cb
);
3825 off
+= offsetof(struct qdisc_skb_cb
, data
);
3826 if (type
== BPF_WRITE
)
3827 *insn
++ = BPF_STX_MEM(BPF_SIZE(si
->code
), si
->dst_reg
,
3830 *insn
++ = BPF_LDX_MEM(BPF_SIZE(si
->code
), si
->dst_reg
,
3834 case offsetof(struct __sk_buff
, tc_classid
):
3835 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb
, tc_classid
) != 2);
3838 off
-= offsetof(struct __sk_buff
, tc_classid
);
3839 off
+= offsetof(struct sk_buff
, cb
);
3840 off
+= offsetof(struct qdisc_skb_cb
, tc_classid
);
3842 if (type
== BPF_WRITE
)
3843 *insn
++ = BPF_STX_MEM(BPF_H
, si
->dst_reg
,
3846 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
,
3850 case offsetof(struct __sk_buff
, data
):
3851 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, data
),
3852 si
->dst_reg
, si
->src_reg
,
3853 offsetof(struct sk_buff
, data
));
3856 case offsetof(struct __sk_buff
, data_end
):
3858 off
-= offsetof(struct __sk_buff
, data_end
);
3859 off
+= offsetof(struct sk_buff
, cb
);
3860 off
+= offsetof(struct bpf_skb_data_end
, data_end
);
3861 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si
->dst_reg
,
3865 case offsetof(struct __sk_buff
, tc_index
):
3866 #ifdef CONFIG_NET_SCHED
3867 if (type
== BPF_WRITE
)
3868 *insn
++ = BPF_STX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3869 bpf_target_off(struct sk_buff
, tc_index
, 2,
3872 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
3873 bpf_target_off(struct sk_buff
, tc_index
, 2,
3877 if (type
== BPF_WRITE
)
3878 *insn
++ = BPF_MOV64_REG(si
->dst_reg
, si
->dst_reg
);
3880 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3884 case offsetof(struct __sk_buff
, napi_id
):
3885 #if defined(CONFIG_NET_RX_BUSY_POLL)
3886 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
3887 bpf_target_off(struct sk_buff
, napi_id
, 4,
3889 *insn
++ = BPF_JMP_IMM(BPF_JGE
, si
->dst_reg
, MIN_NAPI_ID
, 1);
3890 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3893 *insn
++ = BPF_MOV64_IMM(si
->dst_reg
, 0);
3896 case offsetof(struct __sk_buff
, family
):
3897 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_family
) != 2);
3899 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3900 si
->dst_reg
, si
->src_reg
,
3901 offsetof(struct sk_buff
, sk
));
3902 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
3903 bpf_target_off(struct sock_common
,
3907 case offsetof(struct __sk_buff
, remote_ip4
):
3908 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_daddr
) != 4);
3910 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3911 si
->dst_reg
, si
->src_reg
,
3912 offsetof(struct sk_buff
, sk
));
3913 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3914 bpf_target_off(struct sock_common
,
3918 case offsetof(struct __sk_buff
, local_ip4
):
3919 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
,
3920 skc_rcv_saddr
) != 4);
3922 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3923 si
->dst_reg
, si
->src_reg
,
3924 offsetof(struct sk_buff
, sk
));
3925 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3926 bpf_target_off(struct sock_common
,
3930 case offsetof(struct __sk_buff
, remote_ip6
[0]) ...
3931 offsetof(struct __sk_buff
, remote_ip6
[3]):
3932 #if IS_ENABLED(CONFIG_IPV6)
3933 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
,
3934 skc_v6_daddr
.s6_addr32
[0]) != 4);
3937 off
-= offsetof(struct __sk_buff
, remote_ip6
[0]);
3939 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3940 si
->dst_reg
, si
->src_reg
,
3941 offsetof(struct sk_buff
, sk
));
3942 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3943 offsetof(struct sock_common
,
3944 skc_v6_daddr
.s6_addr32
[0]) +
3947 *insn
++ = BPF_MOV32_IMM(si
->dst_reg
, 0);
3950 case offsetof(struct __sk_buff
, local_ip6
[0]) ...
3951 offsetof(struct __sk_buff
, local_ip6
[3]):
3952 #if IS_ENABLED(CONFIG_IPV6)
3953 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
,
3954 skc_v6_rcv_saddr
.s6_addr32
[0]) != 4);
3957 off
-= offsetof(struct __sk_buff
, local_ip6
[0]);
3959 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3960 si
->dst_reg
, si
->src_reg
,
3961 offsetof(struct sk_buff
, sk
));
3962 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
3963 offsetof(struct sock_common
,
3964 skc_v6_rcv_saddr
.s6_addr32
[0]) +
3967 *insn
++ = BPF_MOV32_IMM(si
->dst_reg
, 0);
3971 case offsetof(struct __sk_buff
, remote_port
):
3972 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_dport
) != 2);
3974 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3975 si
->dst_reg
, si
->src_reg
,
3976 offsetof(struct sk_buff
, sk
));
3977 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
3978 bpf_target_off(struct sock_common
,
3981 #ifndef __BIG_ENDIAN_BITFIELD
3982 *insn
++ = BPF_ALU32_IMM(BPF_LSH
, si
->dst_reg
, 16);
3986 case offsetof(struct __sk_buff
, local_port
):
3987 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_num
) != 2);
3989 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, sk
),
3990 si
->dst_reg
, si
->src_reg
,
3991 offsetof(struct sk_buff
, sk
));
3992 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
3993 bpf_target_off(struct sock_common
,
3994 skc_num
, 2, target_size
));
3998 return insn
- insn_buf
;
4001 static u32
sock_filter_convert_ctx_access(enum bpf_access_type type
,
4002 const struct bpf_insn
*si
,
4003 struct bpf_insn
*insn_buf
,
4004 struct bpf_prog
*prog
, u32
*target_size
)
4006 struct bpf_insn
*insn
= insn_buf
;
4009 case offsetof(struct bpf_sock
, bound_dev_if
):
4010 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_bound_dev_if
) != 4);
4012 if (type
== BPF_WRITE
)
4013 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4014 offsetof(struct sock
, sk_bound_dev_if
));
4016 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4017 offsetof(struct sock
, sk_bound_dev_if
));
4020 case offsetof(struct bpf_sock
, mark
):
4021 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_mark
) != 4);
4023 if (type
== BPF_WRITE
)
4024 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4025 offsetof(struct sock
, sk_mark
));
4027 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4028 offsetof(struct sock
, sk_mark
));
4031 case offsetof(struct bpf_sock
, priority
):
4032 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_priority
) != 4);
4034 if (type
== BPF_WRITE
)
4035 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4036 offsetof(struct sock
, sk_priority
));
4038 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4039 offsetof(struct sock
, sk_priority
));
4042 case offsetof(struct bpf_sock
, family
):
4043 BUILD_BUG_ON(FIELD_SIZEOF(struct sock
, sk_family
) != 2);
4045 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->src_reg
,
4046 offsetof(struct sock
, sk_family
));
4049 case offsetof(struct bpf_sock
, type
):
4050 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4051 offsetof(struct sock
, __sk_flags_offset
));
4052 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, SK_FL_TYPE_MASK
);
4053 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, SK_FL_TYPE_SHIFT
);
4056 case offsetof(struct bpf_sock
, protocol
):
4057 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4058 offsetof(struct sock
, __sk_flags_offset
));
4059 *insn
++ = BPF_ALU32_IMM(BPF_AND
, si
->dst_reg
, SK_FL_PROTO_MASK
);
4060 *insn
++ = BPF_ALU32_IMM(BPF_RSH
, si
->dst_reg
, SK_FL_PROTO_SHIFT
);
4064 return insn
- insn_buf
;
4067 static u32
tc_cls_act_convert_ctx_access(enum bpf_access_type type
,
4068 const struct bpf_insn
*si
,
4069 struct bpf_insn
*insn_buf
,
4070 struct bpf_prog
*prog
, u32
*target_size
)
4072 struct bpf_insn
*insn
= insn_buf
;
4075 case offsetof(struct __sk_buff
, ifindex
):
4076 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff
, dev
),
4077 si
->dst_reg
, si
->src_reg
,
4078 offsetof(struct sk_buff
, dev
));
4079 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
4080 bpf_target_off(struct net_device
, ifindex
, 4,
4084 return bpf_convert_ctx_access(type
, si
, insn_buf
, prog
,
4088 return insn
- insn_buf
;
4091 static u32
xdp_convert_ctx_access(enum bpf_access_type type
,
4092 const struct bpf_insn
*si
,
4093 struct bpf_insn
*insn_buf
,
4094 struct bpf_prog
*prog
, u32
*target_size
)
4096 struct bpf_insn
*insn
= insn_buf
;
4099 case offsetof(struct xdp_md
, data
):
4100 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff
, data
),
4101 si
->dst_reg
, si
->src_reg
,
4102 offsetof(struct xdp_buff
, data
));
4104 case offsetof(struct xdp_md
, data_end
):
4105 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff
, data_end
),
4106 si
->dst_reg
, si
->src_reg
,
4107 offsetof(struct xdp_buff
, data_end
));
4111 return insn
- insn_buf
;
4114 static u32
sock_ops_convert_ctx_access(enum bpf_access_type type
,
4115 const struct bpf_insn
*si
,
4116 struct bpf_insn
*insn_buf
,
4117 struct bpf_prog
*prog
,
4120 struct bpf_insn
*insn
= insn_buf
;
4124 case offsetof(struct bpf_sock_ops
, op
) ...
4125 offsetof(struct bpf_sock_ops
, replylong
[3]):
4126 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops
, op
) !=
4127 FIELD_SIZEOF(struct bpf_sock_ops_kern
, op
));
4128 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops
, reply
) !=
4129 FIELD_SIZEOF(struct bpf_sock_ops_kern
, reply
));
4130 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops
, replylong
) !=
4131 FIELD_SIZEOF(struct bpf_sock_ops_kern
, replylong
));
4133 off
-= offsetof(struct bpf_sock_ops
, op
);
4134 off
+= offsetof(struct bpf_sock_ops_kern
, op
);
4135 if (type
== BPF_WRITE
)
4136 *insn
++ = BPF_STX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4139 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->src_reg
,
4143 case offsetof(struct bpf_sock_ops
, family
):
4144 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_family
) != 2);
4146 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4147 struct bpf_sock_ops_kern
, sk
),
4148 si
->dst_reg
, si
->src_reg
,
4149 offsetof(struct bpf_sock_ops_kern
, sk
));
4150 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
4151 offsetof(struct sock_common
, skc_family
));
4154 case offsetof(struct bpf_sock_ops
, remote_ip4
):
4155 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_daddr
) != 4);
4157 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4158 struct bpf_sock_ops_kern
, sk
),
4159 si
->dst_reg
, si
->src_reg
,
4160 offsetof(struct bpf_sock_ops_kern
, sk
));
4161 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
4162 offsetof(struct sock_common
, skc_daddr
));
4165 case offsetof(struct bpf_sock_ops
, local_ip4
):
4166 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_rcv_saddr
) != 4);
4168 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4169 struct bpf_sock_ops_kern
, sk
),
4170 si
->dst_reg
, si
->src_reg
,
4171 offsetof(struct bpf_sock_ops_kern
, sk
));
4172 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
4173 offsetof(struct sock_common
,
4177 case offsetof(struct bpf_sock_ops
, remote_ip6
[0]) ...
4178 offsetof(struct bpf_sock_ops
, remote_ip6
[3]):
4179 #if IS_ENABLED(CONFIG_IPV6)
4180 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
,
4181 skc_v6_daddr
.s6_addr32
[0]) != 4);
4184 off
-= offsetof(struct bpf_sock_ops
, remote_ip6
[0]);
4185 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4186 struct bpf_sock_ops_kern
, sk
),
4187 si
->dst_reg
, si
->src_reg
,
4188 offsetof(struct bpf_sock_ops_kern
, sk
));
4189 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
4190 offsetof(struct sock_common
,
4191 skc_v6_daddr
.s6_addr32
[0]) +
4194 *insn
++ = BPF_MOV32_IMM(si
->dst_reg
, 0);
4198 case offsetof(struct bpf_sock_ops
, local_ip6
[0]) ...
4199 offsetof(struct bpf_sock_ops
, local_ip6
[3]):
4200 #if IS_ENABLED(CONFIG_IPV6)
4201 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
,
4202 skc_v6_rcv_saddr
.s6_addr32
[0]) != 4);
4205 off
-= offsetof(struct bpf_sock_ops
, local_ip6
[0]);
4206 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4207 struct bpf_sock_ops_kern
, sk
),
4208 si
->dst_reg
, si
->src_reg
,
4209 offsetof(struct bpf_sock_ops_kern
, sk
));
4210 *insn
++ = BPF_LDX_MEM(BPF_W
, si
->dst_reg
, si
->dst_reg
,
4211 offsetof(struct sock_common
,
4212 skc_v6_rcv_saddr
.s6_addr32
[0]) +
4215 *insn
++ = BPF_MOV32_IMM(si
->dst_reg
, 0);
4219 case offsetof(struct bpf_sock_ops
, remote_port
):
4220 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_dport
) != 2);
4222 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4223 struct bpf_sock_ops_kern
, sk
),
4224 si
->dst_reg
, si
->src_reg
,
4225 offsetof(struct bpf_sock_ops_kern
, sk
));
4226 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
4227 offsetof(struct sock_common
, skc_dport
));
4228 #ifndef __BIG_ENDIAN_BITFIELD
4229 *insn
++ = BPF_ALU32_IMM(BPF_LSH
, si
->dst_reg
, 16);
4233 case offsetof(struct bpf_sock_ops
, local_port
):
4234 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common
, skc_num
) != 2);
4236 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4237 struct bpf_sock_ops_kern
, sk
),
4238 si
->dst_reg
, si
->src_reg
,
4239 offsetof(struct bpf_sock_ops_kern
, sk
));
4240 *insn
++ = BPF_LDX_MEM(BPF_H
, si
->dst_reg
, si
->dst_reg
,
4241 offsetof(struct sock_common
, skc_num
));
4244 return insn
- insn_buf
;
4247 static u32
sk_skb_convert_ctx_access(enum bpf_access_type type
,
4248 const struct bpf_insn
*si
,
4249 struct bpf_insn
*insn_buf
,
4250 struct bpf_prog
*prog
, u32
*target_size
)
4252 struct bpf_insn
*insn
= insn_buf
;
4256 case offsetof(struct __sk_buff
, data_end
):
4258 off
-= offsetof(struct __sk_buff
, data_end
);
4259 off
+= offsetof(struct sk_buff
, cb
);
4260 off
+= offsetof(struct tcp_skb_cb
, bpf
.data_end
);
4261 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si
->dst_reg
,
4265 return bpf_convert_ctx_access(type
, si
, insn_buf
, prog
,
4269 return insn
- insn_buf
;
4272 const struct bpf_verifier_ops sk_filter_prog_ops
= {
4273 .get_func_proto
= sk_filter_func_proto
,
4274 .is_valid_access
= sk_filter_is_valid_access
,
4275 .convert_ctx_access
= bpf_convert_ctx_access
,
4278 const struct bpf_verifier_ops tc_cls_act_prog_ops
= {
4279 .get_func_proto
= tc_cls_act_func_proto
,
4280 .is_valid_access
= tc_cls_act_is_valid_access
,
4281 .convert_ctx_access
= tc_cls_act_convert_ctx_access
,
4282 .gen_prologue
= tc_cls_act_prologue
,
4283 .test_run
= bpf_prog_test_run_skb
,
4286 const struct bpf_verifier_ops xdp_prog_ops
= {
4287 .get_func_proto
= xdp_func_proto
,
4288 .is_valid_access
= xdp_is_valid_access
,
4289 .convert_ctx_access
= xdp_convert_ctx_access
,
4290 .test_run
= bpf_prog_test_run_xdp
,
4293 const struct bpf_verifier_ops cg_skb_prog_ops
= {
4294 .get_func_proto
= sk_filter_func_proto
,
4295 .is_valid_access
= sk_filter_is_valid_access
,
4296 .convert_ctx_access
= bpf_convert_ctx_access
,
4297 .test_run
= bpf_prog_test_run_skb
,
4300 const struct bpf_verifier_ops lwt_inout_prog_ops
= {
4301 .get_func_proto
= lwt_inout_func_proto
,
4302 .is_valid_access
= lwt_is_valid_access
,
4303 .convert_ctx_access
= bpf_convert_ctx_access
,
4304 .test_run
= bpf_prog_test_run_skb
,
4307 const struct bpf_verifier_ops lwt_xmit_prog_ops
= {
4308 .get_func_proto
= lwt_xmit_func_proto
,
4309 .is_valid_access
= lwt_is_valid_access
,
4310 .convert_ctx_access
= bpf_convert_ctx_access
,
4311 .gen_prologue
= tc_cls_act_prologue
,
4312 .test_run
= bpf_prog_test_run_skb
,
4315 const struct bpf_verifier_ops cg_sock_prog_ops
= {
4316 .get_func_proto
= sock_filter_func_proto
,
4317 .is_valid_access
= sock_filter_is_valid_access
,
4318 .convert_ctx_access
= sock_filter_convert_ctx_access
,
4321 const struct bpf_verifier_ops sock_ops_prog_ops
= {
4322 .get_func_proto
= sock_ops_func_proto
,
4323 .is_valid_access
= sock_ops_is_valid_access
,
4324 .convert_ctx_access
= sock_ops_convert_ctx_access
,
4327 const struct bpf_verifier_ops sk_skb_prog_ops
= {
4328 .get_func_proto
= sk_skb_func_proto
,
4329 .is_valid_access
= sk_skb_is_valid_access
,
4330 .convert_ctx_access
= sk_skb_convert_ctx_access
,
4331 .gen_prologue
= sk_skb_prologue
,
4334 int sk_detach_filter(struct sock
*sk
)
4337 struct sk_filter
*filter
;
4339 if (sock_flag(sk
, SOCK_FILTER_LOCKED
))
4342 filter
= rcu_dereference_protected(sk
->sk_filter
,
4343 lockdep_sock_is_held(sk
));
4345 RCU_INIT_POINTER(sk
->sk_filter
, NULL
);
4346 sk_filter_uncharge(sk
, filter
);
4352 EXPORT_SYMBOL_GPL(sk_detach_filter
);
4354 int sk_get_filter(struct sock
*sk
, struct sock_filter __user
*ubuf
,
4357 struct sock_fprog_kern
*fprog
;
4358 struct sk_filter
*filter
;
4362 filter
= rcu_dereference_protected(sk
->sk_filter
,
4363 lockdep_sock_is_held(sk
));
4367 /* We're copying the filter that has been originally attached,
4368 * so no conversion/decode needed anymore. eBPF programs that
4369 * have no original program cannot be dumped through this.
4372 fprog
= filter
->prog
->orig_prog
;
4378 /* User space only enquires number of filter blocks. */
4382 if (len
< fprog
->len
)
4386 if (copy_to_user(ubuf
, fprog
->filter
, bpf_classic_proglen(fprog
)))
4389 /* Instead of bytes, the API requests to return the number