inet: fix possible memory corruption with UDP_CORK and UFO
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / include / linux / skbuff.h
CommitLineData
1da177e4
LT
1/*
2 * Definitions for the 'struct sk_buff' memory handlers.
3 *
4 * Authors:
5 * Alan Cox, <gw4pts@gw4pts.ampr.org>
6 * Florian La Roche, <rzsfl@rz.uni-sb.de>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _LINUX_SKBUFF_H
15#define _LINUX_SKBUFF_H
16
1da177e4 17#include <linux/kernel.h>
fe55f6d5 18#include <linux/kmemcheck.h>
1da177e4
LT
19#include <linux/compiler.h>
20#include <linux/time.h>
187f1882 21#include <linux/bug.h>
1da177e4
LT
22#include <linux/cache.h>
23
60063497 24#include <linux/atomic.h>
1da177e4
LT
25#include <asm/types.h>
26#include <linux/spinlock.h>
1da177e4 27#include <linux/net.h>
3fc7e8a6 28#include <linux/textsearch.h>
1da177e4 29#include <net/checksum.h>
a80958f4 30#include <linux/rcupdate.h>
97fc2f08 31#include <linux/dmaengine.h>
b7aa0bf7 32#include <linux/hrtimer.h>
131ea667 33#include <linux/dma-mapping.h>
c8f44aff 34#include <linux/netdev_features.h>
5203cd28 35#include <net/flow_keys.h>
1da177e4 36
60476372 37/* Don't change this without changing skb_csum_unnecessary! */
1da177e4 38#define CHECKSUM_NONE 0
60476372
HX
39#define CHECKSUM_UNNECESSARY 1
40#define CHECKSUM_COMPLETE 2
41#define CHECKSUM_PARTIAL 3
1da177e4
LT
42
43#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
44 ~(SMP_CACHE_BYTES - 1))
fc910a27 45#define SKB_WITH_OVERHEAD(X) \
deea84b0 46 ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
fc910a27
DM
47#define SKB_MAX_ORDER(X, ORDER) \
48 SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
1da177e4
LT
49#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
50#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
51
87fb4b7b
ED
52/* return minimum truesize of one skb containing X bytes of data */
53#define SKB_TRUESIZE(X) ((X) + \
54 SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
55 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
56
1da177e4
LT
57/* A. Checksumming of received packets by device.
58 *
59 * NONE: device failed to checksum this packet.
60 * skb->csum is undefined.
61 *
62 * UNNECESSARY: device parsed packet and wouldbe verified checksum.
63 * skb->csum is undefined.
64 * It is bad option, but, unfortunately, many of vendors do this.
65 * Apparently with secret goal to sell you new device, when you
66 * will add new protocol to your host. F.e. IPv6. 8)
67 *
84fa7933 68 * COMPLETE: the most generic way. Device supplied checksum of _all_
1da177e4
LT
69 * the packet as seen by netif_rx in skb->csum.
70 * NOTE: Even if device supports only some protocols, but
84fa7933 71 * is able to produce some skb->csum, it MUST use COMPLETE,
1da177e4
LT
72 * not UNNECESSARY.
73 *
c6c6e3e0
HX
74 * PARTIAL: identical to the case for output below. This may occur
75 * on a packet received directly from another Linux OS, e.g.,
76 * a virtualised Linux kernel on the same host. The packet can
77 * be treated in the same way as UNNECESSARY except that on
78 * output (i.e., forwarding) the checksum must be filled in
79 * by the OS or the hardware.
80 *
1da177e4
LT
81 * B. Checksumming on output.
82 *
83 * NONE: skb is checksummed by protocol or csum is not required.
84 *
84fa7933 85 * PARTIAL: device is required to csum packet as seen by hard_start_xmit
c6c6e3e0
HX
86 * from skb->csum_start to the end and to record the checksum
87 * at skb->csum_start + skb->csum_offset.
1da177e4
LT
88 *
89 * Device must show its capabilities in dev->features, set
90 * at device setup time.
91 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
92 * everything.
1da177e4
LT
93 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
94 * TCP/UDP over IPv4. Sigh. Vendors like this
95 * way by an unknown reason. Though, see comment above
96 * about CHECKSUM_UNNECESSARY. 8)
c6c6e3e0 97 * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
1da177e4 98 *
3af79302
YZ
99 * UNNECESSARY: device will do per protocol specific csum. Protocol drivers
100 * that do not want net to perform the checksum calculation should use
101 * this flag in their outgoing skbs.
102 * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC
103 * offload. Correspondingly, the FCoE protocol driver
104 * stack should use CHECKSUM_UNNECESSARY.
105 *
1da177e4
LT
106 * Any questions? No questions, good. --ANK
107 */
108
1da177e4 109struct net_device;
716ea3a7 110struct scatterlist;
9c55e01c 111struct pipe_inode_info;
1da177e4 112
5f79e0f9 113#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1da177e4
LT
114struct nf_conntrack {
115 atomic_t use;
1da177e4 116};
5f79e0f9 117#endif
1da177e4
LT
118
119#ifdef CONFIG_BRIDGE_NETFILTER
120struct nf_bridge_info {
bf1ac5ca
ED
121 atomic_t use;
122 unsigned int mask;
123 struct net_device *physindev;
124 struct net_device *physoutdev;
125 unsigned long data[32 / sizeof(unsigned long)];
1da177e4
LT
126};
127#endif
128
1da177e4
LT
129struct sk_buff_head {
130 /* These two members must be first. */
131 struct sk_buff *next;
132 struct sk_buff *prev;
133
134 __u32 qlen;
135 spinlock_t lock;
136};
137
138struct sk_buff;
139
9d4dde52
IC
140/* To allow 64K frame to be packed as single skb without frag_list we
141 * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
142 * buffers which do not start on a page boundary.
143 *
144 * Since GRO uses frags we allocate at least 16 regardless of page
145 * size.
a715dea3 146 */
9d4dde52 147#if (65536/PAGE_SIZE + 1) < 16
eec00954 148#define MAX_SKB_FRAGS 16UL
a715dea3 149#else
9d4dde52 150#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
a715dea3 151#endif
1da177e4
LT
152
153typedef struct skb_frag_struct skb_frag_t;
154
155struct skb_frag_struct {
a8605c60
IC
156 struct {
157 struct page *p;
158 } page;
cb4dfe56 159#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
a309bb07
DM
160 __u32 page_offset;
161 __u32 size;
cb4dfe56
ED
162#else
163 __u16 page_offset;
164 __u16 size;
165#endif
1da177e4
LT
166};
167
9e903e08
ED
168static inline unsigned int skb_frag_size(const skb_frag_t *frag)
169{
170 return frag->size;
171}
172
173static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
174{
175 frag->size = size;
176}
177
178static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
179{
180 frag->size += delta;
181}
182
183static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
184{
185 frag->size -= delta;
186}
187
ac45f602
PO
188#define HAVE_HW_TIME_STAMP
189
190/**
d3a21be8 191 * struct skb_shared_hwtstamps - hardware time stamps
ac45f602
PO
192 * @hwtstamp: hardware time stamp transformed into duration
193 * since arbitrary point in time
194 * @syststamp: hwtstamp transformed to system time base
195 *
196 * Software time stamps generated by ktime_get_real() are stored in
197 * skb->tstamp. The relation between the different kinds of time
198 * stamps is as follows:
199 *
200 * syststamp and tstamp can be compared against each other in
201 * arbitrary combinations. The accuracy of a
202 * syststamp/tstamp/"syststamp from other device" comparison is
203 * limited by the accuracy of the transformation into system time
204 * base. This depends on the device driver and its underlying
205 * hardware.
206 *
207 * hwtstamps can only be compared against other hwtstamps from
208 * the same device.
209 *
210 * This structure is attached to packets as part of the
211 * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
212 */
213struct skb_shared_hwtstamps {
214 ktime_t hwtstamp;
215 ktime_t syststamp;
216};
217
2244d07b
OH
218/* Definitions for tx_flags in struct skb_shared_info */
219enum {
220 /* generate hardware time stamp */
221 SKBTX_HW_TSTAMP = 1 << 0,
222
223 /* generate software time stamp */
224 SKBTX_SW_TSTAMP = 1 << 1,
225
226 /* device driver is going to provide hardware time stamp */
227 SKBTX_IN_PROGRESS = 1 << 2,
228
a6686f2f 229 /* device driver supports TX zero-copy buffers */
62b1a8ab 230 SKBTX_DEV_ZEROCOPY = 1 << 3,
6e3e939f
JB
231
232 /* generate wifi status information (where possible) */
62b1a8ab 233 SKBTX_WIFI_STATUS = 1 << 4,
c9af6db4
PS
234
235 /* This indicates at least one fragment might be overwritten
236 * (as in vmsplice(), sendfile() ...)
237 * If we need to compute a TX checksum, we'll need to copy
238 * all frags to avoid possible bad checksum
239 */
240 SKBTX_SHARED_FRAG = 1 << 5,
a6686f2f
SM
241};
242
243/*
244 * The callback notifies userspace to release buffers when skb DMA is done in
245 * lower device, the skb last reference should be 0 when calling this.
e19d6763
MT
246 * The zerocopy_success argument is true if zero copy transmit occurred,
247 * false on data copy or out of memory error caused by data copy attempt.
ca8f4fb2
MT
248 * The ctx field is used to track device context.
249 * The desc field is used to track userspace buffer index.
a6686f2f
SM
250 */
251struct ubuf_info {
e19d6763 252 void (*callback)(struct ubuf_info *, bool zerocopy_success);
ca8f4fb2 253 void *ctx;
a6686f2f 254 unsigned long desc;
ac45f602
PO
255};
256
1da177e4
LT
257/* This data is invariant across clones and lives at
258 * the end of the header data, ie. at skb->end.
259 */
260struct skb_shared_info {
9f42f126
IC
261 unsigned char nr_frags;
262 __u8 tx_flags;
7967168c
HX
263 unsigned short gso_size;
264 /* Warning: this field is not always filled in (UFO)! */
265 unsigned short gso_segs;
266 unsigned short gso_type;
1da177e4 267 struct sk_buff *frag_list;
ac45f602 268 struct skb_shared_hwtstamps hwtstamps;
9f42f126 269 __be32 ip6_frag_id;
ec7d2f2c
ED
270
271 /*
272 * Warning : all fields before dataref are cleared in __alloc_skb()
273 */
274 atomic_t dataref;
275
69e3c75f
JB
276 /* Intermediate layers must ensure that destructor_arg
277 * remains valid until skb destructor */
278 void * destructor_arg;
a6686f2f 279
fed66381
ED
280 /* must be last field, see pskb_expand_head() */
281 skb_frag_t frags[MAX_SKB_FRAGS];
1da177e4
LT
282};
283
284/* We divide dataref into two halves. The higher 16 bits hold references
285 * to the payload part of skb->data. The lower 16 bits hold references to
334a8132
PM
286 * the entire skb->data. A clone of a headerless skb holds the length of
287 * the header in skb->hdr_len.
1da177e4
LT
288 *
289 * All users must obey the rule that the skb->data reference count must be
290 * greater than or equal to the payload reference count.
291 *
292 * Holding a reference to the payload part means that the user does not
293 * care about modifications to the header part of skb->data.
294 */
295#define SKB_DATAREF_SHIFT 16
296#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
297
d179cd12
DM
298
299enum {
300 SKB_FCLONE_UNAVAILABLE,
301 SKB_FCLONE_ORIG,
302 SKB_FCLONE_CLONE,
303};
304
7967168c
HX
305enum {
306 SKB_GSO_TCPV4 = 1 << 0,
f83ef8c0 307 SKB_GSO_UDP = 1 << 1,
576a30eb
HX
308
309 /* This indicates the skb is from an untrusted source. */
310 SKB_GSO_DODGY = 1 << 2,
b0da8537
MC
311
312 /* This indicates the tcp segment has CWR set. */
f83ef8c0
HX
313 SKB_GSO_TCP_ECN = 1 << 3,
314
315 SKB_GSO_TCPV6 = 1 << 4,
01d5b2fc
CL
316
317 SKB_GSO_FCOE = 1 << 5,
68c33163
PS
318
319 SKB_GSO_GRE = 1 << 6,
73136267
PS
320
321 SKB_GSO_UDP_TUNNEL = 1 << 7,
7967168c
HX
322};
323
2e07fa9c
ACM
324#if BITS_PER_LONG > 32
325#define NET_SKBUFF_DATA_USES_OFFSET 1
326#endif
327
328#ifdef NET_SKBUFF_DATA_USES_OFFSET
329typedef unsigned int sk_buff_data_t;
330#else
331typedef unsigned char *sk_buff_data_t;
332#endif
333
2fc72c7b
KK
334#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
335 defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
336#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
337#endif
338
1da177e4
LT
339/**
340 * struct sk_buff - socket buffer
341 * @next: Next buffer in list
342 * @prev: Previous buffer in list
325ed823 343 * @tstamp: Time we arrived
d84e0bd7 344 * @sk: Socket we are owned by
1da177e4 345 * @dev: Device we arrived on/are leaving by
d84e0bd7 346 * @cb: Control buffer. Free for use by every layer. Put private vars here
7fee226a 347 * @_skb_refdst: destination entry (with norefcount bit)
67be2dd1 348 * @sp: the security path, used for xfrm
1da177e4
LT
349 * @len: Length of actual data
350 * @data_len: Data length
351 * @mac_len: Length of link layer header
334a8132 352 * @hdr_len: writable header length of cloned skb
663ead3b
HX
353 * @csum: Checksum (must include start/offset pair)
354 * @csum_start: Offset from skb->head where checksumming should start
355 * @csum_offset: Offset from csum_start where checksum should be stored
d84e0bd7 356 * @priority: Packet queueing priority
67be2dd1 357 * @local_df: allow local fragmentation
1da177e4 358 * @cloned: Head may be cloned (check refcnt to be sure)
d84e0bd7 359 * @ip_summed: Driver fed us an IP checksum
1da177e4 360 * @nohdr: Payload reference only, must not modify header
d84e0bd7 361 * @nfctinfo: Relationship of this skb to the connection
1da177e4 362 * @pkt_type: Packet class
c83c2486 363 * @fclone: skbuff clone status
c83c2486 364 * @ipvs_property: skbuff is owned by ipvs
31729363
RD
365 * @peeked: this packet has been seen already, so stats have been
366 * done for it, don't do them again
ba9dda3a 367 * @nf_trace: netfilter packet trace flag
d84e0bd7
DB
368 * @protocol: Packet protocol from driver
369 * @destructor: Destruct function
370 * @nfct: Associated connection, if any
461ddf3b 371 * @nfct_reasm: netfilter conntrack re-assembly pointer
1da177e4 372 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
8964be4a 373 * @skb_iif: ifindex of device we arrived on
1da177e4
LT
374 * @tc_index: Traffic control index
375 * @tc_verd: traffic control verdict
d84e0bd7
DB
376 * @rxhash: the packet hash computed on receive
377 * @queue_mapping: Queue mapping for multiqueue devices
553a5672 378 * @ndisc_nodetype: router type (from link layer)
d84e0bd7 379 * @ooo_okay: allow the mapping of a socket to a queue to be changed
4ca2462e
CG
380 * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
381 * ports.
6e3e939f
JB
382 * @wifi_acked_valid: wifi_acked was set
383 * @wifi_acked: whether frame was acked on wifi or not
3bdc0eba 384 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
f4b8ea78
RD
385 * @dma_cookie: a cookie to one of several possible DMA operations
386 * done by skb DMA functions
984bc16c 387 * @secmark: security marking
d84e0bd7
DB
388 * @mark: Generic packet mark
389 * @dropcount: total number of sk_receive_queue overflows
86a9bad3 390 * @vlan_proto: vlan encapsulation protocol
6aa895b0 391 * @vlan_tci: vlan tag control information
6a674e9c
JG
392 * @inner_transport_header: Inner transport layer header (encapsulation)
393 * @inner_network_header: Network layer header (encapsulation)
aefbd2b3 394 * @inner_mac_header: Link layer header (encapsulation)
d84e0bd7
DB
395 * @transport_header: Transport layer header
396 * @network_header: Network layer header
397 * @mac_header: Link layer header
398 * @tail: Tail pointer
399 * @end: End pointer
400 * @head: Head of buffer
401 * @data: Data head pointer
402 * @truesize: Buffer size
403 * @users: User count - see {datagram,tcp}.c
1da177e4
LT
404 */
405
406struct sk_buff {
407 /* These two members must be first. */
408 struct sk_buff *next;
409 struct sk_buff *prev;
410
b7aa0bf7 411 ktime_t tstamp;
da3f5cf1
FF
412
413 struct sock *sk;
1da177e4 414 struct net_device *dev;
1da177e4 415
1da177e4
LT
416 /*
417 * This is the control buffer. It is free to use for every
418 * layer. Please put your private variables there. If you
419 * want to keep them across layers you have to do a skb_clone()
420 * first. This is owned by whoever has the skb queued ATM.
421 */
da3f5cf1 422 char cb[48] __aligned(8);
1da177e4 423
7fee226a 424 unsigned long _skb_refdst;
da3f5cf1
FF
425#ifdef CONFIG_XFRM
426 struct sec_path *sp;
427#endif
1da177e4 428 unsigned int len,
334a8132
PM
429 data_len;
430 __u16 mac_len,
431 hdr_len;
ff1dcadb
AV
432 union {
433 __wsum csum;
663ead3b
HX
434 struct {
435 __u16 csum_start;
436 __u16 csum_offset;
437 };
ff1dcadb 438 };
1da177e4 439 __u32 priority;
fe55f6d5 440 kmemcheck_bitfield_begin(flags1);
1cbb3380
TG
441 __u8 local_df:1,
442 cloned:1,
443 ip_summed:2,
6869c4d8
HW
444 nohdr:1,
445 nfctinfo:3;
d179cd12 446 __u8 pkt_type:3,
b84f4cc9 447 fclone:2,
ba9dda3a 448 ipvs_property:1,
a59322be 449 peeked:1,
ba9dda3a 450 nf_trace:1;
fe55f6d5 451 kmemcheck_bitfield_end(flags1);
4ab408de 452 __be16 protocol;
1da177e4
LT
453
454 void (*destructor)(struct sk_buff *skb);
9fb9cbb1 455#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
5f79e0f9 456 struct nf_conntrack *nfct;
2fc72c7b
KK
457#endif
458#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
9fb9cbb1
YK
459 struct sk_buff *nfct_reasm;
460#endif
1da177e4
LT
461#ifdef CONFIG_BRIDGE_NETFILTER
462 struct nf_bridge_info *nf_bridge;
463#endif
f25f4e44 464
8964be4a 465 int skb_iif;
4031ae6e
AD
466
467 __u32 rxhash;
468
86a9bad3 469 __be16 vlan_proto;
4031ae6e
AD
470 __u16 vlan_tci;
471
1da177e4 472#ifdef CONFIG_NET_SCHED
b6b99eb5 473 __u16 tc_index; /* traffic control index */
1da177e4 474#ifdef CONFIG_NET_CLS_ACT
b6b99eb5 475 __u16 tc_verd; /* traffic control verdict */
1da177e4 476#endif
1da177e4 477#endif
fe55f6d5 478
0a14842f 479 __u16 queue_mapping;
fe55f6d5 480 kmemcheck_bitfield_begin(flags2);
de357cc0 481#ifdef CONFIG_IPV6_NDISC_NODETYPE
8a4eb573 482 __u8 ndisc_nodetype:2;
d0f09804 483#endif
c93bdd0e 484 __u8 pfmemalloc:1;
3853b584 485 __u8 ooo_okay:1;
bdeab991 486 __u8 l4_rxhash:1;
6e3e939f
JB
487 __u8 wifi_acked_valid:1;
488 __u8 wifi_acked:1;
3bdc0eba 489 __u8 no_fcs:1;
d3836f21 490 __u8 head_frag:1;
6a674e9c
JG
491 /* Encapsulation protocol and NIC drivers should use
492 * this flag to indicate to each other if the skb contains
493 * encapsulated packet or not and maybe use the inner packet
494 * headers if needed
495 */
496 __u8 encapsulation:1;
497 /* 7/9 bit hole (depending on ndisc_nodetype presence) */
fe55f6d5
VN
498 kmemcheck_bitfield_end(flags2);
499
97fc2f08
CL
500#ifdef CONFIG_NET_DMA
501 dma_cookie_t dma_cookie;
502#endif
984bc16c
JM
503#ifdef CONFIG_NETWORK_SECMARK
504 __u32 secmark;
505#endif
3b885787
NH
506 union {
507 __u32 mark;
508 __u32 dropcount;
16fad69c 509 __u32 reserved_tailroom;
3b885787 510 };
1da177e4 511
6a674e9c
JG
512 sk_buff_data_t inner_transport_header;
513 sk_buff_data_t inner_network_header;
aefbd2b3 514 sk_buff_data_t inner_mac_header;
27a884dc
ACM
515 sk_buff_data_t transport_header;
516 sk_buff_data_t network_header;
517 sk_buff_data_t mac_header;
1da177e4 518 /* These elements must be at the end, see alloc_skb() for details. */
27a884dc 519 sk_buff_data_t tail;
4305b541 520 sk_buff_data_t end;
1da177e4 521 unsigned char *head,
4305b541 522 *data;
27a884dc
ACM
523 unsigned int truesize;
524 atomic_t users;
1da177e4
LT
525};
526
527#ifdef __KERNEL__
528/*
529 * Handling routines are only of interest to the kernel
530 */
531#include <linux/slab.h>
532
1da177e4 533
c93bdd0e
MG
534#define SKB_ALLOC_FCLONE 0x01
535#define SKB_ALLOC_RX 0x02
536
537/* Returns true if the skb was allocated from PFMEMALLOC reserves */
538static inline bool skb_pfmemalloc(const struct sk_buff *skb)
539{
540 return unlikely(skb->pfmemalloc);
541}
542
7fee226a
ED
543/*
544 * skb might have a dst pointer attached, refcounted or not.
545 * _skb_refdst low order bit is set if refcount was _not_ taken
546 */
547#define SKB_DST_NOREF 1UL
548#define SKB_DST_PTRMASK ~(SKB_DST_NOREF)
549
550/**
551 * skb_dst - returns skb dst_entry
552 * @skb: buffer
553 *
554 * Returns skb dst_entry, regardless of reference taken or not.
555 */
adf30907
ED
556static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
557{
7fee226a
ED
558 /* If refdst was not refcounted, check we still are in a
559 * rcu_read_lock section
560 */
561 WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
562 !rcu_read_lock_held() &&
563 !rcu_read_lock_bh_held());
564 return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
adf30907
ED
565}
566
7fee226a
ED
567/**
568 * skb_dst_set - sets skb dst
569 * @skb: buffer
570 * @dst: dst entry
571 *
572 * Sets skb dst, assuming a reference was taken on dst and should
573 * be released by skb_dst_drop()
574 */
adf30907
ED
575static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
576{
7fee226a
ED
577 skb->_skb_refdst = (unsigned long)dst;
578}
579
932bc4d7
JA
580extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
581 bool force);
582
583/**
584 * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
585 * @skb: buffer
586 * @dst: dst entry
587 *
588 * Sets skb dst, assuming a reference was not taken on dst.
589 * If dst entry is cached, we do not take reference and dst_release
590 * will be avoided by refdst_drop. If dst entry is not cached, we take
591 * reference, so that last dst_release can destroy the dst immediately.
592 */
593static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
594{
595 __skb_dst_set_noref(skb, dst, false);
596}
597
598/**
599 * skb_dst_set_noref_force - sets skb dst, without taking reference
600 * @skb: buffer
601 * @dst: dst entry
602 *
603 * Sets skb dst, assuming a reference was not taken on dst.
604 * No reference is taken and no dst_release will be called. While for
605 * cached dsts deferred reclaim is a basic feature, for entries that are
606 * not cached it is caller's job to guarantee that last dst_release for
607 * provided dst happens when nobody uses it, eg. after a RCU grace period.
608 */
609static inline void skb_dst_set_noref_force(struct sk_buff *skb,
610 struct dst_entry *dst)
611{
612 __skb_dst_set_noref(skb, dst, true);
613}
7fee226a
ED
614
615/**
25985edc 616 * skb_dst_is_noref - Test if skb dst isn't refcounted
7fee226a
ED
617 * @skb: buffer
618 */
619static inline bool skb_dst_is_noref(const struct sk_buff *skb)
620{
621 return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
adf30907
ED
622}
623
511c3f92
ED
624static inline struct rtable *skb_rtable(const struct sk_buff *skb)
625{
adf30907 626 return (struct rtable *)skb_dst(skb);
511c3f92
ED
627}
628