net: fix comments for __skb_flow_get_ports()
[GitHub/MotorolaMobilityLLC/kernel-slsi.git] / net / core / flow_dissector.c
CommitLineData
0744dd00 1#include <linux/skbuff.h>
c452ed70 2#include <linux/export.h>
0744dd00
ED
3#include <linux/ip.h>
4#include <linux/ipv6.h>
5#include <linux/if_vlan.h>
6#include <net/ip.h>
ddbe5032 7#include <net/ipv6.h>
f77668dc
DB
8#include <linux/igmp.h>
9#include <linux/icmp.h>
10#include <linux/sctp.h>
11#include <linux/dccp.h>
0744dd00
ED
12#include <linux/if_tunnel.h>
13#include <linux/if_pppox.h>
14#include <linux/ppp_defs.h>
15#include <net/flow_keys.h>
16
4d77d2b5
ED
17/* copy saddr & daddr, possibly using 64bit load/store
18 * Equivalent to : flow->src = iph->saddr;
19 * flow->dst = iph->daddr;
20 */
21static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
22{
23 BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
24 offsetof(typeof(*flow), src) + sizeof(flow->src));
25 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
26}
0744dd00 27
357afe9c 28/**
6451b3f5
WC
29 * __skb_flow_get_ports - extract the upper layer ports and return them
30 * @skb: sk_buff to extract the ports from
357afe9c
NA
31 * @thoff: transport header offset
32 * @ip_proto: protocol for which to get port offset
6451b3f5
WC
33 * @data: raw buffer pointer to the packet, if NULL use skb->data
34 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
357afe9c
NA
35 *
36 * The function will try to retrieve the ports at offset thoff + poff where poff
37 * is the protocol port offset returned from proto_ports_offset
38 */
690e36e7
DM
39__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
40 void *data, int hlen)
357afe9c
NA
41{
42 int poff = proto_ports_offset(ip_proto);
43
690e36e7
DM
44 if (!data) {
45 data = skb->data;
46 hlen = skb_headlen(skb);
47 }
48
357afe9c
NA
49 if (poff >= 0) {
50 __be32 *ports, _ports;
51
690e36e7
DM
52 ports = __skb_header_pointer(skb, thoff + poff,
53 sizeof(_ports), data, hlen, &_ports);
357afe9c
NA
54 if (ports)
55 return *ports;
56 }
57
58 return 0;
59}
690e36e7 60EXPORT_SYMBOL(__skb_flow_get_ports);
357afe9c 61
690e36e7 62bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, int hlen)
0744dd00 63{
357afe9c 64 int nhoff = skb_network_offset(skb);
0744dd00
ED
65 u8 ip_proto;
66 __be16 proto = skb->protocol;
67
690e36e7
DM
68 if (!data) {
69 data = skb->data;
70 hlen = skb_headlen(skb);
71 }
72
0744dd00
ED
73 memset(flow, 0, sizeof(*flow));
74
75again:
76 switch (proto) {
2b8837ae 77 case htons(ETH_P_IP): {
0744dd00
ED
78 const struct iphdr *iph;
79 struct iphdr _iph;
80ip:
690e36e7 81 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
6f092343 82 if (!iph || iph->ihl < 5)
0744dd00 83 return false;
3797d3e8 84 nhoff += iph->ihl * 4;
0744dd00 85
3797d3e8 86 ip_proto = iph->protocol;
0744dd00
ED
87 if (ip_is_fragment(iph))
88 ip_proto = 0;
3797d3e8 89
4d77d2b5 90 iph_to_flow_copy_addrs(flow, iph);
0744dd00
ED
91 break;
92 }
2b8837ae 93 case htons(ETH_P_IPV6): {
0744dd00
ED
94 const struct ipv6hdr *iph;
95 struct ipv6hdr _iph;
19469a87
TH
96 __be32 flow_label;
97
0744dd00 98ipv6:
690e36e7 99 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
0744dd00
ED
100 if (!iph)
101 return false;
102
103 ip_proto = iph->nexthdr;
ddbe5032
ED
104 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
105 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
0744dd00 106 nhoff += sizeof(struct ipv6hdr);
19469a87
TH
107
108 flow_label = ip6_flowlabel(iph);
109 if (flow_label) {
110 /* Awesome, IPv6 packet has a flow label so we can
111 * use that to represent the ports without any
112 * further dissection.
113 */
114 flow->n_proto = proto;
115 flow->ip_proto = ip_proto;
116 flow->ports = flow_label;
117 flow->thoff = (u16)nhoff;
118
119 return true;
120 }
121
0744dd00
ED
122 break;
123 }
2b8837ae
JP
124 case htons(ETH_P_8021AD):
125 case htons(ETH_P_8021Q): {
0744dd00
ED
126 const struct vlan_hdr *vlan;
127 struct vlan_hdr _vlan;
128
690e36e7 129 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
0744dd00
ED
130 if (!vlan)
131 return false;
132
133 proto = vlan->h_vlan_encapsulated_proto;
134 nhoff += sizeof(*vlan);
135 goto again;
136 }
2b8837ae 137 case htons(ETH_P_PPP_SES): {
0744dd00
ED
138 struct {
139 struct pppoe_hdr hdr;
140 __be16 proto;
141 } *hdr, _hdr;
690e36e7 142 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
143 if (!hdr)
144 return false;
145 proto = hdr->proto;
146 nhoff += PPPOE_SES_HLEN;
147 switch (proto) {
2b8837ae 148 case htons(PPP_IP):
0744dd00 149 goto ip;
2b8837ae 150 case htons(PPP_IPV6):
0744dd00
ED
151 goto ipv6;
152 default:
153 return false;
154 }
155 }
156 default:
157 return false;
158 }
159
160 switch (ip_proto) {
161 case IPPROTO_GRE: {
162 struct gre_hdr {
163 __be16 flags;
164 __be16 proto;
165 } *hdr, _hdr;
166
690e36e7 167 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
168 if (!hdr)
169 return false;
170 /*
171 * Only look inside GRE if version zero and no
172 * routing
173 */
174 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
175 proto = hdr->proto;
176 nhoff += 4;
177 if (hdr->flags & GRE_CSUM)
178 nhoff += 4;
179 if (hdr->flags & GRE_KEY)
180 nhoff += 4;
181 if (hdr->flags & GRE_SEQ)
182 nhoff += 4;
e1733de2
MD
183 if (proto == htons(ETH_P_TEB)) {
184 const struct ethhdr *eth;
185 struct ethhdr _eth;
186
690e36e7
DM
187 eth = __skb_header_pointer(skb, nhoff,
188 sizeof(_eth),
189 data, hlen, &_eth);
e1733de2
MD
190 if (!eth)
191 return false;
192 proto = eth->h_proto;
193 nhoff += sizeof(*eth);
194 }
0744dd00
ED
195 goto again;
196 }
197 break;
198 }
199 case IPPROTO_IPIP:
fca41895
TH
200 proto = htons(ETH_P_IP);
201 goto ip;
b438f940
TH
202 case IPPROTO_IPV6:
203 proto = htons(ETH_P_IPV6);
204 goto ipv6;
0744dd00
ED
205 default:
206 break;
207 }
208
e0f31d84 209 flow->n_proto = proto;
0744dd00 210 flow->ip_proto = ip_proto;
690e36e7 211 flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
8ed78166
DB
212 flow->thoff = (u16) nhoff;
213
0744dd00
ED
214 return true;
215}
690e36e7 216EXPORT_SYMBOL(__skb_flow_dissect);
441d9d32
CW
217
218static u32 hashrnd __read_mostly;
66415cf8
HFS
219static __always_inline void __flow_hash_secret_init(void)
220{
221 net_get_random_once(&hashrnd, sizeof(hashrnd));
222}
223
224static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
225{
226 __flow_hash_secret_init();
227 return jhash_3words(a, b, c, hashrnd);
228}
229
5ed20a68
TH
230static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
231{
232 u32 hash;
233
234 /* get a consistent hash (same value on both flow directions) */
235 if (((__force u32)keys->dst < (__force u32)keys->src) ||
236 (((__force u32)keys->dst == (__force u32)keys->src) &&
237 ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
238 swap(keys->dst, keys->src);
239 swap(keys->port16[0], keys->port16[1]);
240 }
241
242 hash = __flow_hash_3words((__force u32)keys->dst,
243 (__force u32)keys->src,
244 (__force u32)keys->ports);
245 if (!hash)
246 hash = 1;
247
248 return hash;
249}
250
251u32 flow_hash_from_keys(struct flow_keys *keys)
252{
253 return __flow_hash_from_keys(keys);
254}
255EXPORT_SYMBOL(flow_hash_from_keys);
256
441d9d32 257/*
3958afa1 258 * __skb_get_hash: calculate a flow hash based on src/dst addresses
61b905da
TH
259 * and src/dst port numbers. Sets hash in skb to non-zero hash value
260 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
441d9d32
CW
261 * if hash is a canonical 4-tuple hash over transport ports.
262 */
3958afa1 263void __skb_get_hash(struct sk_buff *skb)
441d9d32
CW
264{
265 struct flow_keys keys;
441d9d32
CW
266
267 if (!skb_flow_dissect(skb, &keys))
268 return;
269
270 if (keys.ports)
61b905da 271 skb->l4_hash = 1;
441d9d32 272
a3b18ddb
TH
273 skb->sw_hash = 1;
274
5ed20a68 275 skb->hash = __flow_hash_from_keys(&keys);
441d9d32 276}
3958afa1 277EXPORT_SYMBOL(__skb_get_hash);
441d9d32
CW
278
279/*
280 * Returns a Tx hash based on the given packet descriptor a Tx queues' number
281 * to be used as a distribution range.
282 */
0e001614 283u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
441d9d32
CW
284 unsigned int num_tx_queues)
285{
286 u32 hash;
287 u16 qoffset = 0;
288 u16 qcount = num_tx_queues;
289
290 if (skb_rx_queue_recorded(skb)) {
291 hash = skb_get_rx_queue(skb);
292 while (unlikely(hash >= num_tx_queues))
293 hash -= num_tx_queues;
294 return hash;
295 }
296
297 if (dev->num_tc) {
298 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
299 qoffset = dev->tc_to_txq[tc].offset;
300 qcount = dev->tc_to_txq[tc].count;
301 }
302
8fc54f68 303 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
441d9d32
CW
304}
305EXPORT_SYMBOL(__skb_tx_hash);
306
f77668dc
DB
307/* __skb_get_poff() returns the offset to the payload as far as it could
308 * be dissected. The main user is currently BPF, so that we can dynamically
309 * truncate packets without needing to push actual payload to the user
310 * space and can analyze headers only, instead.
311 */
312u32 __skb_get_poff(const struct sk_buff *skb)
313{
314 struct flow_keys keys;
315 u32 poff = 0;
316
317 if (!skb_flow_dissect(skb, &keys))
318 return 0;
319
320 poff += keys.thoff;
321 switch (keys.ip_proto) {
322 case IPPROTO_TCP: {
323 const struct tcphdr *tcph;
324 struct tcphdr _tcph;
325
326 tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
327 if (!tcph)
328 return poff;
329
330 poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
331 break;
332 }
333 case IPPROTO_UDP:
334 case IPPROTO_UDPLITE:
335 poff += sizeof(struct udphdr);
336 break;
337 /* For the rest, we do not really care about header
338 * extensions at this point for now.
339 */
340 case IPPROTO_ICMP:
341 poff += sizeof(struct icmphdr);
342 break;
343 case IPPROTO_ICMPV6:
344 poff += sizeof(struct icmp6hdr);
345 break;
346 case IPPROTO_IGMP:
347 poff += sizeof(struct igmphdr);
348 break;
349 case IPPROTO_DCCP:
350 poff += sizeof(struct dccp_hdr);
351 break;
352 case IPPROTO_SCTP:
353 poff += sizeof(struct sctphdr);
354 break;
355 }
356
357 return poff;
358}
359
441d9d32
CW
360static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
361{
362#ifdef CONFIG_XPS
363 struct xps_dev_maps *dev_maps;
364 struct xps_map *map;
365 int queue_index = -1;
366
367 rcu_read_lock();
368 dev_maps = rcu_dereference(dev->xps_maps);
369 if (dev_maps) {
370 map = rcu_dereference(
371 dev_maps->cpu_map[raw_smp_processor_id()]);
372 if (map) {
373 if (map->len == 1)
374 queue_index = map->queues[0];
0e001614 375 else
8fc54f68
DB
376 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
377 map->len)];
441d9d32
CW
378 if (unlikely(queue_index >= dev->real_num_tx_queues))
379 queue_index = -1;
380 }
381 }
382 rcu_read_unlock();
383
384 return queue_index;
385#else
386 return -1;
387#endif
388}
389
99932d4f 390static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
441d9d32
CW
391{
392 struct sock *sk = skb->sk;
393 int queue_index = sk_tx_queue_get(sk);
394
395 if (queue_index < 0 || skb->ooo_okay ||
396 queue_index >= dev->real_num_tx_queues) {
397 int new_index = get_xps_queue(dev, skb);
398 if (new_index < 0)
399 new_index = skb_tx_hash(dev, skb);
400
702821f4
ED
401 if (queue_index != new_index && sk &&
402 rcu_access_pointer(sk->sk_dst_cache))
50d1784e 403 sk_tx_queue_set(sk, new_index);
441d9d32
CW
404
405 queue_index = new_index;
406 }
407
408 return queue_index;
409}
441d9d32
CW
410
411struct netdev_queue *netdev_pick_tx(struct net_device *dev,
f663dd9a
JW
412 struct sk_buff *skb,
413 void *accel_priv)
441d9d32
CW
414{
415 int queue_index = 0;
416
417 if (dev->real_num_tx_queues != 1) {
418 const struct net_device_ops *ops = dev->netdev_ops;
419 if (ops->ndo_select_queue)
99932d4f
DB
420 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
421 __netdev_pick_tx);
441d9d32
CW
422 else
423 queue_index = __netdev_pick_tx(dev, skb);
f663dd9a
JW
424
425 if (!accel_priv)
b9507bda 426 queue_index = netdev_cap_txqueue(dev, queue_index);
441d9d32
CW
427 }
428
429 skb_set_queue_mapping(skb, queue_index);
430 return netdev_get_tx_queue(dev, queue_index);
431}