Merge 4.14.48 into android-4.14
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52
53 /*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
68
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
77
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
86
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
89
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
93
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
106
107 Alexey Kuznetsov.
108 */
109
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117 __be32 id, u32 index, bool truncate);
118
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
122
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124 const struct tnl_ptk_info *tpi)
125 {
126
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
130
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
134 them "thank you".
135
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
138 by themselves???
139 */
140 struct net *net = dev_net(skb->dev);
141 struct ip_tunnel_net *itn;
142 const struct iphdr *iph;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145 unsigned int data_len = 0;
146 struct ip_tunnel *t;
147
148 switch (type) {
149 default:
150 case ICMP_PARAMETERPROB:
151 return;
152
153 case ICMP_DEST_UNREACH:
154 switch (code) {
155 case ICMP_SR_FAILED:
156 case ICMP_PORT_UNREACH:
157 /* Impossible event. */
158 return;
159 default:
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
163 */
164 break;
165 }
166 break;
167
168 case ICMP_TIME_EXCEEDED:
169 if (code != ICMP_EXC_TTL)
170 return;
171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172 break;
173
174 case ICMP_REDIRECT:
175 break;
176 }
177
178 if (tpi->proto == htons(ETH_P_TEB))
179 itn = net_generic(net, gre_tap_net_id);
180 else
181 itn = net_generic(net, ipgre_net_id);
182
183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 iph->daddr, iph->saddr, tpi->key);
186
187 if (!t)
188 return;
189
190 #if IS_ENABLED(CONFIG_IPV6)
191 if (tpi->proto == htons(ETH_P_IPV6) &&
192 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
193 type, data_len))
194 return;
195 #endif
196
197 if (t->parms.iph.daddr == 0 ||
198 ipv4_is_multicast(t->parms.iph.daddr))
199 return;
200
201 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
202 return;
203
204 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
205 t->err_count++;
206 else
207 t->err_count = 1;
208 t->err_time = jiffies;
209 }
210
211 static void gre_err(struct sk_buff *skb, u32 info)
212 {
213 /* All the routers (except for Linux) return only
214 * 8 bytes of packet payload. It means, that precise relaying of
215 * ICMP in the real Internet is absolutely infeasible.
216 *
217 * Moreover, Cisco "wise men" put GRE key to the third word
218 * in GRE header. It makes impossible maintaining even soft
219 * state for keyed
220 * GRE tunnels with enabled checksum. Tell them "thank you".
221 *
222 * Well, I wonder, rfc1812 was written by Cisco employee,
223 * what the hell these idiots break standards established
224 * by themselves???
225 */
226
227 const struct iphdr *iph = (struct iphdr *)skb->data;
228 const int type = icmp_hdr(skb)->type;
229 const int code = icmp_hdr(skb)->code;
230 struct tnl_ptk_info tpi;
231 bool csum_err = false;
232
233 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
234 iph->ihl * 4) < 0) {
235 if (!csum_err) /* ignore csum errors. */
236 return;
237 }
238
239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
242 return;
243 }
244 if (type == ICMP_REDIRECT) {
245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
246 IPPROTO_GRE, 0);
247 return;
248 }
249
250 ipgre_err(skb, info, &tpi);
251 }
252
253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
254 int gre_hdr_len)
255 {
256 struct net *net = dev_net(skb->dev);
257 struct metadata_dst *tun_dst = NULL;
258 struct ip_tunnel_net *itn;
259 struct ip_tunnel *tunnel;
260 struct erspanhdr *ershdr;
261 const struct iphdr *iph;
262 __be32 index;
263 int len;
264
265 itn = net_generic(net, erspan_net_id);
266 len = gre_hdr_len + sizeof(*ershdr);
267
268 if (unlikely(!pskb_may_pull(skb, len)))
269 return -ENOMEM;
270
271 iph = ip_hdr(skb);
272 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
273
274 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key.
276 */
277 tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
278 index = ershdr->md.index;
279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 tpi->flags | TUNNEL_KEY,
281 iph->saddr, iph->daddr, tpi->key);
282
283 if (tunnel) {
284 if (__iptunnel_pull_header(skb,
285 gre_hdr_len + sizeof(*ershdr),
286 htons(ETH_P_TEB),
287 false, false) < 0)
288 goto drop;
289
290 if (tunnel->collect_md) {
291 struct ip_tunnel_info *info;
292 struct erspan_metadata *md;
293 __be64 tun_id;
294 __be16 flags;
295
296 tpi->flags |= TUNNEL_KEY;
297 flags = tpi->flags;
298 tun_id = key32_to_tunnel_id(tpi->key);
299
300 tun_dst = ip_tun_rx_dst(skb, flags,
301 tun_id, sizeof(*md));
302 if (!tun_dst)
303 return PACKET_REJECT;
304
305 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
306 if (!md) {
307 dst_release((struct dst_entry *)tun_dst);
308 return PACKET_REJECT;
309 }
310
311 md->index = index;
312 info = &tun_dst->u.tun_info;
313 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
314 info->options_len = sizeof(*md);
315 } else {
316 tunnel->index = ntohl(index);
317 }
318
319 skb_reset_mac_header(skb);
320 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
321 return PACKET_RCVD;
322 }
323 drop:
324 kfree_skb(skb);
325 return PACKET_RCVD;
326 }
327
328 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
329 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
330 {
331 struct metadata_dst *tun_dst = NULL;
332 const struct iphdr *iph;
333 struct ip_tunnel *tunnel;
334
335 iph = ip_hdr(skb);
336 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
337 iph->saddr, iph->daddr, tpi->key);
338
339 if (tunnel) {
340 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
341 raw_proto, false) < 0)
342 goto drop;
343
344 if (tunnel->dev->type != ARPHRD_NONE)
345 skb_pop_mac_header(skb);
346 else
347 skb_reset_mac_header(skb);
348 if (tunnel->collect_md) {
349 __be16 flags;
350 __be64 tun_id;
351
352 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
353 tun_id = key32_to_tunnel_id(tpi->key);
354 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
355 if (!tun_dst)
356 return PACKET_REJECT;
357 }
358
359 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
360 return PACKET_RCVD;
361 }
362 return PACKET_NEXT;
363
364 drop:
365 kfree_skb(skb);
366 return PACKET_RCVD;
367 }
368
369 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
370 int hdr_len)
371 {
372 struct net *net = dev_net(skb->dev);
373 struct ip_tunnel_net *itn;
374 int res;
375
376 if (tpi->proto == htons(ETH_P_TEB))
377 itn = net_generic(net, gre_tap_net_id);
378 else
379 itn = net_generic(net, ipgre_net_id);
380
381 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
382 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
383 /* ipgre tunnels in collect metadata mode should receive
384 * also ETH_P_TEB traffic.
385 */
386 itn = net_generic(net, ipgre_net_id);
387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
388 }
389 return res;
390 }
391
392 static int gre_rcv(struct sk_buff *skb)
393 {
394 struct tnl_ptk_info tpi;
395 bool csum_err = false;
396 int hdr_len;
397
398 #ifdef CONFIG_NET_IPGRE_BROADCAST
399 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
400 /* Looped back packet, drop it! */
401 if (rt_is_output_route(skb_rtable(skb)))
402 goto drop;
403 }
404 #endif
405
406 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
407 if (hdr_len < 0)
408 goto drop;
409
410 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
411 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
412 return 0;
413 goto out;
414 }
415
416 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
417 return 0;
418
419 out:
420 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
421 drop:
422 kfree_skb(skb);
423 return 0;
424 }
425
426 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
427 const struct iphdr *tnl_params,
428 __be16 proto)
429 {
430 struct ip_tunnel *tunnel = netdev_priv(dev);
431
432 if (tunnel->parms.o_flags & TUNNEL_SEQ)
433 tunnel->o_seqno++;
434
435 /* Push GRE header. */
436 gre_build_header(skb, tunnel->tun_hlen,
437 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
438 htonl(tunnel->o_seqno));
439
440 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
441 }
442
443 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
444 {
445 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
446 }
447
448 static struct rtable *gre_get_rt(struct sk_buff *skb,
449 struct net_device *dev,
450 struct flowi4 *fl,
451 const struct ip_tunnel_key *key)
452 {
453 struct net *net = dev_net(dev);
454
455 memset(fl, 0, sizeof(*fl));
456 fl->daddr = key->u.ipv4.dst;
457 fl->saddr = key->u.ipv4.src;
458 fl->flowi4_tos = RT_TOS(key->tos);
459 fl->flowi4_mark = skb->mark;
460 fl->flowi4_proto = IPPROTO_GRE;
461
462 return ip_route_output_key(net, fl);
463 }
464
465 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
466 struct net_device *dev,
467 struct flowi4 *fl,
468 int tunnel_hlen)
469 {
470 struct ip_tunnel_info *tun_info;
471 const struct ip_tunnel_key *key;
472 struct rtable *rt = NULL;
473 int min_headroom;
474 bool use_cache;
475 int err;
476
477 tun_info = skb_tunnel_info(skb);
478 key = &tun_info->key;
479 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
480
481 if (use_cache)
482 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
483 if (!rt) {
484 rt = gre_get_rt(skb, dev, fl, key);
485 if (IS_ERR(rt))
486 goto err_free_skb;
487 if (use_cache)
488 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
489 fl->saddr);
490 }
491
492 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
493 + tunnel_hlen + sizeof(struct iphdr);
494 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
495 int head_delta = SKB_DATA_ALIGN(min_headroom -
496 skb_headroom(skb) +
497 16);
498 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
499 0, GFP_ATOMIC);
500 if (unlikely(err))
501 goto err_free_rt;
502 }
503 return rt;
504
505 err_free_rt:
506 ip_rt_put(rt);
507 err_free_skb:
508 kfree_skb(skb);
509 dev->stats.tx_dropped++;
510 return NULL;
511 }
512
513 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
514 __be16 proto)
515 {
516 struct ip_tunnel_info *tun_info;
517 const struct ip_tunnel_key *key;
518 struct rtable *rt = NULL;
519 struct flowi4 fl;
520 int tunnel_hlen;
521 __be16 df, flags;
522
523 tun_info = skb_tunnel_info(skb);
524 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
525 ip_tunnel_info_af(tun_info) != AF_INET))
526 goto err_free_skb;
527
528 key = &tun_info->key;
529 tunnel_hlen = gre_calc_hlen(key->tun_flags);
530
531 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
532 if (!rt)
533 return;
534
535 /* Push Tunnel header. */
536 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
537 goto err_free_rt;
538
539 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
540 gre_build_header(skb, tunnel_hlen, flags, proto,
541 tunnel_id_to_key32(tun_info->key.tun_id), 0);
542
543 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
544
545 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
546 key->tos, key->ttl, df, false);
547 return;
548
549 err_free_rt:
550 ip_rt_put(rt);
551 err_free_skb:
552 kfree_skb(skb);
553 dev->stats.tx_dropped++;
554 }
555
556 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
557 __be16 proto)
558 {
559 struct ip_tunnel *tunnel = netdev_priv(dev);
560 struct ip_tunnel_info *tun_info;
561 const struct ip_tunnel_key *key;
562 struct erspan_metadata *md;
563 struct rtable *rt = NULL;
564 bool truncate = false;
565 struct flowi4 fl;
566 int tunnel_hlen;
567 __be16 df;
568
569 tun_info = skb_tunnel_info(skb);
570 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
571 ip_tunnel_info_af(tun_info) != AF_INET))
572 goto err_free_skb;
573
574 key = &tun_info->key;
575
576 /* ERSPAN has fixed 8 byte GRE header */
577 tunnel_hlen = 8 + sizeof(struct erspanhdr);
578
579 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
580 if (!rt)
581 return;
582
583 if (gre_handle_offloads(skb, false))
584 goto err_free_rt;
585
586 if (skb->len > dev->mtu + dev->hard_header_len) {
587 pskb_trim(skb, dev->mtu + dev->hard_header_len);
588 truncate = true;
589 }
590
591 md = ip_tunnel_info_opts(tun_info);
592 if (!md)
593 goto err_free_rt;
594
595 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
596 ntohl(md->index), truncate);
597
598 gre_build_header(skb, 8, TUNNEL_SEQ,
599 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
600
601 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
602
603 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
604 key->tos, key->ttl, df, false);
605 return;
606
607 err_free_rt:
608 ip_rt_put(rt);
609 err_free_skb:
610 kfree_skb(skb);
611 dev->stats.tx_dropped++;
612 }
613
614 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
615 {
616 struct ip_tunnel_info *info = skb_tunnel_info(skb);
617 struct rtable *rt;
618 struct flowi4 fl4;
619
620 if (ip_tunnel_info_af(info) != AF_INET)
621 return -EINVAL;
622
623 rt = gre_get_rt(skb, dev, &fl4, &info->key);
624 if (IS_ERR(rt))
625 return PTR_ERR(rt);
626
627 ip_rt_put(rt);
628 info->key.u.ipv4.src = fl4.saddr;
629 return 0;
630 }
631
632 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
633 struct net_device *dev)
634 {
635 struct ip_tunnel *tunnel = netdev_priv(dev);
636 const struct iphdr *tnl_params;
637
638 if (tunnel->collect_md) {
639 gre_fb_xmit(skb, dev, skb->protocol);
640 return NETDEV_TX_OK;
641 }
642
643 if (dev->header_ops) {
644 /* Need space for new headers */
645 if (skb_cow_head(skb, dev->needed_headroom -
646 (tunnel->hlen + sizeof(struct iphdr))))
647 goto free_skb;
648
649 tnl_params = (const struct iphdr *)skb->data;
650
651 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
652 * to gre header.
653 */
654 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
655 skb_reset_mac_header(skb);
656 } else {
657 if (skb_cow_head(skb, dev->needed_headroom))
658 goto free_skb;
659
660 tnl_params = &tunnel->parms.iph;
661 }
662
663 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
664 goto free_skb;
665
666 __gre_xmit(skb, dev, tnl_params, skb->protocol);
667 return NETDEV_TX_OK;
668
669 free_skb:
670 kfree_skb(skb);
671 dev->stats.tx_dropped++;
672 return NETDEV_TX_OK;
673 }
674
675 static inline u8 tos_to_cos(u8 tos)
676 {
677 u8 dscp, cos;
678
679 dscp = tos >> 2;
680 cos = dscp >> 3;
681 return cos;
682 }
683
684 static void erspan_build_header(struct sk_buff *skb,
685 __be32 id, u32 index, bool truncate)
686 {
687 struct iphdr *iphdr = ip_hdr(skb);
688 struct ethhdr *eth = eth_hdr(skb);
689 enum erspan_encap_type enc_type;
690 struct erspanhdr *ershdr;
691 struct qtag_prefix {
692 __be16 eth_type;
693 __be16 tci;
694 } *qp;
695 u16 vlan_tci = 0;
696
697 enc_type = ERSPAN_ENCAP_NOVLAN;
698
699 /* If mirrored packet has vlan tag, extract tci and
700 * perserve vlan header in the mirrored frame.
701 */
702 if (eth->h_proto == htons(ETH_P_8021Q)) {
703 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
704 vlan_tci = ntohs(qp->tci);
705 enc_type = ERSPAN_ENCAP_INFRAME;
706 }
707
708 skb_push(skb, sizeof(*ershdr));
709 ershdr = (struct erspanhdr *)skb->data;
710 memset(ershdr, 0, sizeof(*ershdr));
711
712 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
713 (ERSPAN_VERSION << VER_OFFSET));
714 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
715 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
716 (enc_type << EN_OFFSET & EN_MASK) |
717 ((truncate << T_OFFSET) & T_MASK));
718 ershdr->md.index = htonl(index & INDEX_MASK);
719 }
720
721 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
722 struct net_device *dev)
723 {
724 struct ip_tunnel *tunnel = netdev_priv(dev);
725 bool truncate = false;
726
727 if (tunnel->collect_md) {
728 erspan_fb_xmit(skb, dev, skb->protocol);
729 return NETDEV_TX_OK;
730 }
731
732 if (gre_handle_offloads(skb, false))
733 goto free_skb;
734
735 if (skb_cow_head(skb, dev->needed_headroom))
736 goto free_skb;
737
738 if (skb->len > dev->mtu + dev->hard_header_len) {
739 pskb_trim(skb, dev->mtu + dev->hard_header_len);
740 truncate = true;
741 }
742
743 /* Push ERSPAN header */
744 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
745 tunnel->parms.o_flags &= ~TUNNEL_KEY;
746 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
747 return NETDEV_TX_OK;
748
749 free_skb:
750 kfree_skb(skb);
751 dev->stats.tx_dropped++;
752 return NETDEV_TX_OK;
753 }
754
755 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
756 struct net_device *dev)
757 {
758 struct ip_tunnel *tunnel = netdev_priv(dev);
759
760 if (tunnel->collect_md) {
761 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
762 return NETDEV_TX_OK;
763 }
764
765 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
766 goto free_skb;
767
768 if (skb_cow_head(skb, dev->needed_headroom))
769 goto free_skb;
770
771 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
772 return NETDEV_TX_OK;
773
774 free_skb:
775 kfree_skb(skb);
776 dev->stats.tx_dropped++;
777 return NETDEV_TX_OK;
778 }
779
780 static int ipgre_tunnel_ioctl(struct net_device *dev,
781 struct ifreq *ifr, int cmd)
782 {
783 int err;
784 struct ip_tunnel_parm p;
785
786 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
787 return -EFAULT;
788 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
789 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
790 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
791 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
792 return -EINVAL;
793 }
794 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
795 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
796
797 err = ip_tunnel_ioctl(dev, &p, cmd);
798 if (err)
799 return err;
800
801 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
802 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
803
804 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
805 return -EFAULT;
806 return 0;
807 }
808
809 /* Nice toy. Unfortunately, useless in real life :-)
810 It allows to construct virtual multiprotocol broadcast "LAN"
811 over the Internet, provided multicast routing is tuned.
812
813
814 I have no idea was this bicycle invented before me,
815 so that I had to set ARPHRD_IPGRE to a random value.
816 I have an impression, that Cisco could make something similar,
817 but this feature is apparently missing in IOS<=11.2(8).
818
819 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
820 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
821
822 ping -t 255 224.66.66.66
823
824 If nobody answers, mbone does not work.
825
826 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
827 ip addr add 10.66.66.<somewhat>/24 dev Universe
828 ifconfig Universe up
829 ifconfig Universe add fe80::<Your_real_addr>/10
830 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
831 ftp 10.66.66.66
832 ...
833 ftp fec0:6666:6666::193.233.7.65
834 ...
835 */
836 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
837 unsigned short type,
838 const void *daddr, const void *saddr, unsigned int len)
839 {
840 struct ip_tunnel *t = netdev_priv(dev);
841 struct iphdr *iph;
842 struct gre_base_hdr *greh;
843
844 iph = skb_push(skb, t->hlen + sizeof(*iph));
845 greh = (struct gre_base_hdr *)(iph+1);
846 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
847 greh->protocol = htons(type);
848
849 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
850
851 /* Set the source hardware address. */
852 if (saddr)
853 memcpy(&iph->saddr, saddr, 4);
854 if (daddr)
855 memcpy(&iph->daddr, daddr, 4);
856 if (iph->daddr)
857 return t->hlen + sizeof(*iph);
858
859 return -(t->hlen + sizeof(*iph));
860 }
861
862 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
863 {
864 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
865 memcpy(haddr, &iph->saddr, 4);
866 return 4;
867 }
868
869 static const struct header_ops ipgre_header_ops = {
870 .create = ipgre_header,
871 .parse = ipgre_header_parse,
872 };
873
874 #ifdef CONFIG_NET_IPGRE_BROADCAST
875 static int ipgre_open(struct net_device *dev)
876 {
877 struct ip_tunnel *t = netdev_priv(dev);
878
879 if (ipv4_is_multicast(t->parms.iph.daddr)) {
880 struct flowi4 fl4;
881 struct rtable *rt;
882
883 rt = ip_route_output_gre(t->net, &fl4,
884 t->parms.iph.daddr,
885 t->parms.iph.saddr,
886 t->parms.o_key,
887 RT_TOS(t->parms.iph.tos),
888 t->parms.link);
889 if (IS_ERR(rt))
890 return -EADDRNOTAVAIL;
891 dev = rt->dst.dev;
892 ip_rt_put(rt);
893 if (!__in_dev_get_rtnl(dev))
894 return -EADDRNOTAVAIL;
895 t->mlink = dev->ifindex;
896 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
897 }
898 return 0;
899 }
900
901 static int ipgre_close(struct net_device *dev)
902 {
903 struct ip_tunnel *t = netdev_priv(dev);
904
905 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
906 struct in_device *in_dev;
907 in_dev = inetdev_by_index(t->net, t->mlink);
908 if (in_dev)
909 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
910 }
911 return 0;
912 }
913 #endif
914
915 static const struct net_device_ops ipgre_netdev_ops = {
916 .ndo_init = ipgre_tunnel_init,
917 .ndo_uninit = ip_tunnel_uninit,
918 #ifdef CONFIG_NET_IPGRE_BROADCAST
919 .ndo_open = ipgre_open,
920 .ndo_stop = ipgre_close,
921 #endif
922 .ndo_start_xmit = ipgre_xmit,
923 .ndo_do_ioctl = ipgre_tunnel_ioctl,
924 .ndo_change_mtu = ip_tunnel_change_mtu,
925 .ndo_get_stats64 = ip_tunnel_get_stats64,
926 .ndo_get_iflink = ip_tunnel_get_iflink,
927 };
928
929 #define GRE_FEATURES (NETIF_F_SG | \
930 NETIF_F_FRAGLIST | \
931 NETIF_F_HIGHDMA | \
932 NETIF_F_HW_CSUM)
933
934 static void ipgre_tunnel_setup(struct net_device *dev)
935 {
936 dev->netdev_ops = &ipgre_netdev_ops;
937 dev->type = ARPHRD_IPGRE;
938 ip_tunnel_setup(dev, ipgre_net_id);
939 }
940
941 static void __gre_tunnel_init(struct net_device *dev)
942 {
943 struct ip_tunnel *tunnel;
944 int t_hlen;
945
946 tunnel = netdev_priv(dev);
947 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
948 tunnel->parms.iph.protocol = IPPROTO_GRE;
949
950 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
951
952 t_hlen = tunnel->hlen + sizeof(struct iphdr);
953
954 dev->features |= GRE_FEATURES;
955 dev->hw_features |= GRE_FEATURES;
956
957 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
958 /* TCP offload with GRE SEQ is not supported, nor
959 * can we support 2 levels of outer headers requiring
960 * an update.
961 */
962 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
963 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
964 dev->features |= NETIF_F_GSO_SOFTWARE;
965 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
966 }
967
968 /* Can use a lockless transmit, unless we generate
969 * output sequences
970 */
971 dev->features |= NETIF_F_LLTX;
972 }
973 }
974
975 static int ipgre_tunnel_init(struct net_device *dev)
976 {
977 struct ip_tunnel *tunnel = netdev_priv(dev);
978 struct iphdr *iph = &tunnel->parms.iph;
979
980 __gre_tunnel_init(dev);
981
982 memcpy(dev->dev_addr, &iph->saddr, 4);
983 memcpy(dev->broadcast, &iph->daddr, 4);
984
985 dev->flags = IFF_NOARP;
986 netif_keep_dst(dev);
987 dev->addr_len = 4;
988
989 if (iph->daddr && !tunnel->collect_md) {
990 #ifdef CONFIG_NET_IPGRE_BROADCAST
991 if (ipv4_is_multicast(iph->daddr)) {
992 if (!iph->saddr)
993 return -EINVAL;
994 dev->flags = IFF_BROADCAST;
995 dev->header_ops = &ipgre_header_ops;
996 }
997 #endif
998 } else if (!tunnel->collect_md) {
999 dev->header_ops = &ipgre_header_ops;
1000 }
1001
1002 return ip_tunnel_init(dev);
1003 }
1004
1005 static const struct gre_protocol ipgre_protocol = {
1006 .handler = gre_rcv,
1007 .err_handler = gre_err,
1008 };
1009
1010 static int __net_init ipgre_init_net(struct net *net)
1011 {
1012 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1013 }
1014
1015 static void __net_exit ipgre_exit_net(struct net *net)
1016 {
1017 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1018 ip_tunnel_delete_net(itn, &ipgre_link_ops);
1019 }
1020
1021 static struct pernet_operations ipgre_net_ops = {
1022 .init = ipgre_init_net,
1023 .exit = ipgre_exit_net,
1024 .id = &ipgre_net_id,
1025 .size = sizeof(struct ip_tunnel_net),
1026 };
1027
1028 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1029 struct netlink_ext_ack *extack)
1030 {
1031 __be16 flags;
1032
1033 if (!data)
1034 return 0;
1035
1036 flags = 0;
1037 if (data[IFLA_GRE_IFLAGS])
1038 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1039 if (data[IFLA_GRE_OFLAGS])
1040 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1041 if (flags & (GRE_VERSION|GRE_ROUTING))
1042 return -EINVAL;
1043
1044 if (data[IFLA_GRE_COLLECT_METADATA] &&
1045 data[IFLA_GRE_ENCAP_TYPE] &&
1046 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1047 return -EINVAL;
1048
1049 return 0;
1050 }
1051
1052 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1053 struct netlink_ext_ack *extack)
1054 {
1055 __be32 daddr;
1056
1057 if (tb[IFLA_ADDRESS]) {
1058 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1059 return -EINVAL;
1060 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1061 return -EADDRNOTAVAIL;
1062 }
1063
1064 if (!data)
1065 goto out;
1066
1067 if (data[IFLA_GRE_REMOTE]) {
1068 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1069 if (!daddr)
1070 return -EINVAL;
1071 }
1072
1073 out:
1074 return ipgre_tunnel_validate(tb, data, extack);
1075 }
1076
1077 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1078 struct netlink_ext_ack *extack)
1079 {
1080 __be16 flags = 0;
1081 int ret;
1082
1083 if (!data)
1084 return 0;
1085
1086 ret = ipgre_tap_validate(tb, data, extack);
1087 if (ret)
1088 return ret;
1089
1090 /* ERSPAN should only have GRE sequence and key flag */
1091 if (data[IFLA_GRE_OFLAGS])
1092 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1093 if (data[IFLA_GRE_IFLAGS])
1094 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1095 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1096 flags != (GRE_SEQ | GRE_KEY))
1097 return -EINVAL;
1098
1099 /* ERSPAN Session ID only has 10-bit. Since we reuse
1100 * 32-bit key field as ID, check it's range.
1101 */
1102 if (data[IFLA_GRE_IKEY] &&
1103 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1104 return -EINVAL;
1105
1106 if (data[IFLA_GRE_OKEY] &&
1107 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1108 return -EINVAL;
1109
1110 return 0;
1111 }
1112
1113 static int ipgre_netlink_parms(struct net_device *dev,
1114 struct nlattr *data[],
1115 struct nlattr *tb[],
1116 struct ip_tunnel_parm *parms,
1117 __u32 *fwmark)
1118 {
1119 struct ip_tunnel *t = netdev_priv(dev);
1120
1121 memset(parms, 0, sizeof(*parms));
1122
1123 parms->iph.protocol = IPPROTO_GRE;
1124
1125 if (!data)
1126 return 0;
1127
1128 if (data[IFLA_GRE_LINK])
1129 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1130
1131 if (data[IFLA_GRE_IFLAGS])
1132 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1133
1134 if (data[IFLA_GRE_OFLAGS])
1135 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1136
1137 if (data[IFLA_GRE_IKEY])
1138 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1139
1140 if (data[IFLA_GRE_OKEY])
1141 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1142
1143 if (data[IFLA_GRE_LOCAL])
1144 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1145
1146 if (data[IFLA_GRE_REMOTE])
1147 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1148
1149 if (data[IFLA_GRE_TTL])
1150 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1151
1152 if (data[IFLA_GRE_TOS])
1153 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1154
1155 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1156 if (t->ignore_df)
1157 return -EINVAL;
1158 parms->iph.frag_off = htons(IP_DF);
1159 }
1160
1161 if (data[IFLA_GRE_COLLECT_METADATA]) {
1162 t->collect_md = true;
1163 if (dev->type == ARPHRD_IPGRE)
1164 dev->type = ARPHRD_NONE;
1165 }
1166
1167 if (data[IFLA_GRE_IGNORE_DF]) {
1168 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1169 && (parms->iph.frag_off & htons(IP_DF)))
1170 return -EINVAL;
1171 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1172 }
1173
1174 if (data[IFLA_GRE_FWMARK])
1175 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1176
1177 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1178 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1179
1180 if (t->index & ~INDEX_MASK)
1181 return -EINVAL;
1182 }
1183
1184 return 0;
1185 }
1186
1187 /* This function returns true when ENCAP attributes are present in the nl msg */
1188 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1189 struct ip_tunnel_encap *ipencap)
1190 {
1191 bool ret = false;
1192
1193 memset(ipencap, 0, sizeof(*ipencap));
1194
1195 if (!data)
1196 return ret;
1197
1198 if (data[IFLA_GRE_ENCAP_TYPE]) {
1199 ret = true;
1200 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1201 }
1202
1203 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1204 ret = true;
1205 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1206 }
1207
1208 if (data[IFLA_GRE_ENCAP_SPORT]) {
1209 ret = true;
1210 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1211 }
1212
1213 if (data[IFLA_GRE_ENCAP_DPORT]) {
1214 ret = true;
1215 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1216 }
1217
1218 return ret;
1219 }
1220
1221 static int gre_tap_init(struct net_device *dev)
1222 {
1223 __gre_tunnel_init(dev);
1224 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1225 netif_keep_dst(dev);
1226
1227 return ip_tunnel_init(dev);
1228 }
1229
1230 static const struct net_device_ops gre_tap_netdev_ops = {
1231 .ndo_init = gre_tap_init,
1232 .ndo_uninit = ip_tunnel_uninit,
1233 .ndo_start_xmit = gre_tap_xmit,
1234 .ndo_set_mac_address = eth_mac_addr,
1235 .ndo_validate_addr = eth_validate_addr,
1236 .ndo_change_mtu = ip_tunnel_change_mtu,
1237 .ndo_get_stats64 = ip_tunnel_get_stats64,
1238 .ndo_get_iflink = ip_tunnel_get_iflink,
1239 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1240 };
1241
1242 static int erspan_tunnel_init(struct net_device *dev)
1243 {
1244 struct ip_tunnel *tunnel = netdev_priv(dev);
1245 int t_hlen;
1246
1247 tunnel->tun_hlen = 8;
1248 tunnel->parms.iph.protocol = IPPROTO_GRE;
1249 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1250 sizeof(struct erspanhdr);
1251 t_hlen = tunnel->hlen + sizeof(struct iphdr);
1252
1253 dev->features |= GRE_FEATURES;
1254 dev->hw_features |= GRE_FEATURES;
1255 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1256 netif_keep_dst(dev);
1257
1258 return ip_tunnel_init(dev);
1259 }
1260
1261 static const struct net_device_ops erspan_netdev_ops = {
1262 .ndo_init = erspan_tunnel_init,
1263 .ndo_uninit = ip_tunnel_uninit,
1264 .ndo_start_xmit = erspan_xmit,
1265 .ndo_set_mac_address = eth_mac_addr,
1266 .ndo_validate_addr = eth_validate_addr,
1267 .ndo_change_mtu = ip_tunnel_change_mtu,
1268 .ndo_get_stats64 = ip_tunnel_get_stats64,
1269 .ndo_get_iflink = ip_tunnel_get_iflink,
1270 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1271 };
1272
1273 static void ipgre_tap_setup(struct net_device *dev)
1274 {
1275 ether_setup(dev);
1276 dev->max_mtu = 0;
1277 dev->netdev_ops = &gre_tap_netdev_ops;
1278 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1279 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1280 ip_tunnel_setup(dev, gre_tap_net_id);
1281 }
1282
1283 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1284 struct nlattr *tb[], struct nlattr *data[],
1285 struct netlink_ext_ack *extack)
1286 {
1287 struct ip_tunnel_parm p;
1288 struct ip_tunnel_encap ipencap;
1289 __u32 fwmark = 0;
1290 int err;
1291
1292 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1293 struct ip_tunnel *t = netdev_priv(dev);
1294 err = ip_tunnel_encap_setup(t, &ipencap);
1295
1296 if (err < 0)
1297 return err;
1298 }
1299
1300 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1301 if (err < 0)
1302 return err;
1303 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1304 }
1305
1306 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1307 struct nlattr *data[],
1308 struct netlink_ext_ack *extack)
1309 {
1310 struct ip_tunnel *t = netdev_priv(dev);
1311 struct ip_tunnel_parm p;
1312 struct ip_tunnel_encap ipencap;
1313 __u32 fwmark = t->fwmark;
1314 int err;
1315
1316 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1317 err = ip_tunnel_encap_setup(t, &ipencap);
1318
1319 if (err < 0)
1320 return err;
1321 }
1322
1323 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1324 if (err < 0)
1325 return err;
1326 return ip_tunnel_changelink(dev, tb, &p, fwmark);
1327 }
1328
1329 static size_t ipgre_get_size(const struct net_device *dev)
1330 {
1331 return
1332 /* IFLA_GRE_LINK */
1333 nla_total_size(4) +
1334 /* IFLA_GRE_IFLAGS */
1335 nla_total_size(2) +
1336 /* IFLA_GRE_OFLAGS */
1337 nla_total_size(2) +
1338 /* IFLA_GRE_IKEY */
1339 nla_total_size(4) +
1340 /* IFLA_GRE_OKEY */
1341 nla_total_size(4) +
1342 /* IFLA_GRE_LOCAL */
1343 nla_total_size(4) +
1344 /* IFLA_GRE_REMOTE */
1345 nla_total_size(4) +
1346 /* IFLA_GRE_TTL */
1347 nla_total_size(1) +
1348 /* IFLA_GRE_TOS */
1349 nla_total_size(1) +
1350 /* IFLA_GRE_PMTUDISC */
1351 nla_total_size(1) +
1352 /* IFLA_GRE_ENCAP_TYPE */
1353 nla_total_size(2) +
1354 /* IFLA_GRE_ENCAP_FLAGS */
1355 nla_total_size(2) +
1356 /* IFLA_GRE_ENCAP_SPORT */
1357 nla_total_size(2) +
1358 /* IFLA_GRE_ENCAP_DPORT */
1359 nla_total_size(2) +
1360 /* IFLA_GRE_COLLECT_METADATA */
1361 nla_total_size(0) +
1362 /* IFLA_GRE_IGNORE_DF */
1363 nla_total_size(1) +
1364 /* IFLA_GRE_FWMARK */
1365 nla_total_size(4) +
1366 /* IFLA_GRE_ERSPAN_INDEX */
1367 nla_total_size(4) +
1368 0;
1369 }
1370
1371 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1372 {
1373 struct ip_tunnel *t = netdev_priv(dev);
1374 struct ip_tunnel_parm *p = &t->parms;
1375
1376 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1377 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1378 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1379 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1380 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1381 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1382 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1383 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1384 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1385 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1386 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1387 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1388 !!(p->iph.frag_off & htons(IP_DF))) ||
1389 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1390 goto nla_put_failure;
1391
1392 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1393 t->encap.type) ||
1394 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1395 t->encap.sport) ||
1396 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1397 t->encap.dport) ||
1398 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1399 t->encap.flags))
1400 goto nla_put_failure;
1401
1402 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1403 goto nla_put_failure;
1404
1405 if (t->collect_md) {
1406 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1407 goto nla_put_failure;
1408 }
1409
1410 if (t->index)
1411 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1412 goto nla_put_failure;
1413
1414 return 0;
1415
1416 nla_put_failure:
1417 return -EMSGSIZE;
1418 }
1419
1420 static void erspan_setup(struct net_device *dev)
1421 {
1422 ether_setup(dev);
1423 dev->netdev_ops = &erspan_netdev_ops;
1424 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1425 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1426 ip_tunnel_setup(dev, erspan_net_id);
1427 }
1428
1429 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1430 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1431 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1432 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1433 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1434 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1435 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1436 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1437 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1438 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1439 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1440 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1441 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1442 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1443 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1444 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1445 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1446 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1447 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1448 };
1449
1450 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1451 .kind = "gre",
1452 .maxtype = IFLA_GRE_MAX,
1453 .policy = ipgre_policy,
1454 .priv_size = sizeof(struct ip_tunnel),
1455 .setup = ipgre_tunnel_setup,
1456 .validate = ipgre_tunnel_validate,
1457 .newlink = ipgre_newlink,
1458 .changelink = ipgre_changelink,
1459 .dellink = ip_tunnel_dellink,
1460 .get_size = ipgre_get_size,
1461 .fill_info = ipgre_fill_info,
1462 .get_link_net = ip_tunnel_get_link_net,
1463 };
1464
1465 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1466 .kind = "gretap",
1467 .maxtype = IFLA_GRE_MAX,
1468 .policy = ipgre_policy,
1469 .priv_size = sizeof(struct ip_tunnel),
1470 .setup = ipgre_tap_setup,
1471 .validate = ipgre_tap_validate,
1472 .newlink = ipgre_newlink,
1473 .changelink = ipgre_changelink,
1474 .dellink = ip_tunnel_dellink,
1475 .get_size = ipgre_get_size,
1476 .fill_info = ipgre_fill_info,
1477 .get_link_net = ip_tunnel_get_link_net,
1478 };
1479
1480 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1481 .kind = "erspan",
1482 .maxtype = IFLA_GRE_MAX,
1483 .policy = ipgre_policy,
1484 .priv_size = sizeof(struct ip_tunnel),
1485 .setup = erspan_setup,
1486 .validate = erspan_validate,
1487 .newlink = ipgre_newlink,
1488 .changelink = ipgre_changelink,
1489 .dellink = ip_tunnel_dellink,
1490 .get_size = ipgre_get_size,
1491 .fill_info = ipgre_fill_info,
1492 .get_link_net = ip_tunnel_get_link_net,
1493 };
1494
1495 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1496 u8 name_assign_type)
1497 {
1498 struct nlattr *tb[IFLA_MAX + 1];
1499 struct net_device *dev;
1500 LIST_HEAD(list_kill);
1501 struct ip_tunnel *t;
1502 int err;
1503
1504 memset(&tb, 0, sizeof(tb));
1505
1506 dev = rtnl_create_link(net, name, name_assign_type,
1507 &ipgre_tap_ops, tb);
1508 if (IS_ERR(dev))
1509 return dev;
1510
1511 /* Configure flow based GRE device. */
1512 t = netdev_priv(dev);
1513 t->collect_md = true;
1514
1515 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1516 if (err < 0) {
1517 free_netdev(dev);
1518 return ERR_PTR(err);
1519 }
1520
1521 /* openvswitch users expect packet sizes to be unrestricted,
1522 * so set the largest MTU we can.
1523 */
1524 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1525 if (err)
1526 goto out;
1527
1528 err = rtnl_configure_link(dev, NULL);
1529 if (err < 0)
1530 goto out;
1531
1532 return dev;
1533 out:
1534 ip_tunnel_dellink(dev, &list_kill);
1535 unregister_netdevice_many(&list_kill);
1536 return ERR_PTR(err);
1537 }
1538 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1539
1540 static int __net_init ipgre_tap_init_net(struct net *net)
1541 {
1542 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1543 }
1544
1545 static void __net_exit ipgre_tap_exit_net(struct net *net)
1546 {
1547 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1548 ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1549 }
1550
1551 static struct pernet_operations ipgre_tap_net_ops = {
1552 .init = ipgre_tap_init_net,
1553 .exit = ipgre_tap_exit_net,
1554 .id = &gre_tap_net_id,
1555 .size = sizeof(struct ip_tunnel_net),
1556 };
1557
1558 static int __net_init erspan_init_net(struct net *net)
1559 {
1560 return ip_tunnel_init_net(net, erspan_net_id,
1561 &erspan_link_ops, "erspan0");
1562 }
1563
1564 static void __net_exit erspan_exit_net(struct net *net)
1565 {
1566 struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
1567
1568 ip_tunnel_delete_net(itn, &erspan_link_ops);
1569 }
1570
1571 static struct pernet_operations erspan_net_ops = {
1572 .init = erspan_init_net,
1573 .exit = erspan_exit_net,
1574 .id = &erspan_net_id,
1575 .size = sizeof(struct ip_tunnel_net),
1576 };
1577
1578 static int __init ipgre_init(void)
1579 {
1580 int err;
1581
1582 pr_info("GRE over IPv4 tunneling driver\n");
1583
1584 err = register_pernet_device(&ipgre_net_ops);
1585 if (err < 0)
1586 return err;
1587
1588 err = register_pernet_device(&ipgre_tap_net_ops);
1589 if (err < 0)
1590 goto pnet_tap_failed;
1591
1592 err = register_pernet_device(&erspan_net_ops);
1593 if (err < 0)
1594 goto pnet_erspan_failed;
1595
1596 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1597 if (err < 0) {
1598 pr_info("%s: can't add protocol\n", __func__);
1599 goto add_proto_failed;
1600 }
1601
1602 err = rtnl_link_register(&ipgre_link_ops);
1603 if (err < 0)
1604 goto rtnl_link_failed;
1605
1606 err = rtnl_link_register(&ipgre_tap_ops);
1607 if (err < 0)
1608 goto tap_ops_failed;
1609
1610 err = rtnl_link_register(&erspan_link_ops);
1611 if (err < 0)
1612 goto erspan_link_failed;
1613
1614 return 0;
1615
1616 erspan_link_failed:
1617 rtnl_link_unregister(&ipgre_tap_ops);
1618 tap_ops_failed:
1619 rtnl_link_unregister(&ipgre_link_ops);
1620 rtnl_link_failed:
1621 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1622 add_proto_failed:
1623 unregister_pernet_device(&erspan_net_ops);
1624 pnet_erspan_failed:
1625 unregister_pernet_device(&ipgre_tap_net_ops);
1626 pnet_tap_failed:
1627 unregister_pernet_device(&ipgre_net_ops);
1628 return err;
1629 }
1630
1631 static void __exit ipgre_fini(void)
1632 {
1633 rtnl_link_unregister(&ipgre_tap_ops);
1634 rtnl_link_unregister(&ipgre_link_ops);
1635 rtnl_link_unregister(&erspan_link_ops);
1636 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1637 unregister_pernet_device(&ipgre_tap_net_ops);
1638 unregister_pernet_device(&ipgre_net_ops);
1639 unregister_pernet_device(&erspan_net_ops);
1640 }
1641
1642 module_init(ipgre_init);
1643 module_exit(ipgre_fini);
1644 MODULE_LICENSE("GPL");
1645 MODULE_ALIAS_RTNL_LINK("gre");
1646 MODULE_ALIAS_RTNL_LINK("gretap");
1647 MODULE_ALIAS_RTNL_LINK("erspan");
1648 MODULE_ALIAS_NETDEV("gre0");
1649 MODULE_ALIAS_NETDEV("gretap0");
1650 MODULE_ALIAS_NETDEV("erspan0");