Merge tag 'for-linus-v3.10-rc3' of git://oss.sgi.com/xfs/xfs
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56
57 /*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
72
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
97
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111 Alexey Kuznetsov.
112 */
113
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123
124 static __sum16 check_checksum(struct sk_buff *skb)
125 {
126 __sum16 csum = 0;
127
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
131
132 if (!csum)
133 break;
134 /* Fall through. */
135
136 case CHECKSUM_NONE:
137 skb->csum = 0;
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
140 break;
141 }
142
143 return csum;
144 }
145
146 static int ip_gre_calc_hlen(__be16 o_flags)
147 {
148 int addend = 4;
149
150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157 }
158
159 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
161 {
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
164 __be32 *options;
165
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 return -EINVAL;
168
169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 return -EINVAL;
172
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
175
176 if (!pskb_may_pull(skb, *hdr_len))
177 return -EINVAL;
178
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180
181 tpi->proto = greh->protocol;
182
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
186 *csum_err = true;
187 return -EINVAL;
188 }
189 options++;
190 }
191
192 if (greh->flags & GRE_KEY) {
193 tpi->key = *options;
194 options++;
195 } else
196 tpi->key = 0;
197
198 if (unlikely(greh->flags & GRE_SEQ)) {
199 tpi->seq = *options;
200 options++;
201 } else
202 tpi->seq = 0;
203
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 */
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
211 *hdr_len += 4;
212 if (!pskb_may_pull(skb, *hdr_len))
213 return -EINVAL;
214 }
215 }
216
217 return 0;
218 }
219
220 static void ipgre_err(struct sk_buff *skb, u32 info)
221 {
222
223 /* All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible.
226
227 Moreover, Cisco "wise men" put GRE key to the third word
228 in GRE header. It makes impossible maintaining even soft
229 state for keyed GRE tunnels with enabled checksum. Tell
230 them "thank you".
231
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
234 by themselves???
235 */
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
238 const struct iphdr *iph;
239 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code;
241 struct ip_tunnel *t;
242 struct tnl_ptk_info tpi;
243 int hdr_len;
244 bool csum_err = false;
245
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
248 return;
249 }
250
251 switch (type) {
252 default:
253 case ICMP_PARAMETERPROB:
254 return;
255
256 case ICMP_DEST_UNREACH:
257 switch (code) {
258 case ICMP_SR_FAILED:
259 case ICMP_PORT_UNREACH:
260 /* Impossible event. */
261 return;
262 default:
263 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH,
265 I believe they are just ether pollution. --ANK
266 */
267 break;
268 }
269 break;
270 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL)
272 return;
273 break;
274
275 case ICMP_REDIRECT:
276 break;
277 }
278
279 if (tpi.proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id);
281 else
282 itn = net_generic(net, ipgre_net_id);
283
284 iph = (const struct iphdr *)skb->data;
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
286 iph->daddr, iph->saddr, tpi.key);
287
288 if (t == NULL)
289 return;
290
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
294 return;
295 }
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
298 IPPROTO_GRE, 0);
299 return;
300 }
301 if (t->parms.iph.daddr == 0 ||
302 ipv4_is_multicast(t->parms.iph.daddr))
303 return;
304
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
306 return;
307
308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
309 t->err_count++;
310 else
311 t->err_count = 1;
312 t->err_time = jiffies;
313 }
314
315 static int ipgre_rcv(struct sk_buff *skb)
316 {
317 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn;
319 const struct iphdr *iph;
320 struct ip_tunnel *tunnel;
321 struct tnl_ptk_info tpi;
322 int hdr_len;
323 bool csum_err = false;
324
325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
326 goto drop;
327
328 if (tpi.proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id);
330 else
331 itn = net_generic(net, ipgre_net_id);
332
333 iph = ip_hdr(skb);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
335 iph->saddr, iph->daddr, tpi.key);
336
337 if (tunnel) {
338 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
339 return 0;
340 }
341 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
342 drop:
343 kfree_skb(skb);
344 return 0;
345 }
346
347 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
348 {
349 int err;
350
351 if (skb_is_gso(skb)) {
352 err = skb_unclone(skb, GFP_ATOMIC);
353 if (unlikely(err))
354 goto error;
355 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
356 return skb;
357 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
358 tunnel->parms.o_flags&TUNNEL_CSUM) {
359 err = skb_checksum_help(skb);
360 if (unlikely(err))
361 goto error;
362 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
363 skb->ip_summed = CHECKSUM_NONE;
364
365 return skb;
366
367 error:
368 kfree_skb(skb);
369 return ERR_PTR(err);
370 }
371
372 static struct sk_buff *gre_build_header(struct sk_buff *skb,
373 const struct tnl_ptk_info *tpi,
374 int hdr_len)
375 {
376 struct gre_base_hdr *greh;
377
378 skb_push(skb, hdr_len);
379
380 greh = (struct gre_base_hdr *)skb->data;
381 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
382 greh->protocol = tpi->proto;
383
384 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
385 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
386
387 if (tpi->flags&TUNNEL_SEQ) {
388 *ptr = tpi->seq;
389 ptr--;
390 }
391 if (tpi->flags&TUNNEL_KEY) {
392 *ptr = tpi->key;
393 ptr--;
394 }
395 if (tpi->flags&TUNNEL_CSUM &&
396 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
397 *(__sum16 *)ptr = 0;
398 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
399 skb->len, 0));
400 }
401 }
402
403 return skb;
404 }
405
406 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
407 const struct iphdr *tnl_params,
408 __be16 proto)
409 {
410 struct ip_tunnel *tunnel = netdev_priv(dev);
411 struct tnl_ptk_info tpi;
412
413 if (likely(!skb->encapsulation)) {
414 skb_reset_inner_headers(skb);
415 skb->encapsulation = 1;
416 }
417
418 tpi.flags = tunnel->parms.o_flags;
419 tpi.proto = proto;
420 tpi.key = tunnel->parms.o_key;
421 if (tunnel->parms.o_flags & TUNNEL_SEQ)
422 tunnel->o_seqno++;
423 tpi.seq = htonl(tunnel->o_seqno);
424
425 /* Push GRE header. */
426 skb = gre_build_header(skb, &tpi, tunnel->hlen);
427 if (unlikely(!skb)) {
428 dev->stats.tx_dropped++;
429 return;
430 }
431
432 ip_tunnel_xmit(skb, dev, tnl_params);
433 }
434
435 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
436 struct net_device *dev)
437 {
438 struct ip_tunnel *tunnel = netdev_priv(dev);
439 const struct iphdr *tnl_params;
440
441 skb = handle_offloads(tunnel, skb);
442 if (IS_ERR(skb))
443 goto out;
444
445 if (dev->header_ops) {
446 /* Need space for new headers */
447 if (skb_cow_head(skb, dev->needed_headroom -
448 (tunnel->hlen + sizeof(struct iphdr))))
449 goto free_skb;
450
451 tnl_params = (const struct iphdr *)skb->data;
452
453 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
454 * to gre header.
455 */
456 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
457 } else {
458 if (skb_cow_head(skb, dev->needed_headroom))
459 goto free_skb;
460
461 tnl_params = &tunnel->parms.iph;
462 }
463
464 __gre_xmit(skb, dev, tnl_params, skb->protocol);
465
466 return NETDEV_TX_OK;
467
468 free_skb:
469 dev_kfree_skb(skb);
470 out:
471 dev->stats.tx_dropped++;
472 return NETDEV_TX_OK;
473 }
474
475 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
476 struct net_device *dev)
477 {
478 struct ip_tunnel *tunnel = netdev_priv(dev);
479
480 skb = handle_offloads(tunnel, skb);
481 if (IS_ERR(skb))
482 goto out;
483
484 if (skb_cow_head(skb, dev->needed_headroom))
485 goto free_skb;
486
487 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
488
489 return NETDEV_TX_OK;
490
491 free_skb:
492 dev_kfree_skb(skb);
493 out:
494 dev->stats.tx_dropped++;
495 return NETDEV_TX_OK;
496 }
497
498 static int ipgre_tunnel_ioctl(struct net_device *dev,
499 struct ifreq *ifr, int cmd)
500 {
501 int err = 0;
502 struct ip_tunnel_parm p;
503
504 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
505 return -EFAULT;
506 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
507 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
508 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
509 return -EINVAL;
510 }
511 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
512 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
513
514 err = ip_tunnel_ioctl(dev, &p, cmd);
515 if (err)
516 return err;
517
518 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
519 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
520
521 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
522 return -EFAULT;
523 return 0;
524 }
525
526 /* Nice toy. Unfortunately, useless in real life :-)
527 It allows to construct virtual multiprotocol broadcast "LAN"
528 over the Internet, provided multicast routing is tuned.
529
530
531 I have no idea was this bicycle invented before me,
532 so that I had to set ARPHRD_IPGRE to a random value.
533 I have an impression, that Cisco could make something similar,
534 but this feature is apparently missing in IOS<=11.2(8).
535
536 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
537 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
538
539 ping -t 255 224.66.66.66
540
541 If nobody answers, mbone does not work.
542
543 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
544 ip addr add 10.66.66.<somewhat>/24 dev Universe
545 ifconfig Universe up
546 ifconfig Universe add fe80::<Your_real_addr>/10
547 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
548 ftp 10.66.66.66
549 ...
550 ftp fec0:6666:6666::193.233.7.65
551 ...
552 */
553 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
554 unsigned short type,
555 const void *daddr, const void *saddr, unsigned int len)
556 {
557 struct ip_tunnel *t = netdev_priv(dev);
558 struct iphdr *iph;
559 struct gre_base_hdr *greh;
560
561 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
562 greh = (struct gre_base_hdr *)(iph+1);
563 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
564 greh->protocol = htons(type);
565
566 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
567
568 /* Set the source hardware address. */
569 if (saddr)
570 memcpy(&iph->saddr, saddr, 4);
571 if (daddr)
572 memcpy(&iph->daddr, daddr, 4);
573 if (iph->daddr)
574 return t->hlen;
575
576 return -(t->hlen + sizeof(*iph));
577 }
578
579 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
580 {
581 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
582 memcpy(haddr, &iph->saddr, 4);
583 return 4;
584 }
585
586 static const struct header_ops ipgre_header_ops = {
587 .create = ipgre_header,
588 .parse = ipgre_header_parse,
589 };
590
591 #ifdef CONFIG_NET_IPGRE_BROADCAST
592 static int ipgre_open(struct net_device *dev)
593 {
594 struct ip_tunnel *t = netdev_priv(dev);
595
596 if (ipv4_is_multicast(t->parms.iph.daddr)) {
597 struct flowi4 fl4;
598 struct rtable *rt;
599
600 rt = ip_route_output_gre(dev_net(dev), &fl4,
601 t->parms.iph.daddr,
602 t->parms.iph.saddr,
603 t->parms.o_key,
604 RT_TOS(t->parms.iph.tos),
605 t->parms.link);
606 if (IS_ERR(rt))
607 return -EADDRNOTAVAIL;
608 dev = rt->dst.dev;
609 ip_rt_put(rt);
610 if (__in_dev_get_rtnl(dev) == NULL)
611 return -EADDRNOTAVAIL;
612 t->mlink = dev->ifindex;
613 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
614 }
615 return 0;
616 }
617
618 static int ipgre_close(struct net_device *dev)
619 {
620 struct ip_tunnel *t = netdev_priv(dev);
621
622 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
623 struct in_device *in_dev;
624 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
625 if (in_dev)
626 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
627 }
628 return 0;
629 }
630 #endif
631
632 static const struct net_device_ops ipgre_netdev_ops = {
633 .ndo_init = ipgre_tunnel_init,
634 .ndo_uninit = ip_tunnel_uninit,
635 #ifdef CONFIG_NET_IPGRE_BROADCAST
636 .ndo_open = ipgre_open,
637 .ndo_stop = ipgre_close,
638 #endif
639 .ndo_start_xmit = ipgre_xmit,
640 .ndo_do_ioctl = ipgre_tunnel_ioctl,
641 .ndo_change_mtu = ip_tunnel_change_mtu,
642 .ndo_get_stats64 = ip_tunnel_get_stats64,
643 };
644
645 #define GRE_FEATURES (NETIF_F_SG | \
646 NETIF_F_FRAGLIST | \
647 NETIF_F_HIGHDMA | \
648 NETIF_F_HW_CSUM)
649
650 static void ipgre_tunnel_setup(struct net_device *dev)
651 {
652 dev->netdev_ops = &ipgre_netdev_ops;
653 ip_tunnel_setup(dev, ipgre_net_id);
654 }
655
656 static void __gre_tunnel_init(struct net_device *dev)
657 {
658 struct ip_tunnel *tunnel;
659
660 tunnel = netdev_priv(dev);
661 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
662 tunnel->parms.iph.protocol = IPPROTO_GRE;
663
664 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
665 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
666
667 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
668 dev->hw_features |= GRE_FEATURES;
669
670 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
671 /* TCP offload with GRE SEQ is not supported. */
672 dev->features |= NETIF_F_GSO_SOFTWARE;
673 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
674 /* Can use a lockless transmit, unless we generate
675 * output sequences
676 */
677 dev->features |= NETIF_F_LLTX;
678 }
679 }
680
681 static int ipgre_tunnel_init(struct net_device *dev)
682 {
683 struct ip_tunnel *tunnel = netdev_priv(dev);
684 struct iphdr *iph = &tunnel->parms.iph;
685
686 __gre_tunnel_init(dev);
687
688 memcpy(dev->dev_addr, &iph->saddr, 4);
689 memcpy(dev->broadcast, &iph->daddr, 4);
690
691 dev->type = ARPHRD_IPGRE;
692 dev->flags = IFF_NOARP;
693 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
694 dev->addr_len = 4;
695
696 if (iph->daddr) {
697 #ifdef CONFIG_NET_IPGRE_BROADCAST
698 if (ipv4_is_multicast(iph->daddr)) {
699 if (!iph->saddr)
700 return -EINVAL;
701 dev->flags = IFF_BROADCAST;
702 dev->header_ops = &ipgre_header_ops;
703 }
704 #endif
705 } else
706 dev->header_ops = &ipgre_header_ops;
707
708 return ip_tunnel_init(dev);
709 }
710
711 static const struct gre_protocol ipgre_protocol = {
712 .handler = ipgre_rcv,
713 .err_handler = ipgre_err,
714 };
715
716 static int __net_init ipgre_init_net(struct net *net)
717 {
718 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
719 }
720
721 static void __net_exit ipgre_exit_net(struct net *net)
722 {
723 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
724 ip_tunnel_delete_net(itn);
725 }
726
727 static struct pernet_operations ipgre_net_ops = {
728 .init = ipgre_init_net,
729 .exit = ipgre_exit_net,
730 .id = &ipgre_net_id,
731 .size = sizeof(struct ip_tunnel_net),
732 };
733
734 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
735 {
736 __be16 flags;
737
738 if (!data)
739 return 0;
740
741 flags = 0;
742 if (data[IFLA_GRE_IFLAGS])
743 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
744 if (data[IFLA_GRE_OFLAGS])
745 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
746 if (flags & (GRE_VERSION|GRE_ROUTING))
747 return -EINVAL;
748
749 return 0;
750 }
751
752 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
753 {
754 __be32 daddr;
755
756 if (tb[IFLA_ADDRESS]) {
757 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
758 return -EINVAL;
759 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
760 return -EADDRNOTAVAIL;
761 }
762
763 if (!data)
764 goto out;
765
766 if (data[IFLA_GRE_REMOTE]) {
767 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
768 if (!daddr)
769 return -EINVAL;
770 }
771
772 out:
773 return ipgre_tunnel_validate(tb, data);
774 }
775
776 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
777 struct ip_tunnel_parm *parms)
778 {
779 memset(parms, 0, sizeof(*parms));
780
781 parms->iph.protocol = IPPROTO_GRE;
782
783 if (!data)
784 return;
785
786 if (data[IFLA_GRE_LINK])
787 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
788
789 if (data[IFLA_GRE_IFLAGS])
790 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
791
792 if (data[IFLA_GRE_OFLAGS])
793 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
794
795 if (data[IFLA_GRE_IKEY])
796 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
797
798 if (data[IFLA_GRE_OKEY])
799 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
800
801 if (data[IFLA_GRE_LOCAL])
802 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
803
804 if (data[IFLA_GRE_REMOTE])
805 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
806
807 if (data[IFLA_GRE_TTL])
808 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
809
810 if (data[IFLA_GRE_TOS])
811 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
812
813 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
814 parms->iph.frag_off = htons(IP_DF);
815 }
816
817 static int gre_tap_init(struct net_device *dev)
818 {
819 __gre_tunnel_init(dev);
820
821 return ip_tunnel_init(dev);
822 }
823
824 static const struct net_device_ops gre_tap_netdev_ops = {
825 .ndo_init = gre_tap_init,
826 .ndo_uninit = ip_tunnel_uninit,
827 .ndo_start_xmit = gre_tap_xmit,
828 .ndo_set_mac_address = eth_mac_addr,
829 .ndo_validate_addr = eth_validate_addr,
830 .ndo_change_mtu = ip_tunnel_change_mtu,
831 .ndo_get_stats64 = ip_tunnel_get_stats64,
832 };
833
834 static void ipgre_tap_setup(struct net_device *dev)
835 {
836 ether_setup(dev);
837 dev->netdev_ops = &gre_tap_netdev_ops;
838 ip_tunnel_setup(dev, gre_tap_net_id);
839 }
840
841 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
842 struct nlattr *tb[], struct nlattr *data[])
843 {
844 struct ip_tunnel_parm p;
845
846 ipgre_netlink_parms(data, tb, &p);
847 return ip_tunnel_newlink(dev, tb, &p);
848 }
849
850 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
851 struct nlattr *data[])
852 {
853 struct ip_tunnel_parm p;
854
855 ipgre_netlink_parms(data, tb, &p);
856 return ip_tunnel_changelink(dev, tb, &p);
857 }
858
859 static size_t ipgre_get_size(const struct net_device *dev)
860 {
861 return
862 /* IFLA_GRE_LINK */
863 nla_total_size(4) +
864 /* IFLA_GRE_IFLAGS */
865 nla_total_size(2) +
866 /* IFLA_GRE_OFLAGS */
867 nla_total_size(2) +
868 /* IFLA_GRE_IKEY */
869 nla_total_size(4) +
870 /* IFLA_GRE_OKEY */
871 nla_total_size(4) +
872 /* IFLA_GRE_LOCAL */
873 nla_total_size(4) +
874 /* IFLA_GRE_REMOTE */
875 nla_total_size(4) +
876 /* IFLA_GRE_TTL */
877 nla_total_size(1) +
878 /* IFLA_GRE_TOS */
879 nla_total_size(1) +
880 /* IFLA_GRE_PMTUDISC */
881 nla_total_size(1) +
882 0;
883 }
884
885 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
886 {
887 struct ip_tunnel *t = netdev_priv(dev);
888 struct ip_tunnel_parm *p = &t->parms;
889
890 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
891 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
892 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
893 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
894 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
895 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
896 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
897 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
898 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
899 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
900 !!(p->iph.frag_off & htons(IP_DF))))
901 goto nla_put_failure;
902 return 0;
903
904 nla_put_failure:
905 return -EMSGSIZE;
906 }
907
908 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
909 [IFLA_GRE_LINK] = { .type = NLA_U32 },
910 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
911 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
912 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
913 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
914 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
915 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
916 [IFLA_GRE_TTL] = { .type = NLA_U8 },
917 [IFLA_GRE_TOS] = { .type = NLA_U8 },
918 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
919 };
920
921 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
922 .kind = "gre",
923 .maxtype = IFLA_GRE_MAX,
924 .policy = ipgre_policy,
925 .priv_size = sizeof(struct ip_tunnel),
926 .setup = ipgre_tunnel_setup,
927 .validate = ipgre_tunnel_validate,
928 .newlink = ipgre_newlink,
929 .changelink = ipgre_changelink,
930 .dellink = ip_tunnel_dellink,
931 .get_size = ipgre_get_size,
932 .fill_info = ipgre_fill_info,
933 };
934
935 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
936 .kind = "gretap",
937 .maxtype = IFLA_GRE_MAX,
938 .policy = ipgre_policy,
939 .priv_size = sizeof(struct ip_tunnel),
940 .setup = ipgre_tap_setup,
941 .validate = ipgre_tap_validate,
942 .newlink = ipgre_newlink,
943 .changelink = ipgre_changelink,
944 .dellink = ip_tunnel_dellink,
945 .get_size = ipgre_get_size,
946 .fill_info = ipgre_fill_info,
947 };
948
949 static int __net_init ipgre_tap_init_net(struct net *net)
950 {
951 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
952 }
953
954 static void __net_exit ipgre_tap_exit_net(struct net *net)
955 {
956 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
957 ip_tunnel_delete_net(itn);
958 }
959
960 static struct pernet_operations ipgre_tap_net_ops = {
961 .init = ipgre_tap_init_net,
962 .exit = ipgre_tap_exit_net,
963 .id = &gre_tap_net_id,
964 .size = sizeof(struct ip_tunnel_net),
965 };
966
967 static int __init ipgre_init(void)
968 {
969 int err;
970
971 pr_info("GRE over IPv4 tunneling driver\n");
972
973 err = register_pernet_device(&ipgre_net_ops);
974 if (err < 0)
975 return err;
976
977 err = register_pernet_device(&ipgre_tap_net_ops);
978 if (err < 0)
979 goto pnet_tap_faied;
980
981 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
982 if (err < 0) {
983 pr_info("%s: can't add protocol\n", __func__);
984 goto add_proto_failed;
985 }
986
987 err = rtnl_link_register(&ipgre_link_ops);
988 if (err < 0)
989 goto rtnl_link_failed;
990
991 err = rtnl_link_register(&ipgre_tap_ops);
992 if (err < 0)
993 goto tap_ops_failed;
994
995 return 0;
996
997 tap_ops_failed:
998 rtnl_link_unregister(&ipgre_link_ops);
999 rtnl_link_failed:
1000 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1001 add_proto_failed:
1002 unregister_pernet_device(&ipgre_tap_net_ops);
1003 pnet_tap_faied:
1004 unregister_pernet_device(&ipgre_net_ops);
1005 return err;
1006 }
1007
1008 static void __exit ipgre_fini(void)
1009 {
1010 rtnl_link_unregister(&ipgre_tap_ops);
1011 rtnl_link_unregister(&ipgre_link_ops);
1012 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1013 pr_info("%s: can't remove protocol\n", __func__);
1014 unregister_pernet_device(&ipgre_tap_net_ops);
1015 unregister_pernet_device(&ipgre_net_ops);
1016 }
1017
1018 module_init(ipgre_init);
1019 module_exit(ipgre_fini);
1020 MODULE_LICENSE("GPL");
1021 MODULE_ALIAS_RTNL_LINK("gre");
1022 MODULE_ALIAS_RTNL_LINK("gretap");
1023 MODULE_ALIAS_NETDEV("gre0");
1024 MODULE_ALIAS_NETDEV("gretap0");