fae5a8459538b720715249ad9900eed40aa916db
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ip_gre.c
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56
57 /*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
72
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
97
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111 Alexey Kuznetsov.
112 */
113
114 static bool log_ecn_error = true;
115 module_param(log_ecn_error, bool, 0644);
116 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
118 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119 static int ipgre_tunnel_init(struct net_device *dev);
120
121 static int ipgre_net_id __read_mostly;
122 static int gre_tap_net_id __read_mostly;
123
124 static __sum16 check_checksum(struct sk_buff *skb)
125 {
126 __sum16 csum = 0;
127
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
131
132 if (!csum)
133 break;
134 /* Fall through. */
135
136 case CHECKSUM_NONE:
137 skb->csum = 0;
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
140 break;
141 }
142
143 return csum;
144 }
145
146 static int ip_gre_calc_hlen(__be16 o_flags)
147 {
148 int addend = 4;
149
150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157 }
158
159 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
161 {
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
164 __be32 *options;
165
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 return -EINVAL;
168
169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 return -EINVAL;
172
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
175
176 if (!pskb_may_pull(skb, *hdr_len))
177 return -EINVAL;
178
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180
181 tpi->proto = greh->protocol;
182
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
186 *csum_err = true;
187 return -EINVAL;
188 }
189 options++;
190 }
191
192 if (greh->flags & GRE_KEY) {
193 tpi->key = *options;
194 options++;
195 } else
196 tpi->key = 0;
197
198 if (unlikely(greh->flags & GRE_SEQ)) {
199 tpi->seq = *options;
200 options++;
201 } else
202 tpi->seq = 0;
203
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 */
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
211 *hdr_len += 4;
212 if (!pskb_may_pull(skb, *hdr_len))
213 return -EINVAL;
214 }
215 }
216
217 return 0;
218 }
219
220 static void ipgre_err(struct sk_buff *skb, u32 info)
221 {
222
223 /* All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible.
226
227 Moreover, Cisco "wise men" put GRE key to the third word
228 in GRE header. It makes impossible maintaining even soft
229 state for keyed GRE tunnels with enabled checksum. Tell
230 them "thank you".
231
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
234 by themselves???
235 */
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
238 const struct iphdr *iph;
239 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code;
241 struct ip_tunnel *t;
242 struct tnl_ptk_info tpi;
243 int hdr_len;
244 bool csum_err = false;
245
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
248 return;
249 }
250
251 switch (type) {
252 default:
253 case ICMP_PARAMETERPROB:
254 return;
255
256 case ICMP_DEST_UNREACH:
257 switch (code) {
258 case ICMP_SR_FAILED:
259 case ICMP_PORT_UNREACH:
260 /* Impossible event. */
261 return;
262 default:
263 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH,
265 I believe they are just ether pollution. --ANK
266 */
267 break;
268 }
269 break;
270 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL)
272 return;
273 break;
274
275 case ICMP_REDIRECT:
276 break;
277 }
278
279 if (tpi.proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id);
281 else
282 itn = net_generic(net, ipgre_net_id);
283
284 iph = (const struct iphdr *)skb->data;
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
286 iph->daddr, iph->saddr, tpi.key);
287
288 if (t == NULL)
289 return;
290
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
294 return;
295 }
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
298 IPPROTO_GRE, 0);
299 return;
300 }
301 if (t->parms.iph.daddr == 0 ||
302 ipv4_is_multicast(t->parms.iph.daddr))
303 return;
304
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
306 return;
307
308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
309 t->err_count++;
310 else
311 t->err_count = 1;
312 t->err_time = jiffies;
313 }
314
315 static int ipgre_rcv(struct sk_buff *skb)
316 {
317 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn;
319 const struct iphdr *iph;
320 struct ip_tunnel *tunnel;
321 struct tnl_ptk_info tpi;
322 int hdr_len;
323 bool csum_err = false;
324
325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
326 goto drop;
327
328 if (tpi.proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id);
330 else
331 itn = net_generic(net, ipgre_net_id);
332
333 iph = ip_hdr(skb);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
335 iph->saddr, iph->daddr, tpi.key);
336
337 if (tunnel) {
338 skb_pop_mac_header(skb);
339 ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error);
340 return 0;
341 }
342 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
343 drop:
344 kfree_skb(skb);
345 return 0;
346 }
347
348 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
349 {
350 int err;
351
352 if (skb_is_gso(skb)) {
353 err = skb_unclone(skb, GFP_ATOMIC);
354 if (unlikely(err))
355 goto error;
356 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
357 return skb;
358 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
359 tunnel->parms.o_flags&TUNNEL_CSUM) {
360 err = skb_checksum_help(skb);
361 if (unlikely(err))
362 goto error;
363 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
364 skb->ip_summed = CHECKSUM_NONE;
365
366 return skb;
367
368 error:
369 kfree_skb(skb);
370 return ERR_PTR(err);
371 }
372
373 static struct sk_buff *gre_build_header(struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi,
375 int hdr_len)
376 {
377 struct gre_base_hdr *greh;
378
379 skb_push(skb, hdr_len);
380
381 greh = (struct gre_base_hdr *)skb->data;
382 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
383 greh->protocol = tpi->proto;
384
385 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
386 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
387
388 if (tpi->flags&TUNNEL_SEQ) {
389 *ptr = tpi->seq;
390 ptr--;
391 }
392 if (tpi->flags&TUNNEL_KEY) {
393 *ptr = tpi->key;
394 ptr--;
395 }
396 if (tpi->flags&TUNNEL_CSUM &&
397 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
398 *(__sum16 *)ptr = 0;
399 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
400 skb->len, 0));
401 }
402 }
403
404 return skb;
405 }
406
407 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
408 const struct iphdr *tnl_params,
409 __be16 proto)
410 {
411 struct ip_tunnel *tunnel = netdev_priv(dev);
412 struct tnl_ptk_info tpi;
413
414 if (likely(!skb->encapsulation)) {
415 skb_reset_inner_headers(skb);
416 skb->encapsulation = 1;
417 }
418
419 tpi.flags = tunnel->parms.o_flags;
420 tpi.proto = proto;
421 tpi.key = tunnel->parms.o_key;
422 if (tunnel->parms.o_flags & TUNNEL_SEQ)
423 tunnel->o_seqno++;
424 tpi.seq = htonl(tunnel->o_seqno);
425
426 /* Push GRE header. */
427 skb = gre_build_header(skb, &tpi, tunnel->hlen);
428 if (unlikely(!skb)) {
429 dev->stats.tx_dropped++;
430 return;
431 }
432
433 ip_tunnel_xmit(skb, dev, tnl_params);
434 }
435
436 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
437 struct net_device *dev)
438 {
439 struct ip_tunnel *tunnel = netdev_priv(dev);
440 const struct iphdr *tnl_params;
441
442 skb = handle_offloads(tunnel, skb);
443 if (IS_ERR(skb))
444 goto out;
445
446 if (dev->header_ops) {
447 /* Need space for new headers */
448 if (skb_cow_head(skb, dev->needed_headroom -
449 (tunnel->hlen + sizeof(struct iphdr))))
450 goto free_skb;
451
452 tnl_params = (const struct iphdr *)skb->data;
453
454 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
455 * to gre header.
456 */
457 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
458 } else {
459 if (skb_cow_head(skb, dev->needed_headroom))
460 goto free_skb;
461
462 tnl_params = &tunnel->parms.iph;
463 }
464
465 __gre_xmit(skb, dev, tnl_params, skb->protocol);
466
467 return NETDEV_TX_OK;
468
469 free_skb:
470 dev_kfree_skb(skb);
471 out:
472 dev->stats.tx_dropped++;
473 return NETDEV_TX_OK;
474 }
475
476 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
477 struct net_device *dev)
478 {
479 struct ip_tunnel *tunnel = netdev_priv(dev);
480
481 skb = handle_offloads(tunnel, skb);
482 if (IS_ERR(skb))
483 goto out;
484
485 if (skb_cow_head(skb, dev->needed_headroom))
486 goto free_skb;
487
488 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
489
490 return NETDEV_TX_OK;
491
492 free_skb:
493 dev_kfree_skb(skb);
494 out:
495 dev->stats.tx_dropped++;
496 return NETDEV_TX_OK;
497 }
498
499 static int ipgre_tunnel_ioctl(struct net_device *dev,
500 struct ifreq *ifr, int cmd)
501 {
502 int err = 0;
503 struct ip_tunnel_parm p;
504
505 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
506 return -EFAULT;
507 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
508 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
509 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
510 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
511 return -EINVAL;
512 }
513 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
514 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
515
516 err = ip_tunnel_ioctl(dev, &p, cmd);
517 if (err)
518 return err;
519
520 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
521 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
522
523 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
524 return -EFAULT;
525 return 0;
526 }
527
528 /* Nice toy. Unfortunately, useless in real life :-)
529 It allows to construct virtual multiprotocol broadcast "LAN"
530 over the Internet, provided multicast routing is tuned.
531
532
533 I have no idea was this bicycle invented before me,
534 so that I had to set ARPHRD_IPGRE to a random value.
535 I have an impression, that Cisco could make something similar,
536 but this feature is apparently missing in IOS<=11.2(8).
537
538 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
539 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
540
541 ping -t 255 224.66.66.66
542
543 If nobody answers, mbone does not work.
544
545 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
546 ip addr add 10.66.66.<somewhat>/24 dev Universe
547 ifconfig Universe up
548 ifconfig Universe add fe80::<Your_real_addr>/10
549 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
550 ftp 10.66.66.66
551 ...
552 ftp fec0:6666:6666::193.233.7.65
553 ...
554 */
555 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
556 unsigned short type,
557 const void *daddr, const void *saddr, unsigned int len)
558 {
559 struct ip_tunnel *t = netdev_priv(dev);
560 struct iphdr *iph;
561 struct gre_base_hdr *greh;
562
563 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
564 greh = (struct gre_base_hdr *)(iph+1);
565 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
566 greh->protocol = htons(type);
567
568 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
569
570 /* Set the source hardware address. */
571 if (saddr)
572 memcpy(&iph->saddr, saddr, 4);
573 if (daddr)
574 memcpy(&iph->daddr, daddr, 4);
575 if (iph->daddr)
576 return t->hlen + sizeof(*iph);
577
578 return -(t->hlen + sizeof(*iph));
579 }
580
581 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
582 {
583 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
584 memcpy(haddr, &iph->saddr, 4);
585 return 4;
586 }
587
588 static const struct header_ops ipgre_header_ops = {
589 .create = ipgre_header,
590 .parse = ipgre_header_parse,
591 };
592
593 #ifdef CONFIG_NET_IPGRE_BROADCAST
594 static int ipgre_open(struct net_device *dev)
595 {
596 struct ip_tunnel *t = netdev_priv(dev);
597
598 if (ipv4_is_multicast(t->parms.iph.daddr)) {
599 struct flowi4 fl4;
600 struct rtable *rt;
601
602 rt = ip_route_output_gre(dev_net(dev), &fl4,
603 t->parms.iph.daddr,
604 t->parms.iph.saddr,
605 t->parms.o_key,
606 RT_TOS(t->parms.iph.tos),
607 t->parms.link);
608 if (IS_ERR(rt))
609 return -EADDRNOTAVAIL;
610 dev = rt->dst.dev;
611 ip_rt_put(rt);
612 if (__in_dev_get_rtnl(dev) == NULL)
613 return -EADDRNOTAVAIL;
614 t->mlink = dev->ifindex;
615 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
616 }
617 return 0;
618 }
619
620 static int ipgre_close(struct net_device *dev)
621 {
622 struct ip_tunnel *t = netdev_priv(dev);
623
624 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
625 struct in_device *in_dev;
626 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
627 if (in_dev)
628 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
629 }
630 return 0;
631 }
632 #endif
633
634 static const struct net_device_ops ipgre_netdev_ops = {
635 .ndo_init = ipgre_tunnel_init,
636 .ndo_uninit = ip_tunnel_uninit,
637 #ifdef CONFIG_NET_IPGRE_BROADCAST
638 .ndo_open = ipgre_open,
639 .ndo_stop = ipgre_close,
640 #endif
641 .ndo_start_xmit = ipgre_xmit,
642 .ndo_do_ioctl = ipgre_tunnel_ioctl,
643 .ndo_change_mtu = ip_tunnel_change_mtu,
644 .ndo_get_stats64 = ip_tunnel_get_stats64,
645 };
646
647 #define GRE_FEATURES (NETIF_F_SG | \
648 NETIF_F_FRAGLIST | \
649 NETIF_F_HIGHDMA | \
650 NETIF_F_HW_CSUM)
651
652 static void ipgre_tunnel_setup(struct net_device *dev)
653 {
654 dev->netdev_ops = &ipgre_netdev_ops;
655 dev->type = ARPHRD_IPGRE;
656 ip_tunnel_setup(dev, ipgre_net_id);
657 }
658
659 static void __gre_tunnel_init(struct net_device *dev)
660 {
661 struct ip_tunnel *tunnel;
662
663 tunnel = netdev_priv(dev);
664 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
665 tunnel->parms.iph.protocol = IPPROTO_GRE;
666
667 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
668 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
669
670 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
671 dev->hw_features |= GRE_FEATURES;
672
673 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
674 /* TCP offload with GRE SEQ is not supported. */
675 dev->features |= NETIF_F_GSO_SOFTWARE;
676 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
677 /* Can use a lockless transmit, unless we generate
678 * output sequences
679 */
680 dev->features |= NETIF_F_LLTX;
681 }
682 }
683
684 static int ipgre_tunnel_init(struct net_device *dev)
685 {
686 struct ip_tunnel *tunnel = netdev_priv(dev);
687 struct iphdr *iph = &tunnel->parms.iph;
688
689 __gre_tunnel_init(dev);
690
691 memcpy(dev->dev_addr, &iph->saddr, 4);
692 memcpy(dev->broadcast, &iph->daddr, 4);
693
694 dev->flags = IFF_NOARP;
695 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
696 dev->addr_len = 4;
697
698 if (iph->daddr) {
699 #ifdef CONFIG_NET_IPGRE_BROADCAST
700 if (ipv4_is_multicast(iph->daddr)) {
701 if (!iph->saddr)
702 return -EINVAL;
703 dev->flags = IFF_BROADCAST;
704 dev->header_ops = &ipgre_header_ops;
705 }
706 #endif
707 } else
708 dev->header_ops = &ipgre_header_ops;
709
710 return ip_tunnel_init(dev);
711 }
712
713 static const struct gre_protocol ipgre_protocol = {
714 .handler = ipgre_rcv,
715 .err_handler = ipgre_err,
716 };
717
718 static int __net_init ipgre_init_net(struct net *net)
719 {
720 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
721 }
722
723 static void __net_exit ipgre_exit_net(struct net *net)
724 {
725 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
726 ip_tunnel_delete_net(itn);
727 }
728
729 static struct pernet_operations ipgre_net_ops = {
730 .init = ipgre_init_net,
731 .exit = ipgre_exit_net,
732 .id = &ipgre_net_id,
733 .size = sizeof(struct ip_tunnel_net),
734 };
735
736 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
737 {
738 __be16 flags;
739
740 if (!data)
741 return 0;
742
743 flags = 0;
744 if (data[IFLA_GRE_IFLAGS])
745 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
746 if (data[IFLA_GRE_OFLAGS])
747 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
748 if (flags & (GRE_VERSION|GRE_ROUTING))
749 return -EINVAL;
750
751 return 0;
752 }
753
754 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
755 {
756 __be32 daddr;
757
758 if (tb[IFLA_ADDRESS]) {
759 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
760 return -EINVAL;
761 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
762 return -EADDRNOTAVAIL;
763 }
764
765 if (!data)
766 goto out;
767
768 if (data[IFLA_GRE_REMOTE]) {
769 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
770 if (!daddr)
771 return -EINVAL;
772 }
773
774 out:
775 return ipgre_tunnel_validate(tb, data);
776 }
777
778 static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
779 struct ip_tunnel_parm *parms)
780 {
781 memset(parms, 0, sizeof(*parms));
782
783 parms->iph.protocol = IPPROTO_GRE;
784
785 if (!data)
786 return;
787
788 if (data[IFLA_GRE_LINK])
789 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
790
791 if (data[IFLA_GRE_IFLAGS])
792 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
793
794 if (data[IFLA_GRE_OFLAGS])
795 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
796
797 if (data[IFLA_GRE_IKEY])
798 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
799
800 if (data[IFLA_GRE_OKEY])
801 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
802
803 if (data[IFLA_GRE_LOCAL])
804 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
805
806 if (data[IFLA_GRE_REMOTE])
807 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
808
809 if (data[IFLA_GRE_TTL])
810 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
811
812 if (data[IFLA_GRE_TOS])
813 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
814
815 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
816 parms->iph.frag_off = htons(IP_DF);
817 }
818
819 static int gre_tap_init(struct net_device *dev)
820 {
821 __gre_tunnel_init(dev);
822
823 return ip_tunnel_init(dev);
824 }
825
826 static const struct net_device_ops gre_tap_netdev_ops = {
827 .ndo_init = gre_tap_init,
828 .ndo_uninit = ip_tunnel_uninit,
829 .ndo_start_xmit = gre_tap_xmit,
830 .ndo_set_mac_address = eth_mac_addr,
831 .ndo_validate_addr = eth_validate_addr,
832 .ndo_change_mtu = ip_tunnel_change_mtu,
833 .ndo_get_stats64 = ip_tunnel_get_stats64,
834 };
835
836 static void ipgre_tap_setup(struct net_device *dev)
837 {
838 ether_setup(dev);
839 dev->netdev_ops = &gre_tap_netdev_ops;
840 ip_tunnel_setup(dev, gre_tap_net_id);
841 }
842
843 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
844 struct nlattr *tb[], struct nlattr *data[])
845 {
846 struct ip_tunnel_parm p;
847
848 ipgre_netlink_parms(data, tb, &p);
849 return ip_tunnel_newlink(dev, tb, &p);
850 }
851
852 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
853 struct nlattr *data[])
854 {
855 struct ip_tunnel_parm p;
856
857 ipgre_netlink_parms(data, tb, &p);
858 return ip_tunnel_changelink(dev, tb, &p);
859 }
860
861 static size_t ipgre_get_size(const struct net_device *dev)
862 {
863 return
864 /* IFLA_GRE_LINK */
865 nla_total_size(4) +
866 /* IFLA_GRE_IFLAGS */
867 nla_total_size(2) +
868 /* IFLA_GRE_OFLAGS */
869 nla_total_size(2) +
870 /* IFLA_GRE_IKEY */
871 nla_total_size(4) +
872 /* IFLA_GRE_OKEY */
873 nla_total_size(4) +
874 /* IFLA_GRE_LOCAL */
875 nla_total_size(4) +
876 /* IFLA_GRE_REMOTE */
877 nla_total_size(4) +
878 /* IFLA_GRE_TTL */
879 nla_total_size(1) +
880 /* IFLA_GRE_TOS */
881 nla_total_size(1) +
882 /* IFLA_GRE_PMTUDISC */
883 nla_total_size(1) +
884 0;
885 }
886
887 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
888 {
889 struct ip_tunnel *t = netdev_priv(dev);
890 struct ip_tunnel_parm *p = &t->parms;
891
892 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
893 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
894 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
895 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
896 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
897 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
898 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
899 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
900 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
901 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
902 !!(p->iph.frag_off & htons(IP_DF))))
903 goto nla_put_failure;
904 return 0;
905
906 nla_put_failure:
907 return -EMSGSIZE;
908 }
909
910 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
911 [IFLA_GRE_LINK] = { .type = NLA_U32 },
912 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
913 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
914 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
915 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
916 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
917 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
918 [IFLA_GRE_TTL] = { .type = NLA_U8 },
919 [IFLA_GRE_TOS] = { .type = NLA_U8 },
920 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
921 };
922
923 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
924 .kind = "gre",
925 .maxtype = IFLA_GRE_MAX,
926 .policy = ipgre_policy,
927 .priv_size = sizeof(struct ip_tunnel),
928 .setup = ipgre_tunnel_setup,
929 .validate = ipgre_tunnel_validate,
930 .newlink = ipgre_newlink,
931 .changelink = ipgre_changelink,
932 .dellink = ip_tunnel_dellink,
933 .get_size = ipgre_get_size,
934 .fill_info = ipgre_fill_info,
935 };
936
937 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
938 .kind = "gretap",
939 .maxtype = IFLA_GRE_MAX,
940 .policy = ipgre_policy,
941 .priv_size = sizeof(struct ip_tunnel),
942 .setup = ipgre_tap_setup,
943 .validate = ipgre_tap_validate,
944 .newlink = ipgre_newlink,
945 .changelink = ipgre_changelink,
946 .dellink = ip_tunnel_dellink,
947 .get_size = ipgre_get_size,
948 .fill_info = ipgre_fill_info,
949 };
950
951 static int __net_init ipgre_tap_init_net(struct net *net)
952 {
953 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
954 }
955
956 static void __net_exit ipgre_tap_exit_net(struct net *net)
957 {
958 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
959 ip_tunnel_delete_net(itn);
960 }
961
962 static struct pernet_operations ipgre_tap_net_ops = {
963 .init = ipgre_tap_init_net,
964 .exit = ipgre_tap_exit_net,
965 .id = &gre_tap_net_id,
966 .size = sizeof(struct ip_tunnel_net),
967 };
968
969 static int __init ipgre_init(void)
970 {
971 int err;
972
973 pr_info("GRE over IPv4 tunneling driver\n");
974
975 err = register_pernet_device(&ipgre_net_ops);
976 if (err < 0)
977 return err;
978
979 err = register_pernet_device(&ipgre_tap_net_ops);
980 if (err < 0)
981 goto pnet_tap_faied;
982
983 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
984 if (err < 0) {
985 pr_info("%s: can't add protocol\n", __func__);
986 goto add_proto_failed;
987 }
988
989 err = rtnl_link_register(&ipgre_link_ops);
990 if (err < 0)
991 goto rtnl_link_failed;
992
993 err = rtnl_link_register(&ipgre_tap_ops);
994 if (err < 0)
995 goto tap_ops_failed;
996
997 return 0;
998
999 tap_ops_failed:
1000 rtnl_link_unregister(&ipgre_link_ops);
1001 rtnl_link_failed:
1002 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1003 add_proto_failed:
1004 unregister_pernet_device(&ipgre_tap_net_ops);
1005 pnet_tap_faied:
1006 unregister_pernet_device(&ipgre_net_ops);
1007 return err;
1008 }
1009
1010 static void __exit ipgre_fini(void)
1011 {
1012 rtnl_link_unregister(&ipgre_tap_ops);
1013 rtnl_link_unregister(&ipgre_link_ops);
1014 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1015 pr_info("%s: can't remove protocol\n", __func__);
1016 unregister_pernet_device(&ipgre_tap_net_ops);
1017 unregister_pernet_device(&ipgre_net_ops);
1018 }
1019
1020 module_init(ipgre_init);
1021 module_exit(ipgre_fini);
1022 MODULE_LICENSE("GPL");
1023 MODULE_ALIAS_RTNL_LINK("gre");
1024 MODULE_ALIAS_RTNL_LINK("gretap");
1025 MODULE_ALIAS_NETDEV("gre0");
1026 MODULE_ALIAS_NETDEV("gretap0");