drivers: power: report battery voltage in AOSP compatible format
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ip_gre.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: GRE over IP protocol decoder.
1da177e4
LT
3 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
afd46503
JP
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
4fc268d2 15#include <linux/capability.h>
1da177e4
LT
16#include <linux/module.h>
17#include <linux/types.h>
1da177e4 18#include <linux/kernel.h>
5a0e3ad6 19#include <linux/slab.h>
1da177e4
LT
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
e1a80002 33#include <linux/etherdevice.h>
46f25dff 34#include <linux/if_ether.h>
1da177e4
LT
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
c5441932 40#include <net/ip_tunnels.h>
1da177e4
LT
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
59a4c759
PE
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
c19e654d 48#include <net/rtnetlink.h>
00959ade 49#include <net/gre.h>
1da177e4 50
dfd56b8b 51#if IS_ENABLED(CONFIG_IPV6)
1da177e4
LT
52#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
6d0722a2 69 and silently drop packet when it expires. It is a good
bff52857 70 solution, but it supposes maintaining new variable in ALL
1da177e4
LT
71 skb, even if no tunneling is used.
72
6d0722a2
ED
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
1da177e4
LT
76
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
bff52857 96 taking into account fragmentation. TO be short, ttl is not solution at all.
1da177e4
LT
97
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
bff52857 103 rapidly degrades to value <68, where looping stops.
1da177e4
LT
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
1da177e4
LT
111 Alexey Kuznetsov.
112 */
113
eccc1bb8 114static bool log_ecn_error = true;
115module_param(log_ecn_error, bool, 0644);
116MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
c19e654d 118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1da177e4 119static int ipgre_tunnel_init(struct net_device *dev);
eb8ce741 120
f99189b1 121static int ipgre_net_id __read_mostly;
c5441932 122static int gre_tap_net_id __read_mostly;
1da177e4 123
c5441932
PS
124static __sum16 check_checksum(struct sk_buff *skb)
125{
126 __sum16 csum = 0;
1da177e4 127
c5441932
PS
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
1da177e4 131
c5441932
PS
132 if (!csum)
133 break;
134 /* Fall through. */
1da177e4 135
c5441932
PS
136 case CHECKSUM_NONE:
137 skb->csum = 0;
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
140 break;
e985aad7 141 }
87b6d218 142
c5441932 143 return csum;
e985aad7
ED
144}
145
c5441932 146static int ip_gre_calc_hlen(__be16 o_flags)
d2083287 147{
c5441932 148 int addend = 4;
d2083287 149
c5441932
PS
150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
1da177e4 158
c5441932
PS
159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
1da177e4 161{
22251c73
ED
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
c5441932 164 __be32 *options;
e1a80002 165
c5441932
PS
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 return -EINVAL;
e1a80002 168
22251c73 169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
c5441932
PS
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 return -EINVAL;
1da177e4 172
c5441932
PS
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
e1a80002 175
c5441932
PS
176 if (!pskb_may_pull(skb, *hdr_len))
177 return -EINVAL;
749c10f9 178
22251c73
ED
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180
c5441932 181 tpi->proto = greh->protocol;
1da177e4 182
c5441932
PS
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
186 *csum_err = true;
187 return -EINVAL;
188 }
189 options++;
1da177e4
LT
190 }
191
c5441932
PS
192 if (greh->flags & GRE_KEY) {
193 tpi->key = *options;
194 options++;
195 } else
196 tpi->key = 0;
1da177e4 197
c5441932
PS
198 if (unlikely(greh->flags & GRE_SEQ)) {
199 tpi->seq = *options;
200 options++;
201 } else
202 tpi->seq = 0;
1da177e4 203
c5441932
PS
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 */
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
211 *hdr_len += 4;
212 if (!pskb_may_pull(skb, *hdr_len))
213 return -EINVAL;
1da177e4
LT
214 }
215 }
1da177e4 216
c5441932 217 return 0;
1da177e4
LT
218}
219
1da177e4
LT
220static void ipgre_err(struct sk_buff *skb, u32 info)
221{
1da177e4 222
c5441932
PS
223 /* All the routers (except for Linux) return only
224 8 bytes of packet payload. It means, that precise relaying of
225 ICMP in the real Internet is absolutely infeasible.
1da177e4 226
c5441932
PS
227 Moreover, Cisco "wise men" put GRE key to the third word
228 in GRE header. It makes impossible maintaining even soft
229 state for keyed GRE tunnels with enabled checksum. Tell
230 them "thank you".
1da177e4 231
c5441932
PS
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
234 by themselves???
235 */
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
96f5a846 238 const struct iphdr *iph;
88c7664f
ACM
239 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code;
1da177e4 241 struct ip_tunnel *t;
c5441932
PS
242 struct tnl_ptk_info tpi;
243 int hdr_len;
244 bool csum_err = false;
1da177e4 245
c5441932
PS
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
1da177e4 248 return;
1da177e4
LT
249 }
250
1da177e4
LT
251 switch (type) {
252 default:
253 case ICMP_PARAMETERPROB:
254 return;
255
256 case ICMP_DEST_UNREACH:
257 switch (code) {
258 case ICMP_SR_FAILED:
259 case ICMP_PORT_UNREACH:
260 /* Impossible event. */
261 return;
1da177e4
LT
262 default:
263 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH,
265 I believe they are just ether pollution. --ANK
266 */
267 break;
268 }
269 break;
270 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL)
272 return;
273 break;
55be7a9c
DM
274
275 case ICMP_REDIRECT:
276 break;
1da177e4
LT
277 }
278
c5441932
PS
279 if (tpi.proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id);
281 else
282 itn = net_generic(net, ipgre_net_id);
283
96f5a846 284 iph = (const struct iphdr *)skb->data;
c5441932
PS
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
286 iph->daddr, iph->saddr, tpi.key);
d2083287 287
36393395 288 if (t == NULL)
0c5794a6 289 return;
36393395
DM
290
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
0c5794a6 294 return;
36393395 295 }
55be7a9c
DM
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
298 IPPROTO_GRE, 0);
0c5794a6 299 return;
55be7a9c 300 }
36393395 301 if (t->parms.iph.daddr == 0 ||
f97c1e0c 302 ipv4_is_multicast(t->parms.iph.daddr))
0c5794a6 303 return;
1da177e4
LT
304
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
0c5794a6 306 return;
1da177e4 307
da6185d8 308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
309 t->err_count++;
310 else
311 t->err_count = 1;
312 t->err_time = jiffies;
1da177e4
LT
313}
314
1da177e4
LT
315static int ipgre_rcv(struct sk_buff *skb)
316{
c5441932
PS
317 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn;
b71d1d42 319 const struct iphdr *iph;
1da177e4 320 struct ip_tunnel *tunnel;
c5441932
PS
321 struct tnl_ptk_info tpi;
322 int hdr_len;
323 bool csum_err = false;
1da177e4 324
c5441932 325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
0c5794a6 326 goto drop;
1da177e4 327
c5441932
PS
328 if (tpi.proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id);
330 else
331 itn = net_generic(net, ipgre_net_id);
1da177e4 332
c5441932
PS
333 iph = ip_hdr(skb);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
335 iph->saddr, iph->daddr, tpi.key);
e1a80002 336
d2083287 337 if (tunnel) {
85e963b8 338 skb_pop_mac_header(skb);
b9099fea 339 ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error);
8990f468 340 return 0;
1da177e4 341 }
45af08be 342 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1da177e4 343drop:
1da177e4 344 kfree_skb(skb);
a02cec21 345 return 0;
1da177e4
LT
346}
347
8f10098f 348static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
68c33163
PS
349{
350 int err;
351
352 if (skb_is_gso(skb)) {
353 err = skb_unclone(skb, GFP_ATOMIC);
354 if (unlikely(err))
355 goto error;
356 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
357 return skb;
8f10098f 358 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
c5441932 359 tunnel->parms.o_flags&TUNNEL_CSUM) {
8f10098f
PS
360 err = skb_checksum_help(skb);
361 if (unlikely(err))
362 goto error;
363 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
aa0e51cd 364 skb->ip_summed = CHECKSUM_NONE;
68c33163
PS
365
366 return skb;
367
368error:
369 kfree_skb(skb);
370 return ERR_PTR(err);
371}
372
c5441932
PS
373static struct sk_buff *gre_build_header(struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi,
375 int hdr_len)
1da177e4 376{
c5441932 377 struct gre_base_hdr *greh;
68c33163 378
c5441932 379 skb_push(skb, hdr_len);
6b78f16e 380
c5441932
PS
381 greh = (struct gre_base_hdr *)skb->data;
382 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
383 greh->protocol = tpi->proto;
cef401de 384
c5441932
PS
385 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
386 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
e1a80002 387
c5441932
PS
388 if (tpi->flags&TUNNEL_SEQ) {
389 *ptr = tpi->seq;
390 ptr--;
1da177e4 391 }
c5441932
PS
392 if (tpi->flags&TUNNEL_KEY) {
393 *ptr = tpi->key;
394 ptr--;
61d57f87 395 }
c5441932
PS
396 if (tpi->flags&TUNNEL_CSUM &&
397 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
398 *(__sum16 *)ptr = 0;
399 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
400 skb->len, 0));
1da177e4 401 }
1da177e4
LT
402 }
403
c5441932
PS
404 return skb;
405}
1da177e4 406
c5441932
PS
407static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
408 const struct iphdr *tnl_params,
409 __be16 proto)
410{
411 struct ip_tunnel *tunnel = netdev_priv(dev);
412 struct tnl_ptk_info tpi;
1da177e4 413
c5441932
PS
414 if (likely(!skb->encapsulation)) {
415 skb_reset_inner_headers(skb);
416 skb->encapsulation = 1;
1da177e4
LT
417 }
418
c5441932
PS
419 tpi.flags = tunnel->parms.o_flags;
420 tpi.proto = proto;
421 tpi.key = tunnel->parms.o_key;
422 if (tunnel->parms.o_flags & TUNNEL_SEQ)
423 tunnel->o_seqno++;
424 tpi.seq = htonl(tunnel->o_seqno);
1da177e4 425
c5441932
PS
426 /* Push GRE header. */
427 skb = gre_build_header(skb, &tpi, tunnel->hlen);
428 if (unlikely(!skb)) {
429 dev->stats.tx_dropped++;
430 return;
1da177e4 431 }
1da177e4 432
c5441932
PS
433 ip_tunnel_xmit(skb, dev, tnl_params);
434}
1da177e4 435
c5441932
PS
436static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
437 struct net_device *dev)
438{
439 struct ip_tunnel *tunnel = netdev_priv(dev);
440 const struct iphdr *tnl_params;
1da177e4 441
c5441932
PS
442 skb = handle_offloads(tunnel, skb);
443 if (IS_ERR(skb))
444 goto out;
1da177e4 445
c5441932
PS
446 if (dev->header_ops) {
447 /* Need space for new headers */
448 if (skb_cow_head(skb, dev->needed_headroom -
2bac7cb3 449 (tunnel->hlen + sizeof(struct iphdr))))
c5441932 450 goto free_skb;
1da177e4 451
c5441932 452 tnl_params = (const struct iphdr *)skb->data;
1da177e4 453
c5441932
PS
454 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
455 * to gre header.
456 */
457 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
458 } else {
459 if (skb_cow_head(skb, dev->needed_headroom))
460 goto free_skb;
1da177e4 461
c5441932 462 tnl_params = &tunnel->parms.iph;
1da177e4
LT
463 }
464
c5441932
PS
465 __gre_xmit(skb, dev, tnl_params, skb->protocol);
466
6ed10654 467 return NETDEV_TX_OK;
1da177e4 468
c5441932 469free_skb:
1da177e4 470 dev_kfree_skb(skb);
c5441932
PS
471out:
472 dev->stats.tx_dropped++;
6ed10654 473 return NETDEV_TX_OK;
1da177e4
LT
474}
475
c5441932
PS
476static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
477 struct net_device *dev)
ee34c1eb 478{
c5441932 479 struct ip_tunnel *tunnel = netdev_priv(dev);
ee34c1eb 480
c5441932
PS
481 skb = handle_offloads(tunnel, skb);
482 if (IS_ERR(skb))
483 goto out;
ee34c1eb 484
c5441932
PS
485 if (skb_cow_head(skb, dev->needed_headroom))
486 goto free_skb;
42aa9162 487
c5441932 488 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
42aa9162 489
c5441932 490 return NETDEV_TX_OK;
ee34c1eb 491
c5441932
PS
492free_skb:
493 dev_kfree_skb(skb);
494out:
495 dev->stats.tx_dropped++;
496 return NETDEV_TX_OK;
ee34c1eb
MS
497}
498
c5441932
PS
499static int ipgre_tunnel_ioctl(struct net_device *dev,
500 struct ifreq *ifr, int cmd)
1da177e4
LT
501{
502 int err = 0;
503 struct ip_tunnel_parm p;
1da177e4 504
c5441932
PS
505 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
506 return -EFAULT;
3d3fa8bc
CW
507 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
508 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
509 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
510 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
511 return -EINVAL;
1da177e4 512 }
c5441932
PS
513 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
514 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1da177e4 515
c5441932
PS
516 err = ip_tunnel_ioctl(dev, &p, cmd);
517 if (err)
518 return err;
1da177e4 519
c5441932
PS
520 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
521 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
522
523 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
524 return -EFAULT;
1da177e4
LT
525 return 0;
526}
527
1da177e4
LT
528/* Nice toy. Unfortunately, useless in real life :-)
529 It allows to construct virtual multiprotocol broadcast "LAN"
530 over the Internet, provided multicast routing is tuned.
531
532
533 I have no idea was this bicycle invented before me,
534 so that I had to set ARPHRD_IPGRE to a random value.
535 I have an impression, that Cisco could make something similar,
536 but this feature is apparently missing in IOS<=11.2(8).
e905a9ed 537
1da177e4
LT
538 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
539 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
540
541 ping -t 255 224.66.66.66
542
543 If nobody answers, mbone does not work.
544
545 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
546 ip addr add 10.66.66.<somewhat>/24 dev Universe
547 ifconfig Universe up
548 ifconfig Universe add fe80::<Your_real_addr>/10
549 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
550 ftp 10.66.66.66
551 ...
552 ftp fec0:6666:6666::193.233.7.65
553 ...
1da177e4 554 */
3b04ddde
SH
555static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
556 unsigned short type,
1507850b 557 const void *daddr, const void *saddr, unsigned int len)
1da177e4 558{
2941a486 559 struct ip_tunnel *t = netdev_priv(dev);
c5441932
PS
560 struct iphdr *iph;
561 struct gre_base_hdr *greh;
1da177e4 562
c5441932
PS
563 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
564 greh = (struct gre_base_hdr *)(iph+1);
565 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
566 greh->protocol = htons(type);
1da177e4 567
c5441932 568 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
e905a9ed 569
c5441932 570 /* Set the source hardware address. */
1da177e4
LT
571 if (saddr)
572 memcpy(&iph->saddr, saddr, 4);
6d55cb91 573 if (daddr)
1da177e4 574 memcpy(&iph->daddr, daddr, 4);
6d55cb91 575 if (iph->daddr)
27c1c98b 576 return t->hlen + sizeof(*iph);
e905a9ed 577
c5441932 578 return -(t->hlen + sizeof(*iph));
1da177e4
LT
579}
580
6a5f44d7
TT
581static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
582{
b71d1d42 583 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
6a5f44d7
TT
584 memcpy(haddr, &iph->saddr, 4);
585 return 4;
586}
587
3b04ddde
SH
588static const struct header_ops ipgre_header_ops = {
589 .create = ipgre_header,
6a5f44d7 590 .parse = ipgre_header_parse,
3b04ddde
SH
591};
592
6a5f44d7 593#ifdef CONFIG_NET_IPGRE_BROADCAST
1da177e4
LT
594static int ipgre_open(struct net_device *dev)
595{
2941a486 596 struct ip_tunnel *t = netdev_priv(dev);
1da177e4 597
f97c1e0c 598 if (ipv4_is_multicast(t->parms.iph.daddr)) {
cbb1e85f
DM
599 struct flowi4 fl4;
600 struct rtable *rt;
601
602 rt = ip_route_output_gre(dev_net(dev), &fl4,
603 t->parms.iph.daddr,
604 t->parms.iph.saddr,
605 t->parms.o_key,
606 RT_TOS(t->parms.iph.tos),
607 t->parms.link);
b23dd4fe 608 if (IS_ERR(rt))
1da177e4 609 return -EADDRNOTAVAIL;
d8d1f30b 610 dev = rt->dst.dev;
1da177e4 611 ip_rt_put(rt);
e5ed6399 612 if (__in_dev_get_rtnl(dev) == NULL)
1da177e4
LT
613 return -EADDRNOTAVAIL;
614 t->mlink = dev->ifindex;
e5ed6399 615 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1da177e4
LT
616 }
617 return 0;
618}
619
620static int ipgre_close(struct net_device *dev)
621{
2941a486 622 struct ip_tunnel *t = netdev_priv(dev);
b8c26a33 623
f97c1e0c 624 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
7fee0ca2 625 struct in_device *in_dev;
c346dca1 626 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
8723e1b4 627 if (in_dev)
1da177e4 628 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1da177e4
LT
629 }
630 return 0;
631}
1da177e4
LT
632#endif
633
b8c26a33
SH
634static const struct net_device_ops ipgre_netdev_ops = {
635 .ndo_init = ipgre_tunnel_init,
c5441932 636 .ndo_uninit = ip_tunnel_uninit,
b8c26a33
SH
637#ifdef CONFIG_NET_IPGRE_BROADCAST
638 .ndo_open = ipgre_open,
639 .ndo_stop = ipgre_close,
640#endif
c5441932 641 .ndo_start_xmit = ipgre_xmit,
b8c26a33 642 .ndo_do_ioctl = ipgre_tunnel_ioctl,
c5441932
PS
643 .ndo_change_mtu = ip_tunnel_change_mtu,
644 .ndo_get_stats64 = ip_tunnel_get_stats64,
b8c26a33
SH
645};
646
6b78f16e
ED
647#define GRE_FEATURES (NETIF_F_SG | \
648 NETIF_F_FRAGLIST | \
649 NETIF_F_HIGHDMA | \
650 NETIF_F_HW_CSUM)
651
1da177e4
LT
652static void ipgre_tunnel_setup(struct net_device *dev)
653{
b8c26a33 654 dev->netdev_ops = &ipgre_netdev_ops;
5ef58c40 655 dev->type = ARPHRD_IPGRE;
c5441932
PS
656 ip_tunnel_setup(dev, ipgre_net_id);
657}
1da177e4 658
c5441932
PS
659static void __gre_tunnel_init(struct net_device *dev)
660{
661 struct ip_tunnel *tunnel;
662
663 tunnel = netdev_priv(dev);
664 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
665 tunnel->parms.iph.protocol = IPPROTO_GRE;
666
667 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
46f25dff 668 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
6b78f16e 669
c5441932 670 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
6b78f16e 671 dev->hw_features |= GRE_FEATURES;
c5441932
PS
672
673 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
674 /* TCP offload with GRE SEQ is not supported. */
675 dev->features |= NETIF_F_GSO_SOFTWARE;
676 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
677 /* Can use a lockless transmit, unless we generate
678 * output sequences
679 */
680 dev->features |= NETIF_F_LLTX;
681 }
1da177e4
LT
682}
683
684static int ipgre_tunnel_init(struct net_device *dev)
685{
c5441932
PS
686 struct ip_tunnel *tunnel = netdev_priv(dev);
687 struct iphdr *iph = &tunnel->parms.iph;
1da177e4 688
c5441932 689 __gre_tunnel_init(dev);
1da177e4 690
c5441932
PS
691 memcpy(dev->dev_addr, &iph->saddr, 4);
692 memcpy(dev->broadcast, &iph->daddr, 4);
1da177e4 693
c5441932
PS
694 dev->flags = IFF_NOARP;
695 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
696 dev->addr_len = 4;
1da177e4 697
1da177e4 698 if (iph->daddr) {
1da177e4 699#ifdef CONFIG_NET_IPGRE_BROADCAST
f97c1e0c 700 if (ipv4_is_multicast(iph->daddr)) {
1da177e4
LT
701 if (!iph->saddr)
702 return -EINVAL;
703 dev->flags = IFF_BROADCAST;
3b04ddde 704 dev->header_ops = &ipgre_header_ops;
1da177e4
LT
705 }
706#endif
ee34c1eb 707 } else
6a5f44d7 708 dev->header_ops = &ipgre_header_ops;
1da177e4 709
c5441932 710 return ip_tunnel_init(dev);
1da177e4
LT
711}
712
00959ade
DK
713static const struct gre_protocol ipgre_protocol = {
714 .handler = ipgre_rcv,
715 .err_handler = ipgre_err,
1da177e4
LT
716};
717
2c8c1e72 718static int __net_init ipgre_init_net(struct net *net)
59a4c759 719{
c5441932 720 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
59a4c759
PE
721}
722
2c8c1e72 723static void __net_exit ipgre_exit_net(struct net *net)
59a4c759 724{
c5441932
PS
725 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
726 ip_tunnel_delete_net(itn);
59a4c759
PE
727}
728
729static struct pernet_operations ipgre_net_ops = {
730 .init = ipgre_init_net,
731 .exit = ipgre_exit_net,
cfb8fbf2 732 .id = &ipgre_net_id,
c5441932 733 .size = sizeof(struct ip_tunnel_net),
59a4c759 734};
1da177e4 735
c19e654d
HX
736static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
737{
738 __be16 flags;
739
740 if (!data)
741 return 0;
742
743 flags = 0;
744 if (data[IFLA_GRE_IFLAGS])
745 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
746 if (data[IFLA_GRE_OFLAGS])
747 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
748 if (flags & (GRE_VERSION|GRE_ROUTING))
749 return -EINVAL;
750
751 return 0;
752}
753
e1a80002
HX
754static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
755{
756 __be32 daddr;
757
758 if (tb[IFLA_ADDRESS]) {
759 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
760 return -EINVAL;
761 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
762 return -EADDRNOTAVAIL;
763 }
764
765 if (!data)
766 goto out;
767
768 if (data[IFLA_GRE_REMOTE]) {
769 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
770 if (!daddr)
771 return -EINVAL;
772 }
773
774out:
775 return ipgre_tunnel_validate(tb, data);
776}
777
c5441932
PS
778static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
779 struct ip_tunnel_parm *parms)
c19e654d 780{
7bb82d92 781 memset(parms, 0, sizeof(*parms));
c19e654d
HX
782
783 parms->iph.protocol = IPPROTO_GRE;
784
785 if (!data)
786 return;
787
788 if (data[IFLA_GRE_LINK])
789 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
790
791 if (data[IFLA_GRE_IFLAGS])
c5441932 792 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
c19e654d
HX
793
794 if (data[IFLA_GRE_OFLAGS])
c5441932 795 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
c19e654d
HX
796
797 if (data[IFLA_GRE_IKEY])
798 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
799
800 if (data[IFLA_GRE_OKEY])
801 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
802
803 if (data[IFLA_GRE_LOCAL])
4d74f8ba 804 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
c19e654d
HX
805
806 if (data[IFLA_GRE_REMOTE])
4d74f8ba 807 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
c19e654d
HX
808
809 if (data[IFLA_GRE_TTL])
810 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
811
812 if (data[IFLA_GRE_TOS])
813 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
814
815 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
816 parms->iph.frag_off = htons(IP_DF);
817}
818
c5441932 819static int gre_tap_init(struct net_device *dev)
e1a80002 820{
c5441932 821 __gre_tunnel_init(dev);
e1a80002 822
c5441932 823 return ip_tunnel_init(dev);
e1a80002
HX
824}
825
c5441932
PS
826static const struct net_device_ops gre_tap_netdev_ops = {
827 .ndo_init = gre_tap_init,
828 .ndo_uninit = ip_tunnel_uninit,
829 .ndo_start_xmit = gre_tap_xmit,
b8c26a33
SH
830 .ndo_set_mac_address = eth_mac_addr,
831 .ndo_validate_addr = eth_validate_addr,
c5441932
PS
832 .ndo_change_mtu = ip_tunnel_change_mtu,
833 .ndo_get_stats64 = ip_tunnel_get_stats64,
b8c26a33
SH
834};
835
e1a80002
HX
836static void ipgre_tap_setup(struct net_device *dev)
837{
e1a80002 838 ether_setup(dev);
c5441932
PS
839 dev->netdev_ops = &gre_tap_netdev_ops;
840 ip_tunnel_setup(dev, gre_tap_net_id);
e1a80002
HX
841}
842
c5441932
PS
843static int ipgre_newlink(struct net *src_net, struct net_device *dev,
844 struct nlattr *tb[], struct nlattr *data[])
c19e654d 845{
c5441932 846 struct ip_tunnel_parm p;
c19e654d 847
c5441932
PS
848 ipgre_netlink_parms(data, tb, &p);
849 return ip_tunnel_newlink(dev, tb, &p);
c19e654d
HX
850}
851
852static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
853 struct nlattr *data[])
854{
c19e654d 855 struct ip_tunnel_parm p;
c19e654d 856
c5441932
PS
857 ipgre_netlink_parms(data, tb, &p);
858 return ip_tunnel_changelink(dev, tb, &p);
c19e654d
HX
859}
860
861static size_t ipgre_get_size(const struct net_device *dev)
862{
863 return
864 /* IFLA_GRE_LINK */
865 nla_total_size(4) +
866 /* IFLA_GRE_IFLAGS */
867 nla_total_size(2) +
868 /* IFLA_GRE_OFLAGS */
869 nla_total_size(2) +
870 /* IFLA_GRE_IKEY */
871 nla_total_size(4) +
872 /* IFLA_GRE_OKEY */
873 nla_total_size(4) +
874 /* IFLA_GRE_LOCAL */
875 nla_total_size(4) +
876 /* IFLA_GRE_REMOTE */
877 nla_total_size(4) +
878 /* IFLA_GRE_TTL */
879 nla_total_size(1) +
880 /* IFLA_GRE_TOS */
881 nla_total_size(1) +
882 /* IFLA_GRE_PMTUDISC */
883 nla_total_size(1) +
884 0;
885}
886
887static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
888{
889 struct ip_tunnel *t = netdev_priv(dev);
890 struct ip_tunnel_parm *p = &t->parms;
891
f3756b79 892 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
c5441932
PS
893 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
894 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
f3756b79
DM
895 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
896 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
897 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
898 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
899 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
900 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
901 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
902 !!(p->iph.frag_off & htons(IP_DF))))
903 goto nla_put_failure;
c19e654d
HX
904 return 0;
905
906nla_put_failure:
907 return -EMSGSIZE;
908}
909
910static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
911 [IFLA_GRE_LINK] = { .type = NLA_U32 },
912 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
913 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
914 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
915 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
4d74f8ba
PM
916 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
917 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
c19e654d
HX
918 [IFLA_GRE_TTL] = { .type = NLA_U8 },
919 [IFLA_GRE_TOS] = { .type = NLA_U8 },
920 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
921};
922
923static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
924 .kind = "gre",
925 .maxtype = IFLA_GRE_MAX,
926 .policy = ipgre_policy,
927 .priv_size = sizeof(struct ip_tunnel),
928 .setup = ipgre_tunnel_setup,
929 .validate = ipgre_tunnel_validate,
930 .newlink = ipgre_newlink,
931 .changelink = ipgre_changelink,
c5441932 932 .dellink = ip_tunnel_dellink,
c19e654d
HX
933 .get_size = ipgre_get_size,
934 .fill_info = ipgre_fill_info,
935};
936
e1a80002
HX
937static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
938 .kind = "gretap",
939 .maxtype = IFLA_GRE_MAX,
940 .policy = ipgre_policy,
941 .priv_size = sizeof(struct ip_tunnel),
942 .setup = ipgre_tap_setup,
943 .validate = ipgre_tap_validate,
944 .newlink = ipgre_newlink,
945 .changelink = ipgre_changelink,
c5441932 946 .dellink = ip_tunnel_dellink,
e1a80002
HX
947 .get_size = ipgre_get_size,
948 .fill_info = ipgre_fill_info,
949};
950
c5441932
PS
951static int __net_init ipgre_tap_init_net(struct net *net)
952{
953 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
954}
955
956static void __net_exit ipgre_tap_exit_net(struct net *net)
957{
958 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
959 ip_tunnel_delete_net(itn);
960}
961
962static struct pernet_operations ipgre_tap_net_ops = {
963 .init = ipgre_tap_init_net,
964 .exit = ipgre_tap_exit_net,
965 .id = &gre_tap_net_id,
966 .size = sizeof(struct ip_tunnel_net),
967};
1da177e4
LT
968
969static int __init ipgre_init(void)
970{
971 int err;
972
058bd4d2 973 pr_info("GRE over IPv4 tunneling driver\n");
1da177e4 974
cfb8fbf2 975 err = register_pernet_device(&ipgre_net_ops);
59a4c759 976 if (err < 0)
c2892f02
AD
977 return err;
978
c5441932
PS
979 err = register_pernet_device(&ipgre_tap_net_ops);
980 if (err < 0)
981 goto pnet_tap_faied;
982
00959ade 983 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 984 if (err < 0) {
058bd4d2 985 pr_info("%s: can't add protocol\n", __func__);
c2892f02
AD
986 goto add_proto_failed;
987 }
7daa0004 988
c19e654d
HX
989 err = rtnl_link_register(&ipgre_link_ops);
990 if (err < 0)
991 goto rtnl_link_failed;
992
e1a80002
HX
993 err = rtnl_link_register(&ipgre_tap_ops);
994 if (err < 0)
995 goto tap_ops_failed;
996
c5441932 997 return 0;
c19e654d 998
e1a80002
HX
999tap_ops_failed:
1000 rtnl_link_unregister(&ipgre_link_ops);
c19e654d 1001rtnl_link_failed:
00959ade 1002 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
c2892f02 1003add_proto_failed:
c5441932
PS
1004 unregister_pernet_device(&ipgre_tap_net_ops);
1005pnet_tap_faied:
c2892f02 1006 unregister_pernet_device(&ipgre_net_ops);
c5441932 1007 return err;
1da177e4
LT
1008}
1009
db44575f 1010static void __exit ipgre_fini(void)
1da177e4 1011{
e1a80002 1012 rtnl_link_unregister(&ipgre_tap_ops);
c19e654d 1013 rtnl_link_unregister(&ipgre_link_ops);
00959ade 1014 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
058bd4d2 1015 pr_info("%s: can't remove protocol\n", __func__);
c5441932 1016 unregister_pernet_device(&ipgre_tap_net_ops);
c2892f02 1017 unregister_pernet_device(&ipgre_net_ops);
1da177e4
LT
1018}
1019
1020module_init(ipgre_init);
1021module_exit(ipgre_fini);
1022MODULE_LICENSE("GPL");
4d74f8ba
PM
1023MODULE_ALIAS_RTNL_LINK("gre");
1024MODULE_ALIAS_RTNL_LINK("gretap");
8909c9ad 1025MODULE_ALIAS_NETDEV("gre0");
c5441932 1026MODULE_ALIAS_NETDEV("gretap0");