Merge tag 'v3.10.99' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66 {
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74 {
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118 {
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144 {
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 t->parms.iph.saddr != 0 ||
170 !(t->dev->flags & IFF_UP))
171 continue;
172
173 if (!ip_tunnel_key_match(&t->parms, flags, key))
174 continue;
175
176 if (t->parms.link == link)
177 return t;
178 else if (!cand)
179 cand = t;
180 }
181
182 hash = ip_tunnel_hash(itn, key, 0);
183 head = &itn->tunnels[hash];
184
185 hlist_for_each_entry_rcu(t, head, hash_node) {
186 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
187 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
188 continue;
189
190 if (!(t->dev->flags & IFF_UP))
191 continue;
192
193 if (!ip_tunnel_key_match(&t->parms, flags, key))
194 continue;
195
196 if (t->parms.link == link)
197 return t;
198 else if (!cand)
199 cand = t;
200 }
201
202 if (flags & TUNNEL_NO_KEY)
203 goto skip_key_lookup;
204
205 hlist_for_each_entry_rcu(t, head, hash_node) {
206 if (t->parms.i_key != key ||
207 t->parms.iph.saddr != 0 ||
208 t->parms.iph.daddr != 0 ||
209 !(t->dev->flags & IFF_UP))
210 continue;
211
212 if (t->parms.link == link)
213 return t;
214 else if (!cand)
215 cand = t;
216 }
217
218 skip_key_lookup:
219 if (cand)
220 return cand;
221
222 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
223 return netdev_priv(itn->fb_tunnel_dev);
224
225
226 return NULL;
227 }
228 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
229
230 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
231 struct ip_tunnel_parm *parms)
232 {
233 unsigned int h;
234 __be32 remote;
235
236 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
237 remote = parms->iph.daddr;
238 else
239 remote = 0;
240
241 h = ip_tunnel_hash(itn, parms->i_key, remote);
242 return &itn->tunnels[h];
243 }
244
245 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
246 {
247 struct hlist_head *head = ip_bucket(itn, &t->parms);
248
249 hlist_add_head_rcu(&t->hash_node, head);
250 }
251
252 static void ip_tunnel_del(struct ip_tunnel *t)
253 {
254 hlist_del_init_rcu(&t->hash_node);
255 }
256
257 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
258 struct ip_tunnel_parm *parms,
259 int type)
260 {
261 __be32 remote = parms->iph.daddr;
262 __be32 local = parms->iph.saddr;
263 __be32 key = parms->i_key;
264 int link = parms->link;
265 struct ip_tunnel *t = NULL;
266 struct hlist_head *head = ip_bucket(itn, parms);
267
268 hlist_for_each_entry_rcu(t, head, hash_node) {
269 if (local == t->parms.iph.saddr &&
270 remote == t->parms.iph.daddr &&
271 key == t->parms.i_key &&
272 link == t->parms.link &&
273 type == t->dev->type)
274 break;
275 }
276 return t;
277 }
278
279 static struct net_device *__ip_tunnel_create(struct net *net,
280 const struct rtnl_link_ops *ops,
281 struct ip_tunnel_parm *parms)
282 {
283 int err;
284 struct ip_tunnel *tunnel;
285 struct net_device *dev;
286 char name[IFNAMSIZ];
287
288 if (parms->name[0])
289 strlcpy(name, parms->name, IFNAMSIZ);
290 else {
291 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
292 err = -E2BIG;
293 goto failed;
294 }
295 strlcpy(name, ops->kind, IFNAMSIZ);
296 strncat(name, "%d", 2);
297 }
298
299 ASSERT_RTNL();
300 dev = alloc_netdev(ops->priv_size, name, ops->setup);
301 if (!dev) {
302 err = -ENOMEM;
303 goto failed;
304 }
305 dev_net_set(dev, net);
306
307 dev->rtnl_link_ops = ops;
308
309 tunnel = netdev_priv(dev);
310 tunnel->parms = *parms;
311
312 err = register_netdevice(dev);
313 if (err)
314 goto failed_free;
315
316 return dev;
317
318 failed_free:
319 free_netdev(dev);
320 failed:
321 return ERR_PTR(err);
322 }
323
324 static inline struct rtable *ip_route_output_tunnel(struct net *net,
325 struct flowi4 *fl4,
326 int proto,
327 __be32 daddr, __be32 saddr,
328 __be32 key, __u8 tos, int oif)
329 {
330 memset(fl4, 0, sizeof(*fl4));
331 fl4->flowi4_oif = oif;
332 fl4->daddr = daddr;
333 fl4->saddr = saddr;
334 fl4->flowi4_tos = tos;
335 fl4->flowi4_proto = proto;
336 fl4->fl4_gre_key = key;
337 return ip_route_output_key(net, fl4);
338 }
339
340 static int ip_tunnel_bind_dev(struct net_device *dev)
341 {
342 struct net_device *tdev = NULL;
343 struct ip_tunnel *tunnel = netdev_priv(dev);
344 const struct iphdr *iph;
345 int hlen = LL_MAX_HEADER;
346 int mtu = ETH_DATA_LEN;
347 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
348
349 iph = &tunnel->parms.iph;
350
351 /* Guess output device to choose reasonable mtu and needed_headroom */
352 if (iph->daddr) {
353 struct flowi4 fl4;
354 struct rtable *rt;
355
356 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
357 tunnel->parms.iph.protocol,
358 iph->daddr, iph->saddr,
359 tunnel->parms.o_key,
360 RT_TOS(iph->tos),
361 tunnel->parms.link);
362 if (!IS_ERR(rt)) {
363 tdev = rt->dst.dev;
364 ip_rt_put(rt);
365 }
366 if (dev->type != ARPHRD_ETHER)
367 dev->flags |= IFF_POINTOPOINT;
368 }
369
370 if (!tdev && tunnel->parms.link)
371 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
372
373 if (tdev) {
374 hlen = tdev->hard_header_len + tdev->needed_headroom;
375 mtu = tdev->mtu;
376 }
377 dev->iflink = tunnel->parms.link;
378
379 dev->needed_headroom = t_hlen + hlen;
380 mtu -= (dev->hard_header_len + t_hlen);
381
382 if (mtu < 68)
383 mtu = 68;
384
385 return mtu;
386 }
387
388 static struct ip_tunnel *ip_tunnel_create(struct net *net,
389 struct ip_tunnel_net *itn,
390 struct ip_tunnel_parm *parms)
391 {
392 struct ip_tunnel *nt, *fbt;
393 struct net_device *dev;
394
395 BUG_ON(!itn->fb_tunnel_dev);
396 fbt = netdev_priv(itn->fb_tunnel_dev);
397 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
398 if (IS_ERR(dev))
399 return NULL;
400
401 dev->mtu = ip_tunnel_bind_dev(dev);
402
403 nt = netdev_priv(dev);
404 ip_tunnel_add(itn, nt);
405 return nt;
406 }
407
408 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
409 const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error)
410 {
411 struct pcpu_tstats *tstats;
412 const struct iphdr *iph = ip_hdr(skb);
413 int err;
414
415 secpath_reset(skb);
416
417 skb->protocol = tpi->proto;
418
419 skb->mac_header = skb->network_header;
420 __pskb_pull(skb, hdr_len);
421 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
422 #ifdef CONFIG_NET_IPGRE_BROADCAST
423 if (ipv4_is_multicast(iph->daddr)) {
424 /* Looped back packet, drop it! */
425 if (rt_is_output_route(skb_rtable(skb)))
426 goto drop;
427 tunnel->dev->stats.multicast++;
428 skb->pkt_type = PACKET_BROADCAST;
429 }
430 #endif
431
432 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
433 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
434 tunnel->dev->stats.rx_crc_errors++;
435 tunnel->dev->stats.rx_errors++;
436 goto drop;
437 }
438
439 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
440 if (!(tpi->flags&TUNNEL_SEQ) ||
441 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
442 tunnel->dev->stats.rx_fifo_errors++;
443 tunnel->dev->stats.rx_errors++;
444 goto drop;
445 }
446 tunnel->i_seqno = ntohl(tpi->seq) + 1;
447 }
448
449 /* Warning: All skb pointers will be invalidated! */
450 if (tunnel->dev->type == ARPHRD_ETHER) {
451 if (!pskb_may_pull(skb, ETH_HLEN)) {
452 tunnel->dev->stats.rx_length_errors++;
453 tunnel->dev->stats.rx_errors++;
454 goto drop;
455 }
456
457 iph = ip_hdr(skb);
458 skb->protocol = eth_type_trans(skb, tunnel->dev);
459 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
460 }
461
462 skb->pkt_type = PACKET_HOST;
463 __skb_tunnel_rx(skb, tunnel->dev);
464
465 skb_reset_network_header(skb);
466 err = IP_ECN_decapsulate(iph, skb);
467 if (unlikely(err)) {
468 if (log_ecn_error)
469 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
470 &iph->saddr, iph->tos);
471 if (err > 1) {
472 ++tunnel->dev->stats.rx_frame_errors;
473 ++tunnel->dev->stats.rx_errors;
474 goto drop;
475 }
476 }
477
478 tstats = this_cpu_ptr(tunnel->dev->tstats);
479 u64_stats_update_begin(&tstats->syncp);
480 tstats->rx_packets++;
481 tstats->rx_bytes += skb->len;
482 u64_stats_update_end(&tstats->syncp);
483
484 gro_cells_receive(&tunnel->gro_cells, skb);
485 return 0;
486
487 drop:
488 kfree_skb(skb);
489 return 0;
490 }
491 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
492
493 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
494 struct rtable *rt, __be16 df)
495 {
496 struct ip_tunnel *tunnel = netdev_priv(dev);
497 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
498 int mtu;
499
500 if (df)
501 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
502 - sizeof(struct iphdr) - tunnel->hlen;
503 else
504 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
505
506 if (skb_dst(skb))
507 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
508
509 if (skb->protocol == htons(ETH_P_IP)) {
510 if (!skb_is_gso(skb) &&
511 (df & htons(IP_DF)) && mtu < pkt_size) {
512 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
513 return -E2BIG;
514 }
515 }
516 #if IS_ENABLED(CONFIG_IPV6)
517 else if (skb->protocol == htons(ETH_P_IPV6)) {
518 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
519
520 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
521 mtu >= IPV6_MIN_MTU) {
522 if ((tunnel->parms.iph.daddr &&
523 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
524 rt6->rt6i_dst.plen == 128) {
525 rt6->rt6i_flags |= RTF_MODIFIED;
526 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
527 }
528 }
529
530 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
531 mtu < pkt_size) {
532 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
533 return -E2BIG;
534 }
535 }
536 #endif
537 return 0;
538 }
539
540 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
541 const struct iphdr *tnl_params)
542 {
543 struct ip_tunnel *tunnel = netdev_priv(dev);
544 const struct iphdr *inner_iph;
545 struct iphdr *iph;
546 struct flowi4 fl4;
547 u8 tos, ttl;
548 __be16 df;
549 struct rtable *rt; /* Route to the other host */
550 struct net_device *tdev; /* Device to other host */
551 unsigned int max_headroom; /* The extra header space needed */
552 __be32 dst;
553
554 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
555
556 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
557 dst = tnl_params->daddr;
558 if (dst == 0) {
559 /* NBMA tunnel */
560
561 if (skb_dst(skb) == NULL) {
562 dev->stats.tx_fifo_errors++;
563 goto tx_error;
564 }
565
566 if (skb->protocol == htons(ETH_P_IP)) {
567 rt = skb_rtable(skb);
568 dst = rt_nexthop(rt, inner_iph->daddr);
569 }
570 #if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 const struct in6_addr *addr6;
573 struct neighbour *neigh;
574 bool do_tx_error_icmp;
575 int addr_type;
576
577 neigh = dst_neigh_lookup(skb_dst(skb),
578 &ipv6_hdr(skb)->daddr);
579 if (neigh == NULL)
580 goto tx_error;
581
582 addr6 = (const struct in6_addr *)&neigh->primary_key;
583 addr_type = ipv6_addr_type(addr6);
584
585 if (addr_type == IPV6_ADDR_ANY) {
586 addr6 = &ipv6_hdr(skb)->daddr;
587 addr_type = ipv6_addr_type(addr6);
588 }
589
590 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
591 do_tx_error_icmp = true;
592 else {
593 do_tx_error_icmp = false;
594 dst = addr6->s6_addr32[3];
595 }
596 neigh_release(neigh);
597 if (do_tx_error_icmp)
598 goto tx_error_icmp;
599 }
600 #endif
601 else
602 goto tx_error;
603 }
604
605 tos = tnl_params->tos;
606 if (tos & 0x1) {
607 tos &= ~0x1;
608 if (skb->protocol == htons(ETH_P_IP))
609 tos = inner_iph->tos;
610 else if (skb->protocol == htons(ETH_P_IPV6))
611 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
612 }
613
614 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
615 tunnel->parms.iph.protocol,
616 dst, tnl_params->saddr,
617 tunnel->parms.o_key,
618 RT_TOS(tos),
619 tunnel->parms.link);
620 if (IS_ERR(rt)) {
621 dev->stats.tx_carrier_errors++;
622 goto tx_error;
623 }
624 tdev = rt->dst.dev;
625
626 if (tdev == dev) {
627 ip_rt_put(rt);
628 dev->stats.collisions++;
629 goto tx_error;
630 }
631
632
633 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
634 ip_rt_put(rt);
635 goto tx_error;
636 }
637
638 if (tunnel->err_count > 0) {
639 if (time_before(jiffies,
640 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
641 tunnel->err_count--;
642
643 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
644 dst_link_failure(skb);
645 } else
646 tunnel->err_count = 0;
647 }
648
649 ttl = tnl_params->ttl;
650 if (ttl == 0) {
651 if (skb->protocol == htons(ETH_P_IP))
652 ttl = inner_iph->ttl;
653 #if IS_ENABLED(CONFIG_IPV6)
654 else if (skb->protocol == htons(ETH_P_IPV6))
655 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656 #endif
657 else
658 ttl = ip4_dst_hoplimit(&rt->dst);
659 }
660
661 df = tnl_params->frag_off;
662 if (skb->protocol == htons(ETH_P_IP))
663 df |= (inner_iph->frag_off&htons(IP_DF));
664
665 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
666 + rt->dst.header_len;
667 if (max_headroom > dev->needed_headroom)
668 dev->needed_headroom = max_headroom;
669
670 if (skb_cow_head(skb, dev->needed_headroom)) {
671 dev->stats.tx_dropped++;
672 dev_kfree_skb(skb);
673 return;
674 }
675
676 skb_dst_drop(skb);
677 skb_dst_set(skb, &rt->dst);
678
679 /* Push down and install the IP header. */
680 skb_push(skb, sizeof(struct iphdr));
681 skb_reset_network_header(skb);
682
683 iph = ip_hdr(skb);
684 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
685
686 iph->version = 4;
687 iph->ihl = sizeof(struct iphdr) >> 2;
688 iph->frag_off = df;
689 iph->protocol = tnl_params->protocol;
690 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
691 iph->daddr = fl4.daddr;
692 iph->saddr = fl4.saddr;
693 iph->ttl = ttl;
694 __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
695
696 iptunnel_xmit(skb, dev);
697 return;
698
699 #if IS_ENABLED(CONFIG_IPV6)
700 tx_error_icmp:
701 dst_link_failure(skb);
702 #endif
703 tx_error:
704 dev->stats.tx_errors++;
705 dev_kfree_skb(skb);
706 }
707 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
708
709 static void ip_tunnel_update(struct ip_tunnel_net *itn,
710 struct ip_tunnel *t,
711 struct net_device *dev,
712 struct ip_tunnel_parm *p,
713 bool set_mtu)
714 {
715 ip_tunnel_del(t);
716 t->parms.iph.saddr = p->iph.saddr;
717 t->parms.iph.daddr = p->iph.daddr;
718 t->parms.i_key = p->i_key;
719 t->parms.o_key = p->o_key;
720 if (dev->type != ARPHRD_ETHER) {
721 memcpy(dev->dev_addr, &p->iph.saddr, 4);
722 memcpy(dev->broadcast, &p->iph.daddr, 4);
723 }
724 ip_tunnel_add(itn, t);
725
726 t->parms.iph.ttl = p->iph.ttl;
727 t->parms.iph.tos = p->iph.tos;
728 t->parms.iph.frag_off = p->iph.frag_off;
729
730 if (t->parms.link != p->link) {
731 int mtu;
732
733 t->parms.link = p->link;
734 mtu = ip_tunnel_bind_dev(dev);
735 if (set_mtu)
736 dev->mtu = mtu;
737 }
738 netdev_state_change(dev);
739 }
740
741 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
742 {
743 int err = 0;
744 struct ip_tunnel *t;
745 struct net *net = dev_net(dev);
746 struct ip_tunnel *tunnel = netdev_priv(dev);
747 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
748
749 BUG_ON(!itn->fb_tunnel_dev);
750 switch (cmd) {
751 case SIOCGETTUNNEL:
752 t = NULL;
753 if (dev == itn->fb_tunnel_dev)
754 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
755 if (t == NULL)
756 t = netdev_priv(dev);
757 memcpy(p, &t->parms, sizeof(*p));
758 break;
759
760 case SIOCADDTUNNEL:
761 case SIOCCHGTUNNEL:
762 err = -EPERM;
763 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
764 goto done;
765 if (p->iph.ttl)
766 p->iph.frag_off |= htons(IP_DF);
767 if (!(p->i_flags&TUNNEL_KEY))
768 p->i_key = 0;
769 if (!(p->o_flags&TUNNEL_KEY))
770 p->o_key = 0;
771
772 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
773
774 if (!t && (cmd == SIOCADDTUNNEL))
775 t = ip_tunnel_create(net, itn, p);
776
777 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
778 if (t != NULL) {
779 if (t->dev != dev) {
780 err = -EEXIST;
781 break;
782 }
783 } else {
784 unsigned int nflags = 0;
785
786 if (ipv4_is_multicast(p->iph.daddr))
787 nflags = IFF_BROADCAST;
788 else if (p->iph.daddr)
789 nflags = IFF_POINTOPOINT;
790
791 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
792 err = -EINVAL;
793 break;
794 }
795
796 t = netdev_priv(dev);
797 }
798 }
799
800 if (t) {
801 err = 0;
802 ip_tunnel_update(itn, t, dev, p, true);
803 } else
804 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
805 break;
806
807 case SIOCDELTUNNEL:
808 err = -EPERM;
809 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
810 goto done;
811
812 if (dev == itn->fb_tunnel_dev) {
813 err = -ENOENT;
814 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
815 if (t == NULL)
816 goto done;
817 err = -EPERM;
818 if (t == netdev_priv(itn->fb_tunnel_dev))
819 goto done;
820 dev = t->dev;
821 }
822 unregister_netdevice(dev);
823 err = 0;
824 break;
825
826 default:
827 err = -EINVAL;
828 }
829
830 done:
831 return err;
832 }
833 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
834
835 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
836 {
837 struct ip_tunnel *tunnel = netdev_priv(dev);
838 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
839
840 if (new_mtu < 68 ||
841 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
842 return -EINVAL;
843 dev->mtu = new_mtu;
844 return 0;
845 }
846 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
847
848 static void ip_tunnel_dev_free(struct net_device *dev)
849 {
850 struct ip_tunnel *tunnel = netdev_priv(dev);
851
852 gro_cells_destroy(&tunnel->gro_cells);
853 free_percpu(dev->tstats);
854 free_netdev(dev);
855 }
856
857 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
858 {
859 struct net *net = dev_net(dev);
860 struct ip_tunnel *tunnel = netdev_priv(dev);
861 struct ip_tunnel_net *itn;
862
863 itn = net_generic(net, tunnel->ip_tnl_net_id);
864
865 if (itn->fb_tunnel_dev != dev) {
866 ip_tunnel_del(netdev_priv(dev));
867 unregister_netdevice_queue(dev, head);
868 }
869 }
870 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
871
872 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
873 struct rtnl_link_ops *ops, char *devname)
874 {
875 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
876 struct ip_tunnel_parm parms;
877
878 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
879 if (!itn->tunnels)
880 return -ENOMEM;
881
882 if (!ops) {
883 itn->fb_tunnel_dev = NULL;
884 return 0;
885 }
886 memset(&parms, 0, sizeof(parms));
887 if (devname)
888 strlcpy(parms.name, devname, IFNAMSIZ);
889
890 rtnl_lock();
891 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
892 rtnl_unlock();
893 if (IS_ERR(itn->fb_tunnel_dev)) {
894 kfree(itn->tunnels);
895 return PTR_ERR(itn->fb_tunnel_dev);
896 }
897
898 return 0;
899 }
900 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
901
902 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
903 {
904 int h;
905
906 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
907 struct ip_tunnel *t;
908 struct hlist_node *n;
909 struct hlist_head *thead = &itn->tunnels[h];
910
911 hlist_for_each_entry_safe(t, n, thead, hash_node)
912 unregister_netdevice_queue(t->dev, head);
913 }
914 if (itn->fb_tunnel_dev)
915 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
916 }
917
918 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
919 {
920 LIST_HEAD(list);
921
922 rtnl_lock();
923 ip_tunnel_destroy(itn, &list);
924 unregister_netdevice_many(&list);
925 rtnl_unlock();
926 kfree(itn->tunnels);
927 }
928 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
929
930 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
931 struct ip_tunnel_parm *p)
932 {
933 struct ip_tunnel *nt;
934 struct net *net = dev_net(dev);
935 struct ip_tunnel_net *itn;
936 int mtu;
937 int err;
938
939 nt = netdev_priv(dev);
940 itn = net_generic(net, nt->ip_tnl_net_id);
941
942 if (ip_tunnel_find(itn, p, dev->type))
943 return -EEXIST;
944
945 nt->parms = *p;
946 err = register_netdevice(dev);
947 if (err)
948 goto out;
949
950 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
951 eth_hw_addr_random(dev);
952
953 mtu = ip_tunnel_bind_dev(dev);
954 if (!tb[IFLA_MTU])
955 dev->mtu = mtu;
956
957 ip_tunnel_add(itn, nt);
958
959 out:
960 return err;
961 }
962 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
963
964 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
965 struct ip_tunnel_parm *p)
966 {
967 struct ip_tunnel *t, *nt;
968 struct net *net = dev_net(dev);
969 struct ip_tunnel *tunnel = netdev_priv(dev);
970 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
971
972 if (dev == itn->fb_tunnel_dev)
973 return -EINVAL;
974
975 nt = netdev_priv(dev);
976
977 t = ip_tunnel_find(itn, p, dev->type);
978
979 if (t) {
980 if (t->dev != dev)
981 return -EEXIST;
982 } else {
983 t = nt;
984
985 if (dev->type != ARPHRD_ETHER) {
986 unsigned int nflags = 0;
987
988 if (ipv4_is_multicast(p->iph.daddr))
989 nflags = IFF_BROADCAST;
990 else if (p->iph.daddr)
991 nflags = IFF_POINTOPOINT;
992
993 if ((dev->flags ^ nflags) &
994 (IFF_POINTOPOINT | IFF_BROADCAST))
995 return -EINVAL;
996 }
997 }
998
999 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1000 return 0;
1001 }
1002 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1003
1004 int ip_tunnel_init(struct net_device *dev)
1005 {
1006 struct ip_tunnel *tunnel = netdev_priv(dev);
1007 struct iphdr *iph = &tunnel->parms.iph;
1008 int err;
1009
1010 dev->destructor = ip_tunnel_dev_free;
1011 dev->tstats = alloc_percpu(struct pcpu_tstats);
1012 if (!dev->tstats)
1013 return -ENOMEM;
1014
1015 err = gro_cells_init(&tunnel->gro_cells, dev);
1016 if (err) {
1017 free_percpu(dev->tstats);
1018 return err;
1019 }
1020
1021 tunnel->dev = dev;
1022 strcpy(tunnel->parms.name, dev->name);
1023 iph->version = 4;
1024 iph->ihl = 5;
1025
1026 return 0;
1027 }
1028 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1029
1030 void ip_tunnel_uninit(struct net_device *dev)
1031 {
1032 struct net *net = dev_net(dev);
1033 struct ip_tunnel *tunnel = netdev_priv(dev);
1034 struct ip_tunnel_net *itn;
1035
1036 itn = net_generic(net, tunnel->ip_tnl_net_id);
1037 /* fb_tunnel_dev will be unregisted in net-exit call. */
1038 if (itn->fb_tunnel_dev != dev)
1039 ip_tunnel_del(netdev_priv(dev));
1040 }
1041 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1042
1043 /* Do least required initialization, rest of init is done in tunnel_init call */
1044 void ip_tunnel_setup(struct net_device *dev, int net_id)
1045 {
1046 struct ip_tunnel *tunnel = netdev_priv(dev);
1047 tunnel->ip_tnl_net_id = net_id;
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1050
1051 MODULE_LICENSE("GPL");