net: unify for_each_ip_tunnel_rcu()
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ipip.c
1 /*
2 * Linux NET3: IP/IP protocol decoder.
3 *
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27 /* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
36
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
43
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87 /*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE 16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
134
135 struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
145 {
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
165 }
166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
175 }
176
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 __be32 remote, __be32 local)
179 {
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
182 struct ip_tunnel *t;
183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
189
190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
193
194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
200 return t;
201 return NULL;
202 }
203
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 struct ip_tunnel_parm *parms)
206 {
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 unsigned int h = 0;
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
220 return &ipn->tunnels[prio][h];
221 }
222
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 struct ip_tunnel *t)
225 {
226 return __ipip_bucket(ipn, &t->parms);
227 }
228
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
238 rcu_assign_pointer(*tp, t->next);
239 break;
240 }
241 }
242 }
243
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
250 }
251
252 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
253 struct ip_tunnel_parm *parms, int create)
254 {
255 __be32 remote = parms->iph.daddr;
256 __be32 local = parms->iph.saddr;
257 struct ip_tunnel *t, *nt;
258 struct ip_tunnel __rcu **tp;
259 struct net_device *dev;
260 char name[IFNAMSIZ];
261 struct ipip_net *ipn = net_generic(net, ipip_net_id);
262
263 for (tp = __ipip_bucket(ipn, parms);
264 (t = rtnl_dereference(*tp)) != NULL;
265 tp = &t->next) {
266 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
267 return t;
268 }
269 if (!create)
270 return NULL;
271
272 if (parms->name[0])
273 strlcpy(name, parms->name, IFNAMSIZ);
274 else
275 strcpy(name, "tunl%d");
276
277 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
278 if (dev == NULL)
279 return NULL;
280
281 dev_net_set(dev, net);
282
283 nt = netdev_priv(dev);
284 nt->parms = *parms;
285
286 if (ipip_tunnel_init(dev) < 0)
287 goto failed_free;
288
289 if (register_netdevice(dev) < 0)
290 goto failed_free;
291
292 strcpy(nt->parms.name, dev->name);
293 dev->rtnl_link_ops = &ipip_link_ops;
294
295 dev_hold(dev);
296 ipip_tunnel_link(ipn, nt);
297 return nt;
298
299 failed_free:
300 ipip_dev_free(dev);
301 return NULL;
302 }
303
304 /* called with RTNL */
305 static void ipip_tunnel_uninit(struct net_device *dev)
306 {
307 struct net *net = dev_net(dev);
308 struct ipip_net *ipn = net_generic(net, ipip_net_id);
309
310 if (dev == ipn->fb_tunnel_dev)
311 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
312 else
313 ipip_tunnel_unlink(ipn, netdev_priv(dev));
314 dev_put(dev);
315 }
316
317 static int ipip_err(struct sk_buff *skb, u32 info)
318 {
319
320 /* All the routers (except for Linux) return only
321 8 bytes of packet payload. It means, that precise relaying of
322 ICMP in the real Internet is absolutely infeasible.
323 */
324 const struct iphdr *iph = (const struct iphdr *)skb->data;
325 const int type = icmp_hdr(skb)->type;
326 const int code = icmp_hdr(skb)->code;
327 struct ip_tunnel *t;
328 int err;
329
330 switch (type) {
331 default:
332 case ICMP_PARAMETERPROB:
333 return 0;
334
335 case ICMP_DEST_UNREACH:
336 switch (code) {
337 case ICMP_SR_FAILED:
338 case ICMP_PORT_UNREACH:
339 /* Impossible event. */
340 return 0;
341 default:
342 /* All others are translated to HOST_UNREACH.
343 rfc2003 contains "deep thoughts" about NET_UNREACH,
344 I believe they are just ether pollution. --ANK
345 */
346 break;
347 }
348 break;
349 case ICMP_TIME_EXCEEDED:
350 if (code != ICMP_EXC_TTL)
351 return 0;
352 break;
353 case ICMP_REDIRECT:
354 break;
355 }
356
357 err = -ENOENT;
358 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
359 if (t == NULL)
360 goto out;
361
362 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
363 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
364 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
365 err = 0;
366 goto out;
367 }
368
369 if (type == ICMP_REDIRECT) {
370 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
371 IPPROTO_IPIP, 0);
372 err = 0;
373 goto out;
374 }
375
376 if (t->parms.iph.daddr == 0)
377 goto out;
378
379 err = 0;
380 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
381 goto out;
382
383 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
384 t->err_count++;
385 else
386 t->err_count = 1;
387 t->err_time = jiffies;
388 out:
389
390 return err;
391 }
392
393 static int ipip_rcv(struct sk_buff *skb)
394 {
395 struct ip_tunnel *tunnel;
396 const struct iphdr *iph = ip_hdr(skb);
397 int err;
398
399 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
400 if (tunnel != NULL) {
401 struct pcpu_tstats *tstats;
402
403 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
404 goto drop;
405
406 secpath_reset(skb);
407
408 skb->mac_header = skb->network_header;
409 skb_reset_network_header(skb);
410 skb->protocol = htons(ETH_P_IP);
411 skb->pkt_type = PACKET_HOST;
412
413 __skb_tunnel_rx(skb, tunnel->dev);
414
415 err = IP_ECN_decapsulate(iph, skb);
416 if (unlikely(err)) {
417 if (log_ecn_error)
418 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419 &iph->saddr, iph->tos);
420 if (err > 1) {
421 ++tunnel->dev->stats.rx_frame_errors;
422 ++tunnel->dev->stats.rx_errors;
423 goto drop;
424 }
425 }
426
427 tstats = this_cpu_ptr(tunnel->dev->tstats);
428 u64_stats_update_begin(&tstats->syncp);
429 tstats->rx_packets++;
430 tstats->rx_bytes += skb->len;
431 u64_stats_update_end(&tstats->syncp);
432
433 netif_rx(skb);
434 return 0;
435 }
436
437 return -1;
438
439 drop:
440 kfree_skb(skb);
441 return 0;
442 }
443
444 /*
445 * This function assumes it is being called from dev_queue_xmit()
446 * and that skb is filled properly by that function.
447 */
448
449 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
450 {
451 struct ip_tunnel *tunnel = netdev_priv(dev);
452 const struct iphdr *tiph = &tunnel->parms.iph;
453 u8 tos = tunnel->parms.iph.tos;
454 __be16 df = tiph->frag_off;
455 struct rtable *rt; /* Route to the other host */
456 struct net_device *tdev; /* Device to other host */
457 const struct iphdr *old_iph = ip_hdr(skb);
458 struct iphdr *iph; /* Our new IP header */
459 unsigned int max_headroom; /* The extra header space needed */
460 __be32 dst = tiph->daddr;
461 struct flowi4 fl4;
462 int mtu;
463
464 if (skb->protocol != htons(ETH_P_IP))
465 goto tx_error;
466
467 if (skb->ip_summed == CHECKSUM_PARTIAL &&
468 skb_checksum_help(skb))
469 goto tx_error;
470
471 if (tos & 1)
472 tos = old_iph->tos;
473
474 if (!dst) {
475 /* NBMA tunnel */
476 if ((rt = skb_rtable(skb)) == NULL) {
477 dev->stats.tx_fifo_errors++;
478 goto tx_error;
479 }
480 dst = rt_nexthop(rt, old_iph->daddr);
481 }
482
483 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
484 dst, tiph->saddr,
485 0, 0,
486 IPPROTO_IPIP, RT_TOS(tos),
487 tunnel->parms.link);
488 if (IS_ERR(rt)) {
489 dev->stats.tx_carrier_errors++;
490 goto tx_error_icmp;
491 }
492 tdev = rt->dst.dev;
493
494 if (tdev == dev) {
495 ip_rt_put(rt);
496 dev->stats.collisions++;
497 goto tx_error;
498 }
499
500 df |= old_iph->frag_off & htons(IP_DF);
501
502 if (df) {
503 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
504
505 if (mtu < 68) {
506 dev->stats.collisions++;
507 ip_rt_put(rt);
508 goto tx_error;
509 }
510
511 if (skb_dst(skb))
512 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
513
514 if ((old_iph->frag_off & htons(IP_DF)) &&
515 mtu < ntohs(old_iph->tot_len)) {
516 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
517 htonl(mtu));
518 ip_rt_put(rt);
519 goto tx_error;
520 }
521 }
522
523 if (tunnel->err_count > 0) {
524 if (time_before(jiffies,
525 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
526 tunnel->err_count--;
527 dst_link_failure(skb);
528 } else
529 tunnel->err_count = 0;
530 }
531
532 /*
533 * Okay, now see if we can stuff it in the buffer as-is.
534 */
535 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
536
537 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
538 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
539 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
540 if (!new_skb) {
541 ip_rt_put(rt);
542 dev->stats.tx_dropped++;
543 dev_kfree_skb(skb);
544 return NETDEV_TX_OK;
545 }
546 if (skb->sk)
547 skb_set_owner_w(new_skb, skb->sk);
548 dev_kfree_skb(skb);
549 skb = new_skb;
550 old_iph = ip_hdr(skb);
551 }
552
553 skb->transport_header = skb->network_header;
554 skb_push(skb, sizeof(struct iphdr));
555 skb_reset_network_header(skb);
556 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
557 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
558 IPSKB_REROUTED);
559 skb_dst_drop(skb);
560 skb_dst_set(skb, &rt->dst);
561
562 /*
563 * Push down and install the IPIP header.
564 */
565
566 iph = ip_hdr(skb);
567 iph->version = 4;
568 iph->ihl = sizeof(struct iphdr)>>2;
569 iph->frag_off = df;
570 iph->protocol = IPPROTO_IPIP;
571 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
572 iph->daddr = fl4.daddr;
573 iph->saddr = fl4.saddr;
574
575 if ((iph->ttl = tiph->ttl) == 0)
576 iph->ttl = old_iph->ttl;
577
578 iptunnel_xmit(skb, dev);
579 return NETDEV_TX_OK;
580
581 tx_error_icmp:
582 dst_link_failure(skb);
583 tx_error:
584 dev->stats.tx_errors++;
585 dev_kfree_skb(skb);
586 return NETDEV_TX_OK;
587 }
588
589 static void ipip_tunnel_bind_dev(struct net_device *dev)
590 {
591 struct net_device *tdev = NULL;
592 struct ip_tunnel *tunnel;
593 const struct iphdr *iph;
594
595 tunnel = netdev_priv(dev);
596 iph = &tunnel->parms.iph;
597
598 if (iph->daddr) {
599 struct rtable *rt;
600 struct flowi4 fl4;
601
602 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
603 iph->daddr, iph->saddr,
604 0, 0,
605 IPPROTO_IPIP,
606 RT_TOS(iph->tos),
607 tunnel->parms.link);
608 if (!IS_ERR(rt)) {
609 tdev = rt->dst.dev;
610 ip_rt_put(rt);
611 }
612 dev->flags |= IFF_POINTOPOINT;
613 }
614
615 if (!tdev && tunnel->parms.link)
616 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
617
618 if (tdev) {
619 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
620 dev->mtu = tdev->mtu - sizeof(struct iphdr);
621 }
622 dev->iflink = tunnel->parms.link;
623 }
624
625 static int
626 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
627 {
628 int err = 0;
629 struct ip_tunnel_parm p;
630 struct ip_tunnel *t;
631 struct net *net = dev_net(dev);
632 struct ipip_net *ipn = net_generic(net, ipip_net_id);
633
634 switch (cmd) {
635 case SIOCGETTUNNEL:
636 t = NULL;
637 if (dev == ipn->fb_tunnel_dev) {
638 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
639 err = -EFAULT;
640 break;
641 }
642 t = ipip_tunnel_locate(net, &p, 0);
643 }
644 if (t == NULL)
645 t = netdev_priv(dev);
646 memcpy(&p, &t->parms, sizeof(p));
647 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
648 err = -EFAULT;
649 break;
650
651 case SIOCADDTUNNEL:
652 case SIOCCHGTUNNEL:
653 err = -EPERM;
654 if (!capable(CAP_NET_ADMIN))
655 goto done;
656
657 err = -EFAULT;
658 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
659 goto done;
660
661 err = -EINVAL;
662 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
663 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
664 goto done;
665 if (p.iph.ttl)
666 p.iph.frag_off |= htons(IP_DF);
667
668 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
669
670 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
671 if (t != NULL) {
672 if (t->dev != dev) {
673 err = -EEXIST;
674 break;
675 }
676 } else {
677 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
678 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
679 err = -EINVAL;
680 break;
681 }
682 t = netdev_priv(dev);
683 ipip_tunnel_unlink(ipn, t);
684 synchronize_net();
685 t->parms.iph.saddr = p.iph.saddr;
686 t->parms.iph.daddr = p.iph.daddr;
687 memcpy(dev->dev_addr, &p.iph.saddr, 4);
688 memcpy(dev->broadcast, &p.iph.daddr, 4);
689 ipip_tunnel_link(ipn, t);
690 netdev_state_change(dev);
691 }
692 }
693
694 if (t) {
695 err = 0;
696 if (cmd == SIOCCHGTUNNEL) {
697 t->parms.iph.ttl = p.iph.ttl;
698 t->parms.iph.tos = p.iph.tos;
699 t->parms.iph.frag_off = p.iph.frag_off;
700 if (t->parms.link != p.link) {
701 t->parms.link = p.link;
702 ipip_tunnel_bind_dev(dev);
703 netdev_state_change(dev);
704 }
705 }
706 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
707 err = -EFAULT;
708 } else
709 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
710 break;
711
712 case SIOCDELTUNNEL:
713 err = -EPERM;
714 if (!capable(CAP_NET_ADMIN))
715 goto done;
716
717 if (dev == ipn->fb_tunnel_dev) {
718 err = -EFAULT;
719 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
720 goto done;
721 err = -ENOENT;
722 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
723 goto done;
724 err = -EPERM;
725 if (t->dev == ipn->fb_tunnel_dev)
726 goto done;
727 dev = t->dev;
728 }
729 unregister_netdevice(dev);
730 err = 0;
731 break;
732
733 default:
734 err = -EINVAL;
735 }
736
737 done:
738 return err;
739 }
740
741 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
742 {
743 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
744 return -EINVAL;
745 dev->mtu = new_mtu;
746 return 0;
747 }
748
749 static const struct net_device_ops ipip_netdev_ops = {
750 .ndo_uninit = ipip_tunnel_uninit,
751 .ndo_start_xmit = ipip_tunnel_xmit,
752 .ndo_do_ioctl = ipip_tunnel_ioctl,
753 .ndo_change_mtu = ipip_tunnel_change_mtu,
754 .ndo_get_stats64 = ipip_get_stats64,
755 };
756
757 static void ipip_dev_free(struct net_device *dev)
758 {
759 free_percpu(dev->tstats);
760 free_netdev(dev);
761 }
762
763 #define IPIP_FEATURES (NETIF_F_SG | \
764 NETIF_F_FRAGLIST | \
765 NETIF_F_HIGHDMA | \
766 NETIF_F_HW_CSUM)
767
768 static void ipip_tunnel_setup(struct net_device *dev)
769 {
770 dev->netdev_ops = &ipip_netdev_ops;
771 dev->destructor = ipip_dev_free;
772
773 dev->type = ARPHRD_TUNNEL;
774 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
775 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
776 dev->flags = IFF_NOARP;
777 dev->iflink = 0;
778 dev->addr_len = 4;
779 dev->features |= NETIF_F_NETNS_LOCAL;
780 dev->features |= NETIF_F_LLTX;
781 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
782
783 dev->features |= IPIP_FEATURES;
784 dev->hw_features |= IPIP_FEATURES;
785 }
786
787 static int ipip_tunnel_init(struct net_device *dev)
788 {
789 struct ip_tunnel *tunnel = netdev_priv(dev);
790
791 tunnel->dev = dev;
792
793 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
794 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
795
796 ipip_tunnel_bind_dev(dev);
797
798 dev->tstats = alloc_percpu(struct pcpu_tstats);
799 if (!dev->tstats)
800 return -ENOMEM;
801
802 return 0;
803 }
804
805 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
806 {
807 struct ip_tunnel *tunnel = netdev_priv(dev);
808 struct iphdr *iph = &tunnel->parms.iph;
809 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
810
811 tunnel->dev = dev;
812 strcpy(tunnel->parms.name, dev->name);
813
814 iph->version = 4;
815 iph->protocol = IPPROTO_IPIP;
816 iph->ihl = 5;
817
818 dev->tstats = alloc_percpu(struct pcpu_tstats);
819 if (!dev->tstats)
820 return -ENOMEM;
821
822 dev_hold(dev);
823 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
824 return 0;
825 }
826
827 static size_t ipip_get_size(const struct net_device *dev)
828 {
829 return
830 /* IFLA_IPTUN_LINK */
831 nla_total_size(4) +
832 /* IFLA_IPTUN_LOCAL */
833 nla_total_size(4) +
834 /* IFLA_IPTUN_REMOTE */
835 nla_total_size(4) +
836 /* IFLA_IPTUN_TTL */
837 nla_total_size(1) +
838 /* IFLA_IPTUN_TOS */
839 nla_total_size(1) +
840 0;
841 }
842
843 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
844 {
845 struct ip_tunnel *tunnel = netdev_priv(dev);
846 struct ip_tunnel_parm *parm = &tunnel->parms;
847
848 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
849 nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
850 nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
851 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
852 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
853 goto nla_put_failure;
854 return 0;
855
856 nla_put_failure:
857 return -EMSGSIZE;
858 }
859
860 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
861 .kind = "ipip",
862 .maxtype = IFLA_IPTUN_MAX,
863 .priv_size = sizeof(struct ip_tunnel),
864 .get_size = ipip_get_size,
865 .fill_info = ipip_fill_info,
866 };
867
868 static struct xfrm_tunnel ipip_handler __read_mostly = {
869 .handler = ipip_rcv,
870 .err_handler = ipip_err,
871 .priority = 1,
872 };
873
874 static const char banner[] __initconst =
875 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
876
877 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
878 {
879 int prio;
880
881 for (prio = 1; prio < 4; prio++) {
882 int h;
883 for (h = 0; h < HASH_SIZE; h++) {
884 struct ip_tunnel *t;
885
886 t = rtnl_dereference(ipn->tunnels[prio][h]);
887 while (t != NULL) {
888 unregister_netdevice_queue(t->dev, head);
889 t = rtnl_dereference(t->next);
890 }
891 }
892 }
893 }
894
895 static int __net_init ipip_init_net(struct net *net)
896 {
897 struct ipip_net *ipn = net_generic(net, ipip_net_id);
898 struct ip_tunnel *t;
899 int err;
900
901 ipn->tunnels[0] = ipn->tunnels_wc;
902 ipn->tunnels[1] = ipn->tunnels_l;
903 ipn->tunnels[2] = ipn->tunnels_r;
904 ipn->tunnels[3] = ipn->tunnels_r_l;
905
906 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
907 "tunl0",
908 ipip_tunnel_setup);
909 if (!ipn->fb_tunnel_dev) {
910 err = -ENOMEM;
911 goto err_alloc_dev;
912 }
913 dev_net_set(ipn->fb_tunnel_dev, net);
914
915 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
916 if (err)
917 goto err_reg_dev;
918
919 if ((err = register_netdev(ipn->fb_tunnel_dev)))
920 goto err_reg_dev;
921
922 t = netdev_priv(ipn->fb_tunnel_dev);
923
924 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
925 return 0;
926
927 err_reg_dev:
928 ipip_dev_free(ipn->fb_tunnel_dev);
929 err_alloc_dev:
930 /* nothing */
931 return err;
932 }
933
934 static void __net_exit ipip_exit_net(struct net *net)
935 {
936 struct ipip_net *ipn = net_generic(net, ipip_net_id);
937 LIST_HEAD(list);
938
939 rtnl_lock();
940 ipip_destroy_tunnels(ipn, &list);
941 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
942 unregister_netdevice_many(&list);
943 rtnl_unlock();
944 }
945
946 static struct pernet_operations ipip_net_ops = {
947 .init = ipip_init_net,
948 .exit = ipip_exit_net,
949 .id = &ipip_net_id,
950 .size = sizeof(struct ipip_net),
951 };
952
953 static int __init ipip_init(void)
954 {
955 int err;
956
957 printk(banner);
958
959 err = register_pernet_device(&ipip_net_ops);
960 if (err < 0)
961 return err;
962 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
963 if (err < 0) {
964 pr_info("%s: can't register tunnel\n", __func__);
965 goto xfrm_tunnel_failed;
966 }
967 err = rtnl_link_register(&ipip_link_ops);
968 if (err < 0)
969 goto rtnl_link_failed;
970
971 out:
972 return err;
973
974 rtnl_link_failed:
975 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
976 xfrm_tunnel_failed:
977 unregister_pernet_device(&ipip_net_ops);
978 goto out;
979 }
980
981 static void __exit ipip_fini(void)
982 {
983 rtnl_link_unregister(&ipip_link_ops);
984 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
985 pr_info("%s: can't deregister tunnel\n", __func__);
986
987 unregister_pernet_device(&ipip_net_ops);
988 }
989
990 module_init(ipip_init);
991 module_exit(ipip_fini);
992 MODULE_LICENSE("GPL");
993 MODULE_ALIAS_NETDEV("tunl0");