[IPIP]: Make the fallback tunnel device per-net.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / ipip.c
1 /*
2 * Linux NET3: IP/IP protocol decoder.
3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 *
9 * Fixes:
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 *
27 */
28
29 /* tunnel.c: an IP tunnel driver
30
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
33
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
36
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
38
39 Minor tweaks:
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
44 Added tx_dropped stat
45
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
47
48 Reworked:
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
53
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
63
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
66
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
78 find out how much more space you can allocate by calling
79 "skb_tailroom(skb)".
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
87 */
88
89 /*
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92 For comments look at net/ipv4/ip_gre.c --ANK
93 */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120
121 #define HASH_SIZE 16
122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
123
124 static int ipip_net_id;
125 struct ipip_net {
126 struct net_device *fb_tunnel_dev;
127 };
128
129 static int ipip_fb_tunnel_init(struct net_device *dev);
130 static int ipip_tunnel_init(struct net_device *dev);
131 static void ipip_tunnel_setup(struct net_device *dev);
132
133 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
134 static struct ip_tunnel *tunnels_r[HASH_SIZE];
135 static struct ip_tunnel *tunnels_l[HASH_SIZE];
136 static struct ip_tunnel *tunnels_wc[1];
137 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
138
139 static DEFINE_RWLOCK(ipip_lock);
140
141 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
142 {
143 unsigned h0 = HASH(remote);
144 unsigned h1 = HASH(local);
145 struct ip_tunnel *t;
146
147 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
148 if (local == t->parms.iph.saddr &&
149 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 return t;
151 }
152 for (t = tunnels_r[h0]; t; t = t->next) {
153 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154 return t;
155 }
156 for (t = tunnels_l[h1]; t; t = t->next) {
157 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158 return t;
159 }
160 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
161 return t;
162 return NULL;
163 }
164
165 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
166 {
167 __be32 remote = parms->iph.daddr;
168 __be32 local = parms->iph.saddr;
169 unsigned h = 0;
170 int prio = 0;
171
172 if (remote) {
173 prio |= 2;
174 h ^= HASH(remote);
175 }
176 if (local) {
177 prio |= 1;
178 h ^= HASH(local);
179 }
180 return &tunnels[prio][h];
181 }
182
183 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
184 {
185 return __ipip_bucket(&t->parms);
186 }
187
188 static void ipip_tunnel_unlink(struct ip_tunnel *t)
189 {
190 struct ip_tunnel **tp;
191
192 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
193 if (t == *tp) {
194 write_lock_bh(&ipip_lock);
195 *tp = t->next;
196 write_unlock_bh(&ipip_lock);
197 break;
198 }
199 }
200 }
201
202 static void ipip_tunnel_link(struct ip_tunnel *t)
203 {
204 struct ip_tunnel **tp = ipip_bucket(t);
205
206 t->next = *tp;
207 write_lock_bh(&ipip_lock);
208 *tp = t;
209 write_unlock_bh(&ipip_lock);
210 }
211
212 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
213 {
214 __be32 remote = parms->iph.daddr;
215 __be32 local = parms->iph.saddr;
216 struct ip_tunnel *t, **tp, *nt;
217 struct net_device *dev;
218 char name[IFNAMSIZ];
219
220 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
221 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
222 return t;
223 }
224 if (!create)
225 return NULL;
226
227 if (parms->name[0])
228 strlcpy(name, parms->name, IFNAMSIZ);
229 else
230 sprintf(name, "tunl%%d");
231
232 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
233 if (dev == NULL)
234 return NULL;
235
236 if (strchr(name, '%')) {
237 if (dev_alloc_name(dev, name) < 0)
238 goto failed_free;
239 }
240
241 nt = netdev_priv(dev);
242 dev->init = ipip_tunnel_init;
243 nt->parms = *parms;
244
245 if (register_netdevice(dev) < 0)
246 goto failed_free;
247
248 dev_hold(dev);
249 ipip_tunnel_link(nt);
250 return nt;
251
252 failed_free:
253 free_netdev(dev);
254 return NULL;
255 }
256
257 static void ipip_tunnel_uninit(struct net_device *dev)
258 {
259 struct net *net = dev_net(dev);
260 struct ipip_net *ipn = net_generic(net, ipip_net_id);
261
262 if (dev == ipn->fb_tunnel_dev) {
263 write_lock_bh(&ipip_lock);
264 tunnels_wc[0] = NULL;
265 write_unlock_bh(&ipip_lock);
266 } else
267 ipip_tunnel_unlink(netdev_priv(dev));
268 dev_put(dev);
269 }
270
271 static int ipip_err(struct sk_buff *skb, u32 info)
272 {
273 #ifndef I_WISH_WORLD_WERE_PERFECT
274
275 /* It is not :-( All the routers (except for Linux) return only
276 8 bytes of packet payload. It means, that precise relaying of
277 ICMP in the real Internet is absolutely infeasible.
278 */
279 struct iphdr *iph = (struct iphdr*)skb->data;
280 const int type = icmp_hdr(skb)->type;
281 const int code = icmp_hdr(skb)->code;
282 struct ip_tunnel *t;
283 int err;
284
285 switch (type) {
286 default:
287 case ICMP_PARAMETERPROB:
288 return 0;
289
290 case ICMP_DEST_UNREACH:
291 switch (code) {
292 case ICMP_SR_FAILED:
293 case ICMP_PORT_UNREACH:
294 /* Impossible event. */
295 return 0;
296 case ICMP_FRAG_NEEDED:
297 /* Soft state for pmtu is maintained by IP core. */
298 return 0;
299 default:
300 /* All others are translated to HOST_UNREACH.
301 rfc2003 contains "deep thoughts" about NET_UNREACH,
302 I believe they are just ether pollution. --ANK
303 */
304 break;
305 }
306 break;
307 case ICMP_TIME_EXCEEDED:
308 if (code != ICMP_EXC_TTL)
309 return 0;
310 break;
311 }
312
313 err = -ENOENT;
314
315 read_lock(&ipip_lock);
316 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
317 if (t == NULL || t->parms.iph.daddr == 0)
318 goto out;
319
320 err = 0;
321 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322 goto out;
323
324 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325 t->err_count++;
326 else
327 t->err_count = 1;
328 t->err_time = jiffies;
329 out:
330 read_unlock(&ipip_lock);
331 return err;
332 #else
333 struct iphdr *iph = (struct iphdr*)dp;
334 int hlen = iph->ihl<<2;
335 struct iphdr *eiph;
336 const int type = icmp_hdr(skb)->type;
337 const int code = icmp_hdr(skb)->code;
338 int rel_type = 0;
339 int rel_code = 0;
340 __be32 rel_info = 0;
341 __u32 n = 0;
342 struct sk_buff *skb2;
343 struct flowi fl;
344 struct rtable *rt;
345
346 if (len < hlen + sizeof(struct iphdr))
347 return 0;
348 eiph = (struct iphdr*)(dp + hlen);
349
350 switch (type) {
351 default:
352 return 0;
353 case ICMP_PARAMETERPROB:
354 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
355 if (n < hlen)
356 return 0;
357
358 /* So... This guy found something strange INSIDE encapsulated
359 packet. Well, he is fool, but what can we do ?
360 */
361 rel_type = ICMP_PARAMETERPROB;
362 rel_info = htonl((n - hlen) << 24);
363 break;
364
365 case ICMP_DEST_UNREACH:
366 switch (code) {
367 case ICMP_SR_FAILED:
368 case ICMP_PORT_UNREACH:
369 /* Impossible event. */
370 return 0;
371 case ICMP_FRAG_NEEDED:
372 /* And it is the only really necessary thing :-) */
373 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
374 if (n < hlen+68)
375 return 0;
376 n -= hlen;
377 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
378 if (n > ntohs(eiph->tot_len))
379 return 0;
380 rel_info = htonl(n);
381 break;
382 default:
383 /* All others are translated to HOST_UNREACH.
384 rfc2003 contains "deep thoughts" about NET_UNREACH,
385 I believe, it is just ether pollution. --ANK
386 */
387 rel_type = ICMP_DEST_UNREACH;
388 rel_code = ICMP_HOST_UNREACH;
389 break;
390 }
391 break;
392 case ICMP_TIME_EXCEEDED:
393 if (code != ICMP_EXC_TTL)
394 return 0;
395 break;
396 }
397
398 /* Prepare fake skb to feed it to icmp_send */
399 skb2 = skb_clone(skb, GFP_ATOMIC);
400 if (skb2 == NULL)
401 return 0;
402 dst_release(skb2->dst);
403 skb2->dst = NULL;
404 skb_pull(skb2, skb->data - (u8*)eiph);
405 skb_reset_network_header(skb2);
406
407 /* Try to guess incoming interface */
408 memset(&fl, 0, sizeof(fl));
409 fl.fl4_daddr = eiph->saddr;
410 fl.fl4_tos = RT_TOS(eiph->tos);
411 fl.proto = IPPROTO_IPIP;
412 if (ip_route_output_key(&init_net, &rt, &key)) {
413 kfree_skb(skb2);
414 return 0;
415 }
416 skb2->dev = rt->u.dst.dev;
417
418 /* route "incoming" packet */
419 if (rt->rt_flags&RTCF_LOCAL) {
420 ip_rt_put(rt);
421 rt = NULL;
422 fl.fl4_daddr = eiph->daddr;
423 fl.fl4_src = eiph->saddr;
424 fl.fl4_tos = eiph->tos;
425 if (ip_route_output_key(&init_net, &rt, &fl) ||
426 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
427 ip_rt_put(rt);
428 kfree_skb(skb2);
429 return 0;
430 }
431 } else {
432 ip_rt_put(rt);
433 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
434 skb2->dst->dev->type != ARPHRD_TUNNEL) {
435 kfree_skb(skb2);
436 return 0;
437 }
438 }
439
440 /* change mtu on this route */
441 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
442 if (n > dst_mtu(skb2->dst)) {
443 kfree_skb(skb2);
444 return 0;
445 }
446 skb2->dst->ops->update_pmtu(skb2->dst, n);
447 } else if (type == ICMP_TIME_EXCEEDED) {
448 struct ip_tunnel *t = netdev_priv(skb2->dev);
449 if (t->parms.iph.ttl) {
450 rel_type = ICMP_DEST_UNREACH;
451 rel_code = ICMP_HOST_UNREACH;
452 }
453 }
454
455 icmp_send(skb2, rel_type, rel_code, rel_info);
456 kfree_skb(skb2);
457 return 0;
458 #endif
459 }
460
461 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
462 struct sk_buff *skb)
463 {
464 struct iphdr *inner_iph = ip_hdr(skb);
465
466 if (INET_ECN_is_ce(outer_iph->tos))
467 IP_ECN_set_ce(inner_iph);
468 }
469
470 static int ipip_rcv(struct sk_buff *skb)
471 {
472 struct ip_tunnel *tunnel;
473 const struct iphdr *iph = ip_hdr(skb);
474
475 read_lock(&ipip_lock);
476 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
477 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
478 read_unlock(&ipip_lock);
479 kfree_skb(skb);
480 return 0;
481 }
482
483 secpath_reset(skb);
484
485 skb->mac_header = skb->network_header;
486 skb_reset_network_header(skb);
487 skb->protocol = htons(ETH_P_IP);
488 skb->pkt_type = PACKET_HOST;
489
490 tunnel->stat.rx_packets++;
491 tunnel->stat.rx_bytes += skb->len;
492 skb->dev = tunnel->dev;
493 dst_release(skb->dst);
494 skb->dst = NULL;
495 nf_reset(skb);
496 ipip_ecn_decapsulate(iph, skb);
497 netif_rx(skb);
498 read_unlock(&ipip_lock);
499 return 0;
500 }
501 read_unlock(&ipip_lock);
502
503 return -1;
504 }
505
506 /*
507 * This function assumes it is being called from dev_queue_xmit()
508 * and that skb is filled properly by that function.
509 */
510
511 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
512 {
513 struct ip_tunnel *tunnel = netdev_priv(dev);
514 struct net_device_stats *stats = &tunnel->stat;
515 struct iphdr *tiph = &tunnel->parms.iph;
516 u8 tos = tunnel->parms.iph.tos;
517 __be16 df = tiph->frag_off;
518 struct rtable *rt; /* Route to the other host */
519 struct net_device *tdev; /* Device to other host */
520 struct iphdr *old_iph = ip_hdr(skb);
521 struct iphdr *iph; /* Our new IP header */
522 unsigned int max_headroom; /* The extra header space needed */
523 __be32 dst = tiph->daddr;
524 int mtu;
525
526 if (tunnel->recursion++) {
527 tunnel->stat.collisions++;
528 goto tx_error;
529 }
530
531 if (skb->protocol != htons(ETH_P_IP))
532 goto tx_error;
533
534 if (tos&1)
535 tos = old_iph->tos;
536
537 if (!dst) {
538 /* NBMA tunnel */
539 if ((rt = skb->rtable) == NULL) {
540 tunnel->stat.tx_fifo_errors++;
541 goto tx_error;
542 }
543 if ((dst = rt->rt_gateway) == 0)
544 goto tx_error_icmp;
545 }
546
547 {
548 struct flowi fl = { .oif = tunnel->parms.link,
549 .nl_u = { .ip4_u =
550 { .daddr = dst,
551 .saddr = tiph->saddr,
552 .tos = RT_TOS(tos) } },
553 .proto = IPPROTO_IPIP };
554 if (ip_route_output_key(&init_net, &rt, &fl)) {
555 tunnel->stat.tx_carrier_errors++;
556 goto tx_error_icmp;
557 }
558 }
559 tdev = rt->u.dst.dev;
560
561 if (tdev == dev) {
562 ip_rt_put(rt);
563 tunnel->stat.collisions++;
564 goto tx_error;
565 }
566
567 if (tiph->frag_off)
568 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
569 else
570 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
571
572 if (mtu < 68) {
573 tunnel->stat.collisions++;
574 ip_rt_put(rt);
575 goto tx_error;
576 }
577 if (skb->dst)
578 skb->dst->ops->update_pmtu(skb->dst, mtu);
579
580 df |= (old_iph->frag_off&htons(IP_DF));
581
582 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
583 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
584 ip_rt_put(rt);
585 goto tx_error;
586 }
587
588 if (tunnel->err_count > 0) {
589 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
590 tunnel->err_count--;
591 dst_link_failure(skb);
592 } else
593 tunnel->err_count = 0;
594 }
595
596 /*
597 * Okay, now see if we can stuff it in the buffer as-is.
598 */
599 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
600
601 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
602 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
603 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
604 if (!new_skb) {
605 ip_rt_put(rt);
606 stats->tx_dropped++;
607 dev_kfree_skb(skb);
608 tunnel->recursion--;
609 return 0;
610 }
611 if (skb->sk)
612 skb_set_owner_w(new_skb, skb->sk);
613 dev_kfree_skb(skb);
614 skb = new_skb;
615 old_iph = ip_hdr(skb);
616 }
617
618 skb->transport_header = skb->network_header;
619 skb_push(skb, sizeof(struct iphdr));
620 skb_reset_network_header(skb);
621 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
622 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
623 IPSKB_REROUTED);
624 dst_release(skb->dst);
625 skb->dst = &rt->u.dst;
626
627 /*
628 * Push down and install the IPIP header.
629 */
630
631 iph = ip_hdr(skb);
632 iph->version = 4;
633 iph->ihl = sizeof(struct iphdr)>>2;
634 iph->frag_off = df;
635 iph->protocol = IPPROTO_IPIP;
636 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
637 iph->daddr = rt->rt_dst;
638 iph->saddr = rt->rt_src;
639
640 if ((iph->ttl = tiph->ttl) == 0)
641 iph->ttl = old_iph->ttl;
642
643 nf_reset(skb);
644
645 IPTUNNEL_XMIT();
646 tunnel->recursion--;
647 return 0;
648
649 tx_error_icmp:
650 dst_link_failure(skb);
651 tx_error:
652 stats->tx_errors++;
653 dev_kfree_skb(skb);
654 tunnel->recursion--;
655 return 0;
656 }
657
658 static void ipip_tunnel_bind_dev(struct net_device *dev)
659 {
660 struct net_device *tdev = NULL;
661 struct ip_tunnel *tunnel;
662 struct iphdr *iph;
663
664 tunnel = netdev_priv(dev);
665 iph = &tunnel->parms.iph;
666
667 if (iph->daddr) {
668 struct flowi fl = { .oif = tunnel->parms.link,
669 .nl_u = { .ip4_u =
670 { .daddr = iph->daddr,
671 .saddr = iph->saddr,
672 .tos = RT_TOS(iph->tos) } },
673 .proto = IPPROTO_IPIP };
674 struct rtable *rt;
675 if (!ip_route_output_key(&init_net, &rt, &fl)) {
676 tdev = rt->u.dst.dev;
677 ip_rt_put(rt);
678 }
679 dev->flags |= IFF_POINTOPOINT;
680 }
681
682 if (!tdev && tunnel->parms.link)
683 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
684
685 if (tdev) {
686 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
687 dev->mtu = tdev->mtu - sizeof(struct iphdr);
688 }
689 dev->iflink = tunnel->parms.link;
690 }
691
692 static int
693 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
694 {
695 int err = 0;
696 struct ip_tunnel_parm p;
697 struct ip_tunnel *t;
698 struct net *net = dev_net(dev);
699 struct ipip_net *ipn = net_generic(net, ipip_net_id);
700
701 switch (cmd) {
702 case SIOCGETTUNNEL:
703 t = NULL;
704 if (dev == ipn->fb_tunnel_dev) {
705 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
706 err = -EFAULT;
707 break;
708 }
709 t = ipip_tunnel_locate(&p, 0);
710 }
711 if (t == NULL)
712 t = netdev_priv(dev);
713 memcpy(&p, &t->parms, sizeof(p));
714 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
715 err = -EFAULT;
716 break;
717
718 case SIOCADDTUNNEL:
719 case SIOCCHGTUNNEL:
720 err = -EPERM;
721 if (!capable(CAP_NET_ADMIN))
722 goto done;
723
724 err = -EFAULT;
725 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
726 goto done;
727
728 err = -EINVAL;
729 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
730 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
731 goto done;
732 if (p.iph.ttl)
733 p.iph.frag_off |= htons(IP_DF);
734
735 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
736
737 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
738 if (t != NULL) {
739 if (t->dev != dev) {
740 err = -EEXIST;
741 break;
742 }
743 } else {
744 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
745 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
746 err = -EINVAL;
747 break;
748 }
749 t = netdev_priv(dev);
750 ipip_tunnel_unlink(t);
751 t->parms.iph.saddr = p.iph.saddr;
752 t->parms.iph.daddr = p.iph.daddr;
753 memcpy(dev->dev_addr, &p.iph.saddr, 4);
754 memcpy(dev->broadcast, &p.iph.daddr, 4);
755 ipip_tunnel_link(t);
756 netdev_state_change(dev);
757 }
758 }
759
760 if (t) {
761 err = 0;
762 if (cmd == SIOCCHGTUNNEL) {
763 t->parms.iph.ttl = p.iph.ttl;
764 t->parms.iph.tos = p.iph.tos;
765 t->parms.iph.frag_off = p.iph.frag_off;
766 if (t->parms.link != p.link) {
767 t->parms.link = p.link;
768 ipip_tunnel_bind_dev(dev);
769 netdev_state_change(dev);
770 }
771 }
772 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
773 err = -EFAULT;
774 } else
775 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
776 break;
777
778 case SIOCDELTUNNEL:
779 err = -EPERM;
780 if (!capable(CAP_NET_ADMIN))
781 goto done;
782
783 if (dev == ipn->fb_tunnel_dev) {
784 err = -EFAULT;
785 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
786 goto done;
787 err = -ENOENT;
788 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
789 goto done;
790 err = -EPERM;
791 if (t->dev == ipn->fb_tunnel_dev)
792 goto done;
793 dev = t->dev;
794 }
795 unregister_netdevice(dev);
796 err = 0;
797 break;
798
799 default:
800 err = -EINVAL;
801 }
802
803 done:
804 return err;
805 }
806
807 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
808 {
809 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
810 }
811
812 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
813 {
814 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
815 return -EINVAL;
816 dev->mtu = new_mtu;
817 return 0;
818 }
819
820 static void ipip_tunnel_setup(struct net_device *dev)
821 {
822 dev->uninit = ipip_tunnel_uninit;
823 dev->hard_start_xmit = ipip_tunnel_xmit;
824 dev->get_stats = ipip_tunnel_get_stats;
825 dev->do_ioctl = ipip_tunnel_ioctl;
826 dev->change_mtu = ipip_tunnel_change_mtu;
827 dev->destructor = free_netdev;
828
829 dev->type = ARPHRD_TUNNEL;
830 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
831 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
832 dev->flags = IFF_NOARP;
833 dev->iflink = 0;
834 dev->addr_len = 4;
835 }
836
837 static int ipip_tunnel_init(struct net_device *dev)
838 {
839 struct ip_tunnel *tunnel;
840
841 tunnel = netdev_priv(dev);
842
843 tunnel->dev = dev;
844 strcpy(tunnel->parms.name, dev->name);
845
846 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
847 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
848
849 ipip_tunnel_bind_dev(dev);
850
851 return 0;
852 }
853
854 static int ipip_fb_tunnel_init(struct net_device *dev)
855 {
856 struct ip_tunnel *tunnel = netdev_priv(dev);
857 struct iphdr *iph = &tunnel->parms.iph;
858
859 tunnel->dev = dev;
860 strcpy(tunnel->parms.name, dev->name);
861
862 iph->version = 4;
863 iph->protocol = IPPROTO_IPIP;
864 iph->ihl = 5;
865
866 dev_hold(dev);
867 tunnels_wc[0] = tunnel;
868 return 0;
869 }
870
871 static struct xfrm_tunnel ipip_handler = {
872 .handler = ipip_rcv,
873 .err_handler = ipip_err,
874 .priority = 1,
875 };
876
877 static char banner[] __initdata =
878 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
879
880 static int ipip_init_net(struct net *net)
881 {
882 int err;
883 struct ipip_net *ipn;
884
885 err = -ENOMEM;
886 ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
887 if (ipn == NULL)
888 goto err_alloc;
889
890 err = net_assign_generic(net, ipip_net_id, ipn);
891 if (err < 0)
892 goto err_assign;
893
894 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
895 "tunl0",
896 ipip_tunnel_setup);
897 if (!ipn->fb_tunnel_dev) {
898 err = -ENOMEM;
899 goto err_alloc_dev;
900 }
901
902 ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
903 dev_net_set(ipn->fb_tunnel_dev, net);
904
905 if ((err = register_netdev(ipn->fb_tunnel_dev)))
906 goto err_reg_dev;
907
908 return 0;
909
910 err_reg_dev:
911 free_netdev(ipn->fb_tunnel_dev);
912 err_alloc_dev:
913 /* nothing */
914 err_assign:
915 kfree(ipn);
916 err_alloc:
917 return err;
918 }
919
920 static void ipip_exit_net(struct net *net)
921 {
922 struct ipip_net *ipn;
923
924 ipn = net_generic(net, ipip_net_id);
925 rtnl_lock();
926 unregister_netdevice(ipn->fb_tunnel_dev);
927 rtnl_unlock();
928 kfree(ipn);
929 }
930
931 static struct pernet_operations ipip_net_ops = {
932 .init = ipip_init_net,
933 .exit = ipip_exit_net,
934 };
935
936 static int __init ipip_init(void)
937 {
938 int err;
939
940 printk(banner);
941
942 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
943 printk(KERN_INFO "ipip init: can't register tunnel\n");
944 return -EAGAIN;
945 }
946
947 err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
948 if (err)
949 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
950
951 return err;
952 }
953
954 static void __exit ipip_destroy_tunnels(void)
955 {
956 int prio;
957
958 for (prio = 1; prio < 4; prio++) {
959 int h;
960 for (h = 0; h < HASH_SIZE; h++) {
961 struct ip_tunnel *t;
962 while ((t = tunnels[prio][h]) != NULL)
963 unregister_netdevice(t->dev);
964 }
965 }
966 }
967
968 static void __exit ipip_fini(void)
969 {
970 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
971 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
972
973 rtnl_lock();
974 ipip_destroy_tunnels();
975 rtnl_unlock();
976
977 unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
978 }
979
980 module_init(ipip_init);
981 module_exit(ipip_fini);
982 MODULE_LICENSE("GPL");