net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/sched.h>
 100 #include <linux/kernel.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <linux/in.h>
 105 #include <linux/tcp.h>
 106 #include <linux/udp.h>
 107 #include <linux/if_arp.h>
 108 #include <linux/mroute.h>
 109 #include <linux/init.h>
 110 #include <linux/netfilter_ipv4.h>
 111 #include <linux/if_ether.h>
 112
 113 #include <net/sock.h>
 114 #include <net/ip.h>
 115 #include <net/icmp.h>
 116 #include <net/ipip.h>
 117 #include <net/inet_ecn.h>
 118 #include <net/xfrm.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) ((addr^(addr>>4))&0xF)
 122
 123 static int ipip_fb_tunnel_init(struct net_device *dev);
 124 static int ipip_tunnel_init(struct net_device *dev);
 125 static void ipip_tunnel_setup(struct net_device *dev);
 126
 127 static struct net_device *ipip_fb_tunnel_dev;
 128
 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_wc[1];
 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 134
 135 static DEFINE_RWLOCK(ipip_lock);
 136
 137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
 138 {
 139         unsigned h0 = HASH(remote);
 140         unsigned h1 = HASH(local);
 141         struct ip_tunnel *t;
 142
 143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 144                 if (local == t->parms.iph.saddr &&
 145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 146                         return t;
 147         }
 148         for (t = tunnels_r[h0]; t; t = t->next) {
 149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_l[h1]; t; t = t->next) {
 153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 157                 return t;
 158         return NULL;
 159 }
 160
 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 162 {
 163         u32 remote = t->parms.iph.daddr;
 164         u32 local = t->parms.iph.saddr;
 165         unsigned h = 0;
 166         int prio = 0;
 167
 168         if (remote) {
 169                 prio |= 2;
 170                 h ^= HASH(remote);
 171         }
 172         if (local) {
 173                 prio |= 1;
 174                 h ^= HASH(local);
 175         }
 176         return &tunnels[prio][h];
 177 }
 178
 179
 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 181 {
 182         struct ip_tunnel **tp;
 183
 184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 185                 if (t == *tp) {
 186                         write_lock_bh(&ipip_lock);
 187                         *tp = t->next;
 188                         write_unlock_bh(&ipip_lock);
 189                         break;
 190                 }
 191         }
 192 }
 193
 194 static void ipip_tunnel_link(struct ip_tunnel *t)
 195 {
 196         struct ip_tunnel **tp = ipip_bucket(t);
 197
 198         t->next = *tp;
 199         write_lock_bh(&ipip_lock);
 200         *tp = t;
 201         write_unlock_bh(&ipip_lock);
 202 }
 203
 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 205 {
 206         u32 remote = parms->iph.daddr;
 207         u32 local = parms->iph.saddr;
 208         struct ip_tunnel *t, **tp, *nt;
 209         struct net_device *dev;
 210         unsigned h = 0;
 211         int prio = 0;
 212         char name[IFNAMSIZ];
 213
 214         if (remote) {
 215                 prio |= 2;
 216                 h ^= HASH(remote);
 217         }
 218         if (local) {
 219                 prio |= 1;
 220                 h ^= HASH(local);
 221         }
 222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 224                         return t;
 225         }
 226         if (!create)
 227                 return NULL;
 228
 229         if (parms->name[0])
 230                 strlcpy(name, parms->name, IFNAMSIZ);
 231         else {
 232                 int i;
 233                 for (i=1; i<100; i++) {
 234                         sprintf(name, "tunl%d", i);
 235                         if (__dev_get_by_name(name) == NULL)
 236                                 break;
 237                 }
 238                 if (i==100)
 239                         goto failed;
 240         }
 241
 242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 243         if (dev == NULL)
 244                 return NULL;
 245
 246         nt = netdev_priv(dev);
 247         SET_MODULE_OWNER(dev);
 248         dev->init = ipip_tunnel_init;
 249         nt->parms = *parms;
 250
 251         if (register_netdevice(dev) < 0) {
 252                 free_netdev(dev);
 253                 goto failed;
 254         }
 255
 256         dev_hold(dev);
 257         ipip_tunnel_link(nt);
 258         return nt;
 259
 260 failed:
 261         return NULL;
 262 }
 263
 264 static void ipip_tunnel_uninit(struct net_device *dev)
 265 {
 266         if (dev == ipip_fb_tunnel_dev) {
 267                 write_lock_bh(&ipip_lock);
 268                 tunnels_wc[0] = NULL;
 269                 write_unlock_bh(&ipip_lock);
 270         } else
 271                 ipip_tunnel_unlink(netdev_priv(dev));
 272         dev_put(dev);
 273 }
 274
 275 static int ipip_err(struct sk_buff *skb, u32 info)
 276 {
 277 #ifndef I_WISH_WORLD_WERE_PERFECT
 278
 279 /* It is not :-( All the routers (except for Linux) return only
 280    8 bytes of packet payload. It means, that precise relaying of
 281    ICMP in the real Internet is absolutely infeasible.
 282  */
 283         struct iphdr *iph = (struct iphdr*)skb->data;
 284         int type = skb->h.icmph->type;
 285         int code = skb->h.icmph->code;
 286         struct ip_tunnel *t;
 287         int err;
 288
 289         switch (type) {
 290         default:
 291         case ICMP_PARAMETERPROB:
 292                 return 0;
 293
 294         case ICMP_DEST_UNREACH:
 295                 switch (code) {
 296                 case ICMP_SR_FAILED:
 297                 case ICMP_PORT_UNREACH:
 298                         /* Impossible event. */
 299                         return 0;
 300                 case ICMP_FRAG_NEEDED:
 301                         /* Soft state for pmtu is maintained by IP core. */
 302                         return 0;
 303                 default:
 304                         /* All others are translated to HOST_UNREACH.
 305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 306                            I believe they are just ether pollution. --ANK
 307                          */
 308                         break;
 309                 }
 310                 break;
 311         case ICMP_TIME_EXCEEDED:
 312                 if (code != ICMP_EXC_TTL)
 313                         return 0;
 314                 break;
 315         }
 316
 317         err = -ENOENT;
 318
 319         read_lock(&ipip_lock);
 320         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 321         if (t == NULL || t->parms.iph.daddr == 0)
 322                 goto out;
 323
 324         err = 0;
 325         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 326                 goto out;
 327
 328         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 329                 t->err_count++;
 330         else
 331                 t->err_count = 1;
 332         t->err_time = jiffies;
 333 out:
 334         read_unlock(&ipip_lock);
 335         return err;
 336 #else
 337         struct iphdr *iph = (struct iphdr*)dp;
 338         int hlen = iph->ihl<<2;
 339         struct iphdr *eiph;
 340         int type = skb->h.icmph->type;
 341         int code = skb->h.icmph->code;
 342         int rel_type = 0;
 343         int rel_code = 0;
 344         int rel_info = 0;
 345         struct sk_buff *skb2;
 346         struct flowi fl;
 347         struct rtable *rt;
 348
 349         if (len < hlen + sizeof(struct iphdr))
 350                 return 0;
 351         eiph = (struct iphdr*)(dp + hlen);
 352
 353         switch (type) {
 354         default:
 355                 return 0;
 356         case ICMP_PARAMETERPROB:
 357                 if (skb->h.icmph->un.gateway < hlen)
 358                         return 0;
 359
 360                 /* So... This guy found something strange INSIDE encapsulated
 361                    packet. Well, he is fool, but what can we do ?
 362                  */
 363                 rel_type = ICMP_PARAMETERPROB;
 364                 rel_info = skb->h.icmph->un.gateway - hlen;
 365                 break;
 366
 367         case ICMP_DEST_UNREACH:
 368                 switch (code) {
 369                 case ICMP_SR_FAILED:
 370                 case ICMP_PORT_UNREACH:
 371                         /* Impossible event. */
 372                         return 0;
 373                 case ICMP_FRAG_NEEDED:
 374                         /* And it is the only really necessary thing :-) */
 375                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 376                         if (rel_info < hlen+68)
 377                                 return 0;
 378                         rel_info -= hlen;
 379                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 380                         if (rel_info > ntohs(eiph->tot_len))
 381                                 return 0;
 382                         break;
 383                 default:
 384                         /* All others are translated to HOST_UNREACH.
 385                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 386                            I believe, it is just ether pollution. --ANK
 387                          */
 388                         rel_type = ICMP_DEST_UNREACH;
 389                         rel_code = ICMP_HOST_UNREACH;
 390                         break;
 391                 }
 392                 break;
 393         case ICMP_TIME_EXCEEDED:
 394                 if (code != ICMP_EXC_TTL)
 395                         return 0;
 396                 break;
 397         }
 398
 399         /* Prepare fake skb to feed it to icmp_send */
 400         skb2 = skb_clone(skb, GFP_ATOMIC);
 401         if (skb2 == NULL)
 402                 return 0;
 403         dst_release(skb2->dst);
 404         skb2->dst = NULL;
 405         skb_pull(skb2, skb->data - (u8*)eiph);
 406         skb2->nh.raw = skb2->data;
 407
 408         /* Try to guess incoming interface */
 409         memset(&fl, 0, sizeof(fl));
 410         fl.fl4_daddr = eiph->saddr;
 411         fl.fl4_tos = RT_TOS(eiph->tos);
 412         fl.proto = IPPROTO_IPIP;
 413         if (ip_route_output_key(&rt, &key)) {
 414                 kfree_skb(skb2);
 415                 return 0;
 416         }
 417         skb2->dev = rt->u.dst.dev;
 418
 419         /* route "incoming" packet */
 420         if (rt->rt_flags&RTCF_LOCAL) {
 421                 ip_rt_put(rt);
 422                 rt = NULL;
 423                 fl.fl4_daddr = eiph->daddr;
 424                 fl.fl4_src = eiph->saddr;
 425                 fl.fl4_tos = eiph->tos;
 426                 if (ip_route_output_key(&rt, &fl) ||
 427                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 428                         ip_rt_put(rt);
 429                         kfree_skb(skb2);
 430                         return 0;
 431                 }
 432         } else {
 433                 ip_rt_put(rt);
 434                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 435                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 436                         kfree_skb(skb2);
 437                         return 0;
 438                 }
 439         }
 440
 441         /* change mtu on this route */
 442         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 443                 if (rel_info > dst_mtu(skb2->dst)) {
 444                         kfree_skb(skb2);
 445                         return 0;
 446                 }
 447                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 448                 rel_info = htonl(rel_info);
 449         } else if (type == ICMP_TIME_EXCEEDED) {
 450                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 451                 if (t->parms.iph.ttl) {
 452                         rel_type = ICMP_DEST_UNREACH;
 453                         rel_code = ICMP_HOST_UNREACH;
 454                 }
 455         }
 456
 457         icmp_send(skb2, rel_type, rel_code, rel_info);
 458         kfree_skb(skb2);
 459         return 0;
 460 #endif
 461 }
 462
 463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 464 {
 465         struct iphdr *inner_iph = skb->nh.iph;
 466
 467         if (INET_ECN_is_ce(outer_iph->tos))
 468                 IP_ECN_set_ce(inner_iph);
 469 }
 470
 471 static int ipip_rcv(struct sk_buff *skb)
 472 {
 473         struct iphdr *iph;
 474         struct ip_tunnel *tunnel;
 475
 476         iph = skb->nh.iph;
 477
 478         read_lock(&ipip_lock);
 479         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 480                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 481                         read_unlock(&ipip_lock);
 482                         kfree_skb(skb);
 483                         return 0;
 484                 }
 485
 486                 secpath_reset(skb);
 487
 488                 skb->mac.raw = skb->nh.raw;
 489                 skb->nh.raw = skb->data;
 490                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 491                 skb->protocol = htons(ETH_P_IP);
 492                 skb->pkt_type = PACKET_HOST;
 493
 494                 tunnel->stat.rx_packets++;
 495                 tunnel->stat.rx_bytes += skb->len;
 496                 skb->dev = tunnel->dev;
 497                 dst_release(skb->dst);
 498                 skb->dst = NULL;
 499                 nf_reset(skb);
 500                 ipip_ecn_decapsulate(iph, skb);
 501                 netif_rx(skb);
 502                 read_unlock(&ipip_lock);
 503                 return 0;
 504         }
 505         read_unlock(&ipip_lock);
 506
 507         return -1;
 508 }
 509
 510 /*
 511  *      This function assumes it is being called from dev_queue_xmit()
 512  *      and that skb is filled properly by that function.
 513  */
 514
 515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 516 {
 517         struct ip_tunnel *tunnel = netdev_priv(dev);
 518         struct net_device_stats *stats = &tunnel->stat;
 519         struct iphdr  *tiph = &tunnel->parms.iph;
 520         u8     tos = tunnel->parms.iph.tos;
 521         u16    df = tiph->frag_off;
 522         struct rtable *rt;                      /* Route to the other host */
 523         struct net_device *tdev;                        /* Device to other host */
 524         struct iphdr  *old_iph = skb->nh.iph;
 525         struct iphdr  *iph;                     /* Our new IP header */
 526         int    max_headroom;                    /* The extra header space needed */
 527         u32    dst = tiph->daddr;
 528         int    mtu;
 529
 530         if (tunnel->recursion++) {
 531                 tunnel->stat.collisions++;
 532                 goto tx_error;
 533         }
 534
 535         if (skb->protocol != htons(ETH_P_IP))
 536                 goto tx_error;
 537
 538         if (tos&1)
 539                 tos = old_iph->tos;
 540
 541         if (!dst) {
 542                 /* NBMA tunnel */
 543                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 544                         tunnel->stat.tx_fifo_errors++;
 545                         goto tx_error;
 546                 }
 547                 if ((dst = rt->rt_gateway) == 0)
 548                         goto tx_error_icmp;
 549         }
 550
 551         {
 552                 struct flowi fl = { .oif = tunnel->parms.link,
 553                                     .nl_u = { .ip4_u =
 554                                               { .daddr = dst,
 555                                                 .saddr = tiph->saddr,
 556                                                 .tos = RT_TOS(tos) } },
 557                                     .proto = IPPROTO_IPIP };
 558                 if (ip_route_output_key(&rt, &fl)) {
 559                         tunnel->stat.tx_carrier_errors++;
 560                         goto tx_error_icmp;
 561                 }
 562         }
 563         tdev = rt->u.dst.dev;
 564
 565         if (tdev == dev) {
 566                 ip_rt_put(rt);
 567                 tunnel->stat.collisions++;
 568                 goto tx_error;
 569         }
 570
 571         if (tiph->frag_off)
 572                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 573         else
 574                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 575
 576         if (mtu < 68) {
 577                 tunnel->stat.collisions++;
 578                 ip_rt_put(rt);
 579                 goto tx_error;
 580         }
 581         if (skb->dst)
 582                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 583
 584         df |= (old_iph->frag_off&htons(IP_DF));
 585
 586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 588                 ip_rt_put(rt);
 589                 goto tx_error;
 590         }
 591
 592         if (tunnel->err_count > 0) {
 593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 594                         tunnel->err_count--;
 595                         dst_link_failure(skb);
 596                 } else
 597                         tunnel->err_count = 0;
 598         }
 599
 600         /*
 601          * Okay, now see if we can stuff it in the buffer as-is.
 602          */
 603         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 604
 605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 607                 if (!new_skb) {
 608                         ip_rt_put(rt);
 609                         stats->tx_dropped++;
 610                         dev_kfree_skb(skb);
 611                         tunnel->recursion--;
 612                         return 0;
 613                 }
 614                 if (skb->sk)
 615                         skb_set_owner_w(new_skb, skb->sk);
 616                 dev_kfree_skb(skb);
 617                 skb = new_skb;
 618                 old_iph = skb->nh.iph;
 619         }
 620
 621         skb->h.raw = skb->nh.raw;
 622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 624         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 625                               IPSKB_REROUTED);
 626         dst_release(skb->dst);
 627         skb->dst = &rt->u.dst;
 628
 629         /*
 630          *      Push down and install the IPIP header.
 631          */
 632
 633         iph                     =       skb->nh.iph;
 634         iph->version            =       4;
 635         iph->ihl                =       sizeof(struct iphdr)>>2;
 636         iph->frag_off           =       df;
 637         iph->protocol           =       IPPROTO_IPIP;
 638         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 639         iph->daddr              =       rt->rt_dst;
 640         iph->saddr              =       rt->rt_src;
 641
 642         if ((iph->ttl = tiph->ttl) == 0)
 643                 iph->ttl        =       old_iph->ttl;
 644
 645         nf_reset(skb);
 646
 647         IPTUNNEL_XMIT();
 648         tunnel->recursion--;
 649         return 0;
 650
 651 tx_error_icmp:
 652         dst_link_failure(skb);
 653 tx_error:
 654         stats->tx_errors++;
 655         dev_kfree_skb(skb);
 656         tunnel->recursion--;
 657         return 0;
 658 }
 659
 660 static int
 661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 662 {
 663         int err = 0;
 664         struct ip_tunnel_parm p;
 665         struct ip_tunnel *t;
 666
 667         switch (cmd) {
 668         case SIOCGETTUNNEL:
 669                 t = NULL;
 670                 if (dev == ipip_fb_tunnel_dev) {
 671                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 672                                 err = -EFAULT;
 673                                 break;
 674                         }
 675                         t = ipip_tunnel_locate(&p, 0);
 676                 }
 677                 if (t == NULL)
 678                         t = netdev_priv(dev);
 679                 memcpy(&p, &t->parms, sizeof(p));
 680                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 681                         err = -EFAULT;
 682                 break;
 683
 684         case SIOCADDTUNNEL:
 685         case SIOCCHGTUNNEL:
 686                 err = -EPERM;
 687                 if (!capable(CAP_NET_ADMIN))
 688                         goto done;
 689
 690                 err = -EFAULT;
 691                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 692                         goto done;
 693
 694                 err = -EINVAL;
 695                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 696                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 697                         goto done;
 698                 if (p.iph.ttl)
 699                         p.iph.frag_off |= htons(IP_DF);
 700
 701                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 702
 703                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 704                         if (t != NULL) {
 705                                 if (t->dev != dev) {
 706                                         err = -EEXIST;
 707                                         break;
 708                                 }
 709                         } else {
 710                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 711                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 712                                         err = -EINVAL;
 713                                         break;
 714                                 }
 715                                 t = netdev_priv(dev);
 716                                 ipip_tunnel_unlink(t);
 717                                 t->parms.iph.saddr = p.iph.saddr;
 718                                 t->parms.iph.daddr = p.iph.daddr;
 719                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 720                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 721                                 ipip_tunnel_link(t);
 722                                 netdev_state_change(dev);
 723                         }
 724                 }
 725
 726                 if (t) {
 727                         err = 0;
 728                         if (cmd == SIOCCHGTUNNEL) {
 729                                 t->parms.iph.ttl = p.iph.ttl;
 730                                 t->parms.iph.tos = p.iph.tos;
 731                                 t->parms.iph.frag_off = p.iph.frag_off;
 732                         }
 733                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 734                                 err = -EFAULT;
 735                 } else
 736                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 737                 break;
 738
 739         case SIOCDELTUNNEL:
 740                 err = -EPERM;
 741                 if (!capable(CAP_NET_ADMIN))
 742                         goto done;
 743
 744                 if (dev == ipip_fb_tunnel_dev) {
 745                         err = -EFAULT;
 746                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 747                                 goto done;
 748                         err = -ENOENT;
 749                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 750                                 goto done;
 751                         err = -EPERM;
 752                         if (t->dev == ipip_fb_tunnel_dev)
 753                                 goto done;
 754                         dev = t->dev;
 755                 }
 756                 err = unregister_netdevice(dev);
 757                 break;
 758
 759         default:
 760                 err = -EINVAL;
 761         }
 762
 763 done:
 764         return err;
 765 }
 766
 767 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 768 {
 769         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 770 }
 771
 772 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 773 {
 774         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 775                 return -EINVAL;
 776         dev->mtu = new_mtu;
 777         return 0;
 778 }
 779
 780 static void ipip_tunnel_setup(struct net_device *dev)
 781 {
 782         SET_MODULE_OWNER(dev);
 783         dev->uninit             = ipip_tunnel_uninit;
 784         dev->hard_start_xmit    = ipip_tunnel_xmit;
 785         dev->get_stats          = ipip_tunnel_get_stats;
 786         dev->do_ioctl           = ipip_tunnel_ioctl;
 787         dev->change_mtu         = ipip_tunnel_change_mtu;
 788         dev->destructor         = free_netdev;
 789
 790         dev->type               = ARPHRD_TUNNEL;
 791         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 792         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 793         dev->flags              = IFF_NOARP;
 794         dev->iflink             = 0;
 795         dev->addr_len           = 4;
 796 }
 797
 798 static int ipip_tunnel_init(struct net_device *dev)
 799 {
 800         struct net_device *tdev = NULL;
 801         struct ip_tunnel *tunnel;
 802         struct iphdr *iph;
 803
 804         tunnel = netdev_priv(dev);
 805         iph = &tunnel->parms.iph;
 806
 807         tunnel->dev = dev;
 808         strcpy(tunnel->parms.name, dev->name);
 809
 810         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 811         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 812
 813         if (iph->daddr) {
 814                 struct flowi fl = { .oif = tunnel->parms.link,
 815                                     .nl_u = { .ip4_u =
 816                                               { .daddr = iph->daddr,
 817                                                 .saddr = iph->saddr,
 818                                                 .tos = RT_TOS(iph->tos) } },
 819                                     .proto = IPPROTO_IPIP };
 820                 struct rtable *rt;
 821                 if (!ip_route_output_key(&rt, &fl)) {
 822                         tdev = rt->u.dst.dev;
 823                         ip_rt_put(rt);
 824                 }
 825                 dev->flags |= IFF_POINTOPOINT;
 826         }
 827
 828         if (!tdev && tunnel->parms.link)
 829                 tdev = __dev_get_by_index(tunnel->parms.link);
 830
 831         if (tdev) {
 832                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 833                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 834         }
 835         dev->iflink = tunnel->parms.link;
 836
 837         return 0;
 838 }
 839
 840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 841 {
 842         struct ip_tunnel *tunnel = netdev_priv(dev);
 843         struct iphdr *iph = &tunnel->parms.iph;
 844
 845         tunnel->dev = dev;
 846         strcpy(tunnel->parms.name, dev->name);
 847
 848         iph->version            = 4;
 849         iph->protocol           = IPPROTO_IPIP;
 850         iph->ihl                = 5;
 851
 852         dev_hold(dev);
 853         tunnels_wc[0]           = tunnel;
 854         return 0;
 855 }
 856
 857 static struct xfrm_tunnel ipip_handler = {
 858         .handler        =       ipip_rcv,
 859         .err_handler    =       ipip_err,
 860         .priority       =       1,
 861 };
 862
 863 static char banner[] __initdata =
 864         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 865
 866 static int __init ipip_init(void)
 867 {
 868         int err;
 869
 870         printk(banner);
 871
 872         if (xfrm4_tunnel_register(&ipip_handler)) {
 873                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 874                 return -EAGAIN;
 875         }
 876
 877         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 878                                            "tunl0",
 879                                            ipip_tunnel_setup);
 880         if (!ipip_fb_tunnel_dev) {
 881                 err = -ENOMEM;
 882                 goto err1;
 883         }
 884
 885         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 886
 887         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 888                 goto err2;
 889  out:
 890         return err;
 891  err2:
 892         free_netdev(ipip_fb_tunnel_dev);
 893  err1:
 894         xfrm4_tunnel_deregister(&ipip_handler);
 895         goto out;
 896 }
 897
 898 static void __exit ipip_destroy_tunnels(void)
 899 {
 900         int prio;
 901
 902         for (prio = 1; prio < 4; prio++) {
 903                 int h;
 904                 for (h = 0; h < HASH_SIZE; h++) {
 905                         struct ip_tunnel *t;
 906                         while ((t = tunnels[prio][h]) != NULL)
 907                                 unregister_netdevice(t->dev);
 908                 }
 909         }
 910 }
 911
 912 static void __exit ipip_fini(void)
 913 {
 914         if (xfrm4_tunnel_deregister(&ipip_handler))
 915                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 916
 917         rtnl_lock();
 918         ipip_destroy_tunnels();
 919         unregister_netdevice(ipip_fb_tunnel_dev);
 920         rtnl_unlock();
 921 }
 922
 923 module_init(ipip_init);
 924 module_exit(ipip_fini);
 925 MODULE_LICENSE("GPL");