net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118 #include <net/net_namespace.h>
 119 #include <net/netns/generic.h>
 120
 121 #define HASH_SIZE  16
 122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 123
 124 static int ipip_net_id;
 125 struct ipip_net {
 126         struct net_device *fb_tunnel_dev;
 127 };
 128
 129 static int ipip_fb_tunnel_init(struct net_device *dev);
 130 static int ipip_tunnel_init(struct net_device *dev);
 131 static void ipip_tunnel_setup(struct net_device *dev);
 132
 133 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 134 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 135 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 136 static struct ip_tunnel *tunnels_wc[1];
 137 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 138
 139 static DEFINE_RWLOCK(ipip_lock);
 140
 141 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 142 {
 143         unsigned h0 = HASH(remote);
 144         unsigned h1 = HASH(local);
 145         struct ip_tunnel *t;
 146
 147         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 148                 if (local == t->parms.iph.saddr &&
 149                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_r[h0]; t; t = t->next) {
 153                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         for (t = tunnels_l[h1]; t; t = t->next) {
 157                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 158                         return t;
 159         }
 160         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 161                 return t;
 162         return NULL;
 163 }
 164
 165 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 166 {
 167         __be32 remote = parms->iph.daddr;
 168         __be32 local = parms->iph.saddr;
 169         unsigned h = 0;
 170         int prio = 0;
 171
 172         if (remote) {
 173                 prio |= 2;
 174                 h ^= HASH(remote);
 175         }
 176         if (local) {
 177                 prio |= 1;
 178                 h ^= HASH(local);
 179         }
 180         return &tunnels[prio][h];
 181 }
 182
 183 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 184 {
 185         return __ipip_bucket(&t->parms);
 186 }
 187
 188 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 189 {
 190         struct ip_tunnel **tp;
 191
 192         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 193                 if (t == *tp) {
 194                         write_lock_bh(&ipip_lock);
 195                         *tp = t->next;
 196                         write_unlock_bh(&ipip_lock);
 197                         break;
 198                 }
 199         }
 200 }
 201
 202 static void ipip_tunnel_link(struct ip_tunnel *t)
 203 {
 204         struct ip_tunnel **tp = ipip_bucket(t);
 205
 206         t->next = *tp;
 207         write_lock_bh(&ipip_lock);
 208         *tp = t;
 209         write_unlock_bh(&ipip_lock);
 210 }
 211
 212 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 213 {
 214         __be32 remote = parms->iph.daddr;
 215         __be32 local = parms->iph.saddr;
 216         struct ip_tunnel *t, **tp, *nt;
 217         struct net_device *dev;
 218         char name[IFNAMSIZ];
 219
 220         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 221                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 222                         return t;
 223         }
 224         if (!create)
 225                 return NULL;
 226
 227         if (parms->name[0])
 228                 strlcpy(name, parms->name, IFNAMSIZ);
 229         else
 230                 sprintf(name, "tunl%%d");
 231
 232         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 233         if (dev == NULL)
 234                 return NULL;
 235
 236         if (strchr(name, '%')) {
 237                 if (dev_alloc_name(dev, name) < 0)
 238                         goto failed_free;
 239         }
 240
 241         nt = netdev_priv(dev);
 242         dev->init = ipip_tunnel_init;
 243         nt->parms = *parms;
 244
 245         if (register_netdevice(dev) < 0)
 246                 goto failed_free;
 247
 248         dev_hold(dev);
 249         ipip_tunnel_link(nt);
 250         return nt;
 251
 252 failed_free:
 253         free_netdev(dev);
 254         return NULL;
 255 }
 256
 257 static void ipip_tunnel_uninit(struct net_device *dev)
 258 {
 259         struct net *net = dev_net(dev);
 260         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 261
 262         if (dev == ipn->fb_tunnel_dev) {
 263                 write_lock_bh(&ipip_lock);
 264                 tunnels_wc[0] = NULL;
 265                 write_unlock_bh(&ipip_lock);
 266         } else
 267                 ipip_tunnel_unlink(netdev_priv(dev));
 268         dev_put(dev);
 269 }
 270
 271 static int ipip_err(struct sk_buff *skb, u32 info)
 272 {
 273 #ifndef I_WISH_WORLD_WERE_PERFECT
 274
 275 /* It is not :-( All the routers (except for Linux) return only
 276    8 bytes of packet payload. It means, that precise relaying of
 277    ICMP in the real Internet is absolutely infeasible.
 278  */
 279         struct iphdr *iph = (struct iphdr*)skb->data;
 280         const int type = icmp_hdr(skb)->type;
 281         const int code = icmp_hdr(skb)->code;
 282         struct ip_tunnel *t;
 283         int err;
 284
 285         switch (type) {
 286         default:
 287         case ICMP_PARAMETERPROB:
 288                 return 0;
 289
 290         case ICMP_DEST_UNREACH:
 291                 switch (code) {
 292                 case ICMP_SR_FAILED:
 293                 case ICMP_PORT_UNREACH:
 294                         /* Impossible event. */
 295                         return 0;
 296                 case ICMP_FRAG_NEEDED:
 297                         /* Soft state for pmtu is maintained by IP core. */
 298                         return 0;
 299                 default:
 300                         /* All others are translated to HOST_UNREACH.
 301                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 302                            I believe they are just ether pollution. --ANK
 303                          */
 304                         break;
 305                 }
 306                 break;
 307         case ICMP_TIME_EXCEEDED:
 308                 if (code != ICMP_EXC_TTL)
 309                         return 0;
 310                 break;
 311         }
 312
 313         err = -ENOENT;
 314
 315         read_lock(&ipip_lock);
 316         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 317         if (t == NULL || t->parms.iph.daddr == 0)
 318                 goto out;
 319
 320         err = 0;
 321         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 322                 goto out;
 323
 324         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 325                 t->err_count++;
 326         else
 327                 t->err_count = 1;
 328         t->err_time = jiffies;
 329 out:
 330         read_unlock(&ipip_lock);
 331         return err;
 332 #else
 333         struct iphdr *iph = (struct iphdr*)dp;
 334         int hlen = iph->ihl<<2;
 335         struct iphdr *eiph;
 336         const int type = icmp_hdr(skb)->type;
 337         const int code = icmp_hdr(skb)->code;
 338         int rel_type = 0;
 339         int rel_code = 0;
 340         __be32 rel_info = 0;
 341         __u32 n = 0;
 342         struct sk_buff *skb2;
 343         struct flowi fl;
 344         struct rtable *rt;
 345
 346         if (len < hlen + sizeof(struct iphdr))
 347                 return 0;
 348         eiph = (struct iphdr*)(dp + hlen);
 349
 350         switch (type) {
 351         default:
 352                 return 0;
 353         case ICMP_PARAMETERPROB:
 354                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 355                 if (n < hlen)
 356                         return 0;
 357
 358                 /* So... This guy found something strange INSIDE encapsulated
 359                    packet. Well, he is fool, but what can we do ?
 360                  */
 361                 rel_type = ICMP_PARAMETERPROB;
 362                 rel_info = htonl((n - hlen) << 24);
 363                 break;
 364
 365         case ICMP_DEST_UNREACH:
 366                 switch (code) {
 367                 case ICMP_SR_FAILED:
 368                 case ICMP_PORT_UNREACH:
 369                         /* Impossible event. */
 370                         return 0;
 371                 case ICMP_FRAG_NEEDED:
 372                         /* And it is the only really necessary thing :-) */
 373                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 374                         if (n < hlen+68)
 375                                 return 0;
 376                         n -= hlen;
 377                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 378                         if (n > ntohs(eiph->tot_len))
 379                                 return 0;
 380                         rel_info = htonl(n);
 381                         break;
 382                 default:
 383                         /* All others are translated to HOST_UNREACH.
 384                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 385                            I believe, it is just ether pollution. --ANK
 386                          */
 387                         rel_type = ICMP_DEST_UNREACH;
 388                         rel_code = ICMP_HOST_UNREACH;
 389                         break;
 390                 }
 391                 break;
 392         case ICMP_TIME_EXCEEDED:
 393                 if (code != ICMP_EXC_TTL)
 394                         return 0;
 395                 break;
 396         }
 397
 398         /* Prepare fake skb to feed it to icmp_send */
 399         skb2 = skb_clone(skb, GFP_ATOMIC);
 400         if (skb2 == NULL)
 401                 return 0;
 402         dst_release(skb2->dst);
 403         skb2->dst = NULL;
 404         skb_pull(skb2, skb->data - (u8*)eiph);
 405         skb_reset_network_header(skb2);
 406
 407         /* Try to guess incoming interface */
 408         memset(&fl, 0, sizeof(fl));
 409         fl.fl4_daddr = eiph->saddr;
 410         fl.fl4_tos = RT_TOS(eiph->tos);
 411         fl.proto = IPPROTO_IPIP;
 412         if (ip_route_output_key(&init_net, &rt, &key)) {
 413                 kfree_skb(skb2);
 414                 return 0;
 415         }
 416         skb2->dev = rt->u.dst.dev;
 417
 418         /* route "incoming" packet */
 419         if (rt->rt_flags&RTCF_LOCAL) {
 420                 ip_rt_put(rt);
 421                 rt = NULL;
 422                 fl.fl4_daddr = eiph->daddr;
 423                 fl.fl4_src = eiph->saddr;
 424                 fl.fl4_tos = eiph->tos;
 425                 if (ip_route_output_key(&init_net, &rt, &fl) ||
 426                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 427                         ip_rt_put(rt);
 428                         kfree_skb(skb2);
 429                         return 0;
 430                 }
 431         } else {
 432                 ip_rt_put(rt);
 433                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 434                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 435                         kfree_skb(skb2);
 436                         return 0;
 437                 }
 438         }
 439
 440         /* change mtu on this route */
 441         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 442                 if (n > dst_mtu(skb2->dst)) {
 443                         kfree_skb(skb2);
 444                         return 0;
 445                 }
 446                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 447         } else if (type == ICMP_TIME_EXCEEDED) {
 448                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 449                 if (t->parms.iph.ttl) {
 450                         rel_type = ICMP_DEST_UNREACH;
 451                         rel_code = ICMP_HOST_UNREACH;
 452                 }
 453         }
 454
 455         icmp_send(skb2, rel_type, rel_code, rel_info);
 456         kfree_skb(skb2);
 457         return 0;
 458 #endif
 459 }
 460
 461 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 462                                         struct sk_buff *skb)
 463 {
 464         struct iphdr *inner_iph = ip_hdr(skb);
 465
 466         if (INET_ECN_is_ce(outer_iph->tos))
 467                 IP_ECN_set_ce(inner_iph);
 468 }
 469
 470 static int ipip_rcv(struct sk_buff *skb)
 471 {
 472         struct ip_tunnel *tunnel;
 473         const struct iphdr *iph = ip_hdr(skb);
 474
 475         read_lock(&ipip_lock);
 476         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 477                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 478                         read_unlock(&ipip_lock);
 479                         kfree_skb(skb);
 480                         return 0;
 481                 }
 482
 483                 secpath_reset(skb);
 484
 485                 skb->mac_header = skb->network_header;
 486                 skb_reset_network_header(skb);
 487                 skb->protocol = htons(ETH_P_IP);
 488                 skb->pkt_type = PACKET_HOST;
 489
 490                 tunnel->stat.rx_packets++;
 491                 tunnel->stat.rx_bytes += skb->len;
 492                 skb->dev = tunnel->dev;
 493                 dst_release(skb->dst);
 494                 skb->dst = NULL;
 495                 nf_reset(skb);
 496                 ipip_ecn_decapsulate(iph, skb);
 497                 netif_rx(skb);
 498                 read_unlock(&ipip_lock);
 499                 return 0;
 500         }
 501         read_unlock(&ipip_lock);
 502
 503         return -1;
 504 }
 505
 506 /*
 507  *      This function assumes it is being called from dev_queue_xmit()
 508  *      and that skb is filled properly by that function.
 509  */
 510
 511 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 512 {
 513         struct ip_tunnel *tunnel = netdev_priv(dev);
 514         struct net_device_stats *stats = &tunnel->stat;
 515         struct iphdr  *tiph = &tunnel->parms.iph;
 516         u8     tos = tunnel->parms.iph.tos;
 517         __be16 df = tiph->frag_off;
 518         struct rtable *rt;                      /* Route to the other host */
 519         struct net_device *tdev;                        /* Device to other host */
 520         struct iphdr  *old_iph = ip_hdr(skb);
 521         struct iphdr  *iph;                     /* Our new IP header */
 522         unsigned int max_headroom;              /* The extra header space needed */
 523         __be32 dst = tiph->daddr;
 524         int    mtu;
 525
 526         if (tunnel->recursion++) {
 527                 tunnel->stat.collisions++;
 528                 goto tx_error;
 529         }
 530
 531         if (skb->protocol != htons(ETH_P_IP))
 532                 goto tx_error;
 533
 534         if (tos&1)
 535                 tos = old_iph->tos;
 536
 537         if (!dst) {
 538                 /* NBMA tunnel */
 539                 if ((rt = skb->rtable) == NULL) {
 540                         tunnel->stat.tx_fifo_errors++;
 541                         goto tx_error;
 542                 }
 543                 if ((dst = rt->rt_gateway) == 0)
 544                         goto tx_error_icmp;
 545         }
 546
 547         {
 548                 struct flowi fl = { .oif = tunnel->parms.link,
 549                                     .nl_u = { .ip4_u =
 550                                               { .daddr = dst,
 551                                                 .saddr = tiph->saddr,
 552                                                 .tos = RT_TOS(tos) } },
 553                                     .proto = IPPROTO_IPIP };
 554                 if (ip_route_output_key(&init_net, &rt, &fl)) {
 555                         tunnel->stat.tx_carrier_errors++;
 556                         goto tx_error_icmp;
 557                 }
 558         }
 559         tdev = rt->u.dst.dev;
 560
 561         if (tdev == dev) {
 562                 ip_rt_put(rt);
 563                 tunnel->stat.collisions++;
 564                 goto tx_error;
 565         }
 566
 567         if (tiph->frag_off)
 568                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 569         else
 570                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 571
 572         if (mtu < 68) {
 573                 tunnel->stat.collisions++;
 574                 ip_rt_put(rt);
 575                 goto tx_error;
 576         }
 577         if (skb->dst)
 578                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 579
 580         df |= (old_iph->frag_off&htons(IP_DF));
 581
 582         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 583                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 584                 ip_rt_put(rt);
 585                 goto tx_error;
 586         }
 587
 588         if (tunnel->err_count > 0) {
 589                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 590                         tunnel->err_count--;
 591                         dst_link_failure(skb);
 592                 } else
 593                         tunnel->err_count = 0;
 594         }
 595
 596         /*
 597          * Okay, now see if we can stuff it in the buffer as-is.
 598          */
 599         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 600
 601         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 602             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 603                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 604                 if (!new_skb) {
 605                         ip_rt_put(rt);
 606                         stats->tx_dropped++;
 607                         dev_kfree_skb(skb);
 608                         tunnel->recursion--;
 609                         return 0;
 610                 }
 611                 if (skb->sk)
 612                         skb_set_owner_w(new_skb, skb->sk);
 613                 dev_kfree_skb(skb);
 614                 skb = new_skb;
 615                 old_iph = ip_hdr(skb);
 616         }
 617
 618         skb->transport_header = skb->network_header;
 619         skb_push(skb, sizeof(struct iphdr));
 620         skb_reset_network_header(skb);
 621         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 622         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 623                               IPSKB_REROUTED);
 624         dst_release(skb->dst);
 625         skb->dst = &rt->u.dst;
 626
 627         /*
 628          *      Push down and install the IPIP header.
 629          */
 630
 631         iph                     =       ip_hdr(skb);
 632         iph->version            =       4;
 633         iph->ihl                =       sizeof(struct iphdr)>>2;
 634         iph->frag_off           =       df;
 635         iph->protocol           =       IPPROTO_IPIP;
 636         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 637         iph->daddr              =       rt->rt_dst;
 638         iph->saddr              =       rt->rt_src;
 639
 640         if ((iph->ttl = tiph->ttl) == 0)
 641                 iph->ttl        =       old_iph->ttl;
 642
 643         nf_reset(skb);
 644
 645         IPTUNNEL_XMIT();
 646         tunnel->recursion--;
 647         return 0;
 648
 649 tx_error_icmp:
 650         dst_link_failure(skb);
 651 tx_error:
 652         stats->tx_errors++;
 653         dev_kfree_skb(skb);
 654         tunnel->recursion--;
 655         return 0;
 656 }
 657
 658 static void ipip_tunnel_bind_dev(struct net_device *dev)
 659 {
 660         struct net_device *tdev = NULL;
 661         struct ip_tunnel *tunnel;
 662         struct iphdr *iph;
 663
 664         tunnel = netdev_priv(dev);
 665         iph = &tunnel->parms.iph;
 666
 667         if (iph->daddr) {
 668                 struct flowi fl = { .oif = tunnel->parms.link,
 669                                     .nl_u = { .ip4_u =
 670                                               { .daddr = iph->daddr,
 671                                                 .saddr = iph->saddr,
 672                                                 .tos = RT_TOS(iph->tos) } },
 673                                     .proto = IPPROTO_IPIP };
 674                 struct rtable *rt;
 675                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
 676                         tdev = rt->u.dst.dev;
 677                         ip_rt_put(rt);
 678                 }
 679                 dev->flags |= IFF_POINTOPOINT;
 680         }
 681
 682         if (!tdev && tunnel->parms.link)
 683                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
 684
 685         if (tdev) {
 686                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 687                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 688         }
 689         dev->iflink = tunnel->parms.link;
 690 }
 691
 692 static int
 693 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 694 {
 695         int err = 0;
 696         struct ip_tunnel_parm p;
 697         struct ip_tunnel *t;
 698         struct net *net = dev_net(dev);
 699         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 700
 701         switch (cmd) {
 702         case SIOCGETTUNNEL:
 703                 t = NULL;
 704                 if (dev == ipn->fb_tunnel_dev) {
 705                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 706                                 err = -EFAULT;
 707                                 break;
 708                         }
 709                         t = ipip_tunnel_locate(&p, 0);
 710                 }
 711                 if (t == NULL)
 712                         t = netdev_priv(dev);
 713                 memcpy(&p, &t->parms, sizeof(p));
 714                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 715                         err = -EFAULT;
 716                 break;
 717
 718         case SIOCADDTUNNEL:
 719         case SIOCCHGTUNNEL:
 720                 err = -EPERM;
 721                 if (!capable(CAP_NET_ADMIN))
 722                         goto done;
 723
 724                 err = -EFAULT;
 725                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 726                         goto done;
 727
 728                 err = -EINVAL;
 729                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 730                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 731                         goto done;
 732                 if (p.iph.ttl)
 733                         p.iph.frag_off |= htons(IP_DF);
 734
 735                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 736
 737                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 738                         if (t != NULL) {
 739                                 if (t->dev != dev) {
 740                                         err = -EEXIST;
 741                                         break;
 742                                 }
 743                         } else {
 744                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 745                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 746                                         err = -EINVAL;
 747                                         break;
 748                                 }
 749                                 t = netdev_priv(dev);
 750                                 ipip_tunnel_unlink(t);
 751                                 t->parms.iph.saddr = p.iph.saddr;
 752                                 t->parms.iph.daddr = p.iph.daddr;
 753                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 754                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 755                                 ipip_tunnel_link(t);
 756                                 netdev_state_change(dev);
 757                         }
 758                 }
 759
 760                 if (t) {
 761                         err = 0;
 762                         if (cmd == SIOCCHGTUNNEL) {
 763                                 t->parms.iph.ttl = p.iph.ttl;
 764                                 t->parms.iph.tos = p.iph.tos;
 765                                 t->parms.iph.frag_off = p.iph.frag_off;
 766                                 if (t->parms.link != p.link) {
 767                                         t->parms.link = p.link;
 768                                         ipip_tunnel_bind_dev(dev);
 769                                         netdev_state_change(dev);
 770                                 }
 771                         }
 772                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 773                                 err = -EFAULT;
 774                 } else
 775                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 776                 break;
 777
 778         case SIOCDELTUNNEL:
 779                 err = -EPERM;
 780                 if (!capable(CAP_NET_ADMIN))
 781                         goto done;
 782
 783                 if (dev == ipn->fb_tunnel_dev) {
 784                         err = -EFAULT;
 785                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 786                                 goto done;
 787                         err = -ENOENT;
 788                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 789                                 goto done;
 790                         err = -EPERM;
 791                         if (t->dev == ipn->fb_tunnel_dev)
 792                                 goto done;
 793                         dev = t->dev;
 794                 }
 795                 unregister_netdevice(dev);
 796                 err = 0;
 797                 break;
 798
 799         default:
 800                 err = -EINVAL;
 801         }
 802
 803 done:
 804         return err;
 805 }
 806
 807 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 808 {
 809         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 810 }
 811
 812 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 813 {
 814         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 815                 return -EINVAL;
 816         dev->mtu = new_mtu;
 817         return 0;
 818 }
 819
 820 static void ipip_tunnel_setup(struct net_device *dev)
 821 {
 822         dev->uninit             = ipip_tunnel_uninit;
 823         dev->hard_start_xmit    = ipip_tunnel_xmit;
 824         dev->get_stats          = ipip_tunnel_get_stats;
 825         dev->do_ioctl           = ipip_tunnel_ioctl;
 826         dev->change_mtu         = ipip_tunnel_change_mtu;
 827         dev->destructor         = free_netdev;
 828
 829         dev->type               = ARPHRD_TUNNEL;
 830         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 831         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 832         dev->flags              = IFF_NOARP;
 833         dev->iflink             = 0;
 834         dev->addr_len           = 4;
 835 }
 836
 837 static int ipip_tunnel_init(struct net_device *dev)
 838 {
 839         struct ip_tunnel *tunnel;
 840
 841         tunnel = netdev_priv(dev);
 842
 843         tunnel->dev = dev;
 844         strcpy(tunnel->parms.name, dev->name);
 845
 846         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 847         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 848
 849         ipip_tunnel_bind_dev(dev);
 850
 851         return 0;
 852 }
 853
 854 static int ipip_fb_tunnel_init(struct net_device *dev)
 855 {
 856         struct ip_tunnel *tunnel = netdev_priv(dev);
 857         struct iphdr *iph = &tunnel->parms.iph;
 858
 859         tunnel->dev = dev;
 860         strcpy(tunnel->parms.name, dev->name);
 861
 862         iph->version            = 4;
 863         iph->protocol           = IPPROTO_IPIP;
 864         iph->ihl                = 5;
 865
 866         dev_hold(dev);
 867         tunnels_wc[0]           = tunnel;
 868         return 0;
 869 }
 870
 871 static struct xfrm_tunnel ipip_handler = {
 872         .handler        =       ipip_rcv,
 873         .err_handler    =       ipip_err,
 874         .priority       =       1,
 875 };
 876
 877 static char banner[] __initdata =
 878         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 879
 880 static int ipip_init_net(struct net *net)
 881 {
 882         int err;
 883         struct ipip_net *ipn;
 884
 885         err = -ENOMEM;
 886         ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
 887         if (ipn == NULL)
 888                 goto err_alloc;
 889
 890         err = net_assign_generic(net, ipip_net_id, ipn);
 891         if (err < 0)
 892                 goto err_assign;
 893
 894         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 895                                            "tunl0",
 896                                            ipip_tunnel_setup);
 897         if (!ipn->fb_tunnel_dev) {
 898                 err = -ENOMEM;
 899                 goto err_alloc_dev;
 900         }
 901
 902         ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
 903         dev_net_set(ipn->fb_tunnel_dev, net);
 904
 905         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 906                 goto err_reg_dev;
 907
 908         return 0;
 909
 910 err_reg_dev:
 911         free_netdev(ipn->fb_tunnel_dev);
 912 err_alloc_dev:
 913         /* nothing */
 914 err_assign:
 915         kfree(ipn);
 916 err_alloc:
 917         return err;
 918 }
 919
 920 static void ipip_exit_net(struct net *net)
 921 {
 922         struct ipip_net *ipn;
 923
 924         ipn = net_generic(net, ipip_net_id);
 925         rtnl_lock();
 926         unregister_netdevice(ipn->fb_tunnel_dev);
 927         rtnl_unlock();
 928         kfree(ipn);
 929 }
 930
 931 static struct pernet_operations ipip_net_ops = {
 932         .init = ipip_init_net,
 933         .exit = ipip_exit_net,
 934 };
 935
 936 static int __init ipip_init(void)
 937 {
 938         int err;
 939
 940         printk(banner);
 941
 942         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 943                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 944                 return -EAGAIN;
 945         }
 946
 947         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
 948         if (err)
 949                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 950
 951         return err;
 952 }
 953
 954 static void __exit ipip_destroy_tunnels(void)
 955 {
 956         int prio;
 957
 958         for (prio = 1; prio < 4; prio++) {
 959                 int h;
 960                 for (h = 0; h < HASH_SIZE; h++) {
 961                         struct ip_tunnel *t;
 962                         while ((t = tunnels[prio][h]) != NULL)
 963                                 unregister_netdevice(t->dev);
 964                 }
 965         }
 966 }
 967
 968 static void __exit ipip_fini(void)
 969 {
 970         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 971                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 972
 973         rtnl_lock();
 974         ipip_destroy_tunnels();
 975         rtnl_unlock();
 976
 977         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
 978 }
 979
 980 module_init(ipip_init);
 981 module_exit(ipip_fini);
 982 MODULE_LICENSE("GPL");