net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <net/net_namespace.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <linux/skbuff.h>
  54 #include <net/route.h>
  55 #include <net/sock.h>
  56 #include <net/icmp.h>
  57 #include <net/udp.h>
  58 #include <net/raw.h>
  59 #include <linux/notifier.h>
  60 #include <linux/if_arp.h>
  61 #include <linux/netfilter_ipv4.h>
  62 #include <net/ipip.h>
  63 #include <net/checksum.h>
  64 #include <net/netlink.h>
  65
  66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67 #define CONFIG_IP_PIMSM 1
  68 #endif
  69
  70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  71    Note that the changes are semaphored via rtnl_lock.
  72  */
  73
  74 static DEFINE_RWLOCK(mrt_lock);
  75
  76 /*
  77  *      Multicast router control variables
  78  */
  79
  80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84 /* Special spinlock for queue of unresolved entries */
  85 static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87 /* We return to original Alan's scheme. Hash table of resolved
  88    entries is changed only in process context and protected
  89    with weak lock mrt_lock. Queue of unresolved entries is protected
  90    with strong spinlock mfc_unres_lock.
  91
  92    In this case data path is free of exclusive locks at all.
  93  */
  94
  95 static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98 static int ipmr_cache_report(struct net *net,
  99                              struct sk_buff *pkt, vifi_t vifi, int assert);
 100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102 static struct timer_list ipmr_expire_timer;
 103
 104 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107 {
 108         struct net *net = dev_net(dev);
 109
 110         dev_close(dev);
 111
 112         dev = __dev_get_by_name(net, "tunl0");
 113         if (dev) {
 114                 const struct net_device_ops *ops = dev->netdev_ops;
 115                 struct ifreq ifr;
 116                 struct ip_tunnel_parm p;
 117
 118                 memset(&p, 0, sizeof(p));
 119                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                 p.iph.version = 4;
 122                 p.iph.ihl = 5;
 123                 p.iph.protocol = IPPROTO_IPIP;
 124                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                 if (ops->ndo_do_ioctl) {
 128                         mm_segment_t oldfs = get_fs();
 129
 130                         set_fs(KERNEL_DS);
 131                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                         set_fs(oldfs);
 133                 }
 134         }
 135 }
 136
 137 static
 138 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139 {
 140         struct net_device  *dev;
 141
 142         dev = __dev_get_by_name(net, "tunl0");
 143
 144         if (dev) {
 145                 const struct net_device_ops *ops = dev->netdev_ops;
 146                 int err;
 147                 struct ifreq ifr;
 148                 struct ip_tunnel_parm p;
 149                 struct in_device  *in_dev;
 150
 151                 memset(&p, 0, sizeof(p));
 152                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                 p.iph.version = 4;
 155                 p.iph.ihl = 5;
 156                 p.iph.protocol = IPPROTO_IPIP;
 157                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                 if (ops->ndo_do_ioctl) {
 161                         mm_segment_t oldfs = get_fs();
 162
 163                         set_fs(KERNEL_DS);
 164                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                         set_fs(oldfs);
 166                 } else
 167                         err = -EOPNOTSUPP;
 168
 169                 dev = NULL;
 170
 171                 if (err == 0 &&
 172                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                         dev->flags |= IFF_MULTICAST;
 174
 175                         in_dev = __in_dev_get_rtnl(dev);
 176                         if (in_dev == NULL)
 177                                 goto failure;
 178
 179                         ipv4_devconf_setall(in_dev);
 180                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                         if (dev_open(dev))
 183                                 goto failure;
 184                         dev_hold(dev);
 185                 }
 186         }
 187         return dev;
 188
 189 failure:
 190         /* allow the register to be completed before unregistering. */
 191         rtnl_unlock();
 192         rtnl_lock();
 193
 194         unregister_netdevice(dev);
 195         return NULL;
 196 }
 197
 198 #ifdef CONFIG_IP_PIMSM
 199
 200 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201 {
 202         struct net *net = dev_net(dev);
 203
 204         read_lock(&mrt_lock);
 205         dev->stats.tx_bytes += skb->len;
 206         dev->stats.tx_packets++;
 207         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                           IGMPMSG_WHOLEPKT);
 209         read_unlock(&mrt_lock);
 210         kfree_skb(skb);
 211         return NETDEV_TX_OK;
 212 }
 213
 214 static const struct net_device_ops reg_vif_netdev_ops = {
 215         .ndo_start_xmit = reg_vif_xmit,
 216 };
 217
 218 static void reg_vif_setup(struct net_device *dev)
 219 {
 220         dev->type               = ARPHRD_PIMREG;
 221         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222         dev->flags              = IFF_NOARP;
 223         dev->netdev_ops         = &reg_vif_netdev_ops,
 224         dev->destructor         = free_netdev;
 225         dev->features           |= NETIF_F_NETNS_LOCAL;
 226 }
 227
 228 static struct net_device *ipmr_reg_vif(struct net *net)
 229 {
 230         struct net_device *dev;
 231         struct in_device *in_dev;
 232
 233         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235         if (dev == NULL)
 236                 return NULL;
 237
 238         dev_net_set(dev, net);
 239
 240         if (register_netdevice(dev)) {
 241                 free_netdev(dev);
 242                 return NULL;
 243         }
 244         dev->iflink = 0;
 245
 246         rcu_read_lock();
 247         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                 rcu_read_unlock();
 249                 goto failure;
 250         }
 251
 252         ipv4_devconf_setall(in_dev);
 253         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254         rcu_read_unlock();
 255
 256         if (dev_open(dev))
 257                 goto failure;
 258
 259         dev_hold(dev);
 260
 261         return dev;
 262
 263 failure:
 264         /* allow the register to be completed before unregistering. */
 265         rtnl_unlock();
 266         rtnl_lock();
 267
 268         unregister_netdevice(dev);
 269         return NULL;
 270 }
 271 #endif
 272
 273 /*
 274  *      Delete a VIF entry
 275  *      @notify: Set to 1, if the caller is a notifier_call
 276  */
 277
 278 static int vif_delete(struct net *net, int vifi, int notify)
 279 {
 280         struct vif_device *v;
 281         struct net_device *dev;
 282         struct in_device *in_dev;
 283
 284         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 285                 return -EADDRNOTAVAIL;
 286
 287         v = &net->ipv4.vif_table[vifi];
 288
 289         write_lock_bh(&mrt_lock);
 290         dev = v->dev;
 291         v->dev = NULL;
 292
 293         if (!dev) {
 294                 write_unlock_bh(&mrt_lock);
 295                 return -EADDRNOTAVAIL;
 296         }
 297
 298 #ifdef CONFIG_IP_PIMSM
 299         if (vifi == net->ipv4.mroute_reg_vif_num)
 300                 net->ipv4.mroute_reg_vif_num = -1;
 301 #endif
 302
 303         if (vifi+1 == net->ipv4.maxvif) {
 304                 int tmp;
 305                 for (tmp=vifi-1; tmp>=0; tmp--) {
 306                         if (VIF_EXISTS(net, tmp))
 307                                 break;
 308                 }
 309                 net->ipv4.maxvif = tmp+1;
 310         }
 311
 312         write_unlock_bh(&mrt_lock);
 313
 314         dev_set_allmulti(dev, -1);
 315
 316         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 317                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 318                 ip_rt_multicast_event(in_dev);
 319         }
 320
 321         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 322                 unregister_netdevice(dev);
 323
 324         dev_put(dev);
 325         return 0;
 326 }
 327
 328 static inline void ipmr_cache_free(struct mfc_cache *c)
 329 {
 330         release_net(mfc_net(c));
 331         kmem_cache_free(mrt_cachep, c);
 332 }
 333
 334 /* Destroy an unresolved cache entry, killing queued skbs
 335    and reporting error to netlink readers.
 336  */
 337
 338 static void ipmr_destroy_unres(struct mfc_cache *c)
 339 {
 340         struct sk_buff *skb;
 341         struct nlmsgerr *e;
 342         struct net *net = mfc_net(c);
 343
 344         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 345
 346         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 347                 if (ip_hdr(skb)->version == 0) {
 348                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 349                         nlh->nlmsg_type = NLMSG_ERROR;
 350                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 351                         skb_trim(skb, nlh->nlmsg_len);
 352                         e = NLMSG_DATA(nlh);
 353                         e->error = -ETIMEDOUT;
 354                         memset(&e->msg, 0, sizeof(e->msg));
 355
 356                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 357                 } else
 358                         kfree_skb(skb);
 359         }
 360
 361         ipmr_cache_free(c);
 362 }
 363
 364
 365 /* Single timer process for all the unresolved queue. */
 366
 367 static void ipmr_expire_process(unsigned long dummy)
 368 {
 369         unsigned long now;
 370         unsigned long expires;
 371         struct mfc_cache *c, **cp;
 372
 373         if (!spin_trylock(&mfc_unres_lock)) {
 374                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 375                 return;
 376         }
 377
 378         if (mfc_unres_queue == NULL)
 379                 goto out;
 380
 381         now = jiffies;
 382         expires = 10*HZ;
 383         cp = &mfc_unres_queue;
 384
 385         while ((c=*cp) != NULL) {
 386                 if (time_after(c->mfc_un.unres.expires, now)) {
 387                         unsigned long interval = c->mfc_un.unres.expires - now;
 388                         if (interval < expires)
 389                                 expires = interval;
 390                         cp = &c->next;
 391                         continue;
 392                 }
 393
 394                 *cp = c->next;
 395
 396                 ipmr_destroy_unres(c);
 397         }
 398
 399         if (mfc_unres_queue != NULL)
 400                 mod_timer(&ipmr_expire_timer, jiffies + expires);
 401
 402 out:
 403         spin_unlock(&mfc_unres_lock);
 404 }
 405
 406 /* Fill oifs list. It is called under write locked mrt_lock. */
 407
 408 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 409 {
 410         int vifi;
 411         struct net *net = mfc_net(cache);
 412
 413         cache->mfc_un.res.minvif = MAXVIFS;
 414         cache->mfc_un.res.maxvif = 0;
 415         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 416
 417         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 418                 if (VIF_EXISTS(net, vifi) &&
 419                     ttls[vifi] && ttls[vifi] < 255) {
 420                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 421                         if (cache->mfc_un.res.minvif > vifi)
 422                                 cache->mfc_un.res.minvif = vifi;
 423                         if (cache->mfc_un.res.maxvif <= vifi)
 424                                 cache->mfc_un.res.maxvif = vifi + 1;
 425                 }
 426         }
 427 }
 428
 429 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 430 {
 431         int vifi = vifc->vifc_vifi;
 432         struct vif_device *v = &net->ipv4.vif_table[vifi];
 433         struct net_device *dev;
 434         struct in_device *in_dev;
 435         int err;
 436
 437         /* Is vif busy ? */
 438         if (VIF_EXISTS(net, vifi))
 439                 return -EADDRINUSE;
 440
 441         switch (vifc->vifc_flags) {
 442 #ifdef CONFIG_IP_PIMSM
 443         case VIFF_REGISTER:
 444                 /*
 445                  * Special Purpose VIF in PIM
 446                  * All the packets will be sent to the daemon
 447                  */
 448                 if (net->ipv4.mroute_reg_vif_num >= 0)
 449                         return -EADDRINUSE;
 450                 dev = ipmr_reg_vif(net);
 451                 if (!dev)
 452                         return -ENOBUFS;
 453                 err = dev_set_allmulti(dev, 1);
 454                 if (err) {
 455                         unregister_netdevice(dev);
 456                         dev_put(dev);
 457                         return err;
 458                 }
 459                 break;
 460 #endif
 461         case VIFF_TUNNEL:
 462                 dev = ipmr_new_tunnel(net, vifc);
 463                 if (!dev)
 464                         return -ENOBUFS;
 465                 err = dev_set_allmulti(dev, 1);
 466                 if (err) {
 467                         ipmr_del_tunnel(dev, vifc);
 468                         dev_put(dev);
 469                         return err;
 470                 }
 471                 break;
 472         case 0:
 473                 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 474                 if (!dev)
 475                         return -EADDRNOTAVAIL;
 476                 err = dev_set_allmulti(dev, 1);
 477                 if (err) {
 478                         dev_put(dev);
 479                         return err;
 480                 }
 481                 break;
 482         default:
 483                 return -EINVAL;
 484         }
 485
 486         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
 487                 dev_put(dev);
 488                 return -EADDRNOTAVAIL;
 489         }
 490         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 491         ip_rt_multicast_event(in_dev);
 492
 493         /*
 494          *      Fill in the VIF structures
 495          */
 496         v->rate_limit = vifc->vifc_rate_limit;
 497         v->local = vifc->vifc_lcl_addr.s_addr;
 498         v->remote = vifc->vifc_rmt_addr.s_addr;
 499         v->flags = vifc->vifc_flags;
 500         if (!mrtsock)
 501                 v->flags |= VIFF_STATIC;
 502         v->threshold = vifc->vifc_threshold;
 503         v->bytes_in = 0;
 504         v->bytes_out = 0;
 505         v->pkt_in = 0;
 506         v->pkt_out = 0;
 507         v->link = dev->ifindex;
 508         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 509                 v->link = dev->iflink;
 510
 511         /* And finish update writing critical data */
 512         write_lock_bh(&mrt_lock);
 513         v->dev = dev;
 514 #ifdef CONFIG_IP_PIMSM
 515         if (v->flags&VIFF_REGISTER)
 516                 net->ipv4.mroute_reg_vif_num = vifi;
 517 #endif
 518         if (vifi+1 > net->ipv4.maxvif)
 519                 net->ipv4.maxvif = vifi+1;
 520         write_unlock_bh(&mrt_lock);
 521         return 0;
 522 }
 523
 524 static struct mfc_cache *ipmr_cache_find(struct net *net,
 525                                          __be32 origin,
 526                                          __be32 mcastgrp)
 527 {
 528         int line = MFC_HASH(mcastgrp, origin);
 529         struct mfc_cache *c;
 530
 531         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 532                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 533                         break;
 534         }
 535         return c;
 536 }
 537
 538 /*
 539  *      Allocate a multicast cache entry
 540  */
 541 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 542 {
 543         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 544         if (c == NULL)
 545                 return NULL;
 546         c->mfc_un.res.minvif = MAXVIFS;
 547         mfc_net_set(c, net);
 548         return c;
 549 }
 550
 551 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 552 {
 553         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 554         if (c == NULL)
 555                 return NULL;
 556         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 557         c->mfc_un.unres.expires = jiffies + 10*HZ;
 558         mfc_net_set(c, net);
 559         return c;
 560 }
 561
 562 /*
 563  *      A cache entry has gone into a resolved state from queued
 564  */
 565
 566 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 567 {
 568         struct sk_buff *skb;
 569         struct nlmsgerr *e;
 570
 571         /*
 572          *      Play the pending entries through our router
 573          */
 574
 575         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 576                 if (ip_hdr(skb)->version == 0) {
 577                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 578
 579                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 580                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 581                                                   (u8 *)nlh);
 582                         } else {
 583                                 nlh->nlmsg_type = NLMSG_ERROR;
 584                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 585                                 skb_trim(skb, nlh->nlmsg_len);
 586                                 e = NLMSG_DATA(nlh);
 587                                 e->error = -EMSGSIZE;
 588                                 memset(&e->msg, 0, sizeof(e->msg));
 589                         }
 590
 591                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 592                 } else
 593                         ip_mr_forward(skb, c, 0);
 594         }
 595 }
 596
 597 /*
 598  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 599  *      expects the following bizarre scheme.
 600  *
 601  *      Called under mrt_lock.
 602  */
 603
 604 static int ipmr_cache_report(struct net *net,
 605                              struct sk_buff *pkt, vifi_t vifi, int assert)
 606 {
 607         struct sk_buff *skb;
 608         const int ihl = ip_hdrlen(pkt);
 609         struct igmphdr *igmp;
 610         struct igmpmsg *msg;
 611         int ret;
 612
 613 #ifdef CONFIG_IP_PIMSM
 614         if (assert == IGMPMSG_WHOLEPKT)
 615                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 616         else
 617 #endif
 618                 skb = alloc_skb(128, GFP_ATOMIC);
 619
 620         if (!skb)
 621                 return -ENOBUFS;
 622
 623 #ifdef CONFIG_IP_PIMSM
 624         if (assert == IGMPMSG_WHOLEPKT) {
 625                 /* Ugly, but we have no choice with this interface.
 626                    Duplicate old header, fix ihl, length etc.
 627                    And all this only to mangle msg->im_msgtype and
 628                    to set msg->im_mbz to "mbz" :-)
 629                  */
 630                 skb_push(skb, sizeof(struct iphdr));
 631                 skb_reset_network_header(skb);
 632                 skb_reset_transport_header(skb);
 633                 msg = (struct igmpmsg *)skb_network_header(skb);
 634                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 635                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 636                 msg->im_mbz = 0;
 637                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 638                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 639                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 640                                              sizeof(struct iphdr));
 641         } else
 642 #endif
 643         {
 644
 645         /*
 646          *      Copy the IP header
 647          */
 648
 649         skb->network_header = skb->tail;
 650         skb_put(skb, ihl);
 651         skb_copy_to_linear_data(skb, pkt->data, ihl);
 652         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 653         msg = (struct igmpmsg *)skb_network_header(skb);
 654         msg->im_vif = vifi;
 655         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 656
 657         /*
 658          *      Add our header
 659          */
 660
 661         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 662         igmp->type      =
 663         msg->im_msgtype = assert;
 664         igmp->code      =       0;
 665         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 666         skb->transport_header = skb->network_header;
 667         }
 668
 669         if (net->ipv4.mroute_sk == NULL) {
 670                 kfree_skb(skb);
 671                 return -EINVAL;
 672         }
 673
 674         /*
 675          *      Deliver to mrouted
 676          */
 677         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 678         if (ret < 0) {
 679                 if (net_ratelimit())
 680                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 681                 kfree_skb(skb);
 682         }
 683
 684         return ret;
 685 }
 686
 687 /*
 688  *      Queue a packet for resolution. It gets locked cache entry!
 689  */
 690
 691 static int
 692 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 693 {
 694         int err;
 695         struct mfc_cache *c;
 696         const struct iphdr *iph = ip_hdr(skb);
 697
 698         spin_lock_bh(&mfc_unres_lock);
 699         for (c=mfc_unres_queue; c; c=c->next) {
 700                 if (net_eq(mfc_net(c), net) &&
 701                     c->mfc_mcastgrp == iph->daddr &&
 702                     c->mfc_origin == iph->saddr)
 703                         break;
 704         }
 705
 706         if (c == NULL) {
 707                 /*
 708                  *      Create a new entry if allowable
 709                  */
 710
 711                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 712                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 713                         spin_unlock_bh(&mfc_unres_lock);
 714
 715                         kfree_skb(skb);
 716                         return -ENOBUFS;
 717                 }
 718
 719                 /*
 720                  *      Fill in the new cache entry
 721                  */
 722                 c->mfc_parent   = -1;
 723                 c->mfc_origin   = iph->saddr;
 724                 c->mfc_mcastgrp = iph->daddr;
 725
 726                 /*
 727                  *      Reflect first query at mrouted.
 728                  */
 729                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 730                 if (err < 0) {
 731                         /* If the report failed throw the cache entry
 732                            out - Brad Parker
 733                          */
 734                         spin_unlock_bh(&mfc_unres_lock);
 735
 736                         ipmr_cache_free(c);
 737                         kfree_skb(skb);
 738                         return err;
 739                 }
 740
 741                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 742                 c->next = mfc_unres_queue;
 743                 mfc_unres_queue = c;
 744
 745                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 746         }
 747
 748         /*
 749          *      See if we can append the packet
 750          */
 751         if (c->mfc_un.unres.unresolved.qlen>3) {
 752                 kfree_skb(skb);
 753                 err = -ENOBUFS;
 754         } else {
 755                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 756                 err = 0;
 757         }
 758
 759         spin_unlock_bh(&mfc_unres_lock);
 760         return err;
 761 }
 762
 763 /*
 764  *      MFC cache manipulation by user space mroute daemon
 765  */
 766
 767 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 768 {
 769         int line;
 770         struct mfc_cache *c, **cp;
 771
 772         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 773
 774         for (cp = &net->ipv4.mfc_cache_array[line];
 775              (c = *cp) != NULL; cp = &c->next) {
 776                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 777                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 778                         write_lock_bh(&mrt_lock);
 779                         *cp = c->next;
 780                         write_unlock_bh(&mrt_lock);
 781
 782                         ipmr_cache_free(c);
 783                         return 0;
 784                 }
 785         }
 786         return -ENOENT;
 787 }
 788
 789 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 790 {
 791         int line;
 792         struct mfc_cache *uc, *c, **cp;
 793
 794         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 795
 796         for (cp = &net->ipv4.mfc_cache_array[line];
 797              (c = *cp) != NULL; cp = &c->next) {
 798                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 799                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 800                         break;
 801         }
 802
 803         if (c != NULL) {
 804                 write_lock_bh(&mrt_lock);
 805                 c->mfc_parent = mfc->mfcc_parent;
 806                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 807                 if (!mrtsock)
 808                         c->mfc_flags |= MFC_STATIC;
 809                 write_unlock_bh(&mrt_lock);
 810                 return 0;
 811         }
 812
 813         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 814                 return -EINVAL;
 815
 816         c = ipmr_cache_alloc(net);
 817         if (c == NULL)
 818                 return -ENOMEM;
 819
 820         c->mfc_origin = mfc->mfcc_origin.s_addr;
 821         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 822         c->mfc_parent = mfc->mfcc_parent;
 823         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 824         if (!mrtsock)
 825                 c->mfc_flags |= MFC_STATIC;
 826
 827         write_lock_bh(&mrt_lock);
 828         c->next = net->ipv4.mfc_cache_array[line];
 829         net->ipv4.mfc_cache_array[line] = c;
 830         write_unlock_bh(&mrt_lock);
 831
 832         /*
 833          *      Check to see if we resolved a queued list. If so we
 834          *      need to send on the frames and tidy up.
 835          */
 836         spin_lock_bh(&mfc_unres_lock);
 837         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 838              cp = &uc->next) {
 839                 if (net_eq(mfc_net(uc), net) &&
 840                     uc->mfc_origin == c->mfc_origin &&
 841                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 842                         *cp = uc->next;
 843                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 844                         break;
 845                 }
 846         }
 847         if (mfc_unres_queue == NULL)
 848                 del_timer(&ipmr_expire_timer);
 849         spin_unlock_bh(&mfc_unres_lock);
 850
 851         if (uc) {
 852                 ipmr_cache_resolve(uc, c);
 853                 ipmr_cache_free(uc);
 854         }
 855         return 0;
 856 }
 857
 858 /*
 859  *      Close the multicast socket, and clear the vif tables etc
 860  */
 861
 862 static void mroute_clean_tables(struct net *net)
 863 {
 864         int i;
 865
 866         /*
 867          *      Shut down all active vif entries
 868          */
 869         for (i = 0; i < net->ipv4.maxvif; i++) {
 870                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 871                         vif_delete(net, i, 0);
 872         }
 873
 874         /*
 875          *      Wipe the cache
 876          */
 877         for (i=0; i<MFC_LINES; i++) {
 878                 struct mfc_cache *c, **cp;
 879
 880                 cp = &net->ipv4.mfc_cache_array[i];
 881                 while ((c = *cp) != NULL) {
 882                         if (c->mfc_flags&MFC_STATIC) {
 883                                 cp = &c->next;
 884                                 continue;
 885                         }
 886                         write_lock_bh(&mrt_lock);
 887                         *cp = c->next;
 888                         write_unlock_bh(&mrt_lock);
 889
 890                         ipmr_cache_free(c);
 891                 }
 892         }
 893
 894         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 895                 struct mfc_cache *c, **cp;
 896
 897                 spin_lock_bh(&mfc_unres_lock);
 898                 cp = &mfc_unres_queue;
 899                 while ((c = *cp) != NULL) {
 900                         if (!net_eq(mfc_net(c), net)) {
 901                                 cp = &c->next;
 902                                 continue;
 903                         }
 904                         *cp = c->next;
 905
 906                         ipmr_destroy_unres(c);
 907                 }
 908                 spin_unlock_bh(&mfc_unres_lock);
 909         }
 910 }
 911
 912 static void mrtsock_destruct(struct sock *sk)
 913 {
 914         struct net *net = sock_net(sk);
 915
 916         rtnl_lock();
 917         if (sk == net->ipv4.mroute_sk) {
 918                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 919
 920                 write_lock_bh(&mrt_lock);
 921                 net->ipv4.mroute_sk = NULL;
 922                 write_unlock_bh(&mrt_lock);
 923
 924                 mroute_clean_tables(net);
 925         }
 926         rtnl_unlock();
 927 }
 928
 929 /*
 930  *      Socket options and virtual interface manipulation. The whole
 931  *      virtual interface system is a complete heap, but unfortunately
 932  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 933  *      MOSPF/PIM router set up we can clean this up.
 934  */
 935
 936 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 937 {
 938         int ret;
 939         struct vifctl vif;
 940         struct mfcctl mfc;
 941         struct net *net = sock_net(sk);
 942
 943         if (optname != MRT_INIT) {
 944                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 945                         return -EACCES;
 946         }
 947
 948         switch (optname) {
 949         case MRT_INIT:
 950                 if (sk->sk_type != SOCK_RAW ||
 951                     inet_sk(sk)->num != IPPROTO_IGMP)
 952                         return -EOPNOTSUPP;
 953                 if (optlen != sizeof(int))
 954                         return -ENOPROTOOPT;
 955
 956                 rtnl_lock();
 957                 if (net->ipv4.mroute_sk) {
 958                         rtnl_unlock();
 959                         return -EADDRINUSE;
 960                 }
 961
 962                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 963                 if (ret == 0) {
 964                         write_lock_bh(&mrt_lock);
 965                         net->ipv4.mroute_sk = sk;
 966                         write_unlock_bh(&mrt_lock);
 967
 968                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 969                 }
 970                 rtnl_unlock();
 971                 return ret;
 972         case MRT_DONE:
 973                 if (sk != net->ipv4.mroute_sk)
 974                         return -EACCES;
 975                 return ip_ra_control(sk, 0, NULL);
 976         case MRT_ADD_VIF:
 977         case MRT_DEL_VIF:
 978                 if (optlen != sizeof(vif))
 979                         return -EINVAL;
 980                 if (copy_from_user(&vif, optval, sizeof(vif)))
 981                         return -EFAULT;
 982                 if (vif.vifc_vifi >= MAXVIFS)
 983                         return -ENFILE;
 984                 rtnl_lock();
 985                 if (optname == MRT_ADD_VIF) {
 986                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 987                 } else {
 988                         ret = vif_delete(net, vif.vifc_vifi, 0);
 989                 }
 990                 rtnl_unlock();
 991                 return ret;
 992
 993                 /*
 994                  *      Manipulate the forwarding caches. These live
 995                  *      in a sort of kernel/user symbiosis.
 996                  */
 997         case MRT_ADD_MFC:
 998         case MRT_DEL_MFC:
 999                 if (optlen != sizeof(mfc))
1000                         return -EINVAL;
1001                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1002                         return -EFAULT;
1003                 rtnl_lock();
1004                 if (optname == MRT_DEL_MFC)
1005                         ret = ipmr_mfc_delete(net, &mfc);
1006                 else
1007                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1008                 rtnl_unlock();
1009                 return ret;
1010                 /*
1011                  *      Control PIM assert.
1012                  */
1013         case MRT_ASSERT:
1014         {
1015                 int v;
1016                 if (get_user(v,(int __user *)optval))
1017                         return -EFAULT;
1018                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1019                 return 0;
1020         }
1021 #ifdef CONFIG_IP_PIMSM
1022         case MRT_PIM:
1023         {
1024                 int v;
1025
1026                 if (get_user(v,(int __user *)optval))
1027                         return -EFAULT;
1028                 v = (v) ? 1 : 0;
1029
1030                 rtnl_lock();
1031                 ret = 0;
1032                 if (v != net->ipv4.mroute_do_pim) {
1033                         net->ipv4.mroute_do_pim = v;
1034                         net->ipv4.mroute_do_assert = v;
1035                 }
1036                 rtnl_unlock();
1037                 return ret;
1038         }
1039 #endif
1040         /*
1041          *      Spurious command, or MRT_VERSION which you cannot
1042          *      set.
1043          */
1044         default:
1045                 return -ENOPROTOOPT;
1046         }
1047 }
1048
1049 /*
1050  *      Getsock opt support for the multicast routing system.
1051  */
1052
1053 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1054 {
1055         int olr;
1056         int val;
1057         struct net *net = sock_net(sk);
1058
1059         if (optname != MRT_VERSION &&
1060 #ifdef CONFIG_IP_PIMSM
1061            optname!=MRT_PIM &&
1062 #endif
1063            optname!=MRT_ASSERT)
1064                 return -ENOPROTOOPT;
1065
1066         if (get_user(olr, optlen))
1067                 return -EFAULT;
1068
1069         olr = min_t(unsigned int, olr, sizeof(int));
1070         if (olr < 0)
1071                 return -EINVAL;
1072
1073         if (put_user(olr, optlen))
1074                 return -EFAULT;
1075         if (optname == MRT_VERSION)
1076                 val = 0x0305;
1077 #ifdef CONFIG_IP_PIMSM
1078         else if (optname == MRT_PIM)
1079                 val = net->ipv4.mroute_do_pim;
1080 #endif
1081         else
1082                 val = net->ipv4.mroute_do_assert;
1083         if (copy_to_user(optval, &val, olr))
1084                 return -EFAULT;
1085         return 0;
1086 }
1087
1088 /*
1089  *      The IP multicast ioctl support routines.
1090  */
1091
1092 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1093 {
1094         struct sioc_sg_req sr;
1095         struct sioc_vif_req vr;
1096         struct vif_device *vif;
1097         struct mfc_cache *c;
1098         struct net *net = sock_net(sk);
1099
1100         switch (cmd) {
1101         case SIOCGETVIFCNT:
1102                 if (copy_from_user(&vr, arg, sizeof(vr)))
1103                         return -EFAULT;
1104                 if (vr.vifi >= net->ipv4.maxvif)
1105                         return -EINVAL;
1106                 read_lock(&mrt_lock);
1107                 vif = &net->ipv4.vif_table[vr.vifi];
1108                 if (VIF_EXISTS(net, vr.vifi)) {
1109                         vr.icount = vif->pkt_in;
1110                         vr.ocount = vif->pkt_out;
1111                         vr.ibytes = vif->bytes_in;
1112                         vr.obytes = vif->bytes_out;
1113                         read_unlock(&mrt_lock);
1114
1115                         if (copy_to_user(arg, &vr, sizeof(vr)))
1116                                 return -EFAULT;
1117                         return 0;
1118                 }
1119                 read_unlock(&mrt_lock);
1120                 return -EADDRNOTAVAIL;
1121         case SIOCGETSGCNT:
1122                 if (copy_from_user(&sr, arg, sizeof(sr)))
1123                         return -EFAULT;
1124
1125                 read_lock(&mrt_lock);
1126                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1127                 if (c) {
1128                         sr.pktcnt = c->mfc_un.res.pkt;
1129                         sr.bytecnt = c->mfc_un.res.bytes;
1130                         sr.wrong_if = c->mfc_un.res.wrong_if;
1131                         read_unlock(&mrt_lock);
1132
1133                         if (copy_to_user(arg, &sr, sizeof(sr)))
1134                                 return -EFAULT;
1135                         return 0;
1136                 }
1137                 read_unlock(&mrt_lock);
1138                 return -EADDRNOTAVAIL;
1139         default:
1140                 return -ENOIOCTLCMD;
1141         }
1142 }
1143
1144
1145 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1146 {
1147         struct net_device *dev = ptr;
1148         struct net *net = dev_net(dev);
1149         struct vif_device *v;
1150         int ct;
1151
1152         if (!net_eq(dev_net(dev), net))
1153                 return NOTIFY_DONE;
1154
1155         if (event != NETDEV_UNREGISTER)
1156                 return NOTIFY_DONE;
1157         v = &net->ipv4.vif_table[0];
1158         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1159                 if (v->dev == dev)
1160                         vif_delete(net, ct, 1);
1161         }
1162         return NOTIFY_DONE;
1163 }
1164
1165
1166 static struct notifier_block ip_mr_notifier = {
1167         .notifier_call = ipmr_device_event,
1168 };
1169
1170 /*
1171  *      Encapsulate a packet by attaching a valid IPIP header to it.
1172  *      This avoids tunnel drivers and other mess and gives us the speed so
1173  *      important for multicast video.
1174  */
1175
1176 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1177 {
1178         struct iphdr *iph;
1179         struct iphdr *old_iph = ip_hdr(skb);
1180
1181         skb_push(skb, sizeof(struct iphdr));
1182         skb->transport_header = skb->network_header;
1183         skb_reset_network_header(skb);
1184         iph = ip_hdr(skb);
1185
1186         iph->version    =       4;
1187         iph->tos        =       old_iph->tos;
1188         iph->ttl        =       old_iph->ttl;
1189         iph->frag_off   =       0;
1190         iph->daddr      =       daddr;
1191         iph->saddr      =       saddr;
1192         iph->protocol   =       IPPROTO_IPIP;
1193         iph->ihl        =       5;
1194         iph->tot_len    =       htons(skb->len);
1195         ip_select_ident(iph, skb_dst(skb), NULL);
1196         ip_send_check(iph);
1197
1198         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1199         nf_reset(skb);
1200 }
1201
1202 static inline int ipmr_forward_finish(struct sk_buff *skb)
1203 {
1204         struct ip_options * opt = &(IPCB(skb)->opt);
1205
1206         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1207
1208         if (unlikely(opt->optlen))
1209                 ip_forward_options(skb);
1210
1211         return dst_output(skb);
1212 }
1213
1214 /*
1215  *      Processing handlers for ipmr_forward
1216  */
1217
1218 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1219 {
1220         struct net *net = mfc_net(c);
1221         const struct iphdr *iph = ip_hdr(skb);
1222         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1223         struct net_device *dev;
1224         struct rtable *rt;
1225         int    encap = 0;
1226
1227         if (vif->dev == NULL)
1228                 goto out_free;
1229
1230 #ifdef CONFIG_IP_PIMSM
1231         if (vif->flags & VIFF_REGISTER) {
1232                 vif->pkt_out++;
1233                 vif->bytes_out += skb->len;
1234                 vif->dev->stats.tx_bytes += skb->len;
1235                 vif->dev->stats.tx_packets++;
1236                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1237                 goto out_free;
1238         }
1239 #endif
1240
1241         if (vif->flags&VIFF_TUNNEL) {
1242                 struct flowi fl = { .oif = vif->link,
1243                                     .nl_u = { .ip4_u =
1244                                               { .daddr = vif->remote,
1245                                                 .saddr = vif->local,
1246                                                 .tos = RT_TOS(iph->tos) } },
1247                                     .proto = IPPROTO_IPIP };
1248                 if (ip_route_output_key(net, &rt, &fl))
1249                         goto out_free;
1250                 encap = sizeof(struct iphdr);
1251         } else {
1252                 struct flowi fl = { .oif = vif->link,
1253                                     .nl_u = { .ip4_u =
1254                                               { .daddr = iph->daddr,
1255                                                 .tos = RT_TOS(iph->tos) } },
1256                                     .proto = IPPROTO_IPIP };
1257                 if (ip_route_output_key(net, &rt, &fl))
1258                         goto out_free;
1259         }
1260
1261         dev = rt->u.dst.dev;
1262
1263         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1264                 /* Do not fragment multicasts. Alas, IPv4 does not
1265                    allow to send ICMP, so that packets will disappear
1266                    to blackhole.
1267                  */
1268
1269                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1270                 ip_rt_put(rt);
1271                 goto out_free;
1272         }
1273
1274         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1275
1276         if (skb_cow(skb, encap)) {
1277                 ip_rt_put(rt);
1278                 goto out_free;
1279         }
1280
1281         vif->pkt_out++;
1282         vif->bytes_out += skb->len;
1283
1284         skb_dst_drop(skb);
1285         skb_dst_set(skb, &rt->u.dst);
1286         ip_decrease_ttl(ip_hdr(skb));
1287
1288         /* FIXME: forward and output firewalls used to be called here.
1289          * What do we do with netfilter? -- RR */
1290         if (vif->flags & VIFF_TUNNEL) {
1291                 ip_encap(skb, vif->local, vif->remote);
1292                 /* FIXME: extra output firewall step used to be here. --RR */
1293                 vif->dev->stats.tx_packets++;
1294                 vif->dev->stats.tx_bytes += skb->len;
1295         }
1296
1297         IPCB(skb)->flags |= IPSKB_FORWARDED;
1298
1299         /*
1300          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1301          * not only before forwarding, but after forwarding on all output
1302          * interfaces. It is clear, if mrouter runs a multicasting
1303          * program, it should receive packets not depending to what interface
1304          * program is joined.
1305          * If we will not make it, the program will have to join on all
1306          * interfaces. On the other hand, multihoming host (or router, but
1307          * not mrouter) cannot join to more than one interface - it will
1308          * result in receiving multiple packets.
1309          */
1310         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1311                 ipmr_forward_finish);
1312         return;
1313
1314 out_free:
1315         kfree_skb(skb);
1316         return;
1317 }
1318
1319 static int ipmr_find_vif(struct net_device *dev)
1320 {
1321         struct net *net = dev_net(dev);
1322         int ct;
1323         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1324                 if (net->ipv4.vif_table[ct].dev == dev)
1325                         break;
1326         }
1327         return ct;
1328 }
1329
1330 /* "local" means that we should preserve one skb (for local delivery) */
1331
1332 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1333 {
1334         int psend = -1;
1335         int vif, ct;
1336         struct net *net = mfc_net(cache);
1337
1338         vif = cache->mfc_parent;
1339         cache->mfc_un.res.pkt++;
1340         cache->mfc_un.res.bytes += skb->len;
1341
1342         /*
1343          * Wrong interface: drop packet and (maybe) send PIM assert.
1344          */
1345         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1346                 int true_vifi;
1347
1348                 if (skb_rtable(skb)->fl.iif == 0) {
1349                         /* It is our own packet, looped back.
1350                            Very complicated situation...
1351
1352                            The best workaround until routing daemons will be
1353                            fixed is not to redistribute packet, if it was
1354                            send through wrong interface. It means, that
1355                            multicast applications WILL NOT work for
1356                            (S,G), which have default multicast route pointing
1357                            to wrong oif. In any case, it is not a good
1358                            idea to use multicasting applications on router.
1359                          */
1360                         goto dont_forward;
1361                 }
1362
1363                 cache->mfc_un.res.wrong_if++;
1364                 true_vifi = ipmr_find_vif(skb->dev);
1365
1366                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1367                     /* pimsm uses asserts, when switching from RPT to SPT,
1368                        so that we cannot check that packet arrived on an oif.
1369                        It is bad, but otherwise we would need to move pretty
1370                        large chunk of pimd to kernel. Ough... --ANK
1371                      */
1372                     (net->ipv4.mroute_do_pim ||
1373                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1374                     time_after(jiffies,
1375                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1376                         cache->mfc_un.res.last_assert = jiffies;
1377                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1378                 }
1379                 goto dont_forward;
1380         }
1381
1382         net->ipv4.vif_table[vif].pkt_in++;
1383         net->ipv4.vif_table[vif].bytes_in += skb->len;
1384
1385         /*
1386          *      Forward the frame
1387          */
1388         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1389                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1390                         if (psend != -1) {
1391                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1392                                 if (skb2)
1393                                         ipmr_queue_xmit(skb2, cache, psend);
1394                         }
1395                         psend = ct;
1396                 }
1397         }
1398         if (psend != -1) {
1399                 if (local) {
1400                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401                         if (skb2)
1402                                 ipmr_queue_xmit(skb2, cache, psend);
1403                 } else {
1404                         ipmr_queue_xmit(skb, cache, psend);
1405                         return 0;
1406                 }
1407         }
1408
1409 dont_forward:
1410         if (!local)
1411                 kfree_skb(skb);
1412         return 0;
1413 }
1414
1415
1416 /*
1417  *      Multicast packets for forwarding arrive here
1418  */
1419
1420 int ip_mr_input(struct sk_buff *skb)
1421 {
1422         struct mfc_cache *cache;
1423         struct net *net = dev_net(skb->dev);
1424         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1425
1426         /* Packet is looped back after forward, it should not be
1427            forwarded second time, but still can be delivered locally.
1428          */
1429         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1430                 goto dont_forward;
1431
1432         if (!local) {
1433                     if (IPCB(skb)->opt.router_alert) {
1434                             if (ip_call_ra_chain(skb))
1435                                     return 0;
1436                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1437                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1438                                Cisco IOS <= 11.2(8)) do not put router alert
1439                                option to IGMP packets destined to routable
1440                                groups. It is very bad, because it means
1441                                that we can forward NO IGMP messages.
1442                              */
1443                             read_lock(&mrt_lock);
1444                             if (net->ipv4.mroute_sk) {
1445                                     nf_reset(skb);
1446                                     raw_rcv(net->ipv4.mroute_sk, skb);
1447                                     read_unlock(&mrt_lock);
1448                                     return 0;
1449                             }
1450                             read_unlock(&mrt_lock);
1451                     }
1452         }
1453
1454         read_lock(&mrt_lock);
1455         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1456
1457         /*
1458          *      No usable cache entry
1459          */
1460         if (cache == NULL) {
1461                 int vif;
1462
1463                 if (local) {
1464                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1465                         ip_local_deliver(skb);
1466                         if (skb2 == NULL) {
1467                                 read_unlock(&mrt_lock);
1468                                 return -ENOBUFS;
1469                         }
1470                         skb = skb2;
1471                 }
1472
1473                 vif = ipmr_find_vif(skb->dev);
1474                 if (vif >= 0) {
1475                         int err = ipmr_cache_unresolved(net, vif, skb);
1476                         read_unlock(&mrt_lock);
1477
1478                         return err;
1479                 }
1480                 read_unlock(&mrt_lock);
1481                 kfree_skb(skb);
1482                 return -ENODEV;
1483         }
1484
1485         ip_mr_forward(skb, cache, local);
1486
1487         read_unlock(&mrt_lock);
1488
1489         if (local)
1490                 return ip_local_deliver(skb);
1491
1492         return 0;
1493
1494 dont_forward:
1495         if (local)
1496                 return ip_local_deliver(skb);
1497         kfree_skb(skb);
1498         return 0;
1499 }
1500
1501 #ifdef CONFIG_IP_PIMSM
1502 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1503 {
1504         struct net_device *reg_dev = NULL;
1505         struct iphdr *encap;
1506         struct net *net = dev_net(skb->dev);
1507
1508         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1509         /*
1510            Check that:
1511            a. packet is really destinted to a multicast group
1512            b. packet is not a NULL-REGISTER
1513            c. packet is not truncated
1514          */
1515         if (!ipv4_is_multicast(encap->daddr) ||
1516             encap->tot_len == 0 ||
1517             ntohs(encap->tot_len) + pimlen > skb->len)
1518                 return 1;
1519
1520         read_lock(&mrt_lock);
1521         if (net->ipv4.mroute_reg_vif_num >= 0)
1522                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1523         if (reg_dev)
1524                 dev_hold(reg_dev);
1525         read_unlock(&mrt_lock);
1526
1527         if (reg_dev == NULL)
1528                 return 1;
1529
1530         skb->mac_header = skb->network_header;
1531         skb_pull(skb, (u8*)encap - skb->data);
1532         skb_reset_network_header(skb);
1533         skb->dev = reg_dev;
1534         skb->protocol = htons(ETH_P_IP);
1535         skb->ip_summed = 0;
1536         skb->pkt_type = PACKET_HOST;
1537         skb_dst_drop(skb);
1538         reg_dev->stats.rx_bytes += skb->len;
1539         reg_dev->stats.rx_packets++;
1540         nf_reset(skb);
1541         netif_rx(skb);
1542         dev_put(reg_dev);
1543
1544         return 0;
1545 }
1546 #endif
1547
1548 #ifdef CONFIG_IP_PIMSM_V1
1549 /*
1550  * Handle IGMP messages of PIMv1
1551  */
1552
1553 int pim_rcv_v1(struct sk_buff * skb)
1554 {
1555         struct igmphdr *pim;
1556         struct net *net = dev_net(skb->dev);
1557
1558         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1559                 goto drop;
1560
1561         pim = igmp_hdr(skb);
1562
1563         if (!net->ipv4.mroute_do_pim ||
1564             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1565                 goto drop;
1566
1567         if (__pim_rcv(skb, sizeof(*pim))) {
1568 drop:
1569                 kfree_skb(skb);
1570         }
1571         return 0;
1572 }
1573 #endif
1574
1575 #ifdef CONFIG_IP_PIMSM_V2
1576 static int pim_rcv(struct sk_buff * skb)
1577 {
1578         struct pimreghdr *pim;
1579
1580         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1581                 goto drop;
1582
1583         pim = (struct pimreghdr *)skb_transport_header(skb);
1584         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1585             (pim->flags&PIM_NULL_REGISTER) ||
1586             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1587              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1588                 goto drop;
1589
1590         if (__pim_rcv(skb, sizeof(*pim))) {
1591 drop:
1592                 kfree_skb(skb);
1593         }
1594         return 0;
1595 }
1596 #endif
1597
1598 static int
1599 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1600 {
1601         int ct;
1602         struct rtnexthop *nhp;
1603         struct net *net = mfc_net(c);
1604         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1605         u8 *b = skb_tail_pointer(skb);
1606         struct rtattr *mp_head;
1607
1608         if (dev)
1609                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1610
1611         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1612
1613         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1614                 if (c->mfc_un.res.ttls[ct] < 255) {
1615                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1616                                 goto rtattr_failure;
1617                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1618                         nhp->rtnh_flags = 0;
1619                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1620                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1621                         nhp->rtnh_len = sizeof(*nhp);
1622                 }
1623         }
1624         mp_head->rta_type = RTA_MULTIPATH;
1625         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1626         rtm->rtm_type = RTN_MULTICAST;
1627         return 1;
1628
1629 rtattr_failure:
1630         nlmsg_trim(skb, b);
1631         return -EMSGSIZE;
1632 }
1633
1634 int ipmr_get_route(struct net *net,
1635                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1636 {
1637         int err;
1638         struct mfc_cache *cache;
1639         struct rtable *rt = skb_rtable(skb);
1640
1641         read_lock(&mrt_lock);
1642         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1643
1644         if (cache == NULL) {
1645                 struct sk_buff *skb2;
1646                 struct iphdr *iph;
1647                 struct net_device *dev;
1648                 int vif;
1649
1650                 if (nowait) {
1651                         read_unlock(&mrt_lock);
1652                         return -EAGAIN;
1653                 }
1654
1655                 dev = skb->dev;
1656                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1657                         read_unlock(&mrt_lock);
1658                         return -ENODEV;
1659                 }
1660                 skb2 = skb_clone(skb, GFP_ATOMIC);
1661                 if (!skb2) {
1662                         read_unlock(&mrt_lock);
1663                         return -ENOMEM;
1664                 }
1665
1666                 skb_push(skb2, sizeof(struct iphdr));
1667                 skb_reset_network_header(skb2);
1668                 iph = ip_hdr(skb2);
1669                 iph->ihl = sizeof(struct iphdr) >> 2;
1670                 iph->saddr = rt->rt_src;
1671                 iph->daddr = rt->rt_dst;
1672                 iph->version = 0;
1673                 err = ipmr_cache_unresolved(net, vif, skb2);
1674                 read_unlock(&mrt_lock);
1675                 return err;
1676         }
1677
1678         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1679                 cache->mfc_flags |= MFC_NOTIFY;
1680         err = ipmr_fill_mroute(skb, cache, rtm);
1681         read_unlock(&mrt_lock);
1682         return err;
1683 }
1684
1685 #ifdef CONFIG_PROC_FS
1686 /*
1687  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1688  */
1689 struct ipmr_vif_iter {
1690         struct seq_net_private p;
1691         int ct;
1692 };
1693
1694 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1695                                            struct ipmr_vif_iter *iter,
1696                                            loff_t pos)
1697 {
1698         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1699                 if (!VIF_EXISTS(net, iter->ct))
1700                         continue;
1701                 if (pos-- == 0)
1702                         return &net->ipv4.vif_table[iter->ct];
1703         }
1704         return NULL;
1705 }
1706
1707 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1708         __acquires(mrt_lock)
1709 {
1710         struct net *net = seq_file_net(seq);
1711
1712         read_lock(&mrt_lock);
1713         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1714                 : SEQ_START_TOKEN;
1715 }
1716
1717 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1718 {
1719         struct ipmr_vif_iter *iter = seq->private;
1720         struct net *net = seq_file_net(seq);
1721
1722         ++*pos;
1723         if (v == SEQ_START_TOKEN)
1724                 return ipmr_vif_seq_idx(net, iter, 0);
1725
1726         while (++iter->ct < net->ipv4.maxvif) {
1727                 if (!VIF_EXISTS(net, iter->ct))
1728                         continue;
1729                 return &net->ipv4.vif_table[iter->ct];
1730         }
1731         return NULL;
1732 }
1733
1734 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1735         __releases(mrt_lock)
1736 {
1737         read_unlock(&mrt_lock);
1738 }
1739
1740 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1741 {
1742         struct net *net = seq_file_net(seq);
1743
1744         if (v == SEQ_START_TOKEN) {
1745                 seq_puts(seq,
1746                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1747         } else {
1748                 const struct vif_device *vif = v;
1749                 const char *name =  vif->dev ? vif->dev->name : "none";
1750
1751                 seq_printf(seq,
1752                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1753                            vif - net->ipv4.vif_table,
1754                            name, vif->bytes_in, vif->pkt_in,
1755                            vif->bytes_out, vif->pkt_out,
1756                            vif->flags, vif->local, vif->remote);
1757         }
1758         return 0;
1759 }
1760
1761 static const struct seq_operations ipmr_vif_seq_ops = {
1762         .start = ipmr_vif_seq_start,
1763         .next  = ipmr_vif_seq_next,
1764         .stop  = ipmr_vif_seq_stop,
1765         .show  = ipmr_vif_seq_show,
1766 };
1767
1768 static int ipmr_vif_open(struct inode *inode, struct file *file)
1769 {
1770         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1771                             sizeof(struct ipmr_vif_iter));
1772 }
1773
1774 static const struct file_operations ipmr_vif_fops = {
1775         .owner   = THIS_MODULE,
1776         .open    = ipmr_vif_open,
1777         .read    = seq_read,
1778         .llseek  = seq_lseek,
1779         .release = seq_release_net,
1780 };
1781
1782 struct ipmr_mfc_iter {
1783         struct seq_net_private p;
1784         struct mfc_cache **cache;
1785         int ct;
1786 };
1787
1788
1789 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1790                                           struct ipmr_mfc_iter *it, loff_t pos)
1791 {
1792         struct mfc_cache *mfc;
1793
1794         it->cache = net->ipv4.mfc_cache_array;
1795         read_lock(&mrt_lock);
1796         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1797                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1798                      mfc; mfc = mfc->next)
1799                         if (pos-- == 0)
1800                                 return mfc;
1801         read_unlock(&mrt_lock);
1802
1803         it->cache = &mfc_unres_queue;
1804         spin_lock_bh(&mfc_unres_lock);
1805         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1806                 if (net_eq(mfc_net(mfc), net) &&
1807                     pos-- == 0)
1808                         return mfc;
1809         spin_unlock_bh(&mfc_unres_lock);
1810
1811         it->cache = NULL;
1812         return NULL;
1813 }
1814
1815
1816 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1817 {
1818         struct ipmr_mfc_iter *it = seq->private;
1819         struct net *net = seq_file_net(seq);
1820
1821         it->cache = NULL;
1822         it->ct = 0;
1823         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1824                 : SEQ_START_TOKEN;
1825 }
1826
1827 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1828 {
1829         struct mfc_cache *mfc = v;
1830         struct ipmr_mfc_iter *it = seq->private;
1831         struct net *net = seq_file_net(seq);
1832
1833         ++*pos;
1834
1835         if (v == SEQ_START_TOKEN)
1836                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1837
1838         if (mfc->next)
1839                 return mfc->next;
1840
1841         if (it->cache == &mfc_unres_queue)
1842                 goto end_of_list;
1843
1844         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1845
1846         while (++it->ct < MFC_LINES) {
1847                 mfc = net->ipv4.mfc_cache_array[it->ct];
1848                 if (mfc)
1849                         return mfc;
1850         }
1851
1852         /* exhausted cache_array, show unresolved */
1853         read_unlock(&mrt_lock);
1854         it->cache = &mfc_unres_queue;
1855         it->ct = 0;
1856
1857         spin_lock_bh(&mfc_unres_lock);
1858         mfc = mfc_unres_queue;
1859         while (mfc && !net_eq(mfc_net(mfc), net))
1860                 mfc = mfc->next;
1861         if (mfc)
1862                 return mfc;
1863
1864  end_of_list:
1865         spin_unlock_bh(&mfc_unres_lock);
1866         it->cache = NULL;
1867
1868         return NULL;
1869 }
1870
1871 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1872 {
1873         struct ipmr_mfc_iter *it = seq->private;
1874         struct net *net = seq_file_net(seq);
1875
1876         if (it->cache == &mfc_unres_queue)
1877                 spin_unlock_bh(&mfc_unres_lock);
1878         else if (it->cache == net->ipv4.mfc_cache_array)
1879                 read_unlock(&mrt_lock);
1880 }
1881
1882 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1883 {
1884         int n;
1885         struct net *net = seq_file_net(seq);
1886
1887         if (v == SEQ_START_TOKEN) {
1888                 seq_puts(seq,
1889                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1890         } else {
1891                 const struct mfc_cache *mfc = v;
1892                 const struct ipmr_mfc_iter *it = seq->private;
1893
1894                 seq_printf(seq, "%08lX %08lX %-3hd",
1895                            (unsigned long) mfc->mfc_mcastgrp,
1896                            (unsigned long) mfc->mfc_origin,
1897                            mfc->mfc_parent);
1898
1899                 if (it->cache != &mfc_unres_queue) {
1900                         seq_printf(seq, " %8lu %8lu %8lu",
1901                                    mfc->mfc_un.res.pkt,
1902                                    mfc->mfc_un.res.bytes,
1903                                    mfc->mfc_un.res.wrong_if);
1904                         for (n = mfc->mfc_un.res.minvif;
1905                              n < mfc->mfc_un.res.maxvif; n++ ) {
1906                                 if (VIF_EXISTS(net, n) &&
1907                                     mfc->mfc_un.res.ttls[n] < 255)
1908                                         seq_printf(seq,
1909                                            " %2d:%-3d",
1910                                            n, mfc->mfc_un.res.ttls[n]);
1911                         }
1912                 } else {
1913                         /* unresolved mfc_caches don't contain
1914                          * pkt, bytes and wrong_if values
1915                          */
1916                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1917                 }
1918                 seq_putc(seq, '\n');
1919         }
1920         return 0;
1921 }
1922
1923 static const struct seq_operations ipmr_mfc_seq_ops = {
1924         .start = ipmr_mfc_seq_start,
1925         .next  = ipmr_mfc_seq_next,
1926         .stop  = ipmr_mfc_seq_stop,
1927         .show  = ipmr_mfc_seq_show,
1928 };
1929
1930 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1931 {
1932         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1933                             sizeof(struct ipmr_mfc_iter));
1934 }
1935
1936 static const struct file_operations ipmr_mfc_fops = {
1937         .owner   = THIS_MODULE,
1938         .open    = ipmr_mfc_open,
1939         .read    = seq_read,
1940         .llseek  = seq_lseek,
1941         .release = seq_release_net,
1942 };
1943 #endif
1944
1945 #ifdef CONFIG_IP_PIMSM_V2
1946 static const struct net_protocol pim_protocol = {
1947         .handler        =       pim_rcv,
1948         .netns_ok       =       1,
1949 };
1950 #endif
1951
1952
1953 /*
1954  *      Setup for IP multicast routing
1955  */
1956 static int __net_init ipmr_net_init(struct net *net)
1957 {
1958         int err = 0;
1959
1960         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1961                                       GFP_KERNEL);
1962         if (!net->ipv4.vif_table) {
1963                 err = -ENOMEM;
1964                 goto fail;
1965         }
1966
1967         /* Forwarding cache */
1968         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1969                                             sizeof(struct mfc_cache *),
1970                                             GFP_KERNEL);
1971         if (!net->ipv4.mfc_cache_array) {
1972                 err = -ENOMEM;
1973                 goto fail_mfc_cache;
1974         }
1975
1976 #ifdef CONFIG_IP_PIMSM
1977         net->ipv4.mroute_reg_vif_num = -1;
1978 #endif
1979
1980 #ifdef CONFIG_PROC_FS
1981         err = -ENOMEM;
1982         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1983                 goto proc_vif_fail;
1984         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1985                 goto proc_cache_fail;
1986 #endif
1987         return 0;
1988
1989 #ifdef CONFIG_PROC_FS
1990 proc_cache_fail:
1991         proc_net_remove(net, "ip_mr_vif");
1992 proc_vif_fail:
1993         kfree(net->ipv4.mfc_cache_array);
1994 #endif
1995 fail_mfc_cache:
1996         kfree(net->ipv4.vif_table);
1997 fail:
1998         return err;
1999 }
2000
2001 static void __net_exit ipmr_net_exit(struct net *net)
2002 {
2003 #ifdef CONFIG_PROC_FS
2004         proc_net_remove(net, "ip_mr_cache");
2005         proc_net_remove(net, "ip_mr_vif");
2006 #endif
2007         kfree(net->ipv4.mfc_cache_array);
2008         kfree(net->ipv4.vif_table);
2009 }
2010
2011 static struct pernet_operations ipmr_net_ops = {
2012         .init = ipmr_net_init,
2013         .exit = ipmr_net_exit,
2014 };
2015
2016 int __init ip_mr_init(void)
2017 {
2018         int err;
2019
2020         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2021                                        sizeof(struct mfc_cache),
2022                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2023                                        NULL);
2024         if (!mrt_cachep)
2025                 return -ENOMEM;
2026
2027         err = register_pernet_subsys(&ipmr_net_ops);
2028         if (err)
2029                 goto reg_pernet_fail;
2030
2031         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2032         err = register_netdevice_notifier(&ip_mr_notifier);
2033         if (err)
2034                 goto reg_notif_fail;
2035 #ifdef CONFIG_IP_PIMSM_V2
2036         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2037                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2038                 err = -EAGAIN;
2039                 goto add_proto_fail;
2040         }
2041 #endif
2042         return 0;
2043
2044 #ifdef CONFIG_IP_PIMSM_V2
2045 add_proto_fail:
2046         unregister_netdevice_notifier(&ip_mr_notifier);
2047 #endif
2048 reg_notif_fail:
2049         del_timer(&ipmr_expire_timer);
2050         unregister_pernet_subsys(&ipmr_net_ops);
2051 reg_pernet_fail:
2052         kmem_cache_destroy(mrt_cachep);
2053         return err;
2054 }