netns: ip6mr: declare counter cache_resolve_queue_len per-namespace
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6mr.c
CommitLineData
7bc570c8
YH
1/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
7bc570c8
YH
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
7bc570c8
YH
33#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
7bc570c8 35#include <linux/init.h>
7bc570c8
YH
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
7bc570c8 39#include <net/raw.h>
7bc570c8
YH
40#include <linux/notifier.h>
41#include <linux/if_arp.h>
7bc570c8
YH
42#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
14fb64e1 48#include <linux/pim.h>
7bc570c8
YH
49#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
7bc570c8
YH
52/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
4e16880c 62#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
7bc570c8 63
14fb64e1
YH
64static int mroute_do_assert; /* Set in PIM assert */
65#ifdef CONFIG_IPV6_PIMSM_V2
66static int mroute_do_pim;
67#else
68#define mroute_do_pim 0
69#endif
70
7bc570c8 71static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
7bc570c8
YH
72
73/* Special spinlock for queue of unresolved entries */
74static DEFINE_SPINLOCK(mfc_unres_lock);
75
76/* We return to original Alan's scheme. Hash table of resolved
77 entries is changed only in process context and protected
78 with weak lock mrt_lock. Queue of unresolved entries is protected
79 with strong spinlock mfc_unres_lock.
80
81 In this case data path is free of exclusive locks at all.
82 */
83
84static struct kmem_cache *mrt_cachep __read_mostly;
85
86static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
6ac7eb08 87static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
7bc570c8
YH
88static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
89
14fb64e1
YH
90#ifdef CONFIG_IPV6_PIMSM_V2
91static struct inet6_protocol pim6_protocol;
92#endif
93
7bc570c8
YH
94static struct timer_list ipmr_expire_timer;
95
96
97#ifdef CONFIG_PROC_FS
98
99struct ipmr_mfc_iter {
100 struct mfc6_cache **cache;
101 int ct;
102};
103
104
105static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
106{
107 struct mfc6_cache *mfc;
108
4a6258a0 109 it->cache = init_net.ipv6.mfc6_cache_array;
7bc570c8 110 read_lock(&mrt_lock);
4a6258a0
BT
111 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
112 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
113 mfc; mfc = mfc->next)
7bc570c8
YH
114 if (pos-- == 0)
115 return mfc;
116 read_unlock(&mrt_lock);
117
118 it->cache = &mfc_unres_queue;
119 spin_lock_bh(&mfc_unres_lock);
120 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
121 if (pos-- == 0)
122 return mfc;
123 spin_unlock_bh(&mfc_unres_lock);
124
125 it->cache = NULL;
126 return NULL;
127}
128
129
130
131
132/*
133 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
134 */
135
136struct ipmr_vif_iter {
137 int ct;
138};
139
140static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
141 loff_t pos)
142{
4e16880c
BT
143 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
144 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8
YH
145 continue;
146 if (pos-- == 0)
4e16880c 147 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
148 }
149 return NULL;
150}
151
152static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
153 __acquires(mrt_lock)
154{
155 read_lock(&mrt_lock);
156 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
157 : SEQ_START_TOKEN);
158}
159
160static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
161{
162 struct ipmr_vif_iter *iter = seq->private;
163
164 ++*pos;
165 if (v == SEQ_START_TOKEN)
166 return ip6mr_vif_seq_idx(iter, 0);
167
4e16880c
BT
168 while (++iter->ct < init_net.ipv6.maxvif) {
169 if (!MIF_EXISTS(&init_net, iter->ct))
7bc570c8 170 continue;
4e16880c 171 return &init_net.ipv6.vif6_table[iter->ct];
7bc570c8
YH
172 }
173 return NULL;
174}
175
176static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
177 __releases(mrt_lock)
178{
179 read_unlock(&mrt_lock);
180}
181
182static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
183{
184 if (v == SEQ_START_TOKEN) {
185 seq_puts(seq,
186 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
187 } else {
188 const struct mif_device *vif = v;
189 const char *name = vif->dev ? vif->dev->name : "none";
190
191 seq_printf(seq,
d430a227 192 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
4e16880c 193 vif - init_net.ipv6.vif6_table,
7bc570c8
YH
194 name, vif->bytes_in, vif->pkt_in,
195 vif->bytes_out, vif->pkt_out,
196 vif->flags);
197 }
198 return 0;
199}
200
201static struct seq_operations ip6mr_vif_seq_ops = {
202 .start = ip6mr_vif_seq_start,
203 .next = ip6mr_vif_seq_next,
204 .stop = ip6mr_vif_seq_stop,
205 .show = ip6mr_vif_seq_show,
206};
207
208static int ip6mr_vif_open(struct inode *inode, struct file *file)
209{
210 return seq_open_private(file, &ip6mr_vif_seq_ops,
211 sizeof(struct ipmr_vif_iter));
212}
213
214static struct file_operations ip6mr_vif_fops = {
215 .owner = THIS_MODULE,
216 .open = ip6mr_vif_open,
217 .read = seq_read,
218 .llseek = seq_lseek,
eedd726e 219 .release = seq_release_private,
7bc570c8
YH
220};
221
222static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
223{
224 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
225 : SEQ_START_TOKEN);
226}
227
228static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
229{
230 struct mfc6_cache *mfc = v;
231 struct ipmr_mfc_iter *it = seq->private;
232
233 ++*pos;
234
235 if (v == SEQ_START_TOKEN)
236 return ipmr_mfc_seq_idx(seq->private, 0);
237
238 if (mfc->next)
239 return mfc->next;
240
241 if (it->cache == &mfc_unres_queue)
242 goto end_of_list;
243
4a6258a0 244 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
7bc570c8 245
4a6258a0
BT
246 while (++it->ct < MFC6_LINES) {
247 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
7bc570c8
YH
248 if (mfc)
249 return mfc;
250 }
251
252 /* exhausted cache_array, show unresolved */
253 read_unlock(&mrt_lock);
254 it->cache = &mfc_unres_queue;
255 it->ct = 0;
256
257 spin_lock_bh(&mfc_unres_lock);
258 mfc = mfc_unres_queue;
259 if (mfc)
260 return mfc;
261
262 end_of_list:
263 spin_unlock_bh(&mfc_unres_lock);
264 it->cache = NULL;
265
266 return NULL;
267}
268
269static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
270{
271 struct ipmr_mfc_iter *it = seq->private;
272
273 if (it->cache == &mfc_unres_queue)
274 spin_unlock_bh(&mfc_unres_lock);
4a6258a0 275 else if (it->cache == init_net.ipv6.mfc6_cache_array)
7bc570c8
YH
276 read_unlock(&mrt_lock);
277}
278
279static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
280{
281 int n;
282
283 if (v == SEQ_START_TOKEN) {
284 seq_puts(seq,
285 "Group "
286 "Origin "
287 "Iif Pkts Bytes Wrong Oifs\n");
288 } else {
289 const struct mfc6_cache *mfc = v;
290 const struct ipmr_mfc_iter *it = seq->private;
291
999890b2 292 seq_printf(seq, "%pI6 %pI6 %-3hd",
0c6ce78a 293 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
1ea472e2 294 mfc->mf6c_parent);
7bc570c8
YH
295
296 if (it->cache != &mfc_unres_queue) {
1ea472e2
BT
297 seq_printf(seq, " %8lu %8lu %8lu",
298 mfc->mfc_un.res.pkt,
299 mfc->mfc_un.res.bytes,
300 mfc->mfc_un.res.wrong_if);
7bc570c8
YH
301 for (n = mfc->mfc_un.res.minvif;
302 n < mfc->mfc_un.res.maxvif; n++) {
4e16880c 303 if (MIF_EXISTS(&init_net, n) &&
7bc570c8
YH
304 mfc->mfc_un.res.ttls[n] < 255)
305 seq_printf(seq,
306 " %2d:%-3d",
307 n, mfc->mfc_un.res.ttls[n]);
308 }
1ea472e2
BT
309 } else {
310 /* unresolved mfc_caches don't contain
311 * pkt, bytes and wrong_if values
312 */
313 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
7bc570c8
YH
314 }
315 seq_putc(seq, '\n');
316 }
317 return 0;
318}
319
320static struct seq_operations ipmr_mfc_seq_ops = {
321 .start = ipmr_mfc_seq_start,
322 .next = ipmr_mfc_seq_next,
323 .stop = ipmr_mfc_seq_stop,
324 .show = ipmr_mfc_seq_show,
325};
326
327static int ipmr_mfc_open(struct inode *inode, struct file *file)
328{
329 return seq_open_private(file, &ipmr_mfc_seq_ops,
330 sizeof(struct ipmr_mfc_iter));
331}
332
333static struct file_operations ip6mr_mfc_fops = {
334 .owner = THIS_MODULE,
335 .open = ipmr_mfc_open,
336 .read = seq_read,
337 .llseek = seq_lseek,
eedd726e 338 .release = seq_release_private,
7bc570c8
YH
339};
340#endif
341
14fb64e1
YH
342#ifdef CONFIG_IPV6_PIMSM_V2
343static int reg_vif_num = -1;
344
345static int pim6_rcv(struct sk_buff *skb)
346{
347 struct pimreghdr *pim;
348 struct ipv6hdr *encap;
349 struct net_device *reg_dev = NULL;
350
351 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
352 goto drop;
353
354 pim = (struct pimreghdr *)skb_transport_header(skb);
355 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
356 (pim->flags & PIM_NULL_REGISTER) ||
357 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
ec6b486f 358 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
14fb64e1
YH
359 goto drop;
360
361 /* check if the inner packet is destined to mcast group */
362 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
363 sizeof(*pim));
364
365 if (!ipv6_addr_is_multicast(&encap->daddr) ||
366 encap->payload_len == 0 ||
367 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
368 goto drop;
369
370 read_lock(&mrt_lock);
371 if (reg_vif_num >= 0)
4e16880c 372 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
14fb64e1
YH
373 if (reg_dev)
374 dev_hold(reg_dev);
375 read_unlock(&mrt_lock);
376
377 if (reg_dev == NULL)
378 goto drop;
379
380 skb->mac_header = skb->network_header;
381 skb_pull(skb, (u8 *)encap - skb->data);
382 skb_reset_network_header(skb);
383 skb->dev = reg_dev;
384 skb->protocol = htons(ETH_P_IP);
385 skb->ip_summed = 0;
386 skb->pkt_type = PACKET_HOST;
387 dst_release(skb->dst);
dc58c78c
PE
388 reg_dev->stats.rx_bytes += skb->len;
389 reg_dev->stats.rx_packets++;
14fb64e1
YH
390 skb->dst = NULL;
391 nf_reset(skb);
392 netif_rx(skb);
393 dev_put(reg_dev);
394 return 0;
395 drop:
396 kfree_skb(skb);
397 return 0;
398}
399
400static struct inet6_protocol pim6_protocol = {
401 .handler = pim6_rcv,
402};
403
404/* Service routines creating virtual interfaces: PIMREG */
405
406static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
407{
408 read_lock(&mrt_lock);
dc58c78c
PE
409 dev->stats.tx_bytes += skb->len;
410 dev->stats.tx_packets++;
14fb64e1
YH
411 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
412 read_unlock(&mrt_lock);
413 kfree_skb(skb);
414 return 0;
415}
416
007c3838
SH
417static const struct net_device_ops reg_vif_netdev_ops = {
418 .ndo_start_xmit = reg_vif_xmit,
419};
420
14fb64e1
YH
421static void reg_vif_setup(struct net_device *dev)
422{
423 dev->type = ARPHRD_PIMREG;
424 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
425 dev->flags = IFF_NOARP;
007c3838 426 dev->netdev_ops = &reg_vif_netdev_ops;
14fb64e1
YH
427 dev->destructor = free_netdev;
428}
429
430static struct net_device *ip6mr_reg_vif(void)
431{
432 struct net_device *dev;
14fb64e1 433
dc58c78c 434 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
14fb64e1
YH
435 if (dev == NULL)
436 return NULL;
437
438 if (register_netdevice(dev)) {
439 free_netdev(dev);
440 return NULL;
441 }
442 dev->iflink = 0;
443
14fb64e1
YH
444 if (dev_open(dev))
445 goto failure;
446
7af3db78 447 dev_hold(dev);
14fb64e1
YH
448 return dev;
449
450failure:
451 /* allow the register to be completed before unregistering. */
452 rtnl_unlock();
453 rtnl_lock();
454
455 unregister_netdevice(dev);
456 return NULL;
457}
458#endif
459
7bc570c8
YH
460/*
461 * Delete a VIF entry
462 */
463
464static int mif6_delete(int vifi)
465{
466 struct mif_device *v;
467 struct net_device *dev;
4e16880c 468 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
7bc570c8
YH
469 return -EADDRNOTAVAIL;
470
4e16880c 471 v = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
472
473 write_lock_bh(&mrt_lock);
474 dev = v->dev;
475 v->dev = NULL;
476
477 if (!dev) {
478 write_unlock_bh(&mrt_lock);
479 return -EADDRNOTAVAIL;
480 }
481
14fb64e1
YH
482#ifdef CONFIG_IPV6_PIMSM_V2
483 if (vifi == reg_vif_num)
484 reg_vif_num = -1;
485#endif
486
4e16880c 487 if (vifi + 1 == init_net.ipv6.maxvif) {
7bc570c8
YH
488 int tmp;
489 for (tmp = vifi - 1; tmp >= 0; tmp--) {
4e16880c 490 if (MIF_EXISTS(&init_net, tmp))
7bc570c8
YH
491 break;
492 }
4e16880c 493 init_net.ipv6.maxvif = tmp + 1;
7bc570c8
YH
494 }
495
496 write_unlock_bh(&mrt_lock);
497
498 dev_set_allmulti(dev, -1);
499
500 if (v->flags & MIFF_REGISTER)
501 unregister_netdevice(dev);
502
503 dev_put(dev);
504 return 0;
505}
506
58701ad4
BT
507static inline void ip6mr_cache_free(struct mfc6_cache *c)
508{
509 release_net(mfc6_net(c));
510 kmem_cache_free(mrt_cachep, c);
511}
512
7bc570c8
YH
513/* Destroy an unresolved cache entry, killing queued skbs
514 and reporting error to netlink readers.
515 */
516
517static void ip6mr_destroy_unres(struct mfc6_cache *c)
518{
519 struct sk_buff *skb;
520
4045e57c 521 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
522
523 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
524 if (ipv6_hdr(skb)->version == 0) {
525 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
526 nlh->nlmsg_type = NLMSG_ERROR;
527 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
528 skb_trim(skb, nlh->nlmsg_len);
529 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
530 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
531 } else
532 kfree_skb(skb);
533 }
534
58701ad4 535 ip6mr_cache_free(c);
7bc570c8
YH
536}
537
538
539/* Single timer process for all the unresolved queue. */
540
541static void ipmr_do_expire_process(unsigned long dummy)
542{
543 unsigned long now = jiffies;
544 unsigned long expires = 10 * HZ;
545 struct mfc6_cache *c, **cp;
546
547 cp = &mfc_unres_queue;
548
549 while ((c = *cp) != NULL) {
550 if (time_after(c->mfc_un.unres.expires, now)) {
551 /* not yet... */
552 unsigned long interval = c->mfc_un.unres.expires - now;
553 if (interval < expires)
554 expires = interval;
555 cp = &c->next;
556 continue;
557 }
558
559 *cp = c->next;
560 ip6mr_destroy_unres(c);
561 }
562
4045e57c 563 if (mfc_unres_queue != NULL)
7bc570c8
YH
564 mod_timer(&ipmr_expire_timer, jiffies + expires);
565}
566
567static void ipmr_expire_process(unsigned long dummy)
568{
569 if (!spin_trylock(&mfc_unres_lock)) {
570 mod_timer(&ipmr_expire_timer, jiffies + 1);
571 return;
572 }
573
4045e57c 574 if (mfc_unres_queue != NULL)
7bc570c8
YH
575 ipmr_do_expire_process(dummy);
576
577 spin_unlock(&mfc_unres_lock);
578}
579
580/* Fill oifs list. It is called under write locked mrt_lock. */
581
582static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
583{
584 int vifi;
585
6ac7eb08 586 cache->mfc_un.res.minvif = MAXMIFS;
7bc570c8 587 cache->mfc_un.res.maxvif = 0;
6ac7eb08 588 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
7bc570c8 589
4e16880c
BT
590 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
591 if (MIF_EXISTS(&init_net, vifi) &&
592 ttls[vifi] && ttls[vifi] < 255) {
7bc570c8
YH
593 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
594 if (cache->mfc_un.res.minvif > vifi)
595 cache->mfc_un.res.minvif = vifi;
596 if (cache->mfc_un.res.maxvif <= vifi)
597 cache->mfc_un.res.maxvif = vifi + 1;
598 }
599 }
600}
601
602static int mif6_add(struct mif6ctl *vifc, int mrtsock)
603{
604 int vifi = vifc->mif6c_mifi;
4e16880c 605 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
7bc570c8 606 struct net_device *dev;
5ae7b444 607 int err;
7bc570c8
YH
608
609 /* Is vif busy ? */
4e16880c 610 if (MIF_EXISTS(&init_net, vifi))
7bc570c8
YH
611 return -EADDRINUSE;
612
613 switch (vifc->mif6c_flags) {
14fb64e1
YH
614#ifdef CONFIG_IPV6_PIMSM_V2
615 case MIFF_REGISTER:
616 /*
617 * Special Purpose VIF in PIM
618 * All the packets will be sent to the daemon
619 */
620 if (reg_vif_num >= 0)
621 return -EADDRINUSE;
622 dev = ip6mr_reg_vif();
623 if (!dev)
624 return -ENOBUFS;
5ae7b444
WC
625 err = dev_set_allmulti(dev, 1);
626 if (err) {
627 unregister_netdevice(dev);
7af3db78 628 dev_put(dev);
5ae7b444
WC
629 return err;
630 }
14fb64e1
YH
631 break;
632#endif
7bc570c8
YH
633 case 0:
634 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
635 if (!dev)
636 return -EADDRNOTAVAIL;
5ae7b444 637 err = dev_set_allmulti(dev, 1);
7af3db78
WC
638 if (err) {
639 dev_put(dev);
5ae7b444 640 return err;
7af3db78 641 }
7bc570c8
YH
642 break;
643 default:
644 return -EINVAL;
645 }
646
7bc570c8
YH
647 /*
648 * Fill in the VIF structures
649 */
650 v->rate_limit = vifc->vifc_rate_limit;
651 v->flags = vifc->mif6c_flags;
652 if (!mrtsock)
653 v->flags |= VIFF_STATIC;
654 v->threshold = vifc->vifc_threshold;
655 v->bytes_in = 0;
656 v->bytes_out = 0;
657 v->pkt_in = 0;
658 v->pkt_out = 0;
659 v->link = dev->ifindex;
660 if (v->flags & MIFF_REGISTER)
661 v->link = dev->iflink;
662
663 /* And finish update writing critical data */
664 write_lock_bh(&mrt_lock);
7bc570c8 665 v->dev = dev;
14fb64e1
YH
666#ifdef CONFIG_IPV6_PIMSM_V2
667 if (v->flags & MIFF_REGISTER)
668 reg_vif_num = vifi;
669#endif
4e16880c
BT
670 if (vifi + 1 > init_net.ipv6.maxvif)
671 init_net.ipv6.maxvif = vifi + 1;
7bc570c8
YH
672 write_unlock_bh(&mrt_lock);
673 return 0;
674}
675
676static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
677{
678 int line = MFC6_HASH(mcastgrp, origin);
679 struct mfc6_cache *c;
680
4a6258a0 681 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
7bc570c8
YH
682 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
683 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
684 break;
685 }
686 return c;
687}
688
689/*
690 * Allocate a multicast cache entry
691 */
58701ad4 692static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
7bc570c8 693{
36cbac59 694 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
7bc570c8
YH
695 if (c == NULL)
696 return NULL;
6ac7eb08 697 c->mfc_un.res.minvif = MAXMIFS;
58701ad4 698 mfc6_net_set(c, net);
7bc570c8
YH
699 return c;
700}
701
58701ad4 702static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
7bc570c8 703{
36cbac59 704 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
7bc570c8
YH
705 if (c == NULL)
706 return NULL;
7bc570c8
YH
707 skb_queue_head_init(&c->mfc_un.unres.unresolved);
708 c->mfc_un.unres.expires = jiffies + 10 * HZ;
58701ad4 709 mfc6_net_set(c, net);
7bc570c8
YH
710 return c;
711}
712
713/*
714 * A cache entry has gone into a resolved state from queued
715 */
716
717static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
718{
719 struct sk_buff *skb;
720
721 /*
722 * Play the pending entries through our router
723 */
724
725 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
726 if (ipv6_hdr(skb)->version == 0) {
727 int err;
728 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
729
730 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
549e028d 731 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
7bc570c8
YH
732 } else {
733 nlh->nlmsg_type = NLMSG_ERROR;
734 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
735 skb_trim(skb, nlh->nlmsg_len);
736 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
737 }
738 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
739 } else
740 ip6_mr_forward(skb, c);
741 }
742}
743
744/*
745 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
746 * expects the following bizarre scheme.
747 *
748 * Called under mrt_lock.
749 */
750
6ac7eb08 751static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
7bc570c8
YH
752{
753 struct sk_buff *skb;
754 struct mrt6msg *msg;
755 int ret;
756
14fb64e1
YH
757#ifdef CONFIG_IPV6_PIMSM_V2
758 if (assert == MRT6MSG_WHOLEPKT)
759 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
760 +sizeof(*msg));
761 else
762#endif
763 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
7bc570c8
YH
764
765 if (!skb)
766 return -ENOBUFS;
767
768 /* I suppose that internal messages
769 * do not require checksums */
770
771 skb->ip_summed = CHECKSUM_UNNECESSARY;
772
14fb64e1
YH
773#ifdef CONFIG_IPV6_PIMSM_V2
774 if (assert == MRT6MSG_WHOLEPKT) {
775 /* Ugly, but we have no choice with this interface.
776 Duplicate old header, fix length etc.
777 And all this only to mangle msg->im6_msgtype and
778 to set msg->im6_mbz to "mbz" :-)
779 */
780 skb_push(skb, -skb_network_offset(pkt));
781
782 skb_push(skb, sizeof(*msg));
783 skb_reset_transport_header(skb);
784 msg = (struct mrt6msg *)skb_transport_header(skb);
785 msg->im6_mbz = 0;
786 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
787 msg->im6_mif = reg_vif_num;
788 msg->im6_pad = 0;
789 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
790 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
791
792 skb->ip_summed = CHECKSUM_UNNECESSARY;
793 } else
794#endif
795 {
7bc570c8
YH
796 /*
797 * Copy the IP header
798 */
799
800 skb_put(skb, sizeof(struct ipv6hdr));
801 skb_reset_network_header(skb);
802 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
803
804 /*
805 * Add our header
806 */
807 skb_put(skb, sizeof(*msg));
808 skb_reset_transport_header(skb);
809 msg = (struct mrt6msg *)skb_transport_header(skb);
810
811 msg->im6_mbz = 0;
812 msg->im6_msgtype = assert;
6ac7eb08 813 msg->im6_mif = mifi;
7bc570c8
YH
814 msg->im6_pad = 0;
815 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
816 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
817
818 skb->dst = dst_clone(pkt->dst);
819 skb->ip_summed = CHECKSUM_UNNECESSARY;
820
821 skb_pull(skb, sizeof(struct ipv6hdr));
14fb64e1 822 }
7bc570c8 823
bd91b8bf 824 if (init_net.ipv6.mroute6_sk == NULL) {
7bc570c8
YH
825 kfree_skb(skb);
826 return -EINVAL;
827 }
828
829 /*
830 * Deliver to user space multicast routing algorithms
831 */
bd91b8bf
BT
832 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
833 if (ret < 0) {
7bc570c8
YH
834 if (net_ratelimit())
835 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
836 kfree_skb(skb);
837 }
838
839 return ret;
840}
841
842/*
843 * Queue a packet for resolution. It gets locked cache entry!
844 */
845
846static int
6ac7eb08 847ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
7bc570c8
YH
848{
849 int err;
850 struct mfc6_cache *c;
851
852 spin_lock_bh(&mfc_unres_lock);
853 for (c = mfc_unres_queue; c; c = c->next) {
4045e57c
BT
854 if (net_eq(mfc6_net(c), &init_net) &&
855 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
7bc570c8
YH
856 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
857 break;
858 }
859
860 if (c == NULL) {
861 /*
862 * Create a new entry if allowable
863 */
864
4045e57c 865 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
58701ad4 866 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
7bc570c8
YH
867 spin_unlock_bh(&mfc_unres_lock);
868
869 kfree_skb(skb);
870 return -ENOBUFS;
871 }
872
873 /*
874 * Fill in the new cache entry
875 */
876 c->mf6c_parent = -1;
877 c->mf6c_origin = ipv6_hdr(skb)->saddr;
878 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
879
880 /*
881 * Reflect first query at pim6sd
882 */
6ac7eb08 883 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
7bc570c8
YH
884 /* If the report failed throw the cache entry
885 out - Brad Parker
886 */
887 spin_unlock_bh(&mfc_unres_lock);
888
58701ad4 889 ip6mr_cache_free(c);
7bc570c8
YH
890 kfree_skb(skb);
891 return err;
892 }
893
4045e57c 894 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
895 c->next = mfc_unres_queue;
896 mfc_unres_queue = c;
897
898 ipmr_do_expire_process(1);
899 }
900
901 /*
902 * See if we can append the packet
903 */
904 if (c->mfc_un.unres.unresolved.qlen > 3) {
905 kfree_skb(skb);
906 err = -ENOBUFS;
907 } else {
908 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
909 err = 0;
910 }
911
912 spin_unlock_bh(&mfc_unres_lock);
913 return err;
914}
915
916/*
917 * MFC6 cache manipulation by user space
918 */
919
920static int ip6mr_mfc_delete(struct mf6cctl *mfc)
921{
922 int line;
923 struct mfc6_cache *c, **cp;
924
925 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
926
4a6258a0
BT
927 for (cp = &init_net.ipv6.mfc6_cache_array[line];
928 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
929 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
930 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
931 write_lock_bh(&mrt_lock);
932 *cp = c->next;
933 write_unlock_bh(&mrt_lock);
934
58701ad4 935 ip6mr_cache_free(c);
7bc570c8
YH
936 return 0;
937 }
938 }
939 return -ENOENT;
940}
941
942static int ip6mr_device_event(struct notifier_block *this,
943 unsigned long event, void *ptr)
944{
945 struct net_device *dev = ptr;
946 struct mif_device *v;
947 int ct;
948
721499e8 949 if (!net_eq(dev_net(dev), &init_net))
7bc570c8
YH
950 return NOTIFY_DONE;
951
952 if (event != NETDEV_UNREGISTER)
953 return NOTIFY_DONE;
954
4e16880c
BT
955 v = &init_net.ipv6.vif6_table[0];
956 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
7bc570c8
YH
957 if (v->dev == dev)
958 mif6_delete(ct);
959 }
960 return NOTIFY_DONE;
961}
962
963static struct notifier_block ip6_mr_notifier = {
964 .notifier_call = ip6mr_device_event
965};
966
967/*
968 * Setup for IP multicast routing
969 */
970
4e16880c
BT
971static int __net_init ip6mr_net_init(struct net *net)
972{
973 int err = 0;
974
975 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
976 GFP_KERNEL);
977 if (!net->ipv6.vif6_table) {
978 err = -ENOMEM;
979 goto fail;
980 }
4a6258a0
BT
981
982 /* Forwarding cache */
983 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
984 sizeof(struct mfc6_cache *),
985 GFP_KERNEL);
986 if (!net->ipv6.mfc6_cache_array) {
987 err = -ENOMEM;
988 goto fail_mfc6_cache;
989 }
990 return 0;
991
992fail_mfc6_cache:
993 kfree(net->ipv6.vif6_table);
4e16880c
BT
994fail:
995 return err;
996}
997
998static void __net_exit ip6mr_net_exit(struct net *net)
999{
4a6258a0 1000 kfree(net->ipv6.mfc6_cache_array);
4e16880c
BT
1001 kfree(net->ipv6.vif6_table);
1002}
1003
1004static struct pernet_operations ip6mr_net_ops = {
1005 .init = ip6mr_net_init,
1006 .exit = ip6mr_net_exit,
1007};
1008
623d1a1a 1009int __init ip6_mr_init(void)
7bc570c8 1010{
623d1a1a
WC
1011 int err;
1012
7bc570c8
YH
1013 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1014 sizeof(struct mfc6_cache),
1015 0, SLAB_HWCACHE_ALIGN,
1016 NULL);
1017 if (!mrt_cachep)
623d1a1a 1018 return -ENOMEM;
7bc570c8 1019
4e16880c
BT
1020 err = register_pernet_subsys(&ip6mr_net_ops);
1021 if (err)
1022 goto reg_pernet_fail;
1023
7bc570c8 1024 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
623d1a1a
WC
1025 err = register_netdevice_notifier(&ip6_mr_notifier);
1026 if (err)
1027 goto reg_notif_fail;
1028#ifdef CONFIG_PROC_FS
1029 err = -ENOMEM;
1030 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1031 goto proc_vif_fail;
1032 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1033 0, &ip6mr_mfc_fops))
1034 goto proc_cache_fail;
1035#endif
1036 return 0;
7bc570c8 1037#ifdef CONFIG_PROC_FS
623d1a1a
WC
1038proc_cache_fail:
1039 proc_net_remove(&init_net, "ip6_mr_vif");
87b30a65
BT
1040proc_vif_fail:
1041 unregister_netdevice_notifier(&ip6_mr_notifier);
7bc570c8 1042#endif
87b30a65
BT
1043reg_notif_fail:
1044 del_timer(&ipmr_expire_timer);
4e16880c
BT
1045 unregister_pernet_subsys(&ip6mr_net_ops);
1046reg_pernet_fail:
87b30a65 1047 kmem_cache_destroy(mrt_cachep);
623d1a1a 1048 return err;
7bc570c8
YH
1049}
1050
623d1a1a
WC
1051void ip6_mr_cleanup(void)
1052{
1053#ifdef CONFIG_PROC_FS
1054 proc_net_remove(&init_net, "ip6_mr_cache");
1055 proc_net_remove(&init_net, "ip6_mr_vif");
1056#endif
1057 unregister_netdevice_notifier(&ip6_mr_notifier);
1058 del_timer(&ipmr_expire_timer);
4e16880c 1059 unregister_pernet_subsys(&ip6mr_net_ops);
623d1a1a
WC
1060 kmem_cache_destroy(mrt_cachep);
1061}
7bc570c8
YH
1062
1063static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1064{
1065 int line;
1066 struct mfc6_cache *uc, *c, **cp;
6ac7eb08 1067 unsigned char ttls[MAXMIFS];
7bc570c8
YH
1068 int i;
1069
6ac7eb08
RR
1070 memset(ttls, 255, MAXMIFS);
1071 for (i = 0; i < MAXMIFS; i++) {
7bc570c8
YH
1072 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1073 ttls[i] = 1;
1074
1075 }
1076
1077 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1078
4a6258a0
BT
1079 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1080 (c = *cp) != NULL; cp = &c->next) {
7bc570c8
YH
1081 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1082 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1083 break;
1084 }
1085
1086 if (c != NULL) {
1087 write_lock_bh(&mrt_lock);
1088 c->mf6c_parent = mfc->mf6cc_parent;
1089 ip6mr_update_thresholds(c, ttls);
1090 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC;
1092 write_unlock_bh(&mrt_lock);
1093 return 0;
1094 }
1095
1096 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1097 return -EINVAL;
1098
58701ad4 1099 c = ip6mr_cache_alloc(&init_net);
7bc570c8
YH
1100 if (c == NULL)
1101 return -ENOMEM;
1102
1103 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1104 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1105 c->mf6c_parent = mfc->mf6cc_parent;
1106 ip6mr_update_thresholds(c, ttls);
1107 if (!mrtsock)
1108 c->mfc_flags |= MFC_STATIC;
1109
1110 write_lock_bh(&mrt_lock);
4a6258a0
BT
1111 c->next = init_net.ipv6.mfc6_cache_array[line];
1112 init_net.ipv6.mfc6_cache_array[line] = c;
7bc570c8
YH
1113 write_unlock_bh(&mrt_lock);
1114
1115 /*
1116 * Check to see if we resolved a queued list. If so we
1117 * need to send on the frames and tidy up.
1118 */
1119 spin_lock_bh(&mfc_unres_lock);
1120 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1121 cp = &uc->next) {
4045e57c
BT
1122 if (net_eq(mfc6_net(uc), &init_net) &&
1123 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
7bc570c8
YH
1124 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1125 *cp = uc->next;
4045e57c 1126 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
7bc570c8
YH
1127 break;
1128 }
1129 }
4045e57c
BT
1130 if (mfc_unres_queue == NULL)
1131 del_timer(&ipmr_expire_timer);
7bc570c8
YH
1132 spin_unlock_bh(&mfc_unres_lock);
1133
1134 if (uc) {
1135 ip6mr_cache_resolve(uc, c);
58701ad4 1136 ip6mr_cache_free(uc);
7bc570c8
YH
1137 }
1138 return 0;
1139}
1140
1141/*
1142 * Close the multicast socket, and clear the vif tables etc
1143 */
1144
1145static void mroute_clean_tables(struct sock *sk)
1146{
1147 int i;
1148
1149 /*
1150 * Shut down all active vif entries
1151 */
4e16880c
BT
1152 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1153 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
7bc570c8
YH
1154 mif6_delete(i);
1155 }
1156
1157 /*
1158 * Wipe the cache
1159 */
4a6258a0 1160 for (i = 0; i < MFC6_LINES; i++) {
7bc570c8
YH
1161 struct mfc6_cache *c, **cp;
1162
4a6258a0 1163 cp = &init_net.ipv6.mfc6_cache_array[i];
7bc570c8
YH
1164 while ((c = *cp) != NULL) {
1165 if (c->mfc_flags & MFC_STATIC) {
1166 cp = &c->next;
1167 continue;
1168 }
1169 write_lock_bh(&mrt_lock);
1170 *cp = c->next;
1171 write_unlock_bh(&mrt_lock);
1172
58701ad4 1173 ip6mr_cache_free(c);
7bc570c8
YH
1174 }
1175 }
1176
4045e57c
BT
1177 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1178 struct mfc6_cache *c, **cp;
7bc570c8
YH
1179
1180 spin_lock_bh(&mfc_unres_lock);
4045e57c
BT
1181 cp = &mfc_unres_queue;
1182 while ((c = *cp) != NULL) {
1183 if (!net_eq(mfc6_net(c), &init_net)) {
1184 cp = &c->next;
1185 continue;
1186 }
1187 *cp = c->next;
7bc570c8 1188 ip6mr_destroy_unres(c);
7bc570c8
YH
1189 }
1190 spin_unlock_bh(&mfc_unres_lock);
1191 }
1192}
1193
1194static int ip6mr_sk_init(struct sock *sk)
1195{
1196 int err = 0;
1197
1198 rtnl_lock();
1199 write_lock_bh(&mrt_lock);
bd91b8bf
BT
1200 if (likely(init_net.ipv6.mroute6_sk == NULL))
1201 init_net.ipv6.mroute6_sk = sk;
7bc570c8
YH
1202 else
1203 err = -EADDRINUSE;
1204 write_unlock_bh(&mrt_lock);
1205
1206 rtnl_unlock();
1207
1208 return err;
1209}
1210
1211int ip6mr_sk_done(struct sock *sk)
1212{
1213 int err = 0;
1214
1215 rtnl_lock();
bd91b8bf 1216 if (sk == init_net.ipv6.mroute6_sk) {
7bc570c8 1217 write_lock_bh(&mrt_lock);
bd91b8bf 1218 init_net.ipv6.mroute6_sk = NULL;
7bc570c8
YH
1219 write_unlock_bh(&mrt_lock);
1220
1221 mroute_clean_tables(sk);
1222 } else
1223 err = -EACCES;
1224 rtnl_unlock();
1225
1226 return err;
1227}
1228
1229/*
1230 * Socket options and virtual interface manipulation. The whole
1231 * virtual interface system is a complete heap, but unfortunately
1232 * that's how BSD mrouted happens to think. Maybe one day with a proper
1233 * MOSPF/PIM router set up we can clean this up.
1234 */
1235
1236int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1237{
1238 int ret;
1239 struct mif6ctl vif;
1240 struct mf6cctl mfc;
1241 mifi_t mifi;
1242
1243 if (optname != MRT6_INIT) {
bd91b8bf 1244 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
7bc570c8
YH
1245 return -EACCES;
1246 }
1247
1248 switch (optname) {
1249 case MRT6_INIT:
1250 if (sk->sk_type != SOCK_RAW ||
1251 inet_sk(sk)->num != IPPROTO_ICMPV6)
1252 return -EOPNOTSUPP;
1253 if (optlen < sizeof(int))
1254 return -EINVAL;
1255
1256 return ip6mr_sk_init(sk);
1257
1258 case MRT6_DONE:
1259 return ip6mr_sk_done(sk);
1260
1261 case MRT6_ADD_MIF:
1262 if (optlen < sizeof(vif))
1263 return -EINVAL;
1264 if (copy_from_user(&vif, optval, sizeof(vif)))
1265 return -EFAULT;
6ac7eb08 1266 if (vif.mif6c_mifi >= MAXMIFS)
7bc570c8
YH
1267 return -ENFILE;
1268 rtnl_lock();
bd91b8bf 1269 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1270 rtnl_unlock();
1271 return ret;
1272
1273 case MRT6_DEL_MIF:
1274 if (optlen < sizeof(mifi_t))
1275 return -EINVAL;
1276 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1277 return -EFAULT;
1278 rtnl_lock();
1279 ret = mif6_delete(mifi);
1280 rtnl_unlock();
1281 return ret;
1282
1283 /*
1284 * Manipulate the forwarding caches. These live
1285 * in a sort of kernel/user symbiosis.
1286 */
1287 case MRT6_ADD_MFC:
1288 case MRT6_DEL_MFC:
1289 if (optlen < sizeof(mfc))
1290 return -EINVAL;
1291 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1292 return -EFAULT;
1293 rtnl_lock();
1294 if (optname == MRT6_DEL_MFC)
1295 ret = ip6mr_mfc_delete(&mfc);
1296 else
bd91b8bf 1297 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
7bc570c8
YH
1298 rtnl_unlock();
1299 return ret;
1300
14fb64e1
YH
1301 /*
1302 * Control PIM assert (to activate pim will activate assert)
1303 */
1304 case MRT6_ASSERT:
1305 {
1306 int v;
1307 if (get_user(v, (int __user *)optval))
1308 return -EFAULT;
1309 mroute_do_assert = !!v;
1310 return 0;
1311 }
1312
1313#ifdef CONFIG_IPV6_PIMSM_V2
1314 case MRT6_PIM:
1315 {
a9f83bf3 1316 int v;
14fb64e1
YH
1317 if (get_user(v, (int __user *)optval))
1318 return -EFAULT;
1319 v = !!v;
1320 rtnl_lock();
1321 ret = 0;
1322 if (v != mroute_do_pim) {
1323 mroute_do_pim = v;
1324 mroute_do_assert = v;
1325 if (mroute_do_pim)
1326 ret = inet6_add_protocol(&pim6_protocol,
1327 IPPROTO_PIM);
1328 else
1329 ret = inet6_del_protocol(&pim6_protocol,
1330 IPPROTO_PIM);
1331 if (ret < 0)
1332 ret = -EAGAIN;
1333 }
1334 rtnl_unlock();
1335 return ret;
1336 }
1337
1338#endif
7bc570c8 1339 /*
7d120c55 1340 * Spurious command, or MRT6_VERSION which you cannot
7bc570c8
YH
1341 * set.
1342 */
1343 default:
1344 return -ENOPROTOOPT;
1345 }
1346}
1347
1348/*
1349 * Getsock opt support for the multicast routing system.
1350 */
1351
1352int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1353 int __user *optlen)
1354{
1355 int olr;
1356 int val;
1357
1358 switch (optname) {
1359 case MRT6_VERSION:
1360 val = 0x0305;
1361 break;
14fb64e1
YH
1362#ifdef CONFIG_IPV6_PIMSM_V2
1363 case MRT6_PIM:
1364 val = mroute_do_pim;
1365 break;
1366#endif
1367 case MRT6_ASSERT:
1368 val = mroute_do_assert;
1369 break;
7bc570c8
YH
1370 default:
1371 return -ENOPROTOOPT;
1372 }
1373
1374 if (get_user(olr, optlen))
1375 return -EFAULT;
1376
1377 olr = min_t(int, olr, sizeof(int));
1378 if (olr < 0)
1379 return -EINVAL;
1380
1381 if (put_user(olr, optlen))
1382 return -EFAULT;
1383 if (copy_to_user(optval, &val, olr))
1384 return -EFAULT;
1385 return 0;
1386}
1387
1388/*
1389 * The IP multicast ioctl support routines.
1390 */
1391
1392int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1393{
1394 struct sioc_sg_req6 sr;
1395 struct sioc_mif_req6 vr;
1396 struct mif_device *vif;
1397 struct mfc6_cache *c;
1398
1399 switch (cmd) {
1400 case SIOCGETMIFCNT_IN6:
1401 if (copy_from_user(&vr, arg, sizeof(vr)))
1402 return -EFAULT;
4e16880c 1403 if (vr.mifi >= init_net.ipv6.maxvif)
7bc570c8
YH
1404 return -EINVAL;
1405 read_lock(&mrt_lock);
4e16880c
BT
1406 vif = &init_net.ipv6.vif6_table[vr.mifi];
1407 if (MIF_EXISTS(&init_net, vr.mifi)) {
7bc570c8
YH
1408 vr.icount = vif->pkt_in;
1409 vr.ocount = vif->pkt_out;
1410 vr.ibytes = vif->bytes_in;
1411 vr.obytes = vif->bytes_out;
1412 read_unlock(&mrt_lock);
1413
1414 if (copy_to_user(arg, &vr, sizeof(vr)))
1415 return -EFAULT;
1416 return 0;
1417 }
1418 read_unlock(&mrt_lock);
1419 return -EADDRNOTAVAIL;
1420 case SIOCGETSGCNT_IN6:
1421 if (copy_from_user(&sr, arg, sizeof(sr)))
1422 return -EFAULT;
1423
1424 read_lock(&mrt_lock);
1425 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1426 if (c) {
1427 sr.pktcnt = c->mfc_un.res.pkt;
1428 sr.bytecnt = c->mfc_un.res.bytes;
1429 sr.wrong_if = c->mfc_un.res.wrong_if;
1430 read_unlock(&mrt_lock);
1431
1432 if (copy_to_user(arg, &sr, sizeof(sr)))
1433 return -EFAULT;
1434 return 0;
1435 }
1436 read_unlock(&mrt_lock);
1437 return -EADDRNOTAVAIL;
1438 default:
1439 return -ENOIOCTLCMD;
1440 }
1441}
1442
1443
1444static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1445{
483a47d2
DL
1446 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1447 IPSTATS_MIB_OUTFORWDATAGRAMS);
7bc570c8
YH
1448 return dst_output(skb);
1449}
1450
1451/*
1452 * Processing handlers for ip6mr_forward
1453 */
1454
1455static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1456{
1457 struct ipv6hdr *ipv6h;
4e16880c 1458 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
7bc570c8
YH
1459 struct net_device *dev;
1460 struct dst_entry *dst;
1461 struct flowi fl;
1462
1463 if (vif->dev == NULL)
1464 goto out_free;
1465
14fb64e1
YH
1466#ifdef CONFIG_IPV6_PIMSM_V2
1467 if (vif->flags & MIFF_REGISTER) {
1468 vif->pkt_out++;
1469 vif->bytes_out += skb->len;
dc58c78c
PE
1470 vif->dev->stats.tx_bytes += skb->len;
1471 vif->dev->stats.tx_packets++;
14fb64e1
YH
1472 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1473 kfree_skb(skb);
1474 return 0;
1475 }
1476#endif
1477
7bc570c8
YH
1478 ipv6h = ipv6_hdr(skb);
1479
1480 fl = (struct flowi) {
1481 .oif = vif->link,
1482 .nl_u = { .ip6_u =
1483 { .daddr = ipv6h->daddr, }
1484 }
1485 };
1486
1487 dst = ip6_route_output(&init_net, NULL, &fl);
1488 if (!dst)
1489 goto out_free;
1490
1491 dst_release(skb->dst);
1492 skb->dst = dst;
1493
1494 /*
1495 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1496 * not only before forwarding, but after forwarding on all output
1497 * interfaces. It is clear, if mrouter runs a multicasting
1498 * program, it should receive packets not depending to what interface
1499 * program is joined.
1500 * If we will not make it, the program will have to join on all
1501 * interfaces. On the other hand, multihoming host (or router, but
1502 * not mrouter) cannot join to more than one interface - it will
1503 * result in receiving multiple packets.
1504 */
1505 dev = vif->dev;
1506 skb->dev = dev;
1507 vif->pkt_out++;
1508 vif->bytes_out += skb->len;
1509
1510 /* We are about to write */
1511 /* XXX: extension headers? */
1512 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1513 goto out_free;
1514
1515 ipv6h = ipv6_hdr(skb);
1516 ipv6h->hop_limit--;
1517
1518 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1519
1520 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1521 ip6mr_forward2_finish);
1522
1523out_free:
1524 kfree_skb(skb);
1525 return 0;
1526}
1527
1528static int ip6mr_find_vif(struct net_device *dev)
1529{
1530 int ct;
4e16880c
BT
1531 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1532 if (init_net.ipv6.vif6_table[ct].dev == dev)
7bc570c8
YH
1533 break;
1534 }
1535 return ct;
1536}
1537
1538static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1539{
1540 int psend = -1;
1541 int vif, ct;
1542
1543 vif = cache->mf6c_parent;
1544 cache->mfc_un.res.pkt++;
1545 cache->mfc_un.res.bytes += skb->len;
1546
14fb64e1
YH
1547 /*
1548 * Wrong interface: drop packet and (maybe) send PIM assert.
1549 */
4e16880c 1550 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
14fb64e1
YH
1551 int true_vifi;
1552
1553 cache->mfc_un.res.wrong_if++;
1554 true_vifi = ip6mr_find_vif(skb->dev);
1555
1556 if (true_vifi >= 0 && mroute_do_assert &&
1557 /* pimsm uses asserts, when switching from RPT to SPT,
1558 so that we cannot check that packet arrived on an oif.
1559 It is bad, but otherwise we would need to move pretty
1560 large chunk of pimd to kernel. Ough... --ANK
1561 */
1562 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1563 time_after(jiffies,
1564 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1565 cache->mfc_un.res.last_assert = jiffies;
1566 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1567 }
1568 goto dont_forward;
1569 }
1570
4e16880c
BT
1571 init_net.ipv6.vif6_table[vif].pkt_in++;
1572 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
7bc570c8
YH
1573
1574 /*
1575 * Forward the frame
1576 */
1577 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1578 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1579 if (psend != -1) {
1580 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1581 if (skb2)
1582 ip6mr_forward2(skb2, cache, psend);
1583 }
1584 psend = ct;
1585 }
1586 }
1587 if (psend != -1) {
1588 ip6mr_forward2(skb, cache, psend);
1589 return 0;
1590 }
1591
14fb64e1 1592dont_forward:
7bc570c8
YH
1593 kfree_skb(skb);
1594 return 0;
1595}
1596
1597
1598/*
1599 * Multicast packets for forwarding arrive here
1600 */
1601
1602int ip6_mr_input(struct sk_buff *skb)
1603{
1604 struct mfc6_cache *cache;
1605
1606 read_lock(&mrt_lock);
1607 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1608
1609 /*
1610 * No usable cache entry
1611 */
1612 if (cache == NULL) {
1613 int vif;
1614
1615 vif = ip6mr_find_vif(skb->dev);
1616 if (vif >= 0) {
1617 int err = ip6mr_cache_unresolved(vif, skb);
1618 read_unlock(&mrt_lock);
1619
1620 return err;
1621 }
1622 read_unlock(&mrt_lock);
1623 kfree_skb(skb);
1624 return -ENODEV;
1625 }
1626
1627 ip6_mr_forward(skb, cache);
1628
1629 read_unlock(&mrt_lock);
1630
1631 return 0;
1632}
1633
1634
1635static int
1636ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1637{
1638 int ct;
1639 struct rtnexthop *nhp;
4e16880c 1640 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
549e028d 1641 u8 *b = skb_tail_pointer(skb);
7bc570c8
YH
1642 struct rtattr *mp_head;
1643
1644 if (dev)
1645 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1646
1647 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1648
1649 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1650 if (c->mfc_un.res.ttls[ct] < 255) {
1651 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1652 goto rtattr_failure;
1653 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1654 nhp->rtnh_flags = 0;
1655 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
4e16880c 1656 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
7bc570c8
YH
1657 nhp->rtnh_len = sizeof(*nhp);
1658 }
1659 }
1660 mp_head->rta_type = RTA_MULTIPATH;
549e028d 1661 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
7bc570c8
YH
1662 rtm->rtm_type = RTN_MULTICAST;
1663 return 1;
1664
1665rtattr_failure:
1666 nlmsg_trim(skb, b);
1667 return -EMSGSIZE;
1668}
1669
1670int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1671{
1672 int err;
1673 struct mfc6_cache *cache;
1674 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1675
1676 read_lock(&mrt_lock);
1677 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1678
1679 if (!cache) {
1680 struct sk_buff *skb2;
1681 struct ipv6hdr *iph;
1682 struct net_device *dev;
1683 int vif;
1684
1685 if (nowait) {
1686 read_unlock(&mrt_lock);
1687 return -EAGAIN;
1688 }
1689
1690 dev = skb->dev;
1691 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1692 read_unlock(&mrt_lock);
1693 return -ENODEV;
1694 }
1695
1696 /* really correct? */
1697 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1698 if (!skb2) {
1699 read_unlock(&mrt_lock);
1700 return -ENOMEM;
1701 }
1702
1703 skb_reset_transport_header(skb2);
1704
1705 skb_put(skb2, sizeof(struct ipv6hdr));
1706 skb_reset_network_header(skb2);
1707
1708 iph = ipv6_hdr(skb2);
1709 iph->version = 0;
1710 iph->priority = 0;
1711 iph->flow_lbl[0] = 0;
1712 iph->flow_lbl[1] = 0;
1713 iph->flow_lbl[2] = 0;
1714 iph->payload_len = 0;
1715 iph->nexthdr = IPPROTO_NONE;
1716 iph->hop_limit = 0;
1717 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1718 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1719
1720 err = ip6mr_cache_unresolved(vif, skb2);
1721 read_unlock(&mrt_lock);
1722
1723 return err;
1724 }
1725
1726 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1727 cache->mfc_flags |= MFC_NOTIFY;
1728
1729 err = ip6mr_fill_mroute(skb, cache, rtm);
1730 read_unlock(&mrt_lock);
1731 return err;
1732}
1733