[IPV4]: Increase number of possible routing tables to 2^32
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
1da177e4
LT
187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
c71099ac
TG
225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
1da177e4 231/*
c71099ac 232 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
27097255
YH
269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
1da177e4 305/*
554cfb7e 306 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 307 */
554cfb7e
YH
308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309{
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
317}
1da177e4 318
554cfb7e 319static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 320{
554cfb7e
YH
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
4d0c5911
YH
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
554cfb7e
YH
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
4d0c5911 329 m = 2;
554cfb7e 330 read_unlock_bh(&neigh->lock);
1da177e4 331 }
554cfb7e 332 return m;
1da177e4
LT
333}
334
554cfb7e
YH
335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
1da177e4 337{
4d0c5911
YH
338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
554cfb7e
YH
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
ebacaaa0
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
4d0c5911
YH
346 n = rt6_check_neigh(rt);
347 if (n > 1)
ebacaaa0 348 m |= 16;
4d0c5911 349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
554cfb7e
YH
350 return -1;
351 return m;
352}
353
354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
1da177e4 361
554cfb7e
YH
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 364
554cfb7e 365 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
367 rt = rt->u.next) {
368 int m;
1da177e4 369
554cfb7e 370 if (rt6_check_expired(rt))
1da177e4
LT
371 continue;
372
554cfb7e
YH
373 last = rt;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
1da177e4 377 continue;
1da177e4 378
554cfb7e 379 if (m > mpri) {
27097255 380 rt6_probe(match);
554cfb7e 381 match = rt;
1da177e4 382 mpri = m;
27097255
YH
383 } else {
384 rt6_probe(rt);
1da177e4
LT
385 }
386 }
387
554cfb7e
YH
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
34af946a 392 static DEFINE_SPINLOCK(lock);
c302e6d5 393 spin_lock(&lock);
554cfb7e
YH
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
c302e6d5 397 spin_unlock(&lock);
1da177e4 398 }
1da177e4 399
554cfb7e
YH
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
1da177e4 402
554cfb7e 403 return (match ? match : &ip6_null_entry);
1da177e4
LT
404}
405
70ceb4f5
YH
406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
460 ip6_del_rt(rt, NULL, NULL, NULL);
461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
c71099ac
TG
484#define BACKTRACK() \
485if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
490 } \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
493 } \
494}
495
496static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
1da177e4
LT
498{
499 struct fib6_node *fn;
500 struct rt6_info *rt;
501
c71099ac
TG
502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
1da177e4 508 dst_hold(&rt->u.dst);
c71099ac
TG
509out:
510 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
511
512 rt->u.dst.lastuse = jiffies;
c71099ac
TG
513 rt->u.dst.__use++;
514
515 return rt;
516
517}
518
519struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
521{
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
528 },
529 },
530 };
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
533
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
537
538 dst_release(dst);
539
1da177e4
LT
540 return NULL;
541}
542
c71099ac 543/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
547 */
548
0d51aa80
JHS
549int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
550 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
551{
552 int err;
c71099ac 553 struct fib6_table *table;
1da177e4 554
c71099ac
TG
555 table = rt->rt6i_table;
556 write_lock_bh(&table->tb6_lock);
557 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
558 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
559
560 return err;
561}
562
95a9a5ba
YH
563static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
564 struct in6_addr *saddr)
1da177e4 565{
1da177e4
LT
566 struct rt6_info *rt;
567
568 /*
569 * Clone the route.
570 */
571
572 rt = ip6_rt_copy(ort);
573
574 if (rt) {
58c4fb86
YH
575 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
576 if (rt->rt6i_dst.plen != 128 &&
577 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
578 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 579 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 580 }
1da177e4 581
58c4fb86 582 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
583 rt->rt6i_dst.plen = 128;
584 rt->rt6i_flags |= RTF_CACHE;
585 rt->u.dst.flags |= DST_HOST;
586
587#ifdef CONFIG_IPV6_SUBTREES
588 if (rt->rt6i_src.plen && saddr) {
589 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
590 rt->rt6i_src.plen = 128;
591 }
592#endif
593
594 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
595
95a9a5ba 596 }
1da177e4 597
95a9a5ba
YH
598 return rt;
599}
1da177e4 600
299d9939
YH
601static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
602{
603 struct rt6_info *rt = ip6_rt_copy(ort);
604 if (rt) {
605 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
606 rt->rt6i_dst.plen = 128;
607 rt->rt6i_flags |= RTF_CACHE;
608 if (rt->rt6i_flags & RTF_REJECT)
609 rt->u.dst.error = ort->u.dst.error;
610 rt->u.dst.flags |= DST_HOST;
611 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
612 }
613 return rt;
614}
615
8ce11e6a
AB
616static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
617 struct flowi *fl, int flags)
1da177e4
LT
618{
619 struct fib6_node *fn;
519fbd87 620 struct rt6_info *rt, *nrt;
c71099ac 621 int strict = 0;
1da177e4 622 int attempts = 3;
519fbd87 623 int err;
8238dd06 624 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 625
c71099ac
TG
626 if (flags & RT6_F_STRICT)
627 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
628
629relookup:
c71099ac 630 read_lock_bh(&table->tb6_lock);
1da177e4 631
8238dd06 632restart_2:
c71099ac 633 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
634
635restart:
c71099ac 636 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
1da177e4 637 BACKTRACK();
8238dd06
YH
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
1ddef044 640 goto out;
1da177e4 641
fb9de91e 642 dst_hold(&rt->u.dst);
c71099ac 643 read_unlock_bh(&table->tb6_lock);
fb9de91e 644
519fbd87 645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
647 else {
648#if CLONE_OFFLINK_ROUTE
c71099ac 649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
650#else
651 goto out2;
652#endif
653 }
e40cf353 654
519fbd87
YH
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
1da177e4 657
519fbd87
YH
658 dst_hold(&rt->u.dst);
659 if (nrt) {
c71099ac 660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
519fbd87 661 if (!err)
1da177e4 662 goto out2;
1da177e4 663 }
1da177e4 664
519fbd87
YH
665 if (--attempts <= 0)
666 goto out2;
667
668 /*
c71099ac 669 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
670 * released someone could insert this route. Relookup.
671 */
672 dst_release(&rt->u.dst);
673 goto relookup;
674
675out:
8238dd06
YH
676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
679 }
519fbd87 680 dst_hold(&rt->u.dst);
c71099ac 681 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
682out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
c71099ac
TG
685
686 return rt;
1da177e4
LT
687}
688
c71099ac
TG
689void ip6_route_input(struct sk_buff *skb)
690{
691 struct ipv6hdr *iph = skb->nh.ipv6h;
692 struct flowi fl = {
693 .iif = skb->dev->ifindex,
694 .nl_u = {
695 .ip6_u = {
696 .daddr = iph->daddr,
697 .saddr = iph->saddr,
698 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
699 },
700 },
701 .proto = iph->nexthdr,
702 };
703 int flags = 0;
704
705 if (rt6_need_strict(&iph->daddr))
706 flags |= RT6_F_STRICT;
707
708 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
709}
710
711static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
712 struct flowi *fl, int flags)
1da177e4
LT
713{
714 struct fib6_node *fn;
519fbd87 715 struct rt6_info *rt, *nrt;
c71099ac 716 int strict = 0;
1da177e4 717 int attempts = 3;
519fbd87 718 int err;
8238dd06 719 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 720
c71099ac
TG
721 if (flags & RT6_F_STRICT)
722 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
723
724relookup:
c71099ac 725 read_lock_bh(&table->tb6_lock);
1da177e4 726
8238dd06 727restart_2:
c71099ac 728 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
729
730restart:
8238dd06 731 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
1ddef044 732 BACKTRACK();
8238dd06
YH
733 if (rt == &ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE)
1da177e4 735 goto out;
1da177e4 736
fb9de91e 737 dst_hold(&rt->u.dst);
c71099ac 738 read_unlock_bh(&table->tb6_lock);
fb9de91e 739
519fbd87 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
742 else {
743#if CLONE_OFFLINK_ROUTE
744 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
745#else
746 goto out2;
747#endif
748 }
1da177e4 749
519fbd87
YH
750 dst_release(&rt->u.dst);
751 rt = nrt ? : &ip6_null_entry;
1da177e4 752
519fbd87
YH
753 dst_hold(&rt->u.dst);
754 if (nrt) {
755 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
756 if (!err)
1da177e4 757 goto out2;
1da177e4 758 }
e40cf353 759
519fbd87
YH
760 if (--attempts <= 0)
761 goto out2;
762
763 /*
c71099ac 764 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
765 * released someone could insert this route. Relookup.
766 */
767 dst_release(&rt->u.dst);
768 goto relookup;
769
770out:
8238dd06
YH
771 if (reachable) {
772 reachable = 0;
773 goto restart_2;
774 }
519fbd87 775 dst_hold(&rt->u.dst);
c71099ac 776 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
777out2:
778 rt->u.dst.lastuse = jiffies;
779 rt->u.dst.__use++;
c71099ac
TG
780 return rt;
781}
782
783struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
784{
785 int flags = 0;
786
787 if (rt6_need_strict(&fl->fl6_dst))
788 flags |= RT6_F_STRICT;
789
790 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
791}
792
793
794/*
795 * Destination cache support functions
796 */
797
798static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
799{
800 struct rt6_info *rt;
801
802 rt = (struct rt6_info *) dst;
803
804 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
805 return dst;
806
807 return NULL;
808}
809
810static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
811{
812 struct rt6_info *rt = (struct rt6_info *) dst;
813
814 if (rt) {
815 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 816 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
817 else
818 dst_release(dst);
819 }
820 return NULL;
821}
822
823static void ip6_link_failure(struct sk_buff *skb)
824{
825 struct rt6_info *rt;
826
827 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
828
829 rt = (struct rt6_info *) skb->dst;
830 if (rt) {
831 if (rt->rt6i_flags&RTF_CACHE) {
832 dst_set_expires(&rt->u.dst, 0);
833 rt->rt6i_flags |= RTF_EXPIRES;
834 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
835 rt->rt6i_node->fn_sernum = -1;
836 }
837}
838
839static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
840{
841 struct rt6_info *rt6 = (struct rt6_info*)dst;
842
843 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
844 rt6->rt6i_flags |= RTF_MODIFIED;
845 if (mtu < IPV6_MIN_MTU) {
846 mtu = IPV6_MIN_MTU;
847 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
848 }
849 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 850 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
851 }
852}
853
1da177e4
LT
854static int ipv6_get_mtu(struct net_device *dev);
855
856static inline unsigned int ipv6_advmss(unsigned int mtu)
857{
858 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
859
860 if (mtu < ip6_rt_min_advmss)
861 mtu = ip6_rt_min_advmss;
862
863 /*
864 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
865 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
866 * IPV6_MAXPLEN is also valid and means: "any MSS,
867 * rely only on pmtu discovery"
868 */
869 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
870 mtu = IPV6_MAXPLEN;
871 return mtu;
872}
873
5d0bbeeb 874static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 875static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 876
1da177e4
LT
877struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
878 struct neighbour *neigh,
879 struct in6_addr *addr,
880 int (*output)(struct sk_buff *))
881{
882 struct rt6_info *rt;
883 struct inet6_dev *idev = in6_dev_get(dev);
884
885 if (unlikely(idev == NULL))
886 return NULL;
887
888 rt = ip6_dst_alloc();
889 if (unlikely(rt == NULL)) {
890 in6_dev_put(idev);
891 goto out;
892 }
893
894 dev_hold(dev);
895 if (neigh)
896 neigh_hold(neigh);
897 else
898 neigh = ndisc_get_neigh(dev, addr);
899
900 rt->rt6i_dev = dev;
901 rt->rt6i_idev = idev;
902 rt->rt6i_nexthop = neigh;
903 atomic_set(&rt->u.dst.__refcnt, 1);
904 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
905 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
906 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
907 rt->u.dst.output = output;
908
909#if 0 /* there's no chance to use these for ndisc */
910 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
911 ? DST_HOST
912 : 0;
913 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
914 rt->rt6i_dst.plen = 128;
915#endif
916
5d0bbeeb 917 spin_lock_bh(&ndisc_lock);
1da177e4
LT
918 rt->u.dst.next = ndisc_dst_gc_list;
919 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 920 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
921
922 fib6_force_start_gc();
923
924out:
925 return (struct dst_entry *)rt;
926}
927
928int ndisc_dst_gc(int *more)
929{
930 struct dst_entry *dst, *next, **pprev;
931 int freed;
932
933 next = NULL;
5d0bbeeb
TG
934 freed = 0;
935
936 spin_lock_bh(&ndisc_lock);
1da177e4 937 pprev = &ndisc_dst_gc_list;
5d0bbeeb 938
1da177e4
LT
939 while ((dst = *pprev) != NULL) {
940 if (!atomic_read(&dst->__refcnt)) {
941 *pprev = dst->next;
942 dst_free(dst);
943 freed++;
944 } else {
945 pprev = &dst->next;
946 (*more)++;
947 }
948 }
949
5d0bbeeb
TG
950 spin_unlock_bh(&ndisc_lock);
951
1da177e4
LT
952 return freed;
953}
954
955static int ip6_dst_gc(void)
956{
957 static unsigned expire = 30*HZ;
958 static unsigned long last_gc;
959 unsigned long now = jiffies;
960
961 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
962 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
963 goto out;
964
965 expire++;
966 fib6_run_gc(expire);
967 last_gc = now;
968 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
969 expire = ip6_rt_gc_timeout>>1;
970
971out:
972 expire -= expire>>ip6_rt_gc_elasticity;
973 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
974}
975
976/* Clean host part of a prefix. Not necessary in radix tree,
977 but results in cleaner routing tables.
978
979 Remove it only when all the things will work!
980 */
981
982static int ipv6_get_mtu(struct net_device *dev)
983{
984 int mtu = IPV6_MIN_MTU;
985 struct inet6_dev *idev;
986
987 idev = in6_dev_get(dev);
988 if (idev) {
989 mtu = idev->cnf.mtu6;
990 in6_dev_put(idev);
991 }
992 return mtu;
993}
994
995int ipv6_get_hoplimit(struct net_device *dev)
996{
997 int hoplimit = ipv6_devconf.hop_limit;
998 struct inet6_dev *idev;
999
1000 idev = in6_dev_get(dev);
1001 if (idev) {
1002 hoplimit = idev->cnf.hop_limit;
1003 in6_dev_put(idev);
1004 }
1005 return hoplimit;
1006}
1007
1008/*
1009 *
1010 */
1011
0d51aa80 1012int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
c71099ac
TG
1013 void *_rtattr, struct netlink_skb_parms *req,
1014 u32 table_id)
1da177e4
LT
1015{
1016 int err;
1017 struct rtmsg *r;
1018 struct rtattr **rta;
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
c71099ac 1022 struct fib6_table *table;
1da177e4
LT
1023 int addr_type;
1024
1025 rta = (struct rtattr **) _rtattr;
1026
1027 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
1028 return -EINVAL;
1029#ifndef CONFIG_IPV6_SUBTREES
1030 if (rtmsg->rtmsg_src_len)
1031 return -EINVAL;
1032#endif
1033 if (rtmsg->rtmsg_ifindex) {
1034 err = -ENODEV;
1035 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
1036 if (!dev)
1037 goto out;
1038 idev = in6_dev_get(dev);
1039 if (!idev)
1040 goto out;
1041 }
1042
1043 if (rtmsg->rtmsg_metric == 0)
1044 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1045
c71099ac
TG
1046 table = fib6_new_table(table_id);
1047 if (table == NULL) {
1048 err = -ENOBUFS;
1049 goto out;
1050 }
1051
1da177e4
LT
1052 rt = ip6_dst_alloc();
1053
1054 if (rt == NULL) {
1055 err = -ENOMEM;
1056 goto out;
1057 }
1058
1059 rt->u.dst.obsolete = -1;
3dd4bc68 1060 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
1061 if (nlh && (r = NLMSG_DATA(nlh))) {
1062 rt->rt6i_protocol = r->rtm_protocol;
1063 } else {
1064 rt->rt6i_protocol = RTPROT_BOOT;
1065 }
1066
1067 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1068
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1073
1074 rt->u.dst.output = ip6_output;
1075
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1077 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1078 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1079 if (rt->rt6i_dst.plen == 128)
1080 rt->u.dst.flags = DST_HOST;
1081
1082#ifdef CONFIG_IPV6_SUBTREES
1083 ipv6_addr_prefix(&rt->rt6i_src.addr,
1084 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1085 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1086#endif
1087
1088 rt->rt6i_metric = rtmsg->rtmsg_metric;
1089
1090 /* We cannot add true routes via loopback here,
1091 they would result in kernel looping; promote them to reject routes
1092 */
1093 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1094 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1095 /* hold loopback dev/idev if we haven't done so. */
1096 if (dev != &loopback_dev) {
1097 if (dev) {
1098 dev_put(dev);
1099 in6_dev_put(idev);
1100 }
1101 dev = &loopback_dev;
1102 dev_hold(dev);
1103 idev = in6_dev_get(dev);
1104 if (!idev) {
1105 err = -ENODEV;
1106 goto out;
1107 }
1108 }
1109 rt->u.dst.output = ip6_pkt_discard_out;
1110 rt->u.dst.input = ip6_pkt_discard;
1111 rt->u.dst.error = -ENETUNREACH;
1112 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1113 goto install_route;
1114 }
1115
1116 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1117 struct in6_addr *gw_addr;
1118 int gwa_type;
1119
1120 gw_addr = &rtmsg->rtmsg_gateway;
1121 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1122 gwa_type = ipv6_addr_type(gw_addr);
1123
1124 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1125 struct rt6_info *grt;
1126
1127 /* IPv6 strictly inhibits using not link-local
1128 addresses as nexthop address.
1129 Otherwise, router will not able to send redirects.
1130 It is very good, but in some (rare!) circumstances
1131 (SIT, PtP, NBMA NOARP links) it is handy to allow
1132 some exceptions. --ANK
1133 */
1134 err = -EINVAL;
1135 if (!(gwa_type&IPV6_ADDR_UNICAST))
1136 goto out;
1137
1138 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1139
1140 err = -EHOSTUNREACH;
1141 if (grt == NULL)
1142 goto out;
1143 if (dev) {
1144 if (dev != grt->rt6i_dev) {
1145 dst_release(&grt->u.dst);
1146 goto out;
1147 }
1148 } else {
1149 dev = grt->rt6i_dev;
1150 idev = grt->rt6i_idev;
1151 dev_hold(dev);
1152 in6_dev_hold(grt->rt6i_idev);
1153 }
1154 if (!(grt->rt6i_flags&RTF_GATEWAY))
1155 err = 0;
1156 dst_release(&grt->u.dst);
1157
1158 if (err)
1159 goto out;
1160 }
1161 err = -EINVAL;
1162 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1163 goto out;
1164 }
1165
1166 err = -ENODEV;
1167 if (dev == NULL)
1168 goto out;
1169
1170 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1171 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1172 if (IS_ERR(rt->rt6i_nexthop)) {
1173 err = PTR_ERR(rt->rt6i_nexthop);
1174 rt->rt6i_nexthop = NULL;
1175 goto out;
1176 }
1177 }
1178
1179 rt->rt6i_flags = rtmsg->rtmsg_flags;
1180
1181install_route:
1182 if (rta && rta[RTA_METRICS-1]) {
1183 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1184 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1185
1186 while (RTA_OK(attr, attrlen)) {
1187 unsigned flavor = attr->rta_type;
1188 if (flavor) {
1189 if (flavor > RTAX_MAX) {
1190 err = -EINVAL;
1191 goto out;
1192 }
1193 rt->u.dst.metrics[flavor-1] =
1194 *(u32 *)RTA_DATA(attr);
1195 }
1196 attr = RTA_NEXT(attr, attrlen);
1197 }
1198 }
1199
1200 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1201 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1202 if (!rt->u.dst.metrics[RTAX_MTU-1])
1203 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1204 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1205 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1206 rt->u.dst.dev = dev;
1207 rt->rt6i_idev = idev;
c71099ac 1208 rt->rt6i_table = table;
0d51aa80 1209 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1210
1211out:
1212 if (dev)
1213 dev_put(dev);
1214 if (idev)
1215 in6_dev_put(idev);
1216 if (rt)
1217 dst_free((struct dst_entry *) rt);
1218 return err;
1219}
1220
0d51aa80 1221int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1222{
1223 int err;
c71099ac 1224 struct fib6_table *table;
1da177e4 1225
6c813a72
PM
1226 if (rt == &ip6_null_entry)
1227 return -ENOENT;
1228
c71099ac
TG
1229 table = rt->rt6i_table;
1230 write_lock_bh(&table->tb6_lock);
1da177e4 1231
0d51aa80 1232 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1233 dst_release(&rt->u.dst);
1234
c71099ac 1235 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1236
1237 return err;
1238}
1239
c71099ac
TG
1240static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1241 void *_rtattr, struct netlink_skb_parms *req,
1242 u32 table_id)
1da177e4 1243{
c71099ac 1244 struct fib6_table *table;
1da177e4
LT
1245 struct fib6_node *fn;
1246 struct rt6_info *rt;
1247 int err = -ESRCH;
1248
c71099ac
TG
1249 table = fib6_get_table(table_id);
1250 if (table == NULL)
1251 return err;
1252
1253 read_lock_bh(&table->tb6_lock);
1da177e4 1254
c71099ac 1255 fn = fib6_locate(&table->tb6_root,
1da177e4
LT
1256 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1257 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1258
1259 if (fn) {
1260 for (rt = fn->leaf; rt; rt = rt->u.next) {
1261 if (rtmsg->rtmsg_ifindex &&
1262 (rt->rt6i_dev == NULL ||
1263 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1264 continue;
1265 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1266 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1267 continue;
1268 if (rtmsg->rtmsg_metric &&
1269 rtmsg->rtmsg_metric != rt->rt6i_metric)
1270 continue;
1271 dst_hold(&rt->u.dst);
c71099ac 1272 read_unlock_bh(&table->tb6_lock);
1da177e4 1273
0d51aa80 1274 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1275 }
1276 }
c71099ac 1277 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1278
1279 return err;
1280}
1281
1282/*
1283 * Handle redirects
1284 */
1285void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1286 struct neighbour *neigh, u8 *lladdr, int on_link)
1287{
e843b9e1 1288 struct rt6_info *rt, *nrt = NULL;
e843b9e1 1289 struct fib6_node *fn;
c71099ac 1290 struct fib6_table *table;
8d71740c 1291 struct netevent_redirect netevent;
1da177e4 1292
c71099ac
TG
1293 /* TODO: Very lazy, might need to check all tables */
1294 table = fib6_get_table(RT6_TABLE_MAIN);
1295 if (table == NULL)
1296 return;
1297
1da177e4 1298 /*
e843b9e1
YH
1299 * Get the "current" route for this destination and
1300 * check if the redirect has come from approriate router.
1301 *
1302 * RFC 2461 specifies that redirects should only be
1303 * accepted if they come from the nexthop to the target.
1304 * Due to the way the routes are chosen, this notion
1305 * is a bit fuzzy and one might need to check all possible
1306 * routes.
1da177e4 1307 */
1da177e4 1308
c71099ac
TG
1309 read_lock_bh(&table->tb6_lock);
1310 fn = fib6_lookup(&table->tb6_root, dest, NULL);
e843b9e1
YH
1311restart:
1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1313 /*
1314 * Current route is on-link; redirect is always invalid.
1315 *
1316 * Seems, previous statement is not true. It could
1317 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 * But then router serving it might decide, that we should
1319 * know truth 8)8) --ANK (980726).
1320 */
1321 if (rt6_check_expired(rt))
1322 continue;
1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 continue;
1325 if (neigh->dev != rt->rt6i_dev)
1326 continue;
1327 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1328 continue;
1329 break;
1330 }
1331 if (rt)
1332 dst_hold(&rt->u.dst);
c71099ac 1333 else if (rt6_need_strict(dest)) {
e843b9e1
YH
1334 while ((fn = fn->parent) != NULL) {
1335 if (fn->fn_flags & RTN_ROOT)
1336 break;
1337 if (fn->fn_flags & RTN_RTINFO)
1338 goto restart;
1da177e4 1339 }
e843b9e1 1340 }
c71099ac 1341 read_unlock_bh(&table->tb6_lock);
e843b9e1
YH
1342
1343 if (!rt) {
1da177e4
LT
1344 if (net_ratelimit())
1345 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1346 "for redirect target\n");
e843b9e1 1347 return;
1da177e4
LT
1348 }
1349
1da177e4
LT
1350 /*
1351 * We have finally decided to accept it.
1352 */
1353
1354 neigh_update(neigh, lladdr, NUD_STALE,
1355 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1356 NEIGH_UPDATE_F_OVERRIDE|
1357 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1358 NEIGH_UPDATE_F_ISROUTER))
1359 );
1360
1361 /*
1362 * Redirect received -> path was valid.
1363 * Look, redirects are sent only in response to data packets,
1364 * so that this nexthop apparently is reachable. --ANK
1365 */
1366 dst_confirm(&rt->u.dst);
1367
1368 /* Duplicate redirect: silently ignore. */
1369 if (neigh == rt->u.dst.neighbour)
1370 goto out;
1371
1372 nrt = ip6_rt_copy(rt);
1373 if (nrt == NULL)
1374 goto out;
1375
1376 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1377 if (on_link)
1378 nrt->rt6i_flags &= ~RTF_GATEWAY;
1379
1380 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1381 nrt->rt6i_dst.plen = 128;
1382 nrt->u.dst.flags |= DST_HOST;
1383
1384 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1385 nrt->rt6i_nexthop = neigh_clone(neigh);
1386 /* Reset pmtu, it may be better */
1387 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1388 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1389
0d51aa80 1390 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1391 goto out;
1392
8d71740c
TT
1393 netevent.old = &rt->u.dst;
1394 netevent.new = &nrt->u.dst;
1395 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1396
1da177e4 1397 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1398 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1399 return;
1400 }
1401
1402out:
1403 dst_release(&rt->u.dst);
1404 return;
1405}
1406
1407/*
1408 * Handle ICMP "packet too big" messages
1409 * i.e. Path MTU discovery
1410 */
1411
1412void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1413 struct net_device *dev, u32 pmtu)
1414{
1415 struct rt6_info *rt, *nrt;
1416 int allfrag = 0;
1417
1418 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1419 if (rt == NULL)
1420 return;
1421
1422 if (pmtu >= dst_mtu(&rt->u.dst))
1423 goto out;
1424
1425 if (pmtu < IPV6_MIN_MTU) {
1426 /*
1427 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1428 * MTU (1280) and a fragment header should always be included
1429 * after a node receiving Too Big message reporting PMTU is
1430 * less than the IPv6 Minimum Link MTU.
1431 */
1432 pmtu = IPV6_MIN_MTU;
1433 allfrag = 1;
1434 }
1435
1436 /* New mtu received -> path was valid.
1437 They are sent only in response to data packets,
1438 so that this nexthop apparently is reachable. --ANK
1439 */
1440 dst_confirm(&rt->u.dst);
1441
1442 /* Host route. If it is static, it would be better
1443 not to override it, but add new one, so that
1444 when cache entry will expire old pmtu
1445 would return automatically.
1446 */
1447 if (rt->rt6i_flags & RTF_CACHE) {
1448 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1449 if (allfrag)
1450 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1451 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1452 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1453 goto out;
1454 }
1455
1456 /* Network route.
1457 Two cases are possible:
1458 1. It is connected route. Action: COW
1459 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1460 */
d5315b50 1461 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1462 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1463 else
1464 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1465
d5315b50 1466 if (nrt) {
a1e78363
YH
1467 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1468 if (allfrag)
1469 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1470
1471 /* According to RFC 1981, detecting PMTU increase shouldn't be
1472 * happened within 5 mins, the recommended timer is 10 mins.
1473 * Here this route expiration time is set to ip6_rt_mtu_expires
1474 * which is 10 mins. After 10 mins the decreased pmtu is expired
1475 * and detecting PMTU increase will be automatically happened.
1476 */
1477 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1478 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1479
1480 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1481 }
1da177e4
LT
1482out:
1483 dst_release(&rt->u.dst);
1484}
1485
1486/*
1487 * Misc support functions
1488 */
1489
1490static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1491{
1492 struct rt6_info *rt = ip6_dst_alloc();
1493
1494 if (rt) {
1495 rt->u.dst.input = ort->u.dst.input;
1496 rt->u.dst.output = ort->u.dst.output;
1497
1498 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1499 rt->u.dst.dev = ort->u.dst.dev;
1500 if (rt->u.dst.dev)
1501 dev_hold(rt->u.dst.dev);
1502 rt->rt6i_idev = ort->rt6i_idev;
1503 if (rt->rt6i_idev)
1504 in6_dev_hold(rt->rt6i_idev);
1505 rt->u.dst.lastuse = jiffies;
1506 rt->rt6i_expires = 0;
1507
1508 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1509 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1510 rt->rt6i_metric = 0;
1511
1512 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1513#ifdef CONFIG_IPV6_SUBTREES
1514 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1515#endif
c71099ac 1516 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1517 }
1518 return rt;
1519}
1520
70ceb4f5
YH
1521#ifdef CONFIG_IPV6_ROUTE_INFO
1522static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1523 struct in6_addr *gwaddr, int ifindex)
1524{
1525 struct fib6_node *fn;
1526 struct rt6_info *rt = NULL;
c71099ac
TG
1527 struct fib6_table *table;
1528
1529 table = fib6_get_table(RT6_TABLE_INFO);
1530 if (table == NULL)
1531 return NULL;
70ceb4f5 1532
c71099ac
TG
1533 write_lock_bh(&table->tb6_lock);
1534 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1535 if (!fn)
1536 goto out;
1537
1538 for (rt = fn->leaf; rt; rt = rt->u.next) {
1539 if (rt->rt6i_dev->ifindex != ifindex)
1540 continue;
1541 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1542 continue;
1543 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1544 continue;
1545 dst_hold(&rt->u.dst);
1546 break;
1547 }
1548out:
c71099ac 1549 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1550 return rt;
1551}
1552
1553static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1554 struct in6_addr *gwaddr, int ifindex,
1555 unsigned pref)
1556{
1557 struct in6_rtmsg rtmsg;
1558
1559 memset(&rtmsg, 0, sizeof(rtmsg));
1560 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1561 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1562 rtmsg.rtmsg_dst_len = prefixlen;
1563 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1564 rtmsg.rtmsg_metric = 1024;
1565 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
e317da96
YH
1566 /* We should treat it as a default route if prefix length is 0. */
1567 if (!prefixlen)
1568 rtmsg.rtmsg_flags |= RTF_DEFAULT;
70ceb4f5
YH
1569 rtmsg.rtmsg_ifindex = ifindex;
1570
c71099ac 1571 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
70ceb4f5
YH
1572
1573 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1574}
1575#endif
1576
1da177e4
LT
1577struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1578{
1579 struct rt6_info *rt;
c71099ac 1580 struct fib6_table *table;
1da177e4 1581
c71099ac
TG
1582 table = fib6_get_table(RT6_TABLE_DFLT);
1583 if (table == NULL)
1584 return NULL;
1da177e4 1585
c71099ac
TG
1586 write_lock_bh(&table->tb6_lock);
1587 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1588 if (dev == rt->rt6i_dev &&
045927ff 1589 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1590 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1591 break;
1592 }
1593 if (rt)
1594 dst_hold(&rt->u.dst);
c71099ac 1595 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1596 return rt;
1597}
1598
1599struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1600 struct net_device *dev,
1601 unsigned int pref)
1da177e4
LT
1602{
1603 struct in6_rtmsg rtmsg;
1604
1605 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1606 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1607 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1608 rtmsg.rtmsg_metric = 1024;
ebacaaa0
YH
1609 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1610 RTF_PREF(pref);
1da177e4
LT
1611
1612 rtmsg.rtmsg_ifindex = dev->ifindex;
1613
c71099ac 1614 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
1da177e4
LT
1615 return rt6_get_dflt_router(gwaddr, dev);
1616}
1617
1618void rt6_purge_dflt_routers(void)
1619{
1620 struct rt6_info *rt;
c71099ac
TG
1621 struct fib6_table *table;
1622
1623 /* NOTE: Keep consistent with rt6_get_dflt_router */
1624 table = fib6_get_table(RT6_TABLE_DFLT);
1625 if (table == NULL)
1626 return;
1da177e4
LT
1627
1628restart:
c71099ac
TG
1629 read_lock_bh(&table->tb6_lock);
1630 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1631 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1632 dst_hold(&rt->u.dst);
c71099ac 1633 read_unlock_bh(&table->tb6_lock);
0d51aa80 1634 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1635 goto restart;
1636 }
1637 }
c71099ac 1638 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1639}
1640
1641int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1642{
1643 struct in6_rtmsg rtmsg;
1644 int err;
1645
1646 switch(cmd) {
1647 case SIOCADDRT: /* Add a route */
1648 case SIOCDELRT: /* Delete a route */
1649 if (!capable(CAP_NET_ADMIN))
1650 return -EPERM;
1651 err = copy_from_user(&rtmsg, arg,
1652 sizeof(struct in6_rtmsg));
1653 if (err)
1654 return -EFAULT;
1655
1656 rtnl_lock();
1657 switch (cmd) {
1658 case SIOCADDRT:
c71099ac
TG
1659 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1660 RT6_TABLE_MAIN);
1da177e4
LT
1661 break;
1662 case SIOCDELRT:
c71099ac
TG
1663 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1664 RT6_TABLE_MAIN);
1da177e4
LT
1665 break;
1666 default:
1667 err = -EINVAL;
1668 }
1669 rtnl_unlock();
1670
1671 return err;
1672 };
1673
1674 return -EINVAL;
1675}
1676
1677/*
1678 * Drop the packet on the floor
1679 */
1680
20380731 1681static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1682{
76d0cc1b
LL
1683 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1684 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1685 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1686
1da177e4
LT
1687 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1688 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1689 kfree_skb(skb);
1690 return 0;
1691}
1692
20380731 1693static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1694{
1695 skb->dev = skb->dst->dev;
1696 return ip6_pkt_discard(skb);
1697}
1698
1699/*
1700 * Allocate a dst for local (unicast / anycast) address.
1701 */
1702
1703struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1704 const struct in6_addr *addr,
1705 int anycast)
1706{
1707 struct rt6_info *rt = ip6_dst_alloc();
1708
1709 if (rt == NULL)
1710 return ERR_PTR(-ENOMEM);
1711
1712 dev_hold(&loopback_dev);
1713 in6_dev_hold(idev);
1714
1715 rt->u.dst.flags = DST_HOST;
1716 rt->u.dst.input = ip6_input;
1717 rt->u.dst.output = ip6_output;
1718 rt->rt6i_dev = &loopback_dev;
1719 rt->rt6i_idev = idev;
1720 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1721 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1722 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1723 rt->u.dst.obsolete = -1;
1724
1725 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1726 if (anycast)
1727 rt->rt6i_flags |= RTF_ANYCAST;
1728 else
1da177e4
LT
1729 rt->rt6i_flags |= RTF_LOCAL;
1730 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1731 if (rt->rt6i_nexthop == NULL) {
1732 dst_free((struct dst_entry *) rt);
1733 return ERR_PTR(-ENOMEM);
1734 }
1735
1736 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1737 rt->rt6i_dst.plen = 128;
c71099ac 1738 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1739
1740 atomic_set(&rt->u.dst.__refcnt, 1);
1741
1742 return rt;
1743}
1744
1745static int fib6_ifdown(struct rt6_info *rt, void *arg)
1746{
1747 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1748 rt != &ip6_null_entry) {
1749 RT6_TRACE("deleted by ifdown %p\n", rt);
1750 return -1;
1751 }
1752 return 0;
1753}
1754
1755void rt6_ifdown(struct net_device *dev)
1756{
c71099ac 1757 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1758}
1759
1760struct rt6_mtu_change_arg
1761{
1762 struct net_device *dev;
1763 unsigned mtu;
1764};
1765
1766static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1767{
1768 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1769 struct inet6_dev *idev;
1770
1771 /* In IPv6 pmtu discovery is not optional,
1772 so that RTAX_MTU lock cannot disable it.
1773 We still use this lock to block changes
1774 caused by addrconf/ndisc.
1775 */
1776
1777 idev = __in6_dev_get(arg->dev);
1778 if (idev == NULL)
1779 return 0;
1780
1781 /* For administrative MTU increase, there is no way to discover
1782 IPv6 PMTU increase, so PMTU increase should be updated here.
1783 Since RFC 1981 doesn't include administrative MTU increase
1784 update PMTU increase is a MUST. (i.e. jumbo frame)
1785 */
1786 /*
1787 If new MTU is less than route PMTU, this new MTU will be the
1788 lowest MTU in the path, update the route PMTU to reflect PMTU
1789 decreases; if new MTU is greater than route PMTU, and the
1790 old MTU is the lowest MTU in the path, update the route PMTU
1791 to reflect the increase. In this case if the other nodes' MTU
1792 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1793 PMTU discouvery.
1794 */
1795 if (rt->rt6i_dev == arg->dev &&
1796 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1797 (dst_mtu(&rt->u.dst) > arg->mtu ||
1798 (dst_mtu(&rt->u.dst) < arg->mtu &&
1799 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1800 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1801 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1802 return 0;
1803}
1804
1805void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1806{
c71099ac
TG
1807 struct rt6_mtu_change_arg arg = {
1808 .dev = dev,
1809 .mtu = mtu,
1810 };
1da177e4 1811
c71099ac 1812 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1813}
1814
1815static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1816 struct in6_rtmsg *rtmsg)
1817{
1818 memset(rtmsg, 0, sizeof(*rtmsg));
1819
1820 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1821 rtmsg->rtmsg_src_len = r->rtm_src_len;
1822 rtmsg->rtmsg_flags = RTF_UP;
1823 if (r->rtm_type == RTN_UNREACHABLE)
1824 rtmsg->rtmsg_flags |= RTF_REJECT;
1825
1826 if (rta[RTA_GATEWAY-1]) {
1827 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1828 return -EINVAL;
1829 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1830 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1831 }
1832 if (rta[RTA_DST-1]) {
1833 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1834 return -EINVAL;
1835 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1836 }
1837 if (rta[RTA_SRC-1]) {
1838 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1839 return -EINVAL;
1840 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1841 }
1842 if (rta[RTA_OIF-1]) {
1843 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1844 return -EINVAL;
1845 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1846 }
1847 if (rta[RTA_PRIORITY-1]) {
1848 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1849 return -EINVAL;
1850 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1851 }
1852 return 0;
1853}
1854
1855int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1856{
1857 struct rtmsg *r = NLMSG_DATA(nlh);
1858 struct in6_rtmsg rtmsg;
1859
1860 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1861 return -EINVAL;
9e762a4a
PM
1862 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1863 rtm_get_table(arg, r->rtm_table));
1da177e4
LT
1864}
1865
1866int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1867{
1868 struct rtmsg *r = NLMSG_DATA(nlh);
1869 struct in6_rtmsg rtmsg;
1870
1871 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1872 return -EINVAL;
9e762a4a
PM
1873 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1874 rtm_get_table(arg, r->rtm_table));
1da177e4
LT
1875}
1876
1877struct rt6_rtnl_dump_arg
1878{
1879 struct sk_buff *skb;
1880 struct netlink_callback *cb;
1881};
1882
1883static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1884 struct in6_addr *dst, struct in6_addr *src,
1885 int iif, int type, u32 pid, u32 seq,
1886 int prefix, unsigned int flags)
1da177e4
LT
1887{
1888 struct rtmsg *rtm;
1889 struct nlmsghdr *nlh;
1890 unsigned char *b = skb->tail;
1891 struct rta_cacheinfo ci;
9e762a4a 1892 u32 table;
1da177e4
LT
1893
1894 if (prefix) { /* user wants prefix routes only */
1895 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1896 /* success since this is not a prefix route */
1897 return 1;
1898 }
1899 }
1900
b6544c0b 1901 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1902 rtm = NLMSG_DATA(nlh);
1903 rtm->rtm_family = AF_INET6;
1904 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1905 rtm->rtm_src_len = rt->rt6i_src.plen;
1906 rtm->rtm_tos = 0;
c71099ac 1907 if (rt->rt6i_table)
9e762a4a 1908 table = rt->rt6i_table->tb6_id;
c71099ac 1909 else
9e762a4a
PM
1910 table = RT6_TABLE_UNSPEC;
1911 rtm->rtm_table = table;
1912 RTA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
1913 if (rt->rt6i_flags&RTF_REJECT)
1914 rtm->rtm_type = RTN_UNREACHABLE;
1915 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1916 rtm->rtm_type = RTN_LOCAL;
1917 else
1918 rtm->rtm_type = RTN_UNICAST;
1919 rtm->rtm_flags = 0;
1920 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1921 rtm->rtm_protocol = rt->rt6i_protocol;
1922 if (rt->rt6i_flags&RTF_DYNAMIC)
1923 rtm->rtm_protocol = RTPROT_REDIRECT;
1924 else if (rt->rt6i_flags & RTF_ADDRCONF)
1925 rtm->rtm_protocol = RTPROT_KERNEL;
1926 else if (rt->rt6i_flags&RTF_DEFAULT)
1927 rtm->rtm_protocol = RTPROT_RA;
1928
1929 if (rt->rt6i_flags&RTF_CACHE)
1930 rtm->rtm_flags |= RTM_F_CLONED;
1931
1932 if (dst) {
1933 RTA_PUT(skb, RTA_DST, 16, dst);
1934 rtm->rtm_dst_len = 128;
1935 } else if (rtm->rtm_dst_len)
1936 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1937#ifdef CONFIG_IPV6_SUBTREES
1938 if (src) {
1939 RTA_PUT(skb, RTA_SRC, 16, src);
1940 rtm->rtm_src_len = 128;
1941 } else if (rtm->rtm_src_len)
1942 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1943#endif
1944 if (iif)
1945 RTA_PUT(skb, RTA_IIF, 4, &iif);
1946 else if (dst) {
1947 struct in6_addr saddr_buf;
1948 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1949 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1950 }
1951 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1952 goto rtattr_failure;
1953 if (rt->u.dst.neighbour)
1954 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1955 if (rt->u.dst.dev)
1956 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1957 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1958 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1959 if (rt->rt6i_expires)
1960 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1961 else
1962 ci.rta_expires = 0;
1963 ci.rta_used = rt->u.dst.__use;
1964 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1965 ci.rta_error = rt->u.dst.error;
1966 ci.rta_id = 0;
1967 ci.rta_ts = 0;
1968 ci.rta_tsage = 0;
1969 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1970 nlh->nlmsg_len = skb->tail - b;
1971 return skb->len;
1972
1973nlmsg_failure:
1974rtattr_failure:
1975 skb_trim(skb, b - skb->data);
1976 return -1;
1977}
1978
1979static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1980{
1981 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1982 int prefix;
1983
1984 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1985 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1986 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1987 } else
1988 prefix = 0;
1989
1990 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1991 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1992 prefix, NLM_F_MULTI);
1da177e4
LT
1993}
1994
1995static int fib6_dump_node(struct fib6_walker_t *w)
1996{
1997 int res;
1998 struct rt6_info *rt;
1999
2000 for (rt = w->leaf; rt; rt = rt->u.next) {
2001 res = rt6_dump_route(rt, w->args);
2002 if (res < 0) {
2003 /* Frame is full, suspend walking */
2004 w->leaf = rt;
2005 return 1;
2006 }
2007 BUG_TRAP(res!=0);
2008 }
2009 w->leaf = NULL;
2010 return 0;
2011}
2012
2013static void fib6_dump_end(struct netlink_callback *cb)
2014{
2015 struct fib6_walker_t *w = (void*)cb->args[0];
2016
2017 if (w) {
2018 cb->args[0] = 0;
1da177e4
LT
2019 kfree(w);
2020 }
efacfbcb
HX
2021 cb->done = (void*)cb->args[1];
2022 cb->args[1] = 0;
1da177e4
LT
2023}
2024
2025static int fib6_dump_done(struct netlink_callback *cb)
2026{
2027 fib6_dump_end(cb);
a8f74b22 2028 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
2029}
2030
2031int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
2032{
c71099ac 2033 struct fib6_table *table;
1da177e4
LT
2034 struct rt6_rtnl_dump_arg arg;
2035 struct fib6_walker_t *w;
c71099ac 2036 int i, res = 0;
1da177e4
LT
2037
2038 arg.skb = skb;
2039 arg.cb = cb;
2040
c71099ac
TG
2041 /*
2042 * cb->args[0] = pointer to walker structure
2043 * cb->args[1] = saved cb->done() pointer
2044 * cb->args[2] = current table being dumped
2045 */
2046
1da177e4
LT
2047 w = (void*)cb->args[0];
2048 if (w == NULL) {
2049 /* New dump:
2050 *
2051 * 1. hook callback destructor.
2052 */
2053 cb->args[1] = (long)cb->done;
2054 cb->done = fib6_dump_done;
2055
2056 /*
2057 * 2. allocate and initialize walker.
2058 */
0c600eda 2059 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
2060 if (w == NULL)
2061 return -ENOMEM;
1da177e4
LT
2062 w->func = fib6_dump_node;
2063 w->args = &arg;
2064 cb->args[0] = (long)w;
c71099ac 2065 cb->args[2] = FIB6_TABLE_MIN;
1da177e4
LT
2066 } else {
2067 w->args = &arg;
c71099ac
TG
2068 i = cb->args[2];
2069 if (i > FIB6_TABLE_MAX)
2070 goto end;
2071
2072 table = fib6_get_table(i);
2073 if (table != NULL) {
2074 read_lock_bh(&table->tb6_lock);
2075 w->root = &table->tb6_root;
2076 res = fib6_walk_continue(w);
2077 read_unlock_bh(&table->tb6_lock);
2078 if (res != 0) {
2079 if (res < 0)
2080 fib6_walker_unlink(w);
2081 goto end;
2082 }
2083 }
2084
2085 fib6_walker_unlink(w);
2086 cb->args[2] = ++i;
1da177e4 2087 }
c71099ac
TG
2088
2089 for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
2090 table = fib6_get_table(i);
2091 if (table == NULL)
2092 continue;
2093
2094 read_lock_bh(&table->tb6_lock);
2095 w->root = &table->tb6_root;
2096 res = fib6_walk(w);
2097 read_unlock_bh(&table->tb6_lock);
2098 if (res)
2099 break;
2100 }
2101end:
2102 cb->args[2] = i;
2103
1da177e4
LT
2104 res = res < 0 ? res : skb->len;
2105 /* res < 0 is an error. (really, impossible)
2106 res == 0 means that dump is complete, but skb still can contain data.
2107 res > 0 dump is not complete, but frame is full.
2108 */
2109 /* Destroy walker, if dump of this table is complete. */
2110 if (res <= 0)
2111 fib6_dump_end(cb);
2112 return res;
2113}
2114
2115int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2116{
2117 struct rtattr **rta = arg;
2118 int iif = 0;
2119 int err = -ENOBUFS;
2120 struct sk_buff *skb;
2121 struct flowi fl;
2122 struct rt6_info *rt;
2123
2124 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2125 if (skb == NULL)
2126 goto out;
2127
2128 /* Reserve room for dummy headers, this skb can pass
2129 through good chunk of routing engine.
2130 */
2131 skb->mac.raw = skb->data;
2132 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2133
2134 memset(&fl, 0, sizeof(fl));
2135 if (rta[RTA_SRC-1])
2136 ipv6_addr_copy(&fl.fl6_src,
2137 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2138 if (rta[RTA_DST-1])
2139 ipv6_addr_copy(&fl.fl6_dst,
2140 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2141
2142 if (rta[RTA_IIF-1])
2143 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2144
2145 if (iif) {
2146 struct net_device *dev;
2147 dev = __dev_get_by_index(iif);
2148 if (!dev) {
2149 err = -ENODEV;
2150 goto out_free;
2151 }
2152 }
2153
2154 fl.oif = 0;
2155 if (rta[RTA_OIF-1])
2156 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2157
2158 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2159
2160 skb->dst = &rt->u.dst;
2161
2162 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2163 err = rt6_fill_node(skb, rt,
2164 &fl.fl6_dst, &fl.fl6_src,
2165 iif,
2166 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2167 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
2168 if (err < 0) {
2169 err = -EMSGSIZE;
2170 goto out_free;
2171 }
2172
2173 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2174 if (err > 0)
2175 err = 0;
2176out:
2177 return err;
2178out_free:
2179 kfree_skb(skb);
2180 goto out;
2181}
2182
0d51aa80
JHS
2183void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2184 struct netlink_skb_parms *req)
1da177e4
LT
2185{
2186 struct sk_buff *skb;
2187 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
2188 u32 pid = current->pid;
2189 u32 seq = 0;
1da177e4 2190
0d51aa80
JHS
2191 if (req)
2192 pid = req->pid;
2193 if (nlh)
2194 seq = nlh->nlmsg_seq;
2195
1da177e4
LT
2196 skb = alloc_skb(size, gfp_any());
2197 if (!skb) {
ac6d439d 2198 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
2199 return;
2200 }
0d51aa80 2201 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 2202 kfree_skb(skb);
ac6d439d 2203 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
2204 return;
2205 }
ac6d439d
PM
2206 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2207 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
2208}
2209
2210/*
2211 * /proc
2212 */
2213
2214#ifdef CONFIG_PROC_FS
2215
2216#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2217
2218struct rt6_proc_arg
2219{
2220 char *buffer;
2221 int offset;
2222 int length;
2223 int skip;
2224 int len;
2225};
2226
2227static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2228{
2229 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2230 int i;
2231
2232 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2233 arg->skip++;
2234 return 0;
2235 }
2236
2237 if (arg->len >= arg->length)
2238 return 0;
2239
2240 for (i=0; i<16; i++) {
2241 sprintf(arg->buffer + arg->len, "%02x",
2242 rt->rt6i_dst.addr.s6_addr[i]);
2243 arg->len += 2;
2244 }
2245 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2246 rt->rt6i_dst.plen);
2247
2248#ifdef CONFIG_IPV6_SUBTREES
2249 for (i=0; i<16; i++) {
2250 sprintf(arg->buffer + arg->len, "%02x",
2251 rt->rt6i_src.addr.s6_addr[i]);
2252 arg->len += 2;
2253 }
2254 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2255 rt->rt6i_src.plen);
2256#else
2257 sprintf(arg->buffer + arg->len,
2258 "00000000000000000000000000000000 00 ");
2259 arg->len += 36;
2260#endif
2261
2262 if (rt->rt6i_nexthop) {
2263 for (i=0; i<16; i++) {
2264 sprintf(arg->buffer + arg->len, "%02x",
2265 rt->rt6i_nexthop->primary_key[i]);
2266 arg->len += 2;
2267 }
2268 } else {
2269 sprintf(arg->buffer + arg->len,
2270 "00000000000000000000000000000000");
2271 arg->len += 32;
2272 }
2273 arg->len += sprintf(arg->buffer + arg->len,
2274 " %08x %08x %08x %08x %8s\n",
2275 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2276 rt->u.dst.__use, rt->rt6i_flags,
2277 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2278 return 0;
2279}
2280
2281static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2282{
c71099ac
TG
2283 struct rt6_proc_arg arg = {
2284 .buffer = buffer,
2285 .offset = offset,
2286 .length = length,
2287 };
1da177e4 2288
c71099ac 2289 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2290
2291 *start = buffer;
2292 if (offset)
2293 *start += offset % RT6_INFO_LEN;
2294
2295 arg.len -= offset % RT6_INFO_LEN;
2296
2297 if (arg.len > length)
2298 arg.len = length;
2299 if (arg.len < 0)
2300 arg.len = 0;
2301
2302 return arg.len;
2303}
2304
1da177e4
LT
2305static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2306{
2307 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2308 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2309 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2310 rt6_stats.fib_rt_cache,
2311 atomic_read(&ip6_dst_ops.entries),
2312 rt6_stats.fib_discarded_routes);
2313
2314 return 0;
2315}
2316
2317static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2318{
2319 return single_open(file, rt6_stats_seq_show, NULL);
2320}
2321
2322static struct file_operations rt6_stats_seq_fops = {
2323 .owner = THIS_MODULE,
2324 .open = rt6_stats_seq_open,
2325 .read = seq_read,
2326 .llseek = seq_lseek,
2327 .release = single_release,
2328};
2329#endif /* CONFIG_PROC_FS */
2330
2331#ifdef CONFIG_SYSCTL
2332
2333static int flush_delay;
2334
2335static
2336int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2337 void __user *buffer, size_t *lenp, loff_t *ppos)
2338{
2339 if (write) {
2340 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2341 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2342 return 0;
2343 } else
2344 return -EINVAL;
2345}
2346
2347ctl_table ipv6_route_table[] = {
2348 {
2349 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2350 .procname = "flush",
2351 .data = &flush_delay,
2352 .maxlen = sizeof(int),
89c8b3a1 2353 .mode = 0200,
1da177e4
LT
2354 .proc_handler = &ipv6_sysctl_rtcache_flush
2355 },
2356 {
2357 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2358 .procname = "gc_thresh",
2359 .data = &ip6_dst_ops.gc_thresh,
2360 .maxlen = sizeof(int),
2361 .mode = 0644,
2362 .proc_handler = &proc_dointvec,
2363 },
2364 {
2365 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2366 .procname = "max_size",
2367 .data = &ip6_rt_max_size,
2368 .maxlen = sizeof(int),
2369 .mode = 0644,
2370 .proc_handler = &proc_dointvec,
2371 },
2372 {
2373 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2374 .procname = "gc_min_interval",
2375 .data = &ip6_rt_gc_min_interval,
2376 .maxlen = sizeof(int),
2377 .mode = 0644,
2378 .proc_handler = &proc_dointvec_jiffies,
2379 .strategy = &sysctl_jiffies,
2380 },
2381 {
2382 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2383 .procname = "gc_timeout",
2384 .data = &ip6_rt_gc_timeout,
2385 .maxlen = sizeof(int),
2386 .mode = 0644,
2387 .proc_handler = &proc_dointvec_jiffies,
2388 .strategy = &sysctl_jiffies,
2389 },
2390 {
2391 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2392 .procname = "gc_interval",
2393 .data = &ip6_rt_gc_interval,
2394 .maxlen = sizeof(int),
2395 .mode = 0644,
2396 .proc_handler = &proc_dointvec_jiffies,
2397 .strategy = &sysctl_jiffies,
2398 },
2399 {
2400 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2401 .procname = "gc_elasticity",
2402 .data = &ip6_rt_gc_elasticity,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
2405 .proc_handler = &proc_dointvec_jiffies,
2406 .strategy = &sysctl_jiffies,
2407 },
2408 {
2409 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2410 .procname = "mtu_expires",
2411 .data = &ip6_rt_mtu_expires,
2412 .maxlen = sizeof(int),
2413 .mode = 0644,
2414 .proc_handler = &proc_dointvec_jiffies,
2415 .strategy = &sysctl_jiffies,
2416 },
2417 {
2418 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2419 .procname = "min_adv_mss",
2420 .data = &ip6_rt_min_advmss,
2421 .maxlen = sizeof(int),
2422 .mode = 0644,
2423 .proc_handler = &proc_dointvec_jiffies,
2424 .strategy = &sysctl_jiffies,
2425 },
2426 {
2427 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2428 .procname = "gc_min_interval_ms",
2429 .data = &ip6_rt_gc_min_interval,
2430 .maxlen = sizeof(int),
2431 .mode = 0644,
2432 .proc_handler = &proc_dointvec_ms_jiffies,
2433 .strategy = &sysctl_ms_jiffies,
2434 },
2435 { .ctl_name = 0 }
2436};
2437
2438#endif
2439
2440void __init ip6_route_init(void)
2441{
2442 struct proc_dir_entry *p;
2443
2444 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2445 sizeof(struct rt6_info),
2446 0, SLAB_HWCACHE_ALIGN,
2447 NULL, NULL);
2448 if (!ip6_dst_ops.kmem_cachep)
2449 panic("cannot create ip6_dst_cache");
2450
2451 fib6_init();
2452#ifdef CONFIG_PROC_FS
2453 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2454 if (p)
2455 p->owner = THIS_MODULE;
2456
2457 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2458#endif
2459#ifdef CONFIG_XFRM
2460 xfrm6_init();
2461#endif
101367c2
TG
2462#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2463 fib6_rules_init();
2464#endif
1da177e4
LT
2465}
2466
2467void ip6_route_cleanup(void)
2468{
101367c2
TG
2469#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2470 fib6_rules_cleanup();
2471#endif
1da177e4
LT
2472#ifdef CONFIG_PROC_FS
2473 proc_net_remove("ipv6_route");
2474 proc_net_remove("rt6_stats");
2475#endif
2476#ifdef CONFIG_XFRM
2477 xfrm6_fini();
2478#endif
2479 rt6_ifdown(NULL);
2480 fib6_gc_cleanup();
2481 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2482}