[IPV6]: ROUTE: Add experimental support for Route Information Option in RA (RFC4191).
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
27097255
YH
229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
1da177e4 265/*
554cfb7e 266 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 267 */
554cfb7e
YH
268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
269{
270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
277}
1da177e4 278
554cfb7e 279static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 280{
554cfb7e
YH
281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
283 if (neigh) {
284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
1da177e4 288 }
554cfb7e 289 return m;
1da177e4
LT
290}
291
554cfb7e
YH
292static int rt6_score_route(struct rt6_info *rt, int oif,
293 int strict)
1da177e4 294{
554cfb7e
YH
295 int m = rt6_check_dev(rt, oif);
296 if (!m && (strict & RT6_SELECT_F_IFACE))
297 return -1;
ebacaaa0
YH
298#ifdef CONFIG_IPV6_ROUTER_PREF
299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
300#endif
554cfb7e 301 if (rt6_check_neigh(rt))
ebacaaa0 302 m |= 16;
554cfb7e
YH
303 else if (strict & RT6_SELECT_F_REACHABLE)
304 return -1;
305 return m;
306}
307
308static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
309 int strict)
310{
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
1da177e4 315
554cfb7e
YH
316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
317 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 318
554cfb7e
YH
319 for (rt = rt0, metric = rt0->rt6i_metric;
320 rt && rt->rt6i_metric == metric;
321 rt = rt->u.next) {
322 int m;
1da177e4 323
554cfb7e 324 if (rt6_check_expired(rt))
1da177e4
LT
325 continue;
326
554cfb7e
YH
327 last = rt;
328
329 m = rt6_score_route(rt, oif, strict);
330 if (m < 0)
1da177e4 331 continue;
1da177e4 332
554cfb7e 333 if (m > mpri) {
27097255 334 rt6_probe(match);
554cfb7e 335 match = rt;
1da177e4 336 mpri = m;
27097255
YH
337 } else {
338 rt6_probe(rt);
1da177e4
LT
339 }
340 }
341
554cfb7e
YH
342 if (!match &&
343 (strict & RT6_SELECT_F_REACHABLE) &&
344 last && last != rt0) {
345 /* no entries matched; do round-robin */
346 *head = rt0->u.next;
347 rt0->u.next = last->u.next;
348 last->u.next = rt0;
1da177e4 349 }
1da177e4 350
554cfb7e
YH
351 RT6_TRACE("%s() => %p, score=%d\n",
352 __FUNCTION__, match, mpri);
1da177e4 353
554cfb7e 354 return (match ? match : &ip6_null_entry);
1da177e4
LT
355}
356
70ceb4f5
YH
357#ifdef CONFIG_IPV6_ROUTE_INFO
358int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
359 struct in6_addr *gwaddr)
360{
361 struct route_info *rinfo = (struct route_info *) opt;
362 struct in6_addr prefix_buf, *prefix;
363 unsigned int pref;
364 u32 lifetime;
365 struct rt6_info *rt;
366
367 if (len < sizeof(struct route_info)) {
368 return -EINVAL;
369 }
370
371 /* Sanity check for prefix_len and length */
372 if (rinfo->length > 3) {
373 return -EINVAL;
374 } else if (rinfo->prefix_len > 128) {
375 return -EINVAL;
376 } else if (rinfo->prefix_len > 64) {
377 if (rinfo->length < 2) {
378 return -EINVAL;
379 }
380 } else if (rinfo->prefix_len > 0) {
381 if (rinfo->length < 1) {
382 return -EINVAL;
383 }
384 }
385
386 pref = rinfo->route_pref;
387 if (pref == ICMPV6_ROUTER_PREF_INVALID)
388 pref = ICMPV6_ROUTER_PREF_MEDIUM;
389
390 lifetime = htonl(rinfo->lifetime);
391 if (lifetime == 0xffffffff) {
392 /* infinity */
393 } else if (lifetime > 0x7fffffff/HZ) {
394 /* Avoid arithmetic overflow */
395 lifetime = 0x7fffffff/HZ - 1;
396 }
397
398 if (rinfo->length == 3)
399 prefix = (struct in6_addr *)rinfo->prefix;
400 else {
401 /* this function is safe */
402 ipv6_addr_prefix(&prefix_buf,
403 (struct in6_addr *)rinfo->prefix,
404 rinfo->prefix_len);
405 prefix = &prefix_buf;
406 }
407
408 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
409
410 if (rt && !lifetime) {
411 ip6_del_rt(rt, NULL, NULL, NULL);
412 rt = NULL;
413 }
414
415 if (!rt && lifetime)
416 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
417 pref);
418 else if (rt)
419 rt->rt6i_flags = RTF_ROUTEINFO |
420 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
421
422 if (rt) {
423 if (lifetime == 0xffffffff) {
424 rt->rt6i_flags &= ~RTF_EXPIRES;
425 } else {
426 rt->rt6i_expires = jiffies + HZ * lifetime;
427 rt->rt6i_flags |= RTF_EXPIRES;
428 }
429 dst_release(&rt->u.dst);
430 }
431 return 0;
432}
433#endif
434
1da177e4
LT
435struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
436 int oif, int strict)
437{
438 struct fib6_node *fn;
439 struct rt6_info *rt;
440
441 read_lock_bh(&rt6_lock);
442 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
443 rt = rt6_device_match(fn->leaf, oif, strict);
444 dst_hold(&rt->u.dst);
445 rt->u.dst.__use++;
446 read_unlock_bh(&rt6_lock);
447
448 rt->u.dst.lastuse = jiffies;
449 if (rt->u.dst.error == 0)
450 return rt;
451 dst_release(&rt->u.dst);
452 return NULL;
453}
454
455/* ip6_ins_rt is called with FREE rt6_lock.
456 It takes new route entry, the addition fails by any reason the
457 route is freed. In any case, if caller does not hold it, it may
458 be destroyed.
459 */
460
0d51aa80
JHS
461int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
462 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
463{
464 int err;
465
466 write_lock_bh(&rt6_lock);
0d51aa80 467 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
468 write_unlock_bh(&rt6_lock);
469
470 return err;
471}
472
95a9a5ba
YH
473static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
474 struct in6_addr *saddr)
1da177e4 475{
1da177e4
LT
476 struct rt6_info *rt;
477
478 /*
479 * Clone the route.
480 */
481
482 rt = ip6_rt_copy(ort);
483
484 if (rt) {
58c4fb86
YH
485 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
486 if (rt->rt6i_dst.plen != 128 &&
487 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
488 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 489 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 490 }
1da177e4 491
58c4fb86 492 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
493 rt->rt6i_dst.plen = 128;
494 rt->rt6i_flags |= RTF_CACHE;
495 rt->u.dst.flags |= DST_HOST;
496
497#ifdef CONFIG_IPV6_SUBTREES
498 if (rt->rt6i_src.plen && saddr) {
499 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
500 rt->rt6i_src.plen = 128;
501 }
502#endif
503
504 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
505
95a9a5ba 506 }
1da177e4 507
95a9a5ba
YH
508 return rt;
509}
1da177e4 510
299d9939
YH
511static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
512{
513 struct rt6_info *rt = ip6_rt_copy(ort);
514 if (rt) {
515 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
516 rt->rt6i_dst.plen = 128;
517 rt->rt6i_flags |= RTF_CACHE;
518 if (rt->rt6i_flags & RTF_REJECT)
519 rt->u.dst.error = ort->u.dst.error;
520 rt->u.dst.flags |= DST_HOST;
521 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
522 }
523 return rt;
524}
525
1da177e4 526#define BACKTRACK() \
bb133964 527if (rt == &ip6_null_entry) { \
1da177e4
LT
528 while ((fn = fn->parent) != NULL) { \
529 if (fn->fn_flags & RTN_ROOT) { \
1da177e4
LT
530 goto out; \
531 } \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
534 } \
535}
536
537
538void ip6_route_input(struct sk_buff *skb)
539{
540 struct fib6_node *fn;
519fbd87 541 struct rt6_info *rt, *nrt;
1da177e4
LT
542 int strict;
543 int attempts = 3;
519fbd87 544 int err;
8238dd06 545 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 546
118f8c16 547 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
548
549relookup:
550 read_lock_bh(&rt6_lock);
551
8238dd06 552restart_2:
1da177e4
LT
553 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
554 &skb->nh.ipv6h->saddr);
555
556restart:
8238dd06 557 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
1da177e4 558 BACKTRACK();
8238dd06
YH
559 if (rt == &ip6_null_entry ||
560 rt->rt6i_flags & RTF_CACHE)
1ddef044 561 goto out;
1da177e4 562
fb9de91e
YH
563 dst_hold(&rt->u.dst);
564 read_unlock_bh(&rt6_lock);
565
519fbd87
YH
566 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
567 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
568 else {
569#if CLONE_OFFLINK_ROUTE
570 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
571#else
572 goto out2;
573#endif
574 }
e40cf353 575
519fbd87
YH
576 dst_release(&rt->u.dst);
577 rt = nrt ? : &ip6_null_entry;
1da177e4 578
519fbd87
YH
579 dst_hold(&rt->u.dst);
580 if (nrt) {
581 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
582 if (!err)
1da177e4 583 goto out2;
1da177e4 584 }
1da177e4 585
519fbd87
YH
586 if (--attempts <= 0)
587 goto out2;
588
589 /*
590 * Race condition! In the gap, when rt6_lock was
591 * released someone could insert this route. Relookup.
592 */
593 dst_release(&rt->u.dst);
594 goto relookup;
595
596out:
8238dd06
YH
597 if (reachable) {
598 reachable = 0;
599 goto restart_2;
600 }
519fbd87
YH
601 dst_hold(&rt->u.dst);
602 read_unlock_bh(&rt6_lock);
1da177e4
LT
603out2:
604 rt->u.dst.lastuse = jiffies;
605 rt->u.dst.__use++;
606 skb->dst = (struct dst_entry *) rt;
fb9de91e 607 return;
1da177e4
LT
608}
609
610struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
611{
612 struct fib6_node *fn;
519fbd87 613 struct rt6_info *rt, *nrt;
1da177e4
LT
614 int strict;
615 int attempts = 3;
519fbd87 616 int err;
8238dd06 617 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 618
554cfb7e 619 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
620
621relookup:
622 read_lock_bh(&rt6_lock);
623
8238dd06 624restart_2:
1da177e4
LT
625 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
626
627restart:
8238dd06 628 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
1ddef044 629 BACKTRACK();
8238dd06
YH
630 if (rt == &ip6_null_entry ||
631 rt->rt6i_flags & RTF_CACHE)
1da177e4 632 goto out;
1da177e4 633
fb9de91e
YH
634 dst_hold(&rt->u.dst);
635 read_unlock_bh(&rt6_lock);
636
519fbd87 637 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 638 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
639 else {
640#if CLONE_OFFLINK_ROUTE
641 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
642#else
643 goto out2;
644#endif
645 }
1da177e4 646
519fbd87
YH
647 dst_release(&rt->u.dst);
648 rt = nrt ? : &ip6_null_entry;
1da177e4 649
519fbd87
YH
650 dst_hold(&rt->u.dst);
651 if (nrt) {
652 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
653 if (!err)
1da177e4 654 goto out2;
1da177e4 655 }
e40cf353 656
519fbd87
YH
657 if (--attempts <= 0)
658 goto out2;
659
660 /*
661 * Race condition! In the gap, when rt6_lock was
662 * released someone could insert this route. Relookup.
663 */
664 dst_release(&rt->u.dst);
665 goto relookup;
666
667out:
8238dd06
YH
668 if (reachable) {
669 reachable = 0;
670 goto restart_2;
671 }
519fbd87
YH
672 dst_hold(&rt->u.dst);
673 read_unlock_bh(&rt6_lock);
1da177e4
LT
674out2:
675 rt->u.dst.lastuse = jiffies;
676 rt->u.dst.__use++;
677 return &rt->u.dst;
678}
679
680
681/*
682 * Destination cache support functions
683 */
684
685static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
686{
687 struct rt6_info *rt;
688
689 rt = (struct rt6_info *) dst;
690
691 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
692 return dst;
693
694 return NULL;
695}
696
697static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
698{
699 struct rt6_info *rt = (struct rt6_info *) dst;
700
701 if (rt) {
702 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 703 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
704 else
705 dst_release(dst);
706 }
707 return NULL;
708}
709
710static void ip6_link_failure(struct sk_buff *skb)
711{
712 struct rt6_info *rt;
713
714 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
715
716 rt = (struct rt6_info *) skb->dst;
717 if (rt) {
718 if (rt->rt6i_flags&RTF_CACHE) {
719 dst_set_expires(&rt->u.dst, 0);
720 rt->rt6i_flags |= RTF_EXPIRES;
721 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
722 rt->rt6i_node->fn_sernum = -1;
723 }
724}
725
726static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
727{
728 struct rt6_info *rt6 = (struct rt6_info*)dst;
729
730 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
731 rt6->rt6i_flags |= RTF_MODIFIED;
732 if (mtu < IPV6_MIN_MTU) {
733 mtu = IPV6_MIN_MTU;
734 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
735 }
736 dst->metrics[RTAX_MTU-1] = mtu;
737 }
738}
739
740/* Protected by rt6_lock. */
741static struct dst_entry *ndisc_dst_gc_list;
742static int ipv6_get_mtu(struct net_device *dev);
743
744static inline unsigned int ipv6_advmss(unsigned int mtu)
745{
746 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
747
748 if (mtu < ip6_rt_min_advmss)
749 mtu = ip6_rt_min_advmss;
750
751 /*
752 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
753 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
754 * IPV6_MAXPLEN is also valid and means: "any MSS,
755 * rely only on pmtu discovery"
756 */
757 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
758 mtu = IPV6_MAXPLEN;
759 return mtu;
760}
761
762struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
763 struct neighbour *neigh,
764 struct in6_addr *addr,
765 int (*output)(struct sk_buff *))
766{
767 struct rt6_info *rt;
768 struct inet6_dev *idev = in6_dev_get(dev);
769
770 if (unlikely(idev == NULL))
771 return NULL;
772
773 rt = ip6_dst_alloc();
774 if (unlikely(rt == NULL)) {
775 in6_dev_put(idev);
776 goto out;
777 }
778
779 dev_hold(dev);
780 if (neigh)
781 neigh_hold(neigh);
782 else
783 neigh = ndisc_get_neigh(dev, addr);
784
785 rt->rt6i_dev = dev;
786 rt->rt6i_idev = idev;
787 rt->rt6i_nexthop = neigh;
788 atomic_set(&rt->u.dst.__refcnt, 1);
789 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
790 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
791 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
792 rt->u.dst.output = output;
793
794#if 0 /* there's no chance to use these for ndisc */
795 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
796 ? DST_HOST
797 : 0;
798 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
799 rt->rt6i_dst.plen = 128;
800#endif
801
802 write_lock_bh(&rt6_lock);
803 rt->u.dst.next = ndisc_dst_gc_list;
804 ndisc_dst_gc_list = &rt->u.dst;
805 write_unlock_bh(&rt6_lock);
806
807 fib6_force_start_gc();
808
809out:
810 return (struct dst_entry *)rt;
811}
812
813int ndisc_dst_gc(int *more)
814{
815 struct dst_entry *dst, *next, **pprev;
816 int freed;
817
818 next = NULL;
819 pprev = &ndisc_dst_gc_list;
820 freed = 0;
821 while ((dst = *pprev) != NULL) {
822 if (!atomic_read(&dst->__refcnt)) {
823 *pprev = dst->next;
824 dst_free(dst);
825 freed++;
826 } else {
827 pprev = &dst->next;
828 (*more)++;
829 }
830 }
831
832 return freed;
833}
834
835static int ip6_dst_gc(void)
836{
837 static unsigned expire = 30*HZ;
838 static unsigned long last_gc;
839 unsigned long now = jiffies;
840
841 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
842 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
843 goto out;
844
845 expire++;
846 fib6_run_gc(expire);
847 last_gc = now;
848 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
849 expire = ip6_rt_gc_timeout>>1;
850
851out:
852 expire -= expire>>ip6_rt_gc_elasticity;
853 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
854}
855
856/* Clean host part of a prefix. Not necessary in radix tree,
857 but results in cleaner routing tables.
858
859 Remove it only when all the things will work!
860 */
861
862static int ipv6_get_mtu(struct net_device *dev)
863{
864 int mtu = IPV6_MIN_MTU;
865 struct inet6_dev *idev;
866
867 idev = in6_dev_get(dev);
868 if (idev) {
869 mtu = idev->cnf.mtu6;
870 in6_dev_put(idev);
871 }
872 return mtu;
873}
874
875int ipv6_get_hoplimit(struct net_device *dev)
876{
877 int hoplimit = ipv6_devconf.hop_limit;
878 struct inet6_dev *idev;
879
880 idev = in6_dev_get(dev);
881 if (idev) {
882 hoplimit = idev->cnf.hop_limit;
883 in6_dev_put(idev);
884 }
885 return hoplimit;
886}
887
888/*
889 *
890 */
891
0d51aa80
JHS
892int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
893 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
894{
895 int err;
896 struct rtmsg *r;
897 struct rtattr **rta;
898 struct rt6_info *rt = NULL;
899 struct net_device *dev = NULL;
900 struct inet6_dev *idev = NULL;
901 int addr_type;
902
903 rta = (struct rtattr **) _rtattr;
904
905 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
906 return -EINVAL;
907#ifndef CONFIG_IPV6_SUBTREES
908 if (rtmsg->rtmsg_src_len)
909 return -EINVAL;
910#endif
911 if (rtmsg->rtmsg_ifindex) {
912 err = -ENODEV;
913 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
914 if (!dev)
915 goto out;
916 idev = in6_dev_get(dev);
917 if (!idev)
918 goto out;
919 }
920
921 if (rtmsg->rtmsg_metric == 0)
922 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
923
924 rt = ip6_dst_alloc();
925
926 if (rt == NULL) {
927 err = -ENOMEM;
928 goto out;
929 }
930
931 rt->u.dst.obsolete = -1;
3dd4bc68 932 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
933 if (nlh && (r = NLMSG_DATA(nlh))) {
934 rt->rt6i_protocol = r->rtm_protocol;
935 } else {
936 rt->rt6i_protocol = RTPROT_BOOT;
937 }
938
939 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
940
941 if (addr_type & IPV6_ADDR_MULTICAST)
942 rt->u.dst.input = ip6_mc_input;
943 else
944 rt->u.dst.input = ip6_forward;
945
946 rt->u.dst.output = ip6_output;
947
948 ipv6_addr_prefix(&rt->rt6i_dst.addr,
949 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
950 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
951 if (rt->rt6i_dst.plen == 128)
952 rt->u.dst.flags = DST_HOST;
953
954#ifdef CONFIG_IPV6_SUBTREES
955 ipv6_addr_prefix(&rt->rt6i_src.addr,
956 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
957 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
958#endif
959
960 rt->rt6i_metric = rtmsg->rtmsg_metric;
961
962 /* We cannot add true routes via loopback here,
963 they would result in kernel looping; promote them to reject routes
964 */
965 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
966 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
967 /* hold loopback dev/idev if we haven't done so. */
968 if (dev != &loopback_dev) {
969 if (dev) {
970 dev_put(dev);
971 in6_dev_put(idev);
972 }
973 dev = &loopback_dev;
974 dev_hold(dev);
975 idev = in6_dev_get(dev);
976 if (!idev) {
977 err = -ENODEV;
978 goto out;
979 }
980 }
981 rt->u.dst.output = ip6_pkt_discard_out;
982 rt->u.dst.input = ip6_pkt_discard;
983 rt->u.dst.error = -ENETUNREACH;
984 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
985 goto install_route;
986 }
987
988 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
989 struct in6_addr *gw_addr;
990 int gwa_type;
991
992 gw_addr = &rtmsg->rtmsg_gateway;
993 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
994 gwa_type = ipv6_addr_type(gw_addr);
995
996 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
997 struct rt6_info *grt;
998
999 /* IPv6 strictly inhibits using not link-local
1000 addresses as nexthop address.
1001 Otherwise, router will not able to send redirects.
1002 It is very good, but in some (rare!) circumstances
1003 (SIT, PtP, NBMA NOARP links) it is handy to allow
1004 some exceptions. --ANK
1005 */
1006 err = -EINVAL;
1007 if (!(gwa_type&IPV6_ADDR_UNICAST))
1008 goto out;
1009
1010 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1011
1012 err = -EHOSTUNREACH;
1013 if (grt == NULL)
1014 goto out;
1015 if (dev) {
1016 if (dev != grt->rt6i_dev) {
1017 dst_release(&grt->u.dst);
1018 goto out;
1019 }
1020 } else {
1021 dev = grt->rt6i_dev;
1022 idev = grt->rt6i_idev;
1023 dev_hold(dev);
1024 in6_dev_hold(grt->rt6i_idev);
1025 }
1026 if (!(grt->rt6i_flags&RTF_GATEWAY))
1027 err = 0;
1028 dst_release(&grt->u.dst);
1029
1030 if (err)
1031 goto out;
1032 }
1033 err = -EINVAL;
1034 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1035 goto out;
1036 }
1037
1038 err = -ENODEV;
1039 if (dev == NULL)
1040 goto out;
1041
1042 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1043 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1044 if (IS_ERR(rt->rt6i_nexthop)) {
1045 err = PTR_ERR(rt->rt6i_nexthop);
1046 rt->rt6i_nexthop = NULL;
1047 goto out;
1048 }
1049 }
1050
1051 rt->rt6i_flags = rtmsg->rtmsg_flags;
1052
1053install_route:
1054 if (rta && rta[RTA_METRICS-1]) {
1055 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1056 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1057
1058 while (RTA_OK(attr, attrlen)) {
1059 unsigned flavor = attr->rta_type;
1060 if (flavor) {
1061 if (flavor > RTAX_MAX) {
1062 err = -EINVAL;
1063 goto out;
1064 }
1065 rt->u.dst.metrics[flavor-1] =
1066 *(u32 *)RTA_DATA(attr);
1067 }
1068 attr = RTA_NEXT(attr, attrlen);
1069 }
1070 }
1071
1072 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1073 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1074 if (!rt->u.dst.metrics[RTAX_MTU-1])
1075 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1076 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1077 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1078 rt->u.dst.dev = dev;
1079 rt->rt6i_idev = idev;
0d51aa80 1080 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1081
1082out:
1083 if (dev)
1084 dev_put(dev);
1085 if (idev)
1086 in6_dev_put(idev);
1087 if (rt)
1088 dst_free((struct dst_entry *) rt);
1089 return err;
1090}
1091
0d51aa80 1092int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1093{
1094 int err;
1095
1096 write_lock_bh(&rt6_lock);
1097
0d51aa80 1098 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1099 dst_release(&rt->u.dst);
1100
1101 write_unlock_bh(&rt6_lock);
1102
1103 return err;
1104}
1105
0d51aa80 1106static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1107{
1108 struct fib6_node *fn;
1109 struct rt6_info *rt;
1110 int err = -ESRCH;
1111
1112 read_lock_bh(&rt6_lock);
1113
1114 fn = fib6_locate(&ip6_routing_table,
1115 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1116 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1117
1118 if (fn) {
1119 for (rt = fn->leaf; rt; rt = rt->u.next) {
1120 if (rtmsg->rtmsg_ifindex &&
1121 (rt->rt6i_dev == NULL ||
1122 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1123 continue;
1124 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1125 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1126 continue;
1127 if (rtmsg->rtmsg_metric &&
1128 rtmsg->rtmsg_metric != rt->rt6i_metric)
1129 continue;
1130 dst_hold(&rt->u.dst);
1131 read_unlock_bh(&rt6_lock);
1132
0d51aa80 1133 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1134 }
1135 }
1136 read_unlock_bh(&rt6_lock);
1137
1138 return err;
1139}
1140
1141/*
1142 * Handle redirects
1143 */
1144void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1145 struct neighbour *neigh, u8 *lladdr, int on_link)
1146{
1147 struct rt6_info *rt, *nrt;
1148
1149 /* Locate old route to this destination. */
1150 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1151
1152 if (rt == NULL)
1153 return;
1154
1155 if (neigh->dev != rt->rt6i_dev)
1156 goto out;
1157
1158 /*
1159 * Current route is on-link; redirect is always invalid.
1160 *
1161 * Seems, previous statement is not true. It could
1162 * be node, which looks for us as on-link (f.e. proxy ndisc)
1163 * But then router serving it might decide, that we should
1164 * know truth 8)8) --ANK (980726).
1165 */
1166 if (!(rt->rt6i_flags&RTF_GATEWAY))
1167 goto out;
1168
1169 /*
1170 * RFC 2461 specifies that redirects should only be
1171 * accepted if they come from the nexthop to the target.
1172 * Due to the way default routers are chosen, this notion
1173 * is a bit fuzzy and one might need to check all default
1174 * routers.
1175 */
1176 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1177 if (rt->rt6i_flags & RTF_DEFAULT) {
1178 struct rt6_info *rt1;
1179
1180 read_lock(&rt6_lock);
1181 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1182 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1183 dst_hold(&rt1->u.dst);
1184 dst_release(&rt->u.dst);
1185 read_unlock(&rt6_lock);
1186 rt = rt1;
1187 goto source_ok;
1188 }
1189 }
1190 read_unlock(&rt6_lock);
1191 }
1192 if (net_ratelimit())
1193 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1194 "for redirect target\n");
1195 goto out;
1196 }
1197
1198source_ok:
1199
1200 /*
1201 * We have finally decided to accept it.
1202 */
1203
1204 neigh_update(neigh, lladdr, NUD_STALE,
1205 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1206 NEIGH_UPDATE_F_OVERRIDE|
1207 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1208 NEIGH_UPDATE_F_ISROUTER))
1209 );
1210
1211 /*
1212 * Redirect received -> path was valid.
1213 * Look, redirects are sent only in response to data packets,
1214 * so that this nexthop apparently is reachable. --ANK
1215 */
1216 dst_confirm(&rt->u.dst);
1217
1218 /* Duplicate redirect: silently ignore. */
1219 if (neigh == rt->u.dst.neighbour)
1220 goto out;
1221
1222 nrt = ip6_rt_copy(rt);
1223 if (nrt == NULL)
1224 goto out;
1225
1226 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1227 if (on_link)
1228 nrt->rt6i_flags &= ~RTF_GATEWAY;
1229
1230 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1231 nrt->rt6i_dst.plen = 128;
1232 nrt->u.dst.flags |= DST_HOST;
1233
1234 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1235 nrt->rt6i_nexthop = neigh_clone(neigh);
1236 /* Reset pmtu, it may be better */
1237 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1238 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1239
0d51aa80 1240 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1241 goto out;
1242
1243 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1244 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1245 return;
1246 }
1247
1248out:
1249 dst_release(&rt->u.dst);
1250 return;
1251}
1252
1253/*
1254 * Handle ICMP "packet too big" messages
1255 * i.e. Path MTU discovery
1256 */
1257
1258void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1259 struct net_device *dev, u32 pmtu)
1260{
1261 struct rt6_info *rt, *nrt;
1262 int allfrag = 0;
1263
1264 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1265 if (rt == NULL)
1266 return;
1267
1268 if (pmtu >= dst_mtu(&rt->u.dst))
1269 goto out;
1270
1271 if (pmtu < IPV6_MIN_MTU) {
1272 /*
1273 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1274 * MTU (1280) and a fragment header should always be included
1275 * after a node receiving Too Big message reporting PMTU is
1276 * less than the IPv6 Minimum Link MTU.
1277 */
1278 pmtu = IPV6_MIN_MTU;
1279 allfrag = 1;
1280 }
1281
1282 /* New mtu received -> path was valid.
1283 They are sent only in response to data packets,
1284 so that this nexthop apparently is reachable. --ANK
1285 */
1286 dst_confirm(&rt->u.dst);
1287
1288 /* Host route. If it is static, it would be better
1289 not to override it, but add new one, so that
1290 when cache entry will expire old pmtu
1291 would return automatically.
1292 */
1293 if (rt->rt6i_flags & RTF_CACHE) {
1294 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1295 if (allfrag)
1296 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1297 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1298 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1299 goto out;
1300 }
1301
1302 /* Network route.
1303 Two cases are possible:
1304 1. It is connected route. Action: COW
1305 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1306 */
d5315b50 1307 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1308 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1309 else
1310 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1311
d5315b50 1312 if (nrt) {
a1e78363
YH
1313 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1314 if (allfrag)
1315 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1316
1317 /* According to RFC 1981, detecting PMTU increase shouldn't be
1318 * happened within 5 mins, the recommended timer is 10 mins.
1319 * Here this route expiration time is set to ip6_rt_mtu_expires
1320 * which is 10 mins. After 10 mins the decreased pmtu is expired
1321 * and detecting PMTU increase will be automatically happened.
1322 */
1323 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1324 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1325
1326 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1327 }
1da177e4
LT
1328out:
1329 dst_release(&rt->u.dst);
1330}
1331
1332/*
1333 * Misc support functions
1334 */
1335
1336static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1337{
1338 struct rt6_info *rt = ip6_dst_alloc();
1339
1340 if (rt) {
1341 rt->u.dst.input = ort->u.dst.input;
1342 rt->u.dst.output = ort->u.dst.output;
1343
1344 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1345 rt->u.dst.dev = ort->u.dst.dev;
1346 if (rt->u.dst.dev)
1347 dev_hold(rt->u.dst.dev);
1348 rt->rt6i_idev = ort->rt6i_idev;
1349 if (rt->rt6i_idev)
1350 in6_dev_hold(rt->rt6i_idev);
1351 rt->u.dst.lastuse = jiffies;
1352 rt->rt6i_expires = 0;
1353
1354 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1355 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1356 rt->rt6i_metric = 0;
1357
1358 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1359#ifdef CONFIG_IPV6_SUBTREES
1360 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1361#endif
1362 }
1363 return rt;
1364}
1365
70ceb4f5
YH
1366#ifdef CONFIG_IPV6_ROUTE_INFO
1367static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1368 struct in6_addr *gwaddr, int ifindex)
1369{
1370 struct fib6_node *fn;
1371 struct rt6_info *rt = NULL;
1372
1373 write_lock_bh(&rt6_lock);
1374 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1375 if (!fn)
1376 goto out;
1377
1378 for (rt = fn->leaf; rt; rt = rt->u.next) {
1379 if (rt->rt6i_dev->ifindex != ifindex)
1380 continue;
1381 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1382 continue;
1383 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1384 continue;
1385 dst_hold(&rt->u.dst);
1386 break;
1387 }
1388out:
1389 write_unlock_bh(&rt6_lock);
1390 return rt;
1391}
1392
1393static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1394 struct in6_addr *gwaddr, int ifindex,
1395 unsigned pref)
1396{
1397 struct in6_rtmsg rtmsg;
1398
1399 memset(&rtmsg, 0, sizeof(rtmsg));
1400 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1401 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1402 rtmsg.rtmsg_dst_len = prefixlen;
1403 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1404 rtmsg.rtmsg_metric = 1024;
1405 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1406 rtmsg.rtmsg_ifindex = ifindex;
1407
1408 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1409
1410 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1411}
1412#endif
1413
1da177e4
LT
1414struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1415{
1416 struct rt6_info *rt;
1417 struct fib6_node *fn;
1418
1419 fn = &ip6_routing_table;
1420
1421 write_lock_bh(&rt6_lock);
1422 for (rt = fn->leaf; rt; rt=rt->u.next) {
1423 if (dev == rt->rt6i_dev &&
045927ff 1424 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1425 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1426 break;
1427 }
1428 if (rt)
1429 dst_hold(&rt->u.dst);
1430 write_unlock_bh(&rt6_lock);
1431 return rt;
1432}
1433
1434struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1435 struct net_device *dev,
1436 unsigned int pref)
1da177e4
LT
1437{
1438 struct in6_rtmsg rtmsg;
1439
1440 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1441 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1442 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1443 rtmsg.rtmsg_metric = 1024;
ebacaaa0
YH
1444 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1445 RTF_PREF(pref);
1da177e4
LT
1446
1447 rtmsg.rtmsg_ifindex = dev->ifindex;
1448
0d51aa80 1449 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1450 return rt6_get_dflt_router(gwaddr, dev);
1451}
1452
1453void rt6_purge_dflt_routers(void)
1454{
1455 struct rt6_info *rt;
1456
1457restart:
1458 read_lock_bh(&rt6_lock);
1459 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1460 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1461 dst_hold(&rt->u.dst);
1462
1da177e4
LT
1463 read_unlock_bh(&rt6_lock);
1464
0d51aa80 1465 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1466
1467 goto restart;
1468 }
1469 }
1470 read_unlock_bh(&rt6_lock);
1471}
1472
1473int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1474{
1475 struct in6_rtmsg rtmsg;
1476 int err;
1477
1478 switch(cmd) {
1479 case SIOCADDRT: /* Add a route */
1480 case SIOCDELRT: /* Delete a route */
1481 if (!capable(CAP_NET_ADMIN))
1482 return -EPERM;
1483 err = copy_from_user(&rtmsg, arg,
1484 sizeof(struct in6_rtmsg));
1485 if (err)
1486 return -EFAULT;
1487
1488 rtnl_lock();
1489 switch (cmd) {
1490 case SIOCADDRT:
0d51aa80 1491 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1492 break;
1493 case SIOCDELRT:
0d51aa80 1494 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1495 break;
1496 default:
1497 err = -EINVAL;
1498 }
1499 rtnl_unlock();
1500
1501 return err;
1502 };
1503
1504 return -EINVAL;
1505}
1506
1507/*
1508 * Drop the packet on the floor
1509 */
1510
20380731 1511static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1512{
1513 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1514 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1515 kfree_skb(skb);
1516 return 0;
1517}
1518
20380731 1519static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1520{
1521 skb->dev = skb->dst->dev;
1522 return ip6_pkt_discard(skb);
1523}
1524
1525/*
1526 * Allocate a dst for local (unicast / anycast) address.
1527 */
1528
1529struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1530 const struct in6_addr *addr,
1531 int anycast)
1532{
1533 struct rt6_info *rt = ip6_dst_alloc();
1534
1535 if (rt == NULL)
1536 return ERR_PTR(-ENOMEM);
1537
1538 dev_hold(&loopback_dev);
1539 in6_dev_hold(idev);
1540
1541 rt->u.dst.flags = DST_HOST;
1542 rt->u.dst.input = ip6_input;
1543 rt->u.dst.output = ip6_output;
1544 rt->rt6i_dev = &loopback_dev;
1545 rt->rt6i_idev = idev;
1546 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1547 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1548 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1549 rt->u.dst.obsolete = -1;
1550
1551 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1552 if (anycast)
1553 rt->rt6i_flags |= RTF_ANYCAST;
1554 else
1da177e4
LT
1555 rt->rt6i_flags |= RTF_LOCAL;
1556 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1557 if (rt->rt6i_nexthop == NULL) {
1558 dst_free((struct dst_entry *) rt);
1559 return ERR_PTR(-ENOMEM);
1560 }
1561
1562 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1563 rt->rt6i_dst.plen = 128;
1564
1565 atomic_set(&rt->u.dst.__refcnt, 1);
1566
1567 return rt;
1568}
1569
1570static int fib6_ifdown(struct rt6_info *rt, void *arg)
1571{
1572 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1573 rt != &ip6_null_entry) {
1574 RT6_TRACE("deleted by ifdown %p\n", rt);
1575 return -1;
1576 }
1577 return 0;
1578}
1579
1580void rt6_ifdown(struct net_device *dev)
1581{
1582 write_lock_bh(&rt6_lock);
1583 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1584 write_unlock_bh(&rt6_lock);
1585}
1586
1587struct rt6_mtu_change_arg
1588{
1589 struct net_device *dev;
1590 unsigned mtu;
1591};
1592
1593static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1594{
1595 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1596 struct inet6_dev *idev;
1597
1598 /* In IPv6 pmtu discovery is not optional,
1599 so that RTAX_MTU lock cannot disable it.
1600 We still use this lock to block changes
1601 caused by addrconf/ndisc.
1602 */
1603
1604 idev = __in6_dev_get(arg->dev);
1605 if (idev == NULL)
1606 return 0;
1607
1608 /* For administrative MTU increase, there is no way to discover
1609 IPv6 PMTU increase, so PMTU increase should be updated here.
1610 Since RFC 1981 doesn't include administrative MTU increase
1611 update PMTU increase is a MUST. (i.e. jumbo frame)
1612 */
1613 /*
1614 If new MTU is less than route PMTU, this new MTU will be the
1615 lowest MTU in the path, update the route PMTU to reflect PMTU
1616 decreases; if new MTU is greater than route PMTU, and the
1617 old MTU is the lowest MTU in the path, update the route PMTU
1618 to reflect the increase. In this case if the other nodes' MTU
1619 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1620 PMTU discouvery.
1621 */
1622 if (rt->rt6i_dev == arg->dev &&
1623 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1624 (dst_mtu(&rt->u.dst) > arg->mtu ||
1625 (dst_mtu(&rt->u.dst) < arg->mtu &&
1626 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1627 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1628 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1629 return 0;
1630}
1631
1632void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1633{
1634 struct rt6_mtu_change_arg arg;
1635
1636 arg.dev = dev;
1637 arg.mtu = mtu;
1638 read_lock_bh(&rt6_lock);
1639 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1640 read_unlock_bh(&rt6_lock);
1641}
1642
1643static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1644 struct in6_rtmsg *rtmsg)
1645{
1646 memset(rtmsg, 0, sizeof(*rtmsg));
1647
1648 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1649 rtmsg->rtmsg_src_len = r->rtm_src_len;
1650 rtmsg->rtmsg_flags = RTF_UP;
1651 if (r->rtm_type == RTN_UNREACHABLE)
1652 rtmsg->rtmsg_flags |= RTF_REJECT;
1653
1654 if (rta[RTA_GATEWAY-1]) {
1655 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1656 return -EINVAL;
1657 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1658 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1659 }
1660 if (rta[RTA_DST-1]) {
1661 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1662 return -EINVAL;
1663 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1664 }
1665 if (rta[RTA_SRC-1]) {
1666 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1667 return -EINVAL;
1668 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1669 }
1670 if (rta[RTA_OIF-1]) {
1671 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1672 return -EINVAL;
1673 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1674 }
1675 if (rta[RTA_PRIORITY-1]) {
1676 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1677 return -EINVAL;
1678 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1679 }
1680 return 0;
1681}
1682
1683int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1684{
1685 struct rtmsg *r = NLMSG_DATA(nlh);
1686 struct in6_rtmsg rtmsg;
1687
1688 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1689 return -EINVAL;
0d51aa80 1690 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1691}
1692
1693int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1694{
1695 struct rtmsg *r = NLMSG_DATA(nlh);
1696 struct in6_rtmsg rtmsg;
1697
1698 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1699 return -EINVAL;
0d51aa80 1700 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1701}
1702
1703struct rt6_rtnl_dump_arg
1704{
1705 struct sk_buff *skb;
1706 struct netlink_callback *cb;
1707};
1708
1709static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1710 struct in6_addr *dst, struct in6_addr *src,
1711 int iif, int type, u32 pid, u32 seq,
1712 int prefix, unsigned int flags)
1da177e4
LT
1713{
1714 struct rtmsg *rtm;
1715 struct nlmsghdr *nlh;
1716 unsigned char *b = skb->tail;
1717 struct rta_cacheinfo ci;
1718
1719 if (prefix) { /* user wants prefix routes only */
1720 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1721 /* success since this is not a prefix route */
1722 return 1;
1723 }
1724 }
1725
b6544c0b 1726 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1727 rtm = NLMSG_DATA(nlh);
1728 rtm->rtm_family = AF_INET6;
1729 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1730 rtm->rtm_src_len = rt->rt6i_src.plen;
1731 rtm->rtm_tos = 0;
1732 rtm->rtm_table = RT_TABLE_MAIN;
1733 if (rt->rt6i_flags&RTF_REJECT)
1734 rtm->rtm_type = RTN_UNREACHABLE;
1735 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1736 rtm->rtm_type = RTN_LOCAL;
1737 else
1738 rtm->rtm_type = RTN_UNICAST;
1739 rtm->rtm_flags = 0;
1740 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1741 rtm->rtm_protocol = rt->rt6i_protocol;
1742 if (rt->rt6i_flags&RTF_DYNAMIC)
1743 rtm->rtm_protocol = RTPROT_REDIRECT;
1744 else if (rt->rt6i_flags & RTF_ADDRCONF)
1745 rtm->rtm_protocol = RTPROT_KERNEL;
1746 else if (rt->rt6i_flags&RTF_DEFAULT)
1747 rtm->rtm_protocol = RTPROT_RA;
1748
1749 if (rt->rt6i_flags&RTF_CACHE)
1750 rtm->rtm_flags |= RTM_F_CLONED;
1751
1752 if (dst) {
1753 RTA_PUT(skb, RTA_DST, 16, dst);
1754 rtm->rtm_dst_len = 128;
1755 } else if (rtm->rtm_dst_len)
1756 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1757#ifdef CONFIG_IPV6_SUBTREES
1758 if (src) {
1759 RTA_PUT(skb, RTA_SRC, 16, src);
1760 rtm->rtm_src_len = 128;
1761 } else if (rtm->rtm_src_len)
1762 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1763#endif
1764 if (iif)
1765 RTA_PUT(skb, RTA_IIF, 4, &iif);
1766 else if (dst) {
1767 struct in6_addr saddr_buf;
1768 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1769 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1770 }
1771 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1772 goto rtattr_failure;
1773 if (rt->u.dst.neighbour)
1774 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1775 if (rt->u.dst.dev)
1776 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1777 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1778 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1779 if (rt->rt6i_expires)
1780 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1781 else
1782 ci.rta_expires = 0;
1783 ci.rta_used = rt->u.dst.__use;
1784 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1785 ci.rta_error = rt->u.dst.error;
1786 ci.rta_id = 0;
1787 ci.rta_ts = 0;
1788 ci.rta_tsage = 0;
1789 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1790 nlh->nlmsg_len = skb->tail - b;
1791 return skb->len;
1792
1793nlmsg_failure:
1794rtattr_failure:
1795 skb_trim(skb, b - skb->data);
1796 return -1;
1797}
1798
1799static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1800{
1801 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1802 int prefix;
1803
1804 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1805 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1806 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1807 } else
1808 prefix = 0;
1809
1810 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1811 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1812 prefix, NLM_F_MULTI);
1da177e4
LT
1813}
1814
1815static int fib6_dump_node(struct fib6_walker_t *w)
1816{
1817 int res;
1818 struct rt6_info *rt;
1819
1820 for (rt = w->leaf; rt; rt = rt->u.next) {
1821 res = rt6_dump_route(rt, w->args);
1822 if (res < 0) {
1823 /* Frame is full, suspend walking */
1824 w->leaf = rt;
1825 return 1;
1826 }
1827 BUG_TRAP(res!=0);
1828 }
1829 w->leaf = NULL;
1830 return 0;
1831}
1832
1833static void fib6_dump_end(struct netlink_callback *cb)
1834{
1835 struct fib6_walker_t *w = (void*)cb->args[0];
1836
1837 if (w) {
1838 cb->args[0] = 0;
1839 fib6_walker_unlink(w);
1840 kfree(w);
1841 }
efacfbcb
HX
1842 cb->done = (void*)cb->args[1];
1843 cb->args[1] = 0;
1da177e4
LT
1844}
1845
1846static int fib6_dump_done(struct netlink_callback *cb)
1847{
1848 fib6_dump_end(cb);
a8f74b22 1849 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1850}
1851
1852int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1853{
1854 struct rt6_rtnl_dump_arg arg;
1855 struct fib6_walker_t *w;
1856 int res;
1857
1858 arg.skb = skb;
1859 arg.cb = cb;
1860
1861 w = (void*)cb->args[0];
1862 if (w == NULL) {
1863 /* New dump:
1864 *
1865 * 1. hook callback destructor.
1866 */
1867 cb->args[1] = (long)cb->done;
1868 cb->done = fib6_dump_done;
1869
1870 /*
1871 * 2. allocate and initialize walker.
1872 */
9e147a1c 1873 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1874 if (w == NULL)
1875 return -ENOMEM;
1876 RT6_TRACE("dump<%p", w);
1877 memset(w, 0, sizeof(*w));
1878 w->root = &ip6_routing_table;
1879 w->func = fib6_dump_node;
1880 w->args = &arg;
1881 cb->args[0] = (long)w;
1882 read_lock_bh(&rt6_lock);
1883 res = fib6_walk(w);
1884 read_unlock_bh(&rt6_lock);
1885 } else {
1886 w->args = &arg;
1887 read_lock_bh(&rt6_lock);
1888 res = fib6_walk_continue(w);
1889 read_unlock_bh(&rt6_lock);
1890 }
1891#if RT6_DEBUG >= 3
1892 if (res <= 0 && skb->len == 0)
1893 RT6_TRACE("%p>dump end\n", w);
1894#endif
1895 res = res < 0 ? res : skb->len;
1896 /* res < 0 is an error. (really, impossible)
1897 res == 0 means that dump is complete, but skb still can contain data.
1898 res > 0 dump is not complete, but frame is full.
1899 */
1900 /* Destroy walker, if dump of this table is complete. */
1901 if (res <= 0)
1902 fib6_dump_end(cb);
1903 return res;
1904}
1905
1906int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1907{
1908 struct rtattr **rta = arg;
1909 int iif = 0;
1910 int err = -ENOBUFS;
1911 struct sk_buff *skb;
1912 struct flowi fl;
1913 struct rt6_info *rt;
1914
1915 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1916 if (skb == NULL)
1917 goto out;
1918
1919 /* Reserve room for dummy headers, this skb can pass
1920 through good chunk of routing engine.
1921 */
1922 skb->mac.raw = skb->data;
1923 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1924
1925 memset(&fl, 0, sizeof(fl));
1926 if (rta[RTA_SRC-1])
1927 ipv6_addr_copy(&fl.fl6_src,
1928 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1929 if (rta[RTA_DST-1])
1930 ipv6_addr_copy(&fl.fl6_dst,
1931 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1932
1933 if (rta[RTA_IIF-1])
1934 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1935
1936 if (iif) {
1937 struct net_device *dev;
1938 dev = __dev_get_by_index(iif);
1939 if (!dev) {
1940 err = -ENODEV;
1941 goto out_free;
1942 }
1943 }
1944
1945 fl.oif = 0;
1946 if (rta[RTA_OIF-1])
1947 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1948
1949 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1950
1951 skb->dst = &rt->u.dst;
1952
1953 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1954 err = rt6_fill_node(skb, rt,
1955 &fl.fl6_dst, &fl.fl6_src,
1956 iif,
1957 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1958 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1959 if (err < 0) {
1960 err = -EMSGSIZE;
1961 goto out_free;
1962 }
1963
1964 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1965 if (err > 0)
1966 err = 0;
1967out:
1968 return err;
1969out_free:
1970 kfree_skb(skb);
1971 goto out;
1972}
1973
0d51aa80
JHS
1974void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1975 struct netlink_skb_parms *req)
1da177e4
LT
1976{
1977 struct sk_buff *skb;
1978 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1979 u32 pid = current->pid;
1980 u32 seq = 0;
1da177e4 1981
0d51aa80
JHS
1982 if (req)
1983 pid = req->pid;
1984 if (nlh)
1985 seq = nlh->nlmsg_seq;
1986
1da177e4
LT
1987 skb = alloc_skb(size, gfp_any());
1988 if (!skb) {
ac6d439d 1989 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1990 return;
1991 }
0d51aa80 1992 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 1993 kfree_skb(skb);
ac6d439d 1994 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
1995 return;
1996 }
ac6d439d
PM
1997 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1998 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
1999}
2000
2001/*
2002 * /proc
2003 */
2004
2005#ifdef CONFIG_PROC_FS
2006
2007#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2008
2009struct rt6_proc_arg
2010{
2011 char *buffer;
2012 int offset;
2013 int length;
2014 int skip;
2015 int len;
2016};
2017
2018static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2019{
2020 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2021 int i;
2022
2023 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2024 arg->skip++;
2025 return 0;
2026 }
2027
2028 if (arg->len >= arg->length)
2029 return 0;
2030
2031 for (i=0; i<16; i++) {
2032 sprintf(arg->buffer + arg->len, "%02x",
2033 rt->rt6i_dst.addr.s6_addr[i]);
2034 arg->len += 2;
2035 }
2036 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2037 rt->rt6i_dst.plen);
2038
2039#ifdef CONFIG_IPV6_SUBTREES
2040 for (i=0; i<16; i++) {
2041 sprintf(arg->buffer + arg->len, "%02x",
2042 rt->rt6i_src.addr.s6_addr[i]);
2043 arg->len += 2;
2044 }
2045 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2046 rt->rt6i_src.plen);
2047#else
2048 sprintf(arg->buffer + arg->len,
2049 "00000000000000000000000000000000 00 ");
2050 arg->len += 36;
2051#endif
2052
2053 if (rt->rt6i_nexthop) {
2054 for (i=0; i<16; i++) {
2055 sprintf(arg->buffer + arg->len, "%02x",
2056 rt->rt6i_nexthop->primary_key[i]);
2057 arg->len += 2;
2058 }
2059 } else {
2060 sprintf(arg->buffer + arg->len,
2061 "00000000000000000000000000000000");
2062 arg->len += 32;
2063 }
2064 arg->len += sprintf(arg->buffer + arg->len,
2065 " %08x %08x %08x %08x %8s\n",
2066 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2067 rt->u.dst.__use, rt->rt6i_flags,
2068 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2069 return 0;
2070}
2071
2072static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2073{
2074 struct rt6_proc_arg arg;
2075 arg.buffer = buffer;
2076 arg.offset = offset;
2077 arg.length = length;
2078 arg.skip = 0;
2079 arg.len = 0;
2080
2081 read_lock_bh(&rt6_lock);
2082 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2083 read_unlock_bh(&rt6_lock);
2084
2085 *start = buffer;
2086 if (offset)
2087 *start += offset % RT6_INFO_LEN;
2088
2089 arg.len -= offset % RT6_INFO_LEN;
2090
2091 if (arg.len > length)
2092 arg.len = length;
2093 if (arg.len < 0)
2094 arg.len = 0;
2095
2096 return arg.len;
2097}
2098
1da177e4
LT
2099static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2100{
2101 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2102 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2103 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2104 rt6_stats.fib_rt_cache,
2105 atomic_read(&ip6_dst_ops.entries),
2106 rt6_stats.fib_discarded_routes);
2107
2108 return 0;
2109}
2110
2111static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2112{
2113 return single_open(file, rt6_stats_seq_show, NULL);
2114}
2115
2116static struct file_operations rt6_stats_seq_fops = {
2117 .owner = THIS_MODULE,
2118 .open = rt6_stats_seq_open,
2119 .read = seq_read,
2120 .llseek = seq_lseek,
2121 .release = single_release,
2122};
2123#endif /* CONFIG_PROC_FS */
2124
2125#ifdef CONFIG_SYSCTL
2126
2127static int flush_delay;
2128
2129static
2130int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2131 void __user *buffer, size_t *lenp, loff_t *ppos)
2132{
2133 if (write) {
2134 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2135 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2136 return 0;
2137 } else
2138 return -EINVAL;
2139}
2140
2141ctl_table ipv6_route_table[] = {
2142 {
2143 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2144 .procname = "flush",
2145 .data = &flush_delay,
2146 .maxlen = sizeof(int),
89c8b3a1 2147 .mode = 0200,
1da177e4
LT
2148 .proc_handler = &ipv6_sysctl_rtcache_flush
2149 },
2150 {
2151 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2152 .procname = "gc_thresh",
2153 .data = &ip6_dst_ops.gc_thresh,
2154 .maxlen = sizeof(int),
2155 .mode = 0644,
2156 .proc_handler = &proc_dointvec,
2157 },
2158 {
2159 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2160 .procname = "max_size",
2161 .data = &ip6_rt_max_size,
2162 .maxlen = sizeof(int),
2163 .mode = 0644,
2164 .proc_handler = &proc_dointvec,
2165 },
2166 {
2167 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2168 .procname = "gc_min_interval",
2169 .data = &ip6_rt_gc_min_interval,
2170 .maxlen = sizeof(int),
2171 .mode = 0644,
2172 .proc_handler = &proc_dointvec_jiffies,
2173 .strategy = &sysctl_jiffies,
2174 },
2175 {
2176 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2177 .procname = "gc_timeout",
2178 .data = &ip6_rt_gc_timeout,
2179 .maxlen = sizeof(int),
2180 .mode = 0644,
2181 .proc_handler = &proc_dointvec_jiffies,
2182 .strategy = &sysctl_jiffies,
2183 },
2184 {
2185 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2186 .procname = "gc_interval",
2187 .data = &ip6_rt_gc_interval,
2188 .maxlen = sizeof(int),
2189 .mode = 0644,
2190 .proc_handler = &proc_dointvec_jiffies,
2191 .strategy = &sysctl_jiffies,
2192 },
2193 {
2194 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2195 .procname = "gc_elasticity",
2196 .data = &ip6_rt_gc_elasticity,
2197 .maxlen = sizeof(int),
2198 .mode = 0644,
2199 .proc_handler = &proc_dointvec_jiffies,
2200 .strategy = &sysctl_jiffies,
2201 },
2202 {
2203 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2204 .procname = "mtu_expires",
2205 .data = &ip6_rt_mtu_expires,
2206 .maxlen = sizeof(int),
2207 .mode = 0644,
2208 .proc_handler = &proc_dointvec_jiffies,
2209 .strategy = &sysctl_jiffies,
2210 },
2211 {
2212 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2213 .procname = "min_adv_mss",
2214 .data = &ip6_rt_min_advmss,
2215 .maxlen = sizeof(int),
2216 .mode = 0644,
2217 .proc_handler = &proc_dointvec_jiffies,
2218 .strategy = &sysctl_jiffies,
2219 },
2220 {
2221 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2222 .procname = "gc_min_interval_ms",
2223 .data = &ip6_rt_gc_min_interval,
2224 .maxlen = sizeof(int),
2225 .mode = 0644,
2226 .proc_handler = &proc_dointvec_ms_jiffies,
2227 .strategy = &sysctl_ms_jiffies,
2228 },
2229 { .ctl_name = 0 }
2230};
2231
2232#endif
2233
2234void __init ip6_route_init(void)
2235{
2236 struct proc_dir_entry *p;
2237
2238 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2239 sizeof(struct rt6_info),
2240 0, SLAB_HWCACHE_ALIGN,
2241 NULL, NULL);
2242 if (!ip6_dst_ops.kmem_cachep)
2243 panic("cannot create ip6_dst_cache");
2244
2245 fib6_init();
2246#ifdef CONFIG_PROC_FS
2247 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2248 if (p)
2249 p->owner = THIS_MODULE;
2250
2251 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2252#endif
2253#ifdef CONFIG_XFRM
2254 xfrm6_init();
2255#endif
2256}
2257
2258void ip6_route_cleanup(void)
2259{
2260#ifdef CONFIG_PROC_FS
2261 proc_net_remove("ipv6_route");
2262 proc_net_remove("rt6_stats");
2263#endif
2264#ifdef CONFIG_XFRM
2265 xfrm6_fini();
2266#endif
2267 rt6_ifdown(NULL);
2268 fib6_gc_cleanup();
2269 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2270}