Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 162 .default_advmss = ip6_default_advmss,
14e50e57 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
164};
165
62fa8a84
DM
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
bdb3289f 170static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
d8d1f30b
CG
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
1da177e4
LT
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
101367c2
TG
185#ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
6723ab54
DM
187static int ip6_pkt_prohibit(struct sk_buff *skb);
188static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 189
280a34c8 190static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
d8d1f30b
CG
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
101367c2
TG
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 200 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203};
204
bdb3289f 205static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
d8d1f30b
CG
211 .input = dst_discard,
212 .output = dst_discard,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
220#endif
221
1da177e4 222/* allocate dst with ip6_dst_ops */
f2fc6a54 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 224{
3c7bd1a1 225 return (struct rt6_info *)dst_alloc(ops, 0);
1da177e4
LT
226}
227
228static void ip6_dst_destroy(struct dst_entry *dst)
229{
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 232 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
1ab1457c 237 }
b3419363 238 if (peer) {
b3419363
DM
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242}
243
6431cbc2
DM
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
b3419363
DM
251void rt6_bind_peer(struct rt6_info *rt, int create)
252{
253 struct inet_peer *peer;
254
b3419363
DM
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
6431cbc2
DM
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
260}
261
262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264{
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 267 struct net_device *loopback_dev =
c346dca1 268 dev_net(dev)->loopback_dev;
1da177e4 269
5a3e55d6
DL
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
1da177e4
LT
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278}
279
280static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281{
a02cec21
ED
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
284}
285
c71099ac
TG
286static inline int rt6_need_strict(struct in6_addr *daddr)
287{
a02cec21
ED
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
290}
291
1da177e4 292/*
c71099ac 293 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
294 */
295
8ed67789
DL
296static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
dd3abc4e 298 struct in6_addr *saddr,
1da177e4 299 int oif,
d420895e 300 int flags)
1da177e4
LT
301{
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
dd3abc4e
YH
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
d8d1f30b 308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
1da177e4
LT
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 317 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 318 continue;
1ab1457c 319 if (local && (!oif ||
1da177e4
LT
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
dd3abc4e
YH
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
1da177e4 329 }
dd3abc4e 330 }
1da177e4 331
dd3abc4e 332 if (oif) {
1da177e4
LT
333 if (local)
334 return local;
335
d420895e 336 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 337 return net->ipv6.ip6_null_entry;
1da177e4 338 }
dd3abc4e 339out:
1da177e4
LT
340 return rt;
341}
342
27097255
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344static void rt6_probe(struct rt6_info *rt)
345{
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371}
372#else
373static inline void rt6_probe(struct rt6_info *rt)
374{
27097255
YH
375}
376#endif
377
1da177e4 378/*
554cfb7e 379 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 380 */
b6f99a21 381static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
382{
383 struct net_device *dev = rt->rt6i_dev;
161980f4 384 if (!oif || dev->ifindex == oif)
554cfb7e 385 return 2;
161980f4
DM
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
554cfb7e 390}
1da177e4 391
b6f99a21 392static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 393{
554cfb7e 394 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 395 int m;
4d0c5911
YH
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
554cfb7e
YH
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
4d0c5911 402 m = 2;
398bcbeb
YH
403#ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406#endif
407 else
ea73ee23 408 m = 1;
554cfb7e 409 read_unlock_bh(&neigh->lock);
398bcbeb
YH
410 } else
411 m = 0;
554cfb7e 412 return m;
1da177e4
LT
413}
414
554cfb7e
YH
415static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
1da177e4 417{
4d0c5911 418 int m, n;
1ab1457c 419
4d0c5911 420 m = rt6_check_dev(rt, oif);
77d16f45 421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 422 return -1;
ebacaaa0
YH
423#ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425#endif
4d0c5911 426 n = rt6_check_neigh(rt);
557e92ef 427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
428 return -1;
429 return m;
430}
431
f11e6659
DM
432static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
554cfb7e 434{
f11e6659
DM
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453out:
454 return match;
455}
456
457static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460{
461 struct rt6_info *rt, *match;
554cfb7e 462 int mpri = -1;
1da177e4 463
f11e6659
DM
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 466 rt = rt->dst.rt6_next)
f11e6659
DM
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 469 rt = rt->dst.rt6_next)
f11e6659 470 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 471
f11e6659
DM
472 return match;
473}
1da177e4 474
f11e6659
DM
475static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476{
477 struct rt6_info *match, *rt0;
8ed67789 478 struct net *net;
1da177e4 479
f11e6659 480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 481 __func__, fn->leaf, oif);
554cfb7e 482
f11e6659
DM
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 486
f11e6659 487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 488
554cfb7e 489 if (!match &&
f11e6659 490 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 491 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 492
554cfb7e 493 /* no entries matched; do round-robin */
f11e6659
DM
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
1da177e4 499 }
1da177e4 500
f11e6659 501 RT6_TRACE("%s() => %p\n",
0dc47877 502 __func__, match);
1da177e4 503
c346dca1 504 net = dev_net(rt0->rt6i_dev);
a02cec21 505 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
506}
507
70ceb4f5
YH
508#ifdef CONFIG_IPV6_ROUTE_INFO
509int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511{
c346dca1 512 struct net *net = dev_net(dev);
70ceb4f5
YH
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
4bed72e4 516 unsigned long lifetime;
70ceb4f5
YH
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 540 return -EINVAL;
70ceb4f5 541
4bed72e4 542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
efa2cea0
DL
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
70ceb4f5
YH
556
557 if (rt && !lifetime) {
e0a1ad73 558 ip6_del_rt(rt);
70ceb4f5
YH
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
efa2cea0 563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
4bed72e4 570 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
d8d1f30b 576 dst_release(&rt->dst);
70ceb4f5
YH
577 }
578 return 0;
579}
580#endif
581
8ed67789 582#define BACKTRACK(__net, saddr) \
982f56f3 583do { \
8ed67789 584 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 585 struct fib6_node *pn; \
e0eda7bb 586 while (1) { \
982f56f3
YH
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
c71099ac 596 } \
c71099ac 597 } \
982f56f3 598} while(0)
c71099ac 599
8ed67789
DL
600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
4c9483b2 602 struct flowi6 *fl6, int flags)
1da177e4
LT
603{
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
c71099ac 607 read_lock_bh(&table->tb6_lock);
4c9483b2 608 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
609restart:
610 rt = fn->leaf;
4c9483b2
DM
611 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
612 BACKTRACK(net, &fl6->saddr);
c71099ac 613out:
d8d1f30b 614 dst_use(&rt->dst, jiffies);
c71099ac 615 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
616 return rt;
617
618}
619
9acd9f3a
YH
620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
c71099ac 622{
4c9483b2
DM
623 struct flowi6 fl6 = {
624 .flowi6_oif = oif,
625 .daddr = *daddr,
c71099ac
TG
626 };
627 struct dst_entry *dst;
77d16f45 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 629
adaa70bb 630 if (saddr) {
4c9483b2 631 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
4c9483b2 635 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
1da177e4
LT
641 return NULL;
642}
643
7159039a
YH
644EXPORT_SYMBOL(rt6_lookup);
645
c71099ac 646/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
86872cb5 652static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
653{
654 int err;
c71099ac 655 struct fib6_table *table;
1da177e4 656
c71099ac
TG
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
86872cb5 659 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 660 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
661
662 return err;
663}
664
40e22e8f
TG
665int ip6_ins_rt(struct rt6_info *rt)
666{
4d1169c1 667 struct nl_info info = {
c346dca1 668 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 669 };
528c4ceb 670 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
671}
672
95a9a5ba
YH
673static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
1da177e4 675{
1da177e4
LT
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
14deae41
DM
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
58c4fb86
YH
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 693 }
1da177e4 694
58c4fb86 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
1da177e4
LT
699
700#ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705#endif
706
14deae41
DM
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
86393e52 720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
7e1b33e5 731 "ipv6: Neighbour table overflow.\n");
d8d1f30b 732 dst_free(&rt->dst);
14deae41
DM
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
1da177e4 736
95a9a5ba 737 }
1da177e4 738
95a9a5ba
YH
739 return rt;
740}
1da177e4 741
299d9939
YH
742static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743{
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 749 rt->dst.flags |= DST_HOST;
299d9939
YH
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753}
754
8ed67789 755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 756 struct flowi6 *fl6, int flags)
1da177e4
LT
757{
758 struct fib6_node *fn;
519fbd87 759 struct rt6_info *rt, *nrt;
c71099ac 760 int strict = 0;
1da177e4 761 int attempts = 3;
519fbd87 762 int err;
53b7997f 763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 764
77d16f45 765 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
766
767relookup:
c71099ac 768 read_lock_bh(&table->tb6_lock);
1da177e4 769
8238dd06 770restart_2:
4c9483b2 771 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
772
773restart:
4acad72d 774 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 775
4c9483b2 776 BACKTRACK(net, &fl6->saddr);
8ed67789 777 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 778 rt->rt6i_flags & RTF_CACHE)
1ddef044 779 goto out;
1da177e4 780
d8d1f30b 781 dst_hold(&rt->dst);
c71099ac 782 read_unlock_bh(&table->tb6_lock);
fb9de91e 783
519fbd87 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 785 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 786 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 787 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
788 else
789 goto out2;
e40cf353 790
d8d1f30b 791 dst_release(&rt->dst);
8ed67789 792 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 793
d8d1f30b 794 dst_hold(&rt->dst);
519fbd87 795 if (nrt) {
40e22e8f 796 err = ip6_ins_rt(nrt);
519fbd87 797 if (!err)
1da177e4 798 goto out2;
1da177e4 799 }
1da177e4 800
519fbd87
YH
801 if (--attempts <= 0)
802 goto out2;
803
804 /*
c71099ac 805 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
806 * released someone could insert this route. Relookup.
807 */
d8d1f30b 808 dst_release(&rt->dst);
519fbd87
YH
809 goto relookup;
810
811out:
8238dd06
YH
812 if (reachable) {
813 reachable = 0;
814 goto restart_2;
815 }
d8d1f30b 816 dst_hold(&rt->dst);
c71099ac 817 read_unlock_bh(&table->tb6_lock);
1da177e4 818out2:
d8d1f30b
CG
819 rt->dst.lastuse = jiffies;
820 rt->dst.__use++;
c71099ac
TG
821
822 return rt;
1da177e4
LT
823}
824
8ed67789 825static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 826 struct flowi6 *fl6, int flags)
4acad72d 827{
4c9483b2 828 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
829}
830
c71099ac
TG
831void ip6_route_input(struct sk_buff *skb)
832{
0660e03f 833 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 834 struct net *net = dev_net(skb->dev);
adaa70bb 835 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
836 struct flowi6 fl6 = {
837 .flowi6_iif = skb->dev->ifindex,
838 .daddr = iph->daddr,
839 .saddr = iph->saddr,
840 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
841 .flowi6_mark = skb->mark,
842 .flowi6_proto = iph->nexthdr,
c71099ac 843 };
adaa70bb 844
1d6e55f1 845 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 846 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 847
4c9483b2 848 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
849}
850
8ed67789 851static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 852 struct flowi6 *fl6, int flags)
1da177e4 853{
4c9483b2 854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
855}
856
4591db4f 857struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
4c9483b2 858 struct flowi6 *fl6)
c71099ac
TG
859{
860 int flags = 0;
861
4c9483b2 862 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 863 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 864
4c9483b2 865 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 866 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
867 else if (sk)
868 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 869
4c9483b2 870 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
871}
872
7159039a 873EXPORT_SYMBOL(ip6_route_output);
1da177e4 874
2774c131 875struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 876{
69ead7af
DM
877 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
878 struct rt6_info *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
879 struct dst_entry *new = NULL;
880
881 if (rt) {
d8d1f30b 882 new = &rt->dst;
14e50e57 883
14e50e57 884 new->__use = 1;
352e512c
HX
885 new->input = dst_discard;
886 new->output = dst_discard;
14e50e57 887
defb3519 888 dst_copy_metrics(new, &ort->dst);
d8d1f30b 889 new->dev = ort->dst.dev;
14e50e57
DM
890 if (new->dev)
891 dev_hold(new->dev);
892 rt->rt6i_idev = ort->rt6i_idev;
893 if (rt->rt6i_idev)
894 in6_dev_hold(rt->rt6i_idev);
895 rt->rt6i_expires = 0;
896
897 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
898 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
899 rt->rt6i_metric = 0;
900
901 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
902#ifdef CONFIG_IPV6_SUBTREES
903 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
904#endif
905
906 dst_free(new);
907 }
908
69ead7af
DM
909 dst_release(dst_orig);
910 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 911}
14e50e57 912
1da177e4
LT
913/*
914 * Destination cache support functions
915 */
916
917static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
918{
919 struct rt6_info *rt;
920
921 rt = (struct rt6_info *) dst;
922
6431cbc2
DM
923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
928 }
1da177e4 929 return dst;
6431cbc2 930 }
1da177e4
LT
931 return NULL;
932}
933
934static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
935{
936 struct rt6_info *rt = (struct rt6_info *) dst;
937
938 if (rt) {
54c1a859
YH
939 if (rt->rt6i_flags & RTF_CACHE) {
940 if (rt6_check_expired(rt)) {
941 ip6_del_rt(rt);
942 dst = NULL;
943 }
944 } else {
1da177e4 945 dst_release(dst);
54c1a859
YH
946 dst = NULL;
947 }
1da177e4 948 }
54c1a859 949 return dst;
1da177e4
LT
950}
951
952static void ip6_link_failure(struct sk_buff *skb)
953{
954 struct rt6_info *rt;
955
3ffe533c 956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 957
adf30907 958 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
959 if (rt) {
960 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 961 dst_set_expires(&rt->dst, 0);
1da177e4
LT
962 rt->rt6i_flags |= RTF_EXPIRES;
963 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
964 rt->rt6i_node->fn_sernum = -1;
965 }
966}
967
968static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
969{
970 struct rt6_info *rt6 = (struct rt6_info*)dst;
971
972 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
973 rt6->rt6i_flags |= RTF_MODIFIED;
974 if (mtu < IPV6_MIN_MTU) {
defb3519 975 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 976 mtu = IPV6_MIN_MTU;
defb3519
DM
977 features |= RTAX_FEATURE_ALLFRAG;
978 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 979 }
defb3519 980 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
981 }
982}
983
0dbaee3b 984static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 985{
0dbaee3b
DM
986 struct net_device *dev = dst->dev;
987 unsigned int mtu = dst_mtu(dst);
988 struct net *net = dev_net(dev);
989
1da177e4
LT
990 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
991
5578689a
DL
992 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
993 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
994
995 /*
1ab1457c
YH
996 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
997 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
998 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
999 * rely only on pmtu discovery"
1000 */
1001 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1002 mtu = IPV6_MAXPLEN;
1003 return mtu;
1004}
1005
d33e4553
DM
1006static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1007{
1008 unsigned int mtu = IPV6_MIN_MTU;
1009 struct inet6_dev *idev;
1010
1011 rcu_read_lock();
1012 idev = __in6_dev_get(dst->dev);
1013 if (idev)
1014 mtu = idev->cnf.mtu6;
1015 rcu_read_unlock();
1016
1017 return mtu;
1018}
1019
3b00944c
YH
1020static struct dst_entry *icmp6_dst_gc_list;
1021static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1022
3b00944c 1023struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1024 struct neighbour *neigh,
9acd9f3a 1025 const struct in6_addr *addr)
1da177e4
LT
1026{
1027 struct rt6_info *rt;
1028 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1029 struct net *net = dev_net(dev);
1da177e4
LT
1030
1031 if (unlikely(idev == NULL))
1032 return NULL;
1033
86393e52 1034 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1035 if (unlikely(rt == NULL)) {
1036 in6_dev_put(idev);
1037 goto out;
1038 }
1039
1040 dev_hold(dev);
1041 if (neigh)
1042 neigh_hold(neigh);
14deae41 1043 else {
1da177e4 1044 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1045 if (IS_ERR(neigh))
1046 neigh = NULL;
1047 }
1da177e4
LT
1048
1049 rt->rt6i_dev = dev;
1050 rt->rt6i_idev = idev;
1051 rt->rt6i_nexthop = neigh;
d8d1f30b 1052 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1053 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1054 rt->dst.output = ip6_output;
1da177e4
LT
1055
1056#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1057 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1058 ? DST_HOST
1da177e4
LT
1059 : 0;
1060 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1061 rt->rt6i_dst.plen = 128;
1062#endif
1063
3b00944c 1064 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1065 rt->dst.next = icmp6_dst_gc_list;
1066 icmp6_dst_gc_list = &rt->dst;
3b00944c 1067 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1068
5578689a 1069 fib6_force_start_gc(net);
1da177e4
LT
1070
1071out:
d8d1f30b 1072 return &rt->dst;
1da177e4
LT
1073}
1074
3d0f24a7 1075int icmp6_dst_gc(void)
1da177e4 1076{
e9476e95 1077 struct dst_entry *dst, **pprev;
3d0f24a7 1078 int more = 0;
1da177e4 1079
3b00944c
YH
1080 spin_lock_bh(&icmp6_dst_lock);
1081 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1082
1da177e4
LT
1083 while ((dst = *pprev) != NULL) {
1084 if (!atomic_read(&dst->__refcnt)) {
1085 *pprev = dst->next;
1086 dst_free(dst);
1da177e4
LT
1087 } else {
1088 pprev = &dst->next;
3d0f24a7 1089 ++more;
1da177e4
LT
1090 }
1091 }
1092
3b00944c 1093 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1094
3d0f24a7 1095 return more;
1da177e4
LT
1096}
1097
1e493d19
DM
1098static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1099 void *arg)
1100{
1101 struct dst_entry *dst, **pprev;
1102
1103 spin_lock_bh(&icmp6_dst_lock);
1104 pprev = &icmp6_dst_gc_list;
1105 while ((dst = *pprev) != NULL) {
1106 struct rt6_info *rt = (struct rt6_info *) dst;
1107 if (func(rt, arg)) {
1108 *pprev = dst->next;
1109 dst_free(dst);
1110 } else {
1111 pprev = &dst->next;
1112 }
1113 }
1114 spin_unlock_bh(&icmp6_dst_lock);
1115}
1116
569d3645 1117static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1118{
1da177e4 1119 unsigned long now = jiffies;
86393e52 1120 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1121 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1122 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1123 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1124 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1125 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1126 int entries;
7019b78e 1127
fc66f95c 1128 entries = dst_entries_get_fast(ops);
7019b78e 1129 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1130 entries <= rt_max_size)
1da177e4
LT
1131 goto out;
1132
6891a346
BT
1133 net->ipv6.ip6_rt_gc_expire++;
1134 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1135 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1136 entries = dst_entries_get_slow(ops);
1137 if (entries < ops->gc_thresh)
7019b78e 1138 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1139out:
7019b78e 1140 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1141 return entries > rt_max_size;
1da177e4
LT
1142}
1143
1144/* Clean host part of a prefix. Not necessary in radix tree,
1145 but results in cleaner routing tables.
1146
1147 Remove it only when all the things will work!
1148 */
1149
6b75d090 1150int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1151{
5170ae82 1152 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1153 if (hoplimit == 0) {
6b75d090 1154 struct net_device *dev = dst->dev;
c68f24cc
ED
1155 struct inet6_dev *idev;
1156
1157 rcu_read_lock();
1158 idev = __in6_dev_get(dev);
1159 if (idev)
6b75d090 1160 hoplimit = idev->cnf.hop_limit;
c68f24cc 1161 else
53b7997f 1162 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1163 rcu_read_unlock();
1da177e4
LT
1164 }
1165 return hoplimit;
1166}
abbf46ae 1167EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1168
1169/*
1170 *
1171 */
1172
86872cb5 1173int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1174{
1175 int err;
5578689a 1176 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1177 struct rt6_info *rt = NULL;
1178 struct net_device *dev = NULL;
1179 struct inet6_dev *idev = NULL;
c71099ac 1180 struct fib6_table *table;
1da177e4
LT
1181 int addr_type;
1182
86872cb5 1183 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1184 return -EINVAL;
1185#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1186 if (cfg->fc_src_len)
1da177e4
LT
1187 return -EINVAL;
1188#endif
86872cb5 1189 if (cfg->fc_ifindex) {
1da177e4 1190 err = -ENODEV;
5578689a 1191 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1192 if (!dev)
1193 goto out;
1194 idev = in6_dev_get(dev);
1195 if (!idev)
1196 goto out;
1197 }
1198
86872cb5
TG
1199 if (cfg->fc_metric == 0)
1200 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1201
5578689a 1202 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1203 if (table == NULL) {
1204 err = -ENOBUFS;
1205 goto out;
1206 }
1207
86393e52 1208 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1209
1210 if (rt == NULL) {
1211 err = -ENOMEM;
1212 goto out;
1213 }
1214
d8d1f30b 1215 rt->dst.obsolete = -1;
6f704992
YH
1216 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1217 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1218 0;
1da177e4 1219
86872cb5
TG
1220 if (cfg->fc_protocol == RTPROT_UNSPEC)
1221 cfg->fc_protocol = RTPROT_BOOT;
1222 rt->rt6i_protocol = cfg->fc_protocol;
1223
1224 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1225
1226 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1227 rt->dst.input = ip6_mc_input;
ab79ad14
1228 else if (cfg->fc_flags & RTF_LOCAL)
1229 rt->dst.input = ip6_input;
1da177e4 1230 else
d8d1f30b 1231 rt->dst.input = ip6_forward;
1da177e4 1232
d8d1f30b 1233 rt->dst.output = ip6_output;
1da177e4 1234
86872cb5
TG
1235 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1236 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1237 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1238 rt->dst.flags = DST_HOST;
1da177e4
LT
1239
1240#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1241 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1242 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1243#endif
1244
86872cb5 1245 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1246
1247 /* We cannot add true routes via loopback here,
1248 they would result in kernel looping; promote them to reject routes
1249 */
86872cb5 1250 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1251 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1252 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1253 /* hold loopback dev/idev if we haven't done so. */
5578689a 1254 if (dev != net->loopback_dev) {
1da177e4
LT
1255 if (dev) {
1256 dev_put(dev);
1257 in6_dev_put(idev);
1258 }
5578689a 1259 dev = net->loopback_dev;
1da177e4
LT
1260 dev_hold(dev);
1261 idev = in6_dev_get(dev);
1262 if (!idev) {
1263 err = -ENODEV;
1264 goto out;
1265 }
1266 }
d8d1f30b
CG
1267 rt->dst.output = ip6_pkt_discard_out;
1268 rt->dst.input = ip6_pkt_discard;
1269 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1270 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1271 goto install_route;
1272 }
1273
86872cb5 1274 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1275 struct in6_addr *gw_addr;
1276 int gwa_type;
1277
86872cb5
TG
1278 gw_addr = &cfg->fc_gateway;
1279 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1280 gwa_type = ipv6_addr_type(gw_addr);
1281
1282 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1283 struct rt6_info *grt;
1284
1285 /* IPv6 strictly inhibits using not link-local
1286 addresses as nexthop address.
1287 Otherwise, router will not able to send redirects.
1288 It is very good, but in some (rare!) circumstances
1289 (SIT, PtP, NBMA NOARP links) it is handy to allow
1290 some exceptions. --ANK
1291 */
1292 err = -EINVAL;
1293 if (!(gwa_type&IPV6_ADDR_UNICAST))
1294 goto out;
1295
5578689a 1296 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1297
1298 err = -EHOSTUNREACH;
1299 if (grt == NULL)
1300 goto out;
1301 if (dev) {
1302 if (dev != grt->rt6i_dev) {
d8d1f30b 1303 dst_release(&grt->dst);
1da177e4
LT
1304 goto out;
1305 }
1306 } else {
1307 dev = grt->rt6i_dev;
1308 idev = grt->rt6i_idev;
1309 dev_hold(dev);
1310 in6_dev_hold(grt->rt6i_idev);
1311 }
1312 if (!(grt->rt6i_flags&RTF_GATEWAY))
1313 err = 0;
d8d1f30b 1314 dst_release(&grt->dst);
1da177e4
LT
1315
1316 if (err)
1317 goto out;
1318 }
1319 err = -EINVAL;
1320 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1321 goto out;
1322 }
1323
1324 err = -ENODEV;
1325 if (dev == NULL)
1326 goto out;
1327
86872cb5 1328 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1329 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1330 if (IS_ERR(rt->rt6i_nexthop)) {
1331 err = PTR_ERR(rt->rt6i_nexthop);
1332 rt->rt6i_nexthop = NULL;
1333 goto out;
1334 }
1335 }
1336
86872cb5 1337 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1338
1339install_route:
86872cb5
TG
1340 if (cfg->fc_mx) {
1341 struct nlattr *nla;
1342 int remaining;
1343
1344 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1345 int type = nla_type(nla);
86872cb5
TG
1346
1347 if (type) {
1348 if (type > RTAX_MAX) {
1da177e4
LT
1349 err = -EINVAL;
1350 goto out;
1351 }
86872cb5 1352
defb3519 1353 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1354 }
1da177e4
LT
1355 }
1356 }
1357
d8d1f30b 1358 rt->dst.dev = dev;
1da177e4 1359 rt->rt6i_idev = idev;
c71099ac 1360 rt->rt6i_table = table;
63152fc0 1361
c346dca1 1362 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1363
86872cb5 1364 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1365
1366out:
1367 if (dev)
1368 dev_put(dev);
1369 if (idev)
1370 in6_dev_put(idev);
1371 if (rt)
d8d1f30b 1372 dst_free(&rt->dst);
1da177e4
LT
1373 return err;
1374}
1375
86872cb5 1376static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1377{
1378 int err;
c71099ac 1379 struct fib6_table *table;
c346dca1 1380 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1381
8ed67789 1382 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1383 return -ENOENT;
1384
c71099ac
TG
1385 table = rt->rt6i_table;
1386 write_lock_bh(&table->tb6_lock);
1da177e4 1387
86872cb5 1388 err = fib6_del(rt, info);
d8d1f30b 1389 dst_release(&rt->dst);
1da177e4 1390
c71099ac 1391 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1392
1393 return err;
1394}
1395
e0a1ad73
TG
1396int ip6_del_rt(struct rt6_info *rt)
1397{
4d1169c1 1398 struct nl_info info = {
c346dca1 1399 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1400 };
528c4ceb 1401 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1402}
1403
86872cb5 1404static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1405{
c71099ac 1406 struct fib6_table *table;
1da177e4
LT
1407 struct fib6_node *fn;
1408 struct rt6_info *rt;
1409 int err = -ESRCH;
1410
5578689a 1411 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1412 if (table == NULL)
1413 return err;
1414
1415 read_lock_bh(&table->tb6_lock);
1da177e4 1416
c71099ac 1417 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1418 &cfg->fc_dst, cfg->fc_dst_len,
1419 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1420
1da177e4 1421 if (fn) {
d8d1f30b 1422 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1423 if (cfg->fc_ifindex &&
1da177e4 1424 (rt->rt6i_dev == NULL ||
86872cb5 1425 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1426 continue;
86872cb5
TG
1427 if (cfg->fc_flags & RTF_GATEWAY &&
1428 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1429 continue;
86872cb5 1430 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1431 continue;
d8d1f30b 1432 dst_hold(&rt->dst);
c71099ac 1433 read_unlock_bh(&table->tb6_lock);
1da177e4 1434
86872cb5 1435 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1436 }
1437 }
c71099ac 1438 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1439
1440 return err;
1441}
1442
1443/*
1444 * Handle redirects
1445 */
a6279458 1446struct ip6rd_flowi {
4c9483b2 1447 struct flowi6 fl6;
a6279458
YH
1448 struct in6_addr gateway;
1449};
1450
8ed67789
DL
1451static struct rt6_info *__ip6_route_redirect(struct net *net,
1452 struct fib6_table *table,
4c9483b2 1453 struct flowi6 *fl6,
a6279458 1454 int flags)
1da177e4 1455{
4c9483b2 1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1457 struct rt6_info *rt;
e843b9e1 1458 struct fib6_node *fn;
c71099ac 1459
1da177e4 1460 /*
e843b9e1
YH
1461 * Get the "current" route for this destination and
1462 * check if the redirect has come from approriate router.
1463 *
1464 * RFC 2461 specifies that redirects should only be
1465 * accepted if they come from the nexthop to the target.
1466 * Due to the way the routes are chosen, this notion
1467 * is a bit fuzzy and one might need to check all possible
1468 * routes.
1da177e4 1469 */
1da177e4 1470
c71099ac 1471 read_lock_bh(&table->tb6_lock);
4c9483b2 1472 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1473restart:
d8d1f30b 1474 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1475 /*
1476 * Current route is on-link; redirect is always invalid.
1477 *
1478 * Seems, previous statement is not true. It could
1479 * be node, which looks for us as on-link (f.e. proxy ndisc)
1480 * But then router serving it might decide, that we should
1481 * know truth 8)8) --ANK (980726).
1482 */
1483 if (rt6_check_expired(rt))
1484 continue;
1485 if (!(rt->rt6i_flags & RTF_GATEWAY))
1486 continue;
4c9483b2 1487 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1488 continue;
a6279458 1489 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1490 continue;
1491 break;
1492 }
a6279458 1493
cb15d9c2 1494 if (!rt)
8ed67789 1495 rt = net->ipv6.ip6_null_entry;
4c9483b2 1496 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1497out:
d8d1f30b 1498 dst_hold(&rt->dst);
a6279458 1499
c71099ac 1500 read_unlock_bh(&table->tb6_lock);
e843b9e1 1501
a6279458
YH
1502 return rt;
1503};
1504
1505static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1506 struct in6_addr *src,
1507 struct in6_addr *gateway,
1508 struct net_device *dev)
1509{
adaa70bb 1510 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1511 struct net *net = dev_net(dev);
a6279458 1512 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1513 .fl6 = {
1514 .flowi6_oif = dev->ifindex,
1515 .daddr = *dest,
1516 .saddr = *src,
a6279458 1517 },
a6279458 1518 };
adaa70bb 1519
86c36ce4
BH
1520 ipv6_addr_copy(&rdfl.gateway, gateway);
1521
adaa70bb
TG
1522 if (rt6_need_strict(dest))
1523 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1524
4c9483b2 1525 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1526 flags, __ip6_route_redirect);
a6279458
YH
1527}
1528
1529void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1530 struct in6_addr *saddr,
1531 struct neighbour *neigh, u8 *lladdr, int on_link)
1532{
1533 struct rt6_info *rt, *nrt = NULL;
1534 struct netevent_redirect netevent;
c346dca1 1535 struct net *net = dev_net(neigh->dev);
a6279458
YH
1536
1537 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1538
8ed67789 1539 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1540 if (net_ratelimit())
1541 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1542 "for redirect target\n");
a6279458 1543 goto out;
1da177e4
LT
1544 }
1545
1da177e4
LT
1546 /*
1547 * We have finally decided to accept it.
1548 */
1549
1ab1457c 1550 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1551 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1552 NEIGH_UPDATE_F_OVERRIDE|
1553 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1554 NEIGH_UPDATE_F_ISROUTER))
1555 );
1556
1557 /*
1558 * Redirect received -> path was valid.
1559 * Look, redirects are sent only in response to data packets,
1560 * so that this nexthop apparently is reachable. --ANK
1561 */
d8d1f30b 1562 dst_confirm(&rt->dst);
1da177e4
LT
1563
1564 /* Duplicate redirect: silently ignore. */
d8d1f30b 1565 if (neigh == rt->dst.neighbour)
1da177e4
LT
1566 goto out;
1567
1568 nrt = ip6_rt_copy(rt);
1569 if (nrt == NULL)
1570 goto out;
1571
1572 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1573 if (on_link)
1574 nrt->rt6i_flags &= ~RTF_GATEWAY;
1575
1576 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1577 nrt->rt6i_dst.plen = 128;
d8d1f30b 1578 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1579
1580 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1581 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1582
40e22e8f 1583 if (ip6_ins_rt(nrt))
1da177e4
LT
1584 goto out;
1585
d8d1f30b
CG
1586 netevent.old = &rt->dst;
1587 netevent.new = &nrt->dst;
8d71740c
TT
1588 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1589
1da177e4 1590 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1591 ip6_del_rt(rt);
1da177e4
LT
1592 return;
1593 }
1594
1595out:
d8d1f30b 1596 dst_release(&rt->dst);
1da177e4
LT
1597}
1598
1599/*
1600 * Handle ICMP "packet too big" messages
1601 * i.e. Path MTU discovery
1602 */
1603
ae878ae2
1604static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1605 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1606{
1607 struct rt6_info *rt, *nrt;
1608 int allfrag = 0;
d3052b55 1609again:
ae878ae2 1610 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1611 if (rt == NULL)
1612 return;
1613
d3052b55
AV
1614 if (rt6_check_expired(rt)) {
1615 ip6_del_rt(rt);
1616 goto again;
1617 }
1618
d8d1f30b 1619 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1620 goto out;
1621
1622 if (pmtu < IPV6_MIN_MTU) {
1623 /*
1ab1457c 1624 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1625 * MTU (1280) and a fragment header should always be included
1626 * after a node receiving Too Big message reporting PMTU is
1627 * less than the IPv6 Minimum Link MTU.
1628 */
1629 pmtu = IPV6_MIN_MTU;
1630 allfrag = 1;
1631 }
1632
1633 /* New mtu received -> path was valid.
1634 They are sent only in response to data packets,
1635 so that this nexthop apparently is reachable. --ANK
1636 */
d8d1f30b 1637 dst_confirm(&rt->dst);
1da177e4
LT
1638
1639 /* Host route. If it is static, it would be better
1640 not to override it, but add new one, so that
1641 when cache entry will expire old pmtu
1642 would return automatically.
1643 */
1644 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1645 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1646 if (allfrag) {
1647 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1648 features |= RTAX_FEATURE_ALLFRAG;
1649 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1650 }
d8d1f30b 1651 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1652 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1653 goto out;
1654 }
1655
1656 /* Network route.
1657 Two cases are possible:
1658 1. It is connected route. Action: COW
1659 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1660 */
d5315b50 1661 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1662 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1663 else
1664 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1665
d5315b50 1666 if (nrt) {
defb3519
DM
1667 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1668 if (allfrag) {
1669 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1670 features |= RTAX_FEATURE_ALLFRAG;
1671 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1672 }
a1e78363
YH
1673
1674 /* According to RFC 1981, detecting PMTU increase shouldn't be
1675 * happened within 5 mins, the recommended timer is 10 mins.
1676 * Here this route expiration time is set to ip6_rt_mtu_expires
1677 * which is 10 mins. After 10 mins the decreased pmtu is expired
1678 * and detecting PMTU increase will be automatically happened.
1679 */
d8d1f30b 1680 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1681 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1682
40e22e8f 1683 ip6_ins_rt(nrt);
1da177e4 1684 }
1da177e4 1685out:
d8d1f30b 1686 dst_release(&rt->dst);
1da177e4
LT
1687}
1688
ae878ae2
1689void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1690 struct net_device *dev, u32 pmtu)
1691{
1692 struct net *net = dev_net(dev);
1693
1694 /*
1695 * RFC 1981 states that a node "MUST reduce the size of the packets it
1696 * is sending along the path" that caused the Packet Too Big message.
1697 * Since it's not possible in the general case to determine which
1698 * interface was used to send the original packet, we update the MTU
1699 * on the interface that will be used to send future packets. We also
1700 * update the MTU on the interface that received the Packet Too Big in
1701 * case the original packet was forced out that interface with
1702 * SO_BINDTODEVICE or similar. This is the next best thing to the
1703 * correct behaviour, which would be to update the MTU on all
1704 * interfaces.
1705 */
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1707 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1708}
1709
1da177e4
LT
1710/*
1711 * Misc support functions
1712 */
1713
1714static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1715{
c346dca1 1716 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1717 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1718
1719 if (rt) {
d8d1f30b
CG
1720 rt->dst.input = ort->dst.input;
1721 rt->dst.output = ort->dst.output;
1722
defb3519 1723 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1724 rt->dst.error = ort->dst.error;
1725 rt->dst.dev = ort->dst.dev;
1726 if (rt->dst.dev)
1727 dev_hold(rt->dst.dev);
1da177e4
LT
1728 rt->rt6i_idev = ort->rt6i_idev;
1729 if (rt->rt6i_idev)
1730 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1731 rt->dst.lastuse = jiffies;
1da177e4
LT
1732 rt->rt6i_expires = 0;
1733
1734 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1735 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1736 rt->rt6i_metric = 0;
1737
1738 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1739#ifdef CONFIG_IPV6_SUBTREES
1740 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1741#endif
c71099ac 1742 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1743 }
1744 return rt;
1745}
1746
70ceb4f5 1747#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1748static struct rt6_info *rt6_get_route_info(struct net *net,
1749 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1750 struct in6_addr *gwaddr, int ifindex)
1751{
1752 struct fib6_node *fn;
1753 struct rt6_info *rt = NULL;
c71099ac
TG
1754 struct fib6_table *table;
1755
efa2cea0 1756 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1757 if (table == NULL)
1758 return NULL;
70ceb4f5 1759
c71099ac
TG
1760 write_lock_bh(&table->tb6_lock);
1761 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1762 if (!fn)
1763 goto out;
1764
d8d1f30b 1765 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1766 if (rt->rt6i_dev->ifindex != ifindex)
1767 continue;
1768 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1769 continue;
1770 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1771 continue;
d8d1f30b 1772 dst_hold(&rt->dst);
70ceb4f5
YH
1773 break;
1774 }
1775out:
c71099ac 1776 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1777 return rt;
1778}
1779
efa2cea0
DL
1780static struct rt6_info *rt6_add_route_info(struct net *net,
1781 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1782 struct in6_addr *gwaddr, int ifindex,
1783 unsigned pref)
1784{
86872cb5
TG
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_INFO,
238fc7ea 1787 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1788 .fc_ifindex = ifindex,
1789 .fc_dst_len = prefixlen,
1790 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1791 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1792 .fc_nlinfo.pid = 0,
1793 .fc_nlinfo.nlh = NULL,
1794 .fc_nlinfo.nl_net = net,
86872cb5
TG
1795 };
1796
1797 ipv6_addr_copy(&cfg.fc_dst, prefix);
1798 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1799
e317da96
YH
1800 /* We should treat it as a default route if prefix length is 0. */
1801 if (!prefixlen)
86872cb5 1802 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1803
86872cb5 1804 ip6_route_add(&cfg);
70ceb4f5 1805
efa2cea0 1806 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1807}
1808#endif
1809
1da177e4 1810struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1811{
1da177e4 1812 struct rt6_info *rt;
c71099ac 1813 struct fib6_table *table;
1da177e4 1814
c346dca1 1815 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1816 if (table == NULL)
1817 return NULL;
1da177e4 1818
c71099ac 1819 write_lock_bh(&table->tb6_lock);
d8d1f30b 1820 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1821 if (dev == rt->rt6i_dev &&
045927ff 1822 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1823 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1824 break;
1825 }
1826 if (rt)
d8d1f30b 1827 dst_hold(&rt->dst);
c71099ac 1828 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1829 return rt;
1830}
1831
1832struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1833 struct net_device *dev,
1834 unsigned int pref)
1da177e4 1835{
86872cb5
TG
1836 struct fib6_config cfg = {
1837 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1838 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1839 .fc_ifindex = dev->ifindex,
1840 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1841 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1842 .fc_nlinfo.pid = 0,
1843 .fc_nlinfo.nlh = NULL,
c346dca1 1844 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1845 };
1da177e4 1846
86872cb5 1847 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1848
86872cb5 1849 ip6_route_add(&cfg);
1da177e4 1850
1da177e4
LT
1851 return rt6_get_dflt_router(gwaddr, dev);
1852}
1853
7b4da532 1854void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1855{
1856 struct rt6_info *rt;
c71099ac
TG
1857 struct fib6_table *table;
1858
1859 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1860 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1861 if (table == NULL)
1862 return;
1da177e4
LT
1863
1864restart:
c71099ac 1865 read_lock_bh(&table->tb6_lock);
d8d1f30b 1866 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1867 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1868 dst_hold(&rt->dst);
c71099ac 1869 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1870 ip6_del_rt(rt);
1da177e4
LT
1871 goto restart;
1872 }
1873 }
c71099ac 1874 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1875}
1876
5578689a
DL
1877static void rtmsg_to_fib6_config(struct net *net,
1878 struct in6_rtmsg *rtmsg,
86872cb5
TG
1879 struct fib6_config *cfg)
1880{
1881 memset(cfg, 0, sizeof(*cfg));
1882
1883 cfg->fc_table = RT6_TABLE_MAIN;
1884 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1885 cfg->fc_metric = rtmsg->rtmsg_metric;
1886 cfg->fc_expires = rtmsg->rtmsg_info;
1887 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1888 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1889 cfg->fc_flags = rtmsg->rtmsg_flags;
1890
5578689a 1891 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1892
86872cb5
TG
1893 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1894 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1895 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1896}
1897
5578689a 1898int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1899{
86872cb5 1900 struct fib6_config cfg;
1da177e4
LT
1901 struct in6_rtmsg rtmsg;
1902 int err;
1903
1904 switch(cmd) {
1905 case SIOCADDRT: /* Add a route */
1906 case SIOCDELRT: /* Delete a route */
1907 if (!capable(CAP_NET_ADMIN))
1908 return -EPERM;
1909 err = copy_from_user(&rtmsg, arg,
1910 sizeof(struct in6_rtmsg));
1911 if (err)
1912 return -EFAULT;
86872cb5 1913
5578689a 1914 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1915
1da177e4
LT
1916 rtnl_lock();
1917 switch (cmd) {
1918 case SIOCADDRT:
86872cb5 1919 err = ip6_route_add(&cfg);
1da177e4
LT
1920 break;
1921 case SIOCDELRT:
86872cb5 1922 err = ip6_route_del(&cfg);
1da177e4
LT
1923 break;
1924 default:
1925 err = -EINVAL;
1926 }
1927 rtnl_unlock();
1928
1929 return err;
3ff50b79 1930 }
1da177e4
LT
1931
1932 return -EINVAL;
1933}
1934
1935/*
1936 * Drop the packet on the floor
1937 */
1938
d5fdd6ba 1939static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1940{
612f09e8 1941 int type;
adf30907 1942 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1943 switch (ipstats_mib_noroutes) {
1944 case IPSTATS_MIB_INNOROUTES:
0660e03f 1945 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1946 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1947 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1948 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1949 break;
1950 }
1951 /* FALLTHROUGH */
1952 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1953 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1954 ipstats_mib_noroutes);
612f09e8
YH
1955 break;
1956 }
3ffe533c 1957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1958 kfree_skb(skb);
1959 return 0;
1960}
1961
9ce8ade0
TG
1962static int ip6_pkt_discard(struct sk_buff *skb)
1963{
612f09e8 1964 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1965}
1966
20380731 1967static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1968{
adf30907 1969 skb->dev = skb_dst(skb)->dev;
612f09e8 1970 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1971}
1972
6723ab54
DM
1973#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1974
9ce8ade0
TG
1975static int ip6_pkt_prohibit(struct sk_buff *skb)
1976{
612f09e8 1977 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1978}
1979
1980static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1981{
adf30907 1982 skb->dev = skb_dst(skb)->dev;
612f09e8 1983 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1984}
1985
6723ab54
DM
1986#endif
1987
1da177e4
LT
1988/*
1989 * Allocate a dst for local (unicast / anycast) address.
1990 */
1991
1992struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1993 const struct in6_addr *addr,
1994 int anycast)
1995{
c346dca1 1996 struct net *net = dev_net(idev->dev);
86393e52 1997 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1998 struct neighbour *neigh;
1da177e4 1999
40385653
BG
2000 if (rt == NULL) {
2001 if (net_ratelimit())
2002 pr_warning("IPv6: Maximum number of routes reached,"
2003 " consider increasing route/max_size.\n");
1da177e4 2004 return ERR_PTR(-ENOMEM);
40385653 2005 }
1da177e4 2006
5578689a 2007 dev_hold(net->loopback_dev);
1da177e4
LT
2008 in6_dev_hold(idev);
2009
d8d1f30b
CG
2010 rt->dst.flags = DST_HOST;
2011 rt->dst.input = ip6_input;
2012 rt->dst.output = ip6_output;
5578689a 2013 rt->rt6i_dev = net->loopback_dev;
1da177e4 2014 rt->rt6i_idev = idev;
defb3519 2015 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 2016 rt->dst.obsolete = -1;
1da177e4
LT
2017
2018 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2019 if (anycast)
2020 rt->rt6i_flags |= RTF_ANYCAST;
2021 else
1da177e4 2022 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2023 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2024 if (IS_ERR(neigh)) {
d8d1f30b 2025 dst_free(&rt->dst);
14deae41 2026
29546a64 2027 return ERR_CAST(neigh);
1da177e4 2028 }
14deae41 2029 rt->rt6i_nexthop = neigh;
1da177e4
LT
2030
2031 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2032 rt->rt6i_dst.plen = 128;
5578689a 2033 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2034
d8d1f30b 2035 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2036
2037 return rt;
2038}
2039
8ed67789
DL
2040struct arg_dev_net {
2041 struct net_device *dev;
2042 struct net *net;
2043};
2044
1da177e4
LT
2045static int fib6_ifdown(struct rt6_info *rt, void *arg)
2046{
bc3ef660 2047 const struct arg_dev_net *adn = arg;
2048 const struct net_device *dev = adn->dev;
8ed67789 2049
bc3ef660 2050 if ((rt->rt6i_dev == dev || dev == NULL) &&
2051 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2052 RT6_TRACE("deleted by ifdown %p\n", rt);
2053 return -1;
2054 }
2055 return 0;
2056}
2057
f3db4851 2058void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2059{
8ed67789
DL
2060 struct arg_dev_net adn = {
2061 .dev = dev,
2062 .net = net,
2063 };
2064
2065 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2066 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2067}
2068
2069struct rt6_mtu_change_arg
2070{
2071 struct net_device *dev;
2072 unsigned mtu;
2073};
2074
2075static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2076{
2077 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2078 struct inet6_dev *idev;
2079
2080 /* In IPv6 pmtu discovery is not optional,
2081 so that RTAX_MTU lock cannot disable it.
2082 We still use this lock to block changes
2083 caused by addrconf/ndisc.
2084 */
2085
2086 idev = __in6_dev_get(arg->dev);
2087 if (idev == NULL)
2088 return 0;
2089
2090 /* For administrative MTU increase, there is no way to discover
2091 IPv6 PMTU increase, so PMTU increase should be updated here.
2092 Since RFC 1981 doesn't include administrative MTU increase
2093 update PMTU increase is a MUST. (i.e. jumbo frame)
2094 */
2095 /*
2096 If new MTU is less than route PMTU, this new MTU will be the
2097 lowest MTU in the path, update the route PMTU to reflect PMTU
2098 decreases; if new MTU is greater than route PMTU, and the
2099 old MTU is the lowest MTU in the path, update the route PMTU
2100 to reflect the increase. In this case if the other nodes' MTU
2101 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2102 PMTU discouvery.
2103 */
2104 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2105 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2106 (dst_mtu(&rt->dst) >= arg->mtu ||
2107 (dst_mtu(&rt->dst) < arg->mtu &&
2108 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2109 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2110 }
1da177e4
LT
2111 return 0;
2112}
2113
2114void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2115{
c71099ac
TG
2116 struct rt6_mtu_change_arg arg = {
2117 .dev = dev,
2118 .mtu = mtu,
2119 };
1da177e4 2120
c346dca1 2121 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2122}
2123
ef7c79ed 2124static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2125 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2126 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2127 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2128 [RTA_PRIORITY] = { .type = NLA_U32 },
2129 [RTA_METRICS] = { .type = NLA_NESTED },
2130};
2131
2132static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2133 struct fib6_config *cfg)
1da177e4 2134{
86872cb5
TG
2135 struct rtmsg *rtm;
2136 struct nlattr *tb[RTA_MAX+1];
2137 int err;
1da177e4 2138
86872cb5
TG
2139 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2140 if (err < 0)
2141 goto errout;
1da177e4 2142
86872cb5
TG
2143 err = -EINVAL;
2144 rtm = nlmsg_data(nlh);
2145 memset(cfg, 0, sizeof(*cfg));
2146
2147 cfg->fc_table = rtm->rtm_table;
2148 cfg->fc_dst_len = rtm->rtm_dst_len;
2149 cfg->fc_src_len = rtm->rtm_src_len;
2150 cfg->fc_flags = RTF_UP;
2151 cfg->fc_protocol = rtm->rtm_protocol;
2152
2153 if (rtm->rtm_type == RTN_UNREACHABLE)
2154 cfg->fc_flags |= RTF_REJECT;
2155
ab79ad14
2156 if (rtm->rtm_type == RTN_LOCAL)
2157 cfg->fc_flags |= RTF_LOCAL;
2158
86872cb5
TG
2159 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2160 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2161 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2162
2163 if (tb[RTA_GATEWAY]) {
2164 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2165 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2166 }
86872cb5
TG
2167
2168 if (tb[RTA_DST]) {
2169 int plen = (rtm->rtm_dst_len + 7) >> 3;
2170
2171 if (nla_len(tb[RTA_DST]) < plen)
2172 goto errout;
2173
2174 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2175 }
86872cb5
TG
2176
2177 if (tb[RTA_SRC]) {
2178 int plen = (rtm->rtm_src_len + 7) >> 3;
2179
2180 if (nla_len(tb[RTA_SRC]) < plen)
2181 goto errout;
2182
2183 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2184 }
86872cb5
TG
2185
2186 if (tb[RTA_OIF])
2187 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2188
2189 if (tb[RTA_PRIORITY])
2190 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2191
2192 if (tb[RTA_METRICS]) {
2193 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2194 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2195 }
86872cb5
TG
2196
2197 if (tb[RTA_TABLE])
2198 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2199
2200 err = 0;
2201errout:
2202 return err;
1da177e4
LT
2203}
2204
c127ea2c 2205static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2206{
86872cb5
TG
2207 struct fib6_config cfg;
2208 int err;
1da177e4 2209
86872cb5
TG
2210 err = rtm_to_fib6_config(skb, nlh, &cfg);
2211 if (err < 0)
2212 return err;
2213
2214 return ip6_route_del(&cfg);
1da177e4
LT
2215}
2216
c127ea2c 2217static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2218{
86872cb5
TG
2219 struct fib6_config cfg;
2220 int err;
1da177e4 2221
86872cb5
TG
2222 err = rtm_to_fib6_config(skb, nlh, &cfg);
2223 if (err < 0)
2224 return err;
2225
2226 return ip6_route_add(&cfg);
1da177e4
LT
2227}
2228
339bf98f
TG
2229static inline size_t rt6_nlmsg_size(void)
2230{
2231 return NLMSG_ALIGN(sizeof(struct rtmsg))
2232 + nla_total_size(16) /* RTA_SRC */
2233 + nla_total_size(16) /* RTA_DST */
2234 + nla_total_size(16) /* RTA_GATEWAY */
2235 + nla_total_size(16) /* RTA_PREFSRC */
2236 + nla_total_size(4) /* RTA_TABLE */
2237 + nla_total_size(4) /* RTA_IIF */
2238 + nla_total_size(4) /* RTA_OIF */
2239 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2240 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2241 + nla_total_size(sizeof(struct rta_cacheinfo));
2242}
2243
191cd582
BH
2244static int rt6_fill_node(struct net *net,
2245 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2246 struct in6_addr *dst, struct in6_addr *src,
2247 int iif, int type, u32 pid, u32 seq,
7bc570c8 2248 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2249{
2250 struct rtmsg *rtm;
2d7202bf 2251 struct nlmsghdr *nlh;
e3703b3d 2252 long expires;
9e762a4a 2253 u32 table;
1da177e4
LT
2254
2255 if (prefix) { /* user wants prefix routes only */
2256 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2257 /* success since this is not a prefix route */
2258 return 1;
2259 }
2260 }
2261
2d7202bf
TG
2262 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2263 if (nlh == NULL)
26932566 2264 return -EMSGSIZE;
2d7202bf
TG
2265
2266 rtm = nlmsg_data(nlh);
1da177e4
LT
2267 rtm->rtm_family = AF_INET6;
2268 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2269 rtm->rtm_src_len = rt->rt6i_src.plen;
2270 rtm->rtm_tos = 0;
c71099ac 2271 if (rt->rt6i_table)
9e762a4a 2272 table = rt->rt6i_table->tb6_id;
c71099ac 2273 else
9e762a4a
PM
2274 table = RT6_TABLE_UNSPEC;
2275 rtm->rtm_table = table;
2d7202bf 2276 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2277 if (rt->rt6i_flags&RTF_REJECT)
2278 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2279 else if (rt->rt6i_flags&RTF_LOCAL)
2280 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2281 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2282 rtm->rtm_type = RTN_LOCAL;
2283 else
2284 rtm->rtm_type = RTN_UNICAST;
2285 rtm->rtm_flags = 0;
2286 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2287 rtm->rtm_protocol = rt->rt6i_protocol;
2288 if (rt->rt6i_flags&RTF_DYNAMIC)
2289 rtm->rtm_protocol = RTPROT_REDIRECT;
2290 else if (rt->rt6i_flags & RTF_ADDRCONF)
2291 rtm->rtm_protocol = RTPROT_KERNEL;
2292 else if (rt->rt6i_flags&RTF_DEFAULT)
2293 rtm->rtm_protocol = RTPROT_RA;
2294
2295 if (rt->rt6i_flags&RTF_CACHE)
2296 rtm->rtm_flags |= RTM_F_CLONED;
2297
2298 if (dst) {
2d7202bf 2299 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2300 rtm->rtm_dst_len = 128;
1da177e4 2301 } else if (rtm->rtm_dst_len)
2d7202bf 2302 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2303#ifdef CONFIG_IPV6_SUBTREES
2304 if (src) {
2d7202bf 2305 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2306 rtm->rtm_src_len = 128;
1da177e4 2307 } else if (rtm->rtm_src_len)
2d7202bf 2308 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2309#endif
7bc570c8
YH
2310 if (iif) {
2311#ifdef CONFIG_IPV6_MROUTE
2312 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2313 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2314 if (err <= 0) {
2315 if (!nowait) {
2316 if (err == 0)
2317 return 0;
2318 goto nla_put_failure;
2319 } else {
2320 if (err == -EMSGSIZE)
2321 goto nla_put_failure;
2322 }
2323 }
2324 } else
2325#endif
2326 NLA_PUT_U32(skb, RTA_IIF, iif);
2327 } else if (dst) {
d8d1f30b 2328 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2329 struct in6_addr saddr_buf;
191cd582 2330 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2331 dst, 0, &saddr_buf) == 0)
2d7202bf 2332 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2333 }
2d7202bf 2334
defb3519 2335 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2336 goto nla_put_failure;
2337
d8d1f30b
CG
2338 if (rt->dst.neighbour)
2339 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2340
d8d1f30b 2341 if (rt->dst.dev)
2d7202bf
TG
2342 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2343
2344 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2345
36e3deae
YH
2346 if (!(rt->rt6i_flags & RTF_EXPIRES))
2347 expires = 0;
2348 else if (rt->rt6i_expires - jiffies < INT_MAX)
2349 expires = rt->rt6i_expires - jiffies;
2350 else
2351 expires = INT_MAX;
69cdf8f9 2352
d8d1f30b
CG
2353 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2354 expires, rt->dst.error) < 0)
e3703b3d 2355 goto nla_put_failure;
2d7202bf
TG
2356
2357 return nlmsg_end(skb, nlh);
2358
2359nla_put_failure:
26932566
PM
2360 nlmsg_cancel(skb, nlh);
2361 return -EMSGSIZE;
1da177e4
LT
2362}
2363
1b43af54 2364int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2365{
2366 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2367 int prefix;
2368
2d7202bf
TG
2369 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2370 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2371 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2372 } else
2373 prefix = 0;
2374
191cd582
BH
2375 return rt6_fill_node(arg->net,
2376 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2377 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2378 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2379}
2380
c127ea2c 2381static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2382{
3b1e0a65 2383 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2384 struct nlattr *tb[RTA_MAX+1];
2385 struct rt6_info *rt;
1da177e4 2386 struct sk_buff *skb;
ab364a6f 2387 struct rtmsg *rtm;
4c9483b2 2388 struct flowi6 fl6;
ab364a6f 2389 int err, iif = 0;
1da177e4 2390
ab364a6f
TG
2391 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2392 if (err < 0)
2393 goto errout;
1da177e4 2394
ab364a6f 2395 err = -EINVAL;
4c9483b2 2396 memset(&fl6, 0, sizeof(fl6));
1da177e4 2397
ab364a6f
TG
2398 if (tb[RTA_SRC]) {
2399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2400 goto errout;
2401
4c9483b2 2402 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2403 }
2404
2405 if (tb[RTA_DST]) {
2406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2407 goto errout;
2408
4c9483b2 2409 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2410 }
2411
2412 if (tb[RTA_IIF])
2413 iif = nla_get_u32(tb[RTA_IIF]);
2414
2415 if (tb[RTA_OIF])
4c9483b2 2416 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2417
2418 if (iif) {
2419 struct net_device *dev;
5578689a 2420 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2421 if (!dev) {
2422 err = -ENODEV;
ab364a6f 2423 goto errout;
1da177e4
LT
2424 }
2425 }
2426
ab364a6f
TG
2427 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2428 if (skb == NULL) {
2429 err = -ENOBUFS;
2430 goto errout;
2431 }
1da177e4 2432
ab364a6f
TG
2433 /* Reserve room for dummy headers, this skb can pass
2434 through good chunk of routing engine.
2435 */
459a98ed 2436 skb_reset_mac_header(skb);
ab364a6f 2437 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2438
4c9483b2 2439 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2440 skb_dst_set(skb, &rt->dst);
1da177e4 2441
4c9483b2 2442 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2443 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2444 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2445 if (err < 0) {
ab364a6f
TG
2446 kfree_skb(skb);
2447 goto errout;
1da177e4
LT
2448 }
2449
5578689a 2450 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2451errout:
1da177e4 2452 return err;
1da177e4
LT
2453}
2454
86872cb5 2455void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2456{
2457 struct sk_buff *skb;
5578689a 2458 struct net *net = info->nl_net;
528c4ceb
DL
2459 u32 seq;
2460 int err;
2461
2462 err = -ENOBUFS;
2463 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2464
339bf98f 2465 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2466 if (skb == NULL)
2467 goto errout;
2468
191cd582 2469 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2470 event, info->pid, seq, 0, 0, 0);
26932566
PM
2471 if (err < 0) {
2472 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2473 WARN_ON(err == -EMSGSIZE);
2474 kfree_skb(skb);
2475 goto errout;
2476 }
1ce85fe4
PNA
2477 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2478 info->nlh, gfp_any());
2479 return;
21713ebc
TG
2480errout:
2481 if (err < 0)
5578689a 2482 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2483}
2484
8ed67789
DL
2485static int ip6_route_dev_notify(struct notifier_block *this,
2486 unsigned long event, void *data)
2487{
2488 struct net_device *dev = (struct net_device *)data;
c346dca1 2489 struct net *net = dev_net(dev);
8ed67789
DL
2490
2491 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2492 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2493 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2494#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2495 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2496 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2497 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2498 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2499#endif
2500 }
2501
2502 return NOTIFY_OK;
2503}
2504
1da177e4
LT
2505/*
2506 * /proc
2507 */
2508
2509#ifdef CONFIG_PROC_FS
2510
1da177e4
LT
2511struct rt6_proc_arg
2512{
2513 char *buffer;
2514 int offset;
2515 int length;
2516 int skip;
2517 int len;
2518};
2519
2520static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2521{
33120b30 2522 struct seq_file *m = p_arg;
1da177e4 2523
4b7a4274 2524 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2525
2526#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2527 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2528#else
33120b30 2529 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2530#endif
2531
2532 if (rt->rt6i_nexthop) {
4b7a4274 2533 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2534 } else {
33120b30 2535 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2536 }
33120b30 2537 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2538 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2539 rt->dst.__use, rt->rt6i_flags,
33120b30 2540 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2541 return 0;
2542}
2543
33120b30 2544static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2545{
f3db4851
DL
2546 struct net *net = (struct net *)m->private;
2547 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2548 return 0;
2549}
1da177e4 2550
33120b30
AD
2551static int ipv6_route_open(struct inode *inode, struct file *file)
2552{
de05c557 2553 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2554}
2555
33120b30
AD
2556static const struct file_operations ipv6_route_proc_fops = {
2557 .owner = THIS_MODULE,
2558 .open = ipv6_route_open,
2559 .read = seq_read,
2560 .llseek = seq_lseek,
b6fcbdb4 2561 .release = single_release_net,
33120b30
AD
2562};
2563
1da177e4
LT
2564static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2565{
69ddb805 2566 struct net *net = (struct net *)seq->private;
1da177e4 2567 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2568 net->ipv6.rt6_stats->fib_nodes,
2569 net->ipv6.rt6_stats->fib_route_nodes,
2570 net->ipv6.rt6_stats->fib_rt_alloc,
2571 net->ipv6.rt6_stats->fib_rt_entries,
2572 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2573 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2574 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2575
2576 return 0;
2577}
2578
2579static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2580{
de05c557 2581 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2582}
2583
9a32144e 2584static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2585 .owner = THIS_MODULE,
2586 .open = rt6_stats_seq_open,
2587 .read = seq_read,
2588 .llseek = seq_lseek,
b6fcbdb4 2589 .release = single_release_net,
1da177e4
LT
2590};
2591#endif /* CONFIG_PROC_FS */
2592
2593#ifdef CONFIG_SYSCTL
2594
1da177e4 2595static
8d65af78 2596int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2597 void __user *buffer, size_t *lenp, loff_t *ppos)
2598{
c486da34
LAG
2599 struct net *net;
2600 int delay;
2601 if (!write)
1da177e4 2602 return -EINVAL;
c486da34
LAG
2603
2604 net = (struct net *)ctl->extra1;
2605 delay = net->ipv6.sysctl.flush_delay;
2606 proc_dointvec(ctl, write, buffer, lenp, ppos);
2607 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2608 return 0;
1da177e4
LT
2609}
2610
760f2d01 2611ctl_table ipv6_route_table_template[] = {
1ab1457c 2612 {
1da177e4 2613 .procname = "flush",
4990509f 2614 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2615 .maxlen = sizeof(int),
89c8b3a1 2616 .mode = 0200,
6d9f239a 2617 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2618 },
2619 {
1da177e4 2620 .procname = "gc_thresh",
9a7ec3a9 2621 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2622 .maxlen = sizeof(int),
2623 .mode = 0644,
6d9f239a 2624 .proc_handler = proc_dointvec,
1da177e4
LT
2625 },
2626 {
1da177e4 2627 .procname = "max_size",
4990509f 2628 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2629 .maxlen = sizeof(int),
2630 .mode = 0644,
6d9f239a 2631 .proc_handler = proc_dointvec,
1da177e4
LT
2632 },
2633 {
1da177e4 2634 .procname = "gc_min_interval",
4990509f 2635 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2636 .maxlen = sizeof(int),
2637 .mode = 0644,
6d9f239a 2638 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2639 },
2640 {
1da177e4 2641 .procname = "gc_timeout",
4990509f 2642 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2643 .maxlen = sizeof(int),
2644 .mode = 0644,
6d9f239a 2645 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2646 },
2647 {
1da177e4 2648 .procname = "gc_interval",
4990509f 2649 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2650 .maxlen = sizeof(int),
2651 .mode = 0644,
6d9f239a 2652 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2653 },
2654 {
1da177e4 2655 .procname = "gc_elasticity",
4990509f 2656 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2657 .maxlen = sizeof(int),
2658 .mode = 0644,
f3d3f616 2659 .proc_handler = proc_dointvec,
1da177e4
LT
2660 },
2661 {
1da177e4 2662 .procname = "mtu_expires",
4990509f 2663 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2664 .maxlen = sizeof(int),
2665 .mode = 0644,
6d9f239a 2666 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2667 },
2668 {
1da177e4 2669 .procname = "min_adv_mss",
4990509f 2670 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2671 .maxlen = sizeof(int),
2672 .mode = 0644,
f3d3f616 2673 .proc_handler = proc_dointvec,
1da177e4
LT
2674 },
2675 {
1da177e4 2676 .procname = "gc_min_interval_ms",
4990509f 2677 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2678 .maxlen = sizeof(int),
2679 .mode = 0644,
6d9f239a 2680 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2681 },
f8572d8f 2682 { }
1da177e4
LT
2683};
2684
2c8c1e72 2685struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2686{
2687 struct ctl_table *table;
2688
2689 table = kmemdup(ipv6_route_table_template,
2690 sizeof(ipv6_route_table_template),
2691 GFP_KERNEL);
5ee09105
YH
2692
2693 if (table) {
2694 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2695 table[0].extra1 = net;
86393e52 2696 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2697 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2698 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2699 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2700 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2701 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2702 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2703 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2704 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2705 }
2706
760f2d01
DL
2707 return table;
2708}
1da177e4
LT
2709#endif
2710
2c8c1e72 2711static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2712{
633d424b 2713 int ret = -ENOMEM;
8ed67789 2714
86393e52
AD
2715 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2716 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2717
fc66f95c
ED
2718 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2719 goto out_ip6_dst_ops;
2720
8ed67789
DL
2721 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2722 sizeof(*net->ipv6.ip6_null_entry),
2723 GFP_KERNEL);
2724 if (!net->ipv6.ip6_null_entry)
fc66f95c 2725 goto out_ip6_dst_entries;
d8d1f30b 2726 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2727 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2728 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2729 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2730 ip6_template_metrics, true);
8ed67789
DL
2731
2732#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2734 sizeof(*net->ipv6.ip6_prohibit_entry),
2735 GFP_KERNEL);
68fffc67
PZ
2736 if (!net->ipv6.ip6_prohibit_entry)
2737 goto out_ip6_null_entry;
d8d1f30b 2738 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2739 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2740 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2741 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2742 ip6_template_metrics, true);
8ed67789
DL
2743
2744 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2745 sizeof(*net->ipv6.ip6_blk_hole_entry),
2746 GFP_KERNEL);
68fffc67
PZ
2747 if (!net->ipv6.ip6_blk_hole_entry)
2748 goto out_ip6_prohibit_entry;
d8d1f30b 2749 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2750 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2751 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2752 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2753 ip6_template_metrics, true);
8ed67789
DL
2754#endif
2755
b339a47c
PZ
2756 net->ipv6.sysctl.flush_delay = 0;
2757 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2758 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2759 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2760 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2761 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2762 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2763 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2764
cdb18761
DL
2765#ifdef CONFIG_PROC_FS
2766 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2767 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2768#endif
6891a346
BT
2769 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2770
8ed67789
DL
2771 ret = 0;
2772out:
2773 return ret;
f2fc6a54 2774
68fffc67
PZ
2775#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2776out_ip6_prohibit_entry:
2777 kfree(net->ipv6.ip6_prohibit_entry);
2778out_ip6_null_entry:
2779 kfree(net->ipv6.ip6_null_entry);
2780#endif
fc66f95c
ED
2781out_ip6_dst_entries:
2782 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2783out_ip6_dst_ops:
f2fc6a54 2784 goto out;
cdb18761
DL
2785}
2786
2c8c1e72 2787static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2788{
2789#ifdef CONFIG_PROC_FS
2790 proc_net_remove(net, "ipv6_route");
2791 proc_net_remove(net, "rt6_stats");
2792#endif
8ed67789
DL
2793 kfree(net->ipv6.ip6_null_entry);
2794#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2795 kfree(net->ipv6.ip6_prohibit_entry);
2796 kfree(net->ipv6.ip6_blk_hole_entry);
2797#endif
41bb78b4 2798 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2799}
2800
2801static struct pernet_operations ip6_route_net_ops = {
2802 .init = ip6_route_net_init,
2803 .exit = ip6_route_net_exit,
2804};
2805
8ed67789
DL
2806static struct notifier_block ip6_route_dev_notifier = {
2807 .notifier_call = ip6_route_dev_notify,
2808 .priority = 0,
2809};
2810
433d49c3 2811int __init ip6_route_init(void)
1da177e4 2812{
433d49c3
DL
2813 int ret;
2814
9a7ec3a9
DL
2815 ret = -ENOMEM;
2816 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2817 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2818 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2819 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2820 goto out;
14e50e57 2821
fc66f95c 2822 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2823 if (ret)
bdb3289f 2824 goto out_kmem_cache;
bdb3289f 2825
fc66f95c
ED
2826 ret = register_pernet_subsys(&ip6_route_net_ops);
2827 if (ret)
2828 goto out_dst_entries;
2829
5dc121e9
AE
2830 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2831
8ed67789
DL
2832 /* Registering of the loopback is done before this portion of code,
2833 * the loopback reference in rt6_info will not be taken, do it
2834 * manually for init_net */
d8d1f30b 2835 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2836 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2837 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2838 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2839 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2840 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2841 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2842 #endif
433d49c3
DL
2843 ret = fib6_init();
2844 if (ret)
8ed67789 2845 goto out_register_subsys;
433d49c3 2846
433d49c3
DL
2847 ret = xfrm6_init();
2848 if (ret)
cdb18761 2849 goto out_fib6_init;
c35b7e72 2850
433d49c3
DL
2851 ret = fib6_rules_init();
2852 if (ret)
2853 goto xfrm6_init;
7e5449c2 2854
433d49c3
DL
2855 ret = -ENOBUFS;
2856 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2857 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2858 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2859 goto fib6_rules_init;
c127ea2c 2860
8ed67789 2861 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2862 if (ret)
2863 goto fib6_rules_init;
8ed67789 2864
433d49c3
DL
2865out:
2866 return ret;
2867
2868fib6_rules_init:
433d49c3
DL
2869 fib6_rules_cleanup();
2870xfrm6_init:
433d49c3 2871 xfrm6_fini();
433d49c3 2872out_fib6_init:
433d49c3 2873 fib6_gc_cleanup();
8ed67789
DL
2874out_register_subsys:
2875 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2876out_dst_entries:
2877 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2878out_kmem_cache:
f2fc6a54 2879 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2880 goto out;
1da177e4
LT
2881}
2882
2883void ip6_route_cleanup(void)
2884{
8ed67789 2885 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2886 fib6_rules_cleanup();
1da177e4 2887 xfrm6_fini();
1da177e4 2888 fib6_gc_cleanup();
8ed67789 2889 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2890 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2891 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2892}