xfrm: Kill XFRM_LOOKUP_WAIT flag.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 162 .default_advmss = ip6_default_advmss,
14e50e57 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
164};
165
62fa8a84
DM
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
bdb3289f 170static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
d8d1f30b
CG
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
1da177e4
LT
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
101367c2
TG
185#ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
6723ab54
DM
187static int ip6_pkt_prohibit(struct sk_buff *skb);
188static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 189
280a34c8 190static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
d8d1f30b
CG
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
101367c2
TG
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 200 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203};
204
bdb3289f 205static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
d8d1f30b
CG
211 .input = dst_discard,
212 .output = dst_discard,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
220#endif
221
1da177e4 222/* allocate dst with ip6_dst_ops */
f2fc6a54 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 224{
3c7bd1a1 225 return (struct rt6_info *)dst_alloc(ops, 0);
1da177e4
LT
226}
227
228static void ip6_dst_destroy(struct dst_entry *dst)
229{
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 232 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
1ab1457c 237 }
b3419363 238 if (peer) {
b3419363
DM
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242}
243
6431cbc2
DM
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
b3419363
DM
251void rt6_bind_peer(struct rt6_info *rt, int create)
252{
253 struct inet_peer *peer;
254
b3419363
DM
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
6431cbc2
DM
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
260}
261
262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264{
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 267 struct net_device *loopback_dev =
c346dca1 268 dev_net(dev)->loopback_dev;
1da177e4 269
5a3e55d6
DL
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
1da177e4
LT
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278}
279
280static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281{
a02cec21
ED
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
284}
285
c71099ac
TG
286static inline int rt6_need_strict(struct in6_addr *daddr)
287{
a02cec21
ED
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
290}
291
1da177e4 292/*
c71099ac 293 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
294 */
295
8ed67789
DL
296static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
dd3abc4e 298 struct in6_addr *saddr,
1da177e4 299 int oif,
d420895e 300 int flags)
1da177e4
LT
301{
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
dd3abc4e
YH
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
d8d1f30b 308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
1da177e4
LT
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 317 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 318 continue;
1ab1457c 319 if (local && (!oif ||
1da177e4
LT
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
dd3abc4e
YH
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
1da177e4 329 }
dd3abc4e 330 }
1da177e4 331
dd3abc4e 332 if (oif) {
1da177e4
LT
333 if (local)
334 return local;
335
d420895e 336 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 337 return net->ipv6.ip6_null_entry;
1da177e4 338 }
dd3abc4e 339out:
1da177e4
LT
340 return rt;
341}
342
27097255
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344static void rt6_probe(struct rt6_info *rt)
345{
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371}
372#else
373static inline void rt6_probe(struct rt6_info *rt)
374{
27097255
YH
375}
376#endif
377
1da177e4 378/*
554cfb7e 379 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 380 */
b6f99a21 381static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
382{
383 struct net_device *dev = rt->rt6i_dev;
161980f4 384 if (!oif || dev->ifindex == oif)
554cfb7e 385 return 2;
161980f4
DM
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
554cfb7e 390}
1da177e4 391
b6f99a21 392static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 393{
554cfb7e 394 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 395 int m;
4d0c5911
YH
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
554cfb7e
YH
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
4d0c5911 402 m = 2;
398bcbeb
YH
403#ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406#endif
407 else
ea73ee23 408 m = 1;
554cfb7e 409 read_unlock_bh(&neigh->lock);
398bcbeb
YH
410 } else
411 m = 0;
554cfb7e 412 return m;
1da177e4
LT
413}
414
554cfb7e
YH
415static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
1da177e4 417{
4d0c5911 418 int m, n;
1ab1457c 419
4d0c5911 420 m = rt6_check_dev(rt, oif);
77d16f45 421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 422 return -1;
ebacaaa0
YH
423#ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425#endif
4d0c5911 426 n = rt6_check_neigh(rt);
557e92ef 427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
428 return -1;
429 return m;
430}
431
f11e6659
DM
432static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
554cfb7e 434{
f11e6659
DM
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453out:
454 return match;
455}
456
457static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460{
461 struct rt6_info *rt, *match;
554cfb7e 462 int mpri = -1;
1da177e4 463
f11e6659
DM
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 466 rt = rt->dst.rt6_next)
f11e6659
DM
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 469 rt = rt->dst.rt6_next)
f11e6659 470 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 471
f11e6659
DM
472 return match;
473}
1da177e4 474
f11e6659
DM
475static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476{
477 struct rt6_info *match, *rt0;
8ed67789 478 struct net *net;
1da177e4 479
f11e6659 480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 481 __func__, fn->leaf, oif);
554cfb7e 482
f11e6659
DM
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 486
f11e6659 487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 488
554cfb7e 489 if (!match &&
f11e6659 490 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 491 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 492
554cfb7e 493 /* no entries matched; do round-robin */
f11e6659
DM
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
1da177e4 499 }
1da177e4 500
f11e6659 501 RT6_TRACE("%s() => %p\n",
0dc47877 502 __func__, match);
1da177e4 503
c346dca1 504 net = dev_net(rt0->rt6i_dev);
a02cec21 505 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
506}
507
70ceb4f5
YH
508#ifdef CONFIG_IPV6_ROUTE_INFO
509int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511{
c346dca1 512 struct net *net = dev_net(dev);
70ceb4f5
YH
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
4bed72e4 516 unsigned long lifetime;
70ceb4f5
YH
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 540 return -EINVAL;
70ceb4f5 541
4bed72e4 542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
efa2cea0
DL
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
70ceb4f5
YH
556
557 if (rt && !lifetime) {
e0a1ad73 558 ip6_del_rt(rt);
70ceb4f5
YH
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
efa2cea0 563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
4bed72e4 570 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
d8d1f30b 576 dst_release(&rt->dst);
70ceb4f5
YH
577 }
578 return 0;
579}
580#endif
581
8ed67789 582#define BACKTRACK(__net, saddr) \
982f56f3 583do { \
8ed67789 584 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 585 struct fib6_node *pn; \
e0eda7bb 586 while (1) { \
982f56f3
YH
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
c71099ac 596 } \
c71099ac 597 } \
982f56f3 598} while(0)
c71099ac 599
8ed67789
DL
600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
c71099ac 602 struct flowi *fl, int flags)
1da177e4
LT
603{
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
c71099ac
TG
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
609restart:
610 rt = fn->leaf;
dd3abc4e 611 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 612 BACKTRACK(net, &fl->fl6_src);
c71099ac 613out:
d8d1f30b 614 dst_use(&rt->dst, jiffies);
c71099ac 615 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
616 return rt;
617
618}
619
9acd9f3a
YH
620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
622{
623 struct flowi fl = {
624 .oif = oif,
5811662b 625 .fl6_dst = *daddr,
c71099ac
TG
626 };
627 struct dst_entry *dst;
77d16f45 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 629
adaa70bb
TG
630 if (saddr) {
631 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
606a2b48 635 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
1da177e4
LT
641 return NULL;
642}
643
7159039a
YH
644EXPORT_SYMBOL(rt6_lookup);
645
c71099ac 646/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
86872cb5 652static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
653{
654 int err;
c71099ac 655 struct fib6_table *table;
1da177e4 656
c71099ac
TG
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
86872cb5 659 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 660 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
661
662 return err;
663}
664
40e22e8f
TG
665int ip6_ins_rt(struct rt6_info *rt)
666{
4d1169c1 667 struct nl_info info = {
c346dca1 668 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 669 };
528c4ceb 670 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
671}
672
95a9a5ba
YH
673static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
1da177e4 675{
1da177e4
LT
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
14deae41
DM
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
58c4fb86
YH
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 693 }
1da177e4 694
58c4fb86 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
1da177e4
LT
699
700#ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705#endif
706
14deae41
DM
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
86393e52 720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
7e1b33e5 731 "ipv6: Neighbour table overflow.\n");
d8d1f30b 732 dst_free(&rt->dst);
14deae41
DM
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
1da177e4 736
95a9a5ba 737 }
1da177e4 738
95a9a5ba
YH
739 return rt;
740}
1da177e4 741
299d9939
YH
742static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743{
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 749 rt->dst.flags |= DST_HOST;
299d9939
YH
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753}
754
8ed67789
DL
755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi *fl, int flags)
1da177e4
LT
757{
758 struct fib6_node *fn;
519fbd87 759 struct rt6_info *rt, *nrt;
c71099ac 760 int strict = 0;
1da177e4 761 int attempts = 3;
519fbd87 762 int err;
53b7997f 763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 764
77d16f45 765 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
766
767relookup:
c71099ac 768 read_lock_bh(&table->tb6_lock);
1da177e4 769
8238dd06 770restart_2:
c71099ac 771 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
772
773restart:
4acad72d 774 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
775
776 BACKTRACK(net, &fl->fl6_src);
777 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 778 rt->rt6i_flags & RTF_CACHE)
1ddef044 779 goto out;
1da177e4 780
d8d1f30b 781 dst_hold(&rt->dst);
c71099ac 782 read_unlock_bh(&table->tb6_lock);
fb9de91e 783
519fbd87 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 785 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 786 else
c71099ac 787 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 788
d8d1f30b 789 dst_release(&rt->dst);
8ed67789 790 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 791
d8d1f30b 792 dst_hold(&rt->dst);
519fbd87 793 if (nrt) {
40e22e8f 794 err = ip6_ins_rt(nrt);
519fbd87 795 if (!err)
1da177e4 796 goto out2;
1da177e4 797 }
1da177e4 798
519fbd87
YH
799 if (--attempts <= 0)
800 goto out2;
801
802 /*
c71099ac 803 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
804 * released someone could insert this route. Relookup.
805 */
d8d1f30b 806 dst_release(&rt->dst);
519fbd87
YH
807 goto relookup;
808
809out:
8238dd06
YH
810 if (reachable) {
811 reachable = 0;
812 goto restart_2;
813 }
d8d1f30b 814 dst_hold(&rt->dst);
c71099ac 815 read_unlock_bh(&table->tb6_lock);
1da177e4 816out2:
d8d1f30b
CG
817 rt->dst.lastuse = jiffies;
818 rt->dst.__use++;
c71099ac
TG
819
820 return rt;
1da177e4
LT
821}
822
8ed67789 823static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
824 struct flowi *fl, int flags)
825{
8ed67789 826 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
827}
828
c71099ac
TG
829void ip6_route_input(struct sk_buff *skb)
830{
0660e03f 831 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 832 struct net *net = dev_net(skb->dev);
adaa70bb 833 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
834 struct flowi fl = {
835 .iif = skb->dev->ifindex,
5811662b
CG
836 .fl6_dst = iph->daddr,
837 .fl6_src = iph->saddr,
838 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 839 .mark = skb->mark,
c71099ac
TG
840 .proto = iph->nexthdr,
841 };
adaa70bb 842
1d6e55f1 843 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 844 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 845
adf30907 846 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
847}
848
8ed67789 849static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 850 struct flowi *fl, int flags)
1da177e4 851{
8ed67789 852 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
853}
854
4591db4f
DL
855struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
856 struct flowi *fl)
c71099ac
TG
857{
858 int flags = 0;
859
6057fd78 860 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 861 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 862
adaa70bb
TG
863 if (!ipv6_addr_any(&fl->fl6_src))
864 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
865 else if (sk)
866 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 867
4591db4f 868 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
869}
870
7159039a 871EXPORT_SYMBOL(ip6_route_output);
1da177e4 872
14e50e57
DM
873int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
874{
875 struct rt6_info *ort = (struct rt6_info *) *dstp;
876 struct rt6_info *rt = (struct rt6_info *)
3c7bd1a1 877 dst_alloc(&ip6_dst_blackhole_ops, 1);
14e50e57
DM
878 struct dst_entry *new = NULL;
879
880 if (rt) {
d8d1f30b 881 new = &rt->dst;
14e50e57 882
14e50e57 883 new->__use = 1;
352e512c
HX
884 new->input = dst_discard;
885 new->output = dst_discard;
14e50e57 886
defb3519 887 dst_copy_metrics(new, &ort->dst);
d8d1f30b 888 new->dev = ort->dst.dev;
14e50e57
DM
889 if (new->dev)
890 dev_hold(new->dev);
891 rt->rt6i_idev = ort->rt6i_idev;
892 if (rt->rt6i_idev)
893 in6_dev_hold(rt->rt6i_idev);
894 rt->rt6i_expires = 0;
895
896 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
897 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
898 rt->rt6i_metric = 0;
899
900 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
901#ifdef CONFIG_IPV6_SUBTREES
902 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
903#endif
904
905 dst_free(new);
906 }
907
908 dst_release(*dstp);
909 *dstp = new;
a02cec21 910 return new ? 0 : -ENOMEM;
14e50e57
DM
911}
912EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
913
1da177e4
LT
914/*
915 * Destination cache support functions
916 */
917
918static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
919{
920 struct rt6_info *rt;
921
922 rt = (struct rt6_info *) dst;
923
6431cbc2
DM
924 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
925 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
926 if (!rt->rt6i_peer)
927 rt6_bind_peer(rt, 0);
928 rt->rt6i_peer_genid = rt6_peer_genid();
929 }
1da177e4 930 return dst;
6431cbc2 931 }
1da177e4
LT
932 return NULL;
933}
934
935static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
936{
937 struct rt6_info *rt = (struct rt6_info *) dst;
938
939 if (rt) {
54c1a859
YH
940 if (rt->rt6i_flags & RTF_CACHE) {
941 if (rt6_check_expired(rt)) {
942 ip6_del_rt(rt);
943 dst = NULL;
944 }
945 } else {
1da177e4 946 dst_release(dst);
54c1a859
YH
947 dst = NULL;
948 }
1da177e4 949 }
54c1a859 950 return dst;
1da177e4
LT
951}
952
953static void ip6_link_failure(struct sk_buff *skb)
954{
955 struct rt6_info *rt;
956
3ffe533c 957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 958
adf30907 959 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
960 if (rt) {
961 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 962 dst_set_expires(&rt->dst, 0);
1da177e4
LT
963 rt->rt6i_flags |= RTF_EXPIRES;
964 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
965 rt->rt6i_node->fn_sernum = -1;
966 }
967}
968
969static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
970{
971 struct rt6_info *rt6 = (struct rt6_info*)dst;
972
973 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
974 rt6->rt6i_flags |= RTF_MODIFIED;
975 if (mtu < IPV6_MIN_MTU) {
defb3519 976 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 977 mtu = IPV6_MIN_MTU;
defb3519
DM
978 features |= RTAX_FEATURE_ALLFRAG;
979 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 980 }
defb3519 981 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
982 }
983}
984
0dbaee3b 985static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 986{
0dbaee3b
DM
987 struct net_device *dev = dst->dev;
988 unsigned int mtu = dst_mtu(dst);
989 struct net *net = dev_net(dev);
990
1da177e4
LT
991 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
992
5578689a
DL
993 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
994 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
995
996 /*
1ab1457c
YH
997 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
998 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
999 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1000 * rely only on pmtu discovery"
1001 */
1002 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1003 mtu = IPV6_MAXPLEN;
1004 return mtu;
1005}
1006
d33e4553
DM
1007static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1008{
1009 unsigned int mtu = IPV6_MIN_MTU;
1010 struct inet6_dev *idev;
1011
1012 rcu_read_lock();
1013 idev = __in6_dev_get(dst->dev);
1014 if (idev)
1015 mtu = idev->cnf.mtu6;
1016 rcu_read_unlock();
1017
1018 return mtu;
1019}
1020
3b00944c
YH
1021static struct dst_entry *icmp6_dst_gc_list;
1022static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1023
3b00944c 1024struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1025 struct neighbour *neigh,
9acd9f3a 1026 const struct in6_addr *addr)
1da177e4
LT
1027{
1028 struct rt6_info *rt;
1029 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1030 struct net *net = dev_net(dev);
1da177e4
LT
1031
1032 if (unlikely(idev == NULL))
1033 return NULL;
1034
86393e52 1035 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1036 if (unlikely(rt == NULL)) {
1037 in6_dev_put(idev);
1038 goto out;
1039 }
1040
1041 dev_hold(dev);
1042 if (neigh)
1043 neigh_hold(neigh);
14deae41 1044 else {
1da177e4 1045 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1046 if (IS_ERR(neigh))
1047 neigh = NULL;
1048 }
1da177e4
LT
1049
1050 rt->rt6i_dev = dev;
1051 rt->rt6i_idev = idev;
1052 rt->rt6i_nexthop = neigh;
d8d1f30b 1053 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1054 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1055 rt->dst.output = ip6_output;
1da177e4
LT
1056
1057#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1058 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1059 ? DST_HOST
1da177e4
LT
1060 : 0;
1061 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1062 rt->rt6i_dst.plen = 128;
1063#endif
1064
3b00944c 1065 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1066 rt->dst.next = icmp6_dst_gc_list;
1067 icmp6_dst_gc_list = &rt->dst;
3b00944c 1068 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1069
5578689a 1070 fib6_force_start_gc(net);
1da177e4
LT
1071
1072out:
d8d1f30b 1073 return &rt->dst;
1da177e4
LT
1074}
1075
3d0f24a7 1076int icmp6_dst_gc(void)
1da177e4 1077{
e9476e95 1078 struct dst_entry *dst, **pprev;
3d0f24a7 1079 int more = 0;
1da177e4 1080
3b00944c
YH
1081 spin_lock_bh(&icmp6_dst_lock);
1082 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1083
1da177e4
LT
1084 while ((dst = *pprev) != NULL) {
1085 if (!atomic_read(&dst->__refcnt)) {
1086 *pprev = dst->next;
1087 dst_free(dst);
1da177e4
LT
1088 } else {
1089 pprev = &dst->next;
3d0f24a7 1090 ++more;
1da177e4
LT
1091 }
1092 }
1093
3b00944c 1094 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1095
3d0f24a7 1096 return more;
1da177e4
LT
1097}
1098
1e493d19
DM
1099static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1100 void *arg)
1101{
1102 struct dst_entry *dst, **pprev;
1103
1104 spin_lock_bh(&icmp6_dst_lock);
1105 pprev = &icmp6_dst_gc_list;
1106 while ((dst = *pprev) != NULL) {
1107 struct rt6_info *rt = (struct rt6_info *) dst;
1108 if (func(rt, arg)) {
1109 *pprev = dst->next;
1110 dst_free(dst);
1111 } else {
1112 pprev = &dst->next;
1113 }
1114 }
1115 spin_unlock_bh(&icmp6_dst_lock);
1116}
1117
569d3645 1118static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1119{
1da177e4 1120 unsigned long now = jiffies;
86393e52 1121 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1122 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1123 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1124 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1125 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1126 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1127 int entries;
7019b78e 1128
fc66f95c 1129 entries = dst_entries_get_fast(ops);
7019b78e 1130 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1131 entries <= rt_max_size)
1da177e4
LT
1132 goto out;
1133
6891a346
BT
1134 net->ipv6.ip6_rt_gc_expire++;
1135 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1136 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1137 entries = dst_entries_get_slow(ops);
1138 if (entries < ops->gc_thresh)
7019b78e 1139 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1140out:
7019b78e 1141 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1142 return entries > rt_max_size;
1da177e4
LT
1143}
1144
1145/* Clean host part of a prefix. Not necessary in radix tree,
1146 but results in cleaner routing tables.
1147
1148 Remove it only when all the things will work!
1149 */
1150
6b75d090 1151int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1152{
5170ae82 1153 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1154 if (hoplimit == 0) {
6b75d090 1155 struct net_device *dev = dst->dev;
c68f24cc
ED
1156 struct inet6_dev *idev;
1157
1158 rcu_read_lock();
1159 idev = __in6_dev_get(dev);
1160 if (idev)
6b75d090 1161 hoplimit = idev->cnf.hop_limit;
c68f24cc 1162 else
53b7997f 1163 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1164 rcu_read_unlock();
1da177e4
LT
1165 }
1166 return hoplimit;
1167}
abbf46ae 1168EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1169
1170/*
1171 *
1172 */
1173
86872cb5 1174int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1175{
1176 int err;
5578689a 1177 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1178 struct rt6_info *rt = NULL;
1179 struct net_device *dev = NULL;
1180 struct inet6_dev *idev = NULL;
c71099ac 1181 struct fib6_table *table;
1da177e4
LT
1182 int addr_type;
1183
86872cb5 1184 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1185 return -EINVAL;
1186#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1187 if (cfg->fc_src_len)
1da177e4
LT
1188 return -EINVAL;
1189#endif
86872cb5 1190 if (cfg->fc_ifindex) {
1da177e4 1191 err = -ENODEV;
5578689a 1192 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1193 if (!dev)
1194 goto out;
1195 idev = in6_dev_get(dev);
1196 if (!idev)
1197 goto out;
1198 }
1199
86872cb5
TG
1200 if (cfg->fc_metric == 0)
1201 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1202
5578689a 1203 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1204 if (table == NULL) {
1205 err = -ENOBUFS;
1206 goto out;
1207 }
1208
86393e52 1209 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1210
1211 if (rt == NULL) {
1212 err = -ENOMEM;
1213 goto out;
1214 }
1215
d8d1f30b 1216 rt->dst.obsolete = -1;
6f704992
YH
1217 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1218 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1219 0;
1da177e4 1220
86872cb5
TG
1221 if (cfg->fc_protocol == RTPROT_UNSPEC)
1222 cfg->fc_protocol = RTPROT_BOOT;
1223 rt->rt6i_protocol = cfg->fc_protocol;
1224
1225 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1226
1227 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1228 rt->dst.input = ip6_mc_input;
ab79ad14
1229 else if (cfg->fc_flags & RTF_LOCAL)
1230 rt->dst.input = ip6_input;
1da177e4 1231 else
d8d1f30b 1232 rt->dst.input = ip6_forward;
1da177e4 1233
d8d1f30b 1234 rt->dst.output = ip6_output;
1da177e4 1235
86872cb5
TG
1236 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1237 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1238 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1239 rt->dst.flags = DST_HOST;
1da177e4
LT
1240
1241#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1242 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1243 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1244#endif
1245
86872cb5 1246 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1247
1248 /* We cannot add true routes via loopback here,
1249 they would result in kernel looping; promote them to reject routes
1250 */
86872cb5 1251 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1252 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1253 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1254 /* hold loopback dev/idev if we haven't done so. */
5578689a 1255 if (dev != net->loopback_dev) {
1da177e4
LT
1256 if (dev) {
1257 dev_put(dev);
1258 in6_dev_put(idev);
1259 }
5578689a 1260 dev = net->loopback_dev;
1da177e4
LT
1261 dev_hold(dev);
1262 idev = in6_dev_get(dev);
1263 if (!idev) {
1264 err = -ENODEV;
1265 goto out;
1266 }
1267 }
d8d1f30b
CG
1268 rt->dst.output = ip6_pkt_discard_out;
1269 rt->dst.input = ip6_pkt_discard;
1270 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1271 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1272 goto install_route;
1273 }
1274
86872cb5 1275 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1276 struct in6_addr *gw_addr;
1277 int gwa_type;
1278
86872cb5
TG
1279 gw_addr = &cfg->fc_gateway;
1280 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1281 gwa_type = ipv6_addr_type(gw_addr);
1282
1283 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1284 struct rt6_info *grt;
1285
1286 /* IPv6 strictly inhibits using not link-local
1287 addresses as nexthop address.
1288 Otherwise, router will not able to send redirects.
1289 It is very good, but in some (rare!) circumstances
1290 (SIT, PtP, NBMA NOARP links) it is handy to allow
1291 some exceptions. --ANK
1292 */
1293 err = -EINVAL;
1294 if (!(gwa_type&IPV6_ADDR_UNICAST))
1295 goto out;
1296
5578689a 1297 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1298
1299 err = -EHOSTUNREACH;
1300 if (grt == NULL)
1301 goto out;
1302 if (dev) {
1303 if (dev != grt->rt6i_dev) {
d8d1f30b 1304 dst_release(&grt->dst);
1da177e4
LT
1305 goto out;
1306 }
1307 } else {
1308 dev = grt->rt6i_dev;
1309 idev = grt->rt6i_idev;
1310 dev_hold(dev);
1311 in6_dev_hold(grt->rt6i_idev);
1312 }
1313 if (!(grt->rt6i_flags&RTF_GATEWAY))
1314 err = 0;
d8d1f30b 1315 dst_release(&grt->dst);
1da177e4
LT
1316
1317 if (err)
1318 goto out;
1319 }
1320 err = -EINVAL;
1321 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1322 goto out;
1323 }
1324
1325 err = -ENODEV;
1326 if (dev == NULL)
1327 goto out;
1328
86872cb5 1329 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1330 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1331 if (IS_ERR(rt->rt6i_nexthop)) {
1332 err = PTR_ERR(rt->rt6i_nexthop);
1333 rt->rt6i_nexthop = NULL;
1334 goto out;
1335 }
1336 }
1337
86872cb5 1338 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1339
1340install_route:
86872cb5
TG
1341 if (cfg->fc_mx) {
1342 struct nlattr *nla;
1343 int remaining;
1344
1345 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1346 int type = nla_type(nla);
86872cb5
TG
1347
1348 if (type) {
1349 if (type > RTAX_MAX) {
1da177e4
LT
1350 err = -EINVAL;
1351 goto out;
1352 }
86872cb5 1353
defb3519 1354 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1355 }
1da177e4
LT
1356 }
1357 }
1358
d8d1f30b 1359 rt->dst.dev = dev;
1da177e4 1360 rt->rt6i_idev = idev;
c71099ac 1361 rt->rt6i_table = table;
63152fc0 1362
c346dca1 1363 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1364
86872cb5 1365 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1366
1367out:
1368 if (dev)
1369 dev_put(dev);
1370 if (idev)
1371 in6_dev_put(idev);
1372 if (rt)
d8d1f30b 1373 dst_free(&rt->dst);
1da177e4
LT
1374 return err;
1375}
1376
86872cb5 1377static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1378{
1379 int err;
c71099ac 1380 struct fib6_table *table;
c346dca1 1381 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1382
8ed67789 1383 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1384 return -ENOENT;
1385
c71099ac
TG
1386 table = rt->rt6i_table;
1387 write_lock_bh(&table->tb6_lock);
1da177e4 1388
86872cb5 1389 err = fib6_del(rt, info);
d8d1f30b 1390 dst_release(&rt->dst);
1da177e4 1391
c71099ac 1392 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1393
1394 return err;
1395}
1396
e0a1ad73
TG
1397int ip6_del_rt(struct rt6_info *rt)
1398{
4d1169c1 1399 struct nl_info info = {
c346dca1 1400 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1401 };
528c4ceb 1402 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1403}
1404
86872cb5 1405static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1406{
c71099ac 1407 struct fib6_table *table;
1da177e4
LT
1408 struct fib6_node *fn;
1409 struct rt6_info *rt;
1410 int err = -ESRCH;
1411
5578689a 1412 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1413 if (table == NULL)
1414 return err;
1415
1416 read_lock_bh(&table->tb6_lock);
1da177e4 1417
c71099ac 1418 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1419 &cfg->fc_dst, cfg->fc_dst_len,
1420 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1421
1da177e4 1422 if (fn) {
d8d1f30b 1423 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1424 if (cfg->fc_ifindex &&
1da177e4 1425 (rt->rt6i_dev == NULL ||
86872cb5 1426 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1427 continue;
86872cb5
TG
1428 if (cfg->fc_flags & RTF_GATEWAY &&
1429 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1430 continue;
86872cb5 1431 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1432 continue;
d8d1f30b 1433 dst_hold(&rt->dst);
c71099ac 1434 read_unlock_bh(&table->tb6_lock);
1da177e4 1435
86872cb5 1436 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1437 }
1438 }
c71099ac 1439 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1440
1441 return err;
1442}
1443
1444/*
1445 * Handle redirects
1446 */
a6279458
YH
1447struct ip6rd_flowi {
1448 struct flowi fl;
1449 struct in6_addr gateway;
1450};
1451
8ed67789
DL
1452static struct rt6_info *__ip6_route_redirect(struct net *net,
1453 struct fib6_table *table,
a6279458
YH
1454 struct flowi *fl,
1455 int flags)
1da177e4 1456{
a6279458
YH
1457 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1458 struct rt6_info *rt;
e843b9e1 1459 struct fib6_node *fn;
c71099ac 1460
1da177e4 1461 /*
e843b9e1
YH
1462 * Get the "current" route for this destination and
1463 * check if the redirect has come from approriate router.
1464 *
1465 * RFC 2461 specifies that redirects should only be
1466 * accepted if they come from the nexthop to the target.
1467 * Due to the way the routes are chosen, this notion
1468 * is a bit fuzzy and one might need to check all possible
1469 * routes.
1da177e4 1470 */
1da177e4 1471
c71099ac 1472 read_lock_bh(&table->tb6_lock);
a6279458 1473 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1474restart:
d8d1f30b 1475 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1476 /*
1477 * Current route is on-link; redirect is always invalid.
1478 *
1479 * Seems, previous statement is not true. It could
1480 * be node, which looks for us as on-link (f.e. proxy ndisc)
1481 * But then router serving it might decide, that we should
1482 * know truth 8)8) --ANK (980726).
1483 */
1484 if (rt6_check_expired(rt))
1485 continue;
1486 if (!(rt->rt6i_flags & RTF_GATEWAY))
1487 continue;
a6279458 1488 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1489 continue;
a6279458 1490 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1491 continue;
1492 break;
1493 }
a6279458 1494
cb15d9c2 1495 if (!rt)
8ed67789
DL
1496 rt = net->ipv6.ip6_null_entry;
1497 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1498out:
d8d1f30b 1499 dst_hold(&rt->dst);
a6279458 1500
c71099ac 1501 read_unlock_bh(&table->tb6_lock);
e843b9e1 1502
a6279458
YH
1503 return rt;
1504};
1505
1506static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1507 struct in6_addr *src,
1508 struct in6_addr *gateway,
1509 struct net_device *dev)
1510{
adaa70bb 1511 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1512 struct net *net = dev_net(dev);
a6279458
YH
1513 struct ip6rd_flowi rdfl = {
1514 .fl = {
1515 .oif = dev->ifindex,
5811662b
CG
1516 .fl6_dst = *dest,
1517 .fl6_src = *src,
a6279458 1518 },
a6279458 1519 };
adaa70bb 1520
86c36ce4
BH
1521 ipv6_addr_copy(&rdfl.gateway, gateway);
1522
adaa70bb
TG
1523 if (rt6_need_strict(dest))
1524 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1525
5578689a 1526 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1527 flags, __ip6_route_redirect);
a6279458
YH
1528}
1529
1530void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1531 struct in6_addr *saddr,
1532 struct neighbour *neigh, u8 *lladdr, int on_link)
1533{
1534 struct rt6_info *rt, *nrt = NULL;
1535 struct netevent_redirect netevent;
c346dca1 1536 struct net *net = dev_net(neigh->dev);
a6279458
YH
1537
1538 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1539
8ed67789 1540 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1541 if (net_ratelimit())
1542 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1543 "for redirect target\n");
a6279458 1544 goto out;
1da177e4
LT
1545 }
1546
1da177e4
LT
1547 /*
1548 * We have finally decided to accept it.
1549 */
1550
1ab1457c 1551 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1552 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1553 NEIGH_UPDATE_F_OVERRIDE|
1554 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1555 NEIGH_UPDATE_F_ISROUTER))
1556 );
1557
1558 /*
1559 * Redirect received -> path was valid.
1560 * Look, redirects are sent only in response to data packets,
1561 * so that this nexthop apparently is reachable. --ANK
1562 */
d8d1f30b 1563 dst_confirm(&rt->dst);
1da177e4
LT
1564
1565 /* Duplicate redirect: silently ignore. */
d8d1f30b 1566 if (neigh == rt->dst.neighbour)
1da177e4
LT
1567 goto out;
1568
1569 nrt = ip6_rt_copy(rt);
1570 if (nrt == NULL)
1571 goto out;
1572
1573 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1574 if (on_link)
1575 nrt->rt6i_flags &= ~RTF_GATEWAY;
1576
1577 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1578 nrt->rt6i_dst.plen = 128;
d8d1f30b 1579 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1580
1581 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1582 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1583
40e22e8f 1584 if (ip6_ins_rt(nrt))
1da177e4
LT
1585 goto out;
1586
d8d1f30b
CG
1587 netevent.old = &rt->dst;
1588 netevent.new = &nrt->dst;
8d71740c
TT
1589 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1590
1da177e4 1591 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1592 ip6_del_rt(rt);
1da177e4
LT
1593 return;
1594 }
1595
1596out:
d8d1f30b 1597 dst_release(&rt->dst);
1da177e4
LT
1598}
1599
1600/*
1601 * Handle ICMP "packet too big" messages
1602 * i.e. Path MTU discovery
1603 */
1604
ae878ae2
1605static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1606 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1607{
1608 struct rt6_info *rt, *nrt;
1609 int allfrag = 0;
d3052b55 1610again:
ae878ae2 1611 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1612 if (rt == NULL)
1613 return;
1614
d3052b55
AV
1615 if (rt6_check_expired(rt)) {
1616 ip6_del_rt(rt);
1617 goto again;
1618 }
1619
d8d1f30b 1620 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1621 goto out;
1622
1623 if (pmtu < IPV6_MIN_MTU) {
1624 /*
1ab1457c 1625 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1626 * MTU (1280) and a fragment header should always be included
1627 * after a node receiving Too Big message reporting PMTU is
1628 * less than the IPv6 Minimum Link MTU.
1629 */
1630 pmtu = IPV6_MIN_MTU;
1631 allfrag = 1;
1632 }
1633
1634 /* New mtu received -> path was valid.
1635 They are sent only in response to data packets,
1636 so that this nexthop apparently is reachable. --ANK
1637 */
d8d1f30b 1638 dst_confirm(&rt->dst);
1da177e4
LT
1639
1640 /* Host route. If it is static, it would be better
1641 not to override it, but add new one, so that
1642 when cache entry will expire old pmtu
1643 would return automatically.
1644 */
1645 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1646 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1647 if (allfrag) {
1648 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1649 features |= RTAX_FEATURE_ALLFRAG;
1650 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1651 }
d8d1f30b 1652 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1653 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1654 goto out;
1655 }
1656
1657 /* Network route.
1658 Two cases are possible:
1659 1. It is connected route. Action: COW
1660 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1661 */
d5315b50 1662 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1663 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1664 else
1665 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1666
d5315b50 1667 if (nrt) {
defb3519
DM
1668 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1669 if (allfrag) {
1670 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1671 features |= RTAX_FEATURE_ALLFRAG;
1672 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1673 }
a1e78363
YH
1674
1675 /* According to RFC 1981, detecting PMTU increase shouldn't be
1676 * happened within 5 mins, the recommended timer is 10 mins.
1677 * Here this route expiration time is set to ip6_rt_mtu_expires
1678 * which is 10 mins. After 10 mins the decreased pmtu is expired
1679 * and detecting PMTU increase will be automatically happened.
1680 */
d8d1f30b 1681 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1682 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1683
40e22e8f 1684 ip6_ins_rt(nrt);
1da177e4 1685 }
1da177e4 1686out:
d8d1f30b 1687 dst_release(&rt->dst);
1da177e4
LT
1688}
1689
ae878ae2
1690void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1691 struct net_device *dev, u32 pmtu)
1692{
1693 struct net *net = dev_net(dev);
1694
1695 /*
1696 * RFC 1981 states that a node "MUST reduce the size of the packets it
1697 * is sending along the path" that caused the Packet Too Big message.
1698 * Since it's not possible in the general case to determine which
1699 * interface was used to send the original packet, we update the MTU
1700 * on the interface that will be used to send future packets. We also
1701 * update the MTU on the interface that received the Packet Too Big in
1702 * case the original packet was forced out that interface with
1703 * SO_BINDTODEVICE or similar. This is the next best thing to the
1704 * correct behaviour, which would be to update the MTU on all
1705 * interfaces.
1706 */
1707 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1708 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1709}
1710
1da177e4
LT
1711/*
1712 * Misc support functions
1713 */
1714
1715static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1716{
c346dca1 1717 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1718 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1719
1720 if (rt) {
d8d1f30b
CG
1721 rt->dst.input = ort->dst.input;
1722 rt->dst.output = ort->dst.output;
1723
defb3519 1724 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1725 rt->dst.error = ort->dst.error;
1726 rt->dst.dev = ort->dst.dev;
1727 if (rt->dst.dev)
1728 dev_hold(rt->dst.dev);
1da177e4
LT
1729 rt->rt6i_idev = ort->rt6i_idev;
1730 if (rt->rt6i_idev)
1731 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1732 rt->dst.lastuse = jiffies;
1da177e4
LT
1733 rt->rt6i_expires = 0;
1734
1735 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1736 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1737 rt->rt6i_metric = 0;
1738
1739 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1740#ifdef CONFIG_IPV6_SUBTREES
1741 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1742#endif
c71099ac 1743 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1744 }
1745 return rt;
1746}
1747
70ceb4f5 1748#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1749static struct rt6_info *rt6_get_route_info(struct net *net,
1750 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1751 struct in6_addr *gwaddr, int ifindex)
1752{
1753 struct fib6_node *fn;
1754 struct rt6_info *rt = NULL;
c71099ac
TG
1755 struct fib6_table *table;
1756
efa2cea0 1757 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1758 if (table == NULL)
1759 return NULL;
70ceb4f5 1760
c71099ac
TG
1761 write_lock_bh(&table->tb6_lock);
1762 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1763 if (!fn)
1764 goto out;
1765
d8d1f30b 1766 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1767 if (rt->rt6i_dev->ifindex != ifindex)
1768 continue;
1769 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1770 continue;
1771 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1772 continue;
d8d1f30b 1773 dst_hold(&rt->dst);
70ceb4f5
YH
1774 break;
1775 }
1776out:
c71099ac 1777 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1778 return rt;
1779}
1780
efa2cea0
DL
1781static struct rt6_info *rt6_add_route_info(struct net *net,
1782 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1783 struct in6_addr *gwaddr, int ifindex,
1784 unsigned pref)
1785{
86872cb5
TG
1786 struct fib6_config cfg = {
1787 .fc_table = RT6_TABLE_INFO,
238fc7ea 1788 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1789 .fc_ifindex = ifindex,
1790 .fc_dst_len = prefixlen,
1791 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1792 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1793 .fc_nlinfo.pid = 0,
1794 .fc_nlinfo.nlh = NULL,
1795 .fc_nlinfo.nl_net = net,
86872cb5
TG
1796 };
1797
1798 ipv6_addr_copy(&cfg.fc_dst, prefix);
1799 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1800
e317da96
YH
1801 /* We should treat it as a default route if prefix length is 0. */
1802 if (!prefixlen)
86872cb5 1803 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1804
86872cb5 1805 ip6_route_add(&cfg);
70ceb4f5 1806
efa2cea0 1807 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1808}
1809#endif
1810
1da177e4 1811struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1812{
1da177e4 1813 struct rt6_info *rt;
c71099ac 1814 struct fib6_table *table;
1da177e4 1815
c346dca1 1816 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1817 if (table == NULL)
1818 return NULL;
1da177e4 1819
c71099ac 1820 write_lock_bh(&table->tb6_lock);
d8d1f30b 1821 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1822 if (dev == rt->rt6i_dev &&
045927ff 1823 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1824 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1825 break;
1826 }
1827 if (rt)
d8d1f30b 1828 dst_hold(&rt->dst);
c71099ac 1829 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1830 return rt;
1831}
1832
1833struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1834 struct net_device *dev,
1835 unsigned int pref)
1da177e4 1836{
86872cb5
TG
1837 struct fib6_config cfg = {
1838 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1839 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1840 .fc_ifindex = dev->ifindex,
1841 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1842 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1843 .fc_nlinfo.pid = 0,
1844 .fc_nlinfo.nlh = NULL,
c346dca1 1845 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1846 };
1da177e4 1847
86872cb5 1848 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1849
86872cb5 1850 ip6_route_add(&cfg);
1da177e4 1851
1da177e4
LT
1852 return rt6_get_dflt_router(gwaddr, dev);
1853}
1854
7b4da532 1855void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1856{
1857 struct rt6_info *rt;
c71099ac
TG
1858 struct fib6_table *table;
1859
1860 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1861 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1862 if (table == NULL)
1863 return;
1da177e4
LT
1864
1865restart:
c71099ac 1866 read_lock_bh(&table->tb6_lock);
d8d1f30b 1867 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1868 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1869 dst_hold(&rt->dst);
c71099ac 1870 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1871 ip6_del_rt(rt);
1da177e4
LT
1872 goto restart;
1873 }
1874 }
c71099ac 1875 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1876}
1877
5578689a
DL
1878static void rtmsg_to_fib6_config(struct net *net,
1879 struct in6_rtmsg *rtmsg,
86872cb5
TG
1880 struct fib6_config *cfg)
1881{
1882 memset(cfg, 0, sizeof(*cfg));
1883
1884 cfg->fc_table = RT6_TABLE_MAIN;
1885 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1886 cfg->fc_metric = rtmsg->rtmsg_metric;
1887 cfg->fc_expires = rtmsg->rtmsg_info;
1888 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1889 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1890 cfg->fc_flags = rtmsg->rtmsg_flags;
1891
5578689a 1892 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1893
86872cb5
TG
1894 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1895 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1896 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1897}
1898
5578689a 1899int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1900{
86872cb5 1901 struct fib6_config cfg;
1da177e4
LT
1902 struct in6_rtmsg rtmsg;
1903 int err;
1904
1905 switch(cmd) {
1906 case SIOCADDRT: /* Add a route */
1907 case SIOCDELRT: /* Delete a route */
1908 if (!capable(CAP_NET_ADMIN))
1909 return -EPERM;
1910 err = copy_from_user(&rtmsg, arg,
1911 sizeof(struct in6_rtmsg));
1912 if (err)
1913 return -EFAULT;
86872cb5 1914
5578689a 1915 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1916
1da177e4
LT
1917 rtnl_lock();
1918 switch (cmd) {
1919 case SIOCADDRT:
86872cb5 1920 err = ip6_route_add(&cfg);
1da177e4
LT
1921 break;
1922 case SIOCDELRT:
86872cb5 1923 err = ip6_route_del(&cfg);
1da177e4
LT
1924 break;
1925 default:
1926 err = -EINVAL;
1927 }
1928 rtnl_unlock();
1929
1930 return err;
3ff50b79 1931 }
1da177e4
LT
1932
1933 return -EINVAL;
1934}
1935
1936/*
1937 * Drop the packet on the floor
1938 */
1939
d5fdd6ba 1940static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1941{
612f09e8 1942 int type;
adf30907 1943 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1944 switch (ipstats_mib_noroutes) {
1945 case IPSTATS_MIB_INNOROUTES:
0660e03f 1946 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1947 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1948 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1949 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1950 break;
1951 }
1952 /* FALLTHROUGH */
1953 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1954 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1955 ipstats_mib_noroutes);
612f09e8
YH
1956 break;
1957 }
3ffe533c 1958 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1959 kfree_skb(skb);
1960 return 0;
1961}
1962
9ce8ade0
TG
1963static int ip6_pkt_discard(struct sk_buff *skb)
1964{
612f09e8 1965 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1966}
1967
20380731 1968static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1969{
adf30907 1970 skb->dev = skb_dst(skb)->dev;
612f09e8 1971 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1972}
1973
6723ab54
DM
1974#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1975
9ce8ade0
TG
1976static int ip6_pkt_prohibit(struct sk_buff *skb)
1977{
612f09e8 1978 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1979}
1980
1981static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1982{
adf30907 1983 skb->dev = skb_dst(skb)->dev;
612f09e8 1984 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1985}
1986
6723ab54
DM
1987#endif
1988
1da177e4
LT
1989/*
1990 * Allocate a dst for local (unicast / anycast) address.
1991 */
1992
1993struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1994 const struct in6_addr *addr,
1995 int anycast)
1996{
c346dca1 1997 struct net *net = dev_net(idev->dev);
86393e52 1998 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1999 struct neighbour *neigh;
1da177e4 2000
40385653
BG
2001 if (rt == NULL) {
2002 if (net_ratelimit())
2003 pr_warning("IPv6: Maximum number of routes reached,"
2004 " consider increasing route/max_size.\n");
1da177e4 2005 return ERR_PTR(-ENOMEM);
40385653 2006 }
1da177e4 2007
5578689a 2008 dev_hold(net->loopback_dev);
1da177e4
LT
2009 in6_dev_hold(idev);
2010
d8d1f30b
CG
2011 rt->dst.flags = DST_HOST;
2012 rt->dst.input = ip6_input;
2013 rt->dst.output = ip6_output;
5578689a 2014 rt->rt6i_dev = net->loopback_dev;
1da177e4 2015 rt->rt6i_idev = idev;
defb3519 2016 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 2017 rt->dst.obsolete = -1;
1da177e4
LT
2018
2019 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2020 if (anycast)
2021 rt->rt6i_flags |= RTF_ANYCAST;
2022 else
1da177e4 2023 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2024 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2025 if (IS_ERR(neigh)) {
d8d1f30b 2026 dst_free(&rt->dst);
14deae41
DM
2027
2028 /* We are casting this because that is the return
2029 * value type. But an errno encoded pointer is the
2030 * same regardless of the underlying pointer type,
2031 * and that's what we are returning. So this is OK.
2032 */
2033 return (struct rt6_info *) neigh;
1da177e4 2034 }
14deae41 2035 rt->rt6i_nexthop = neigh;
1da177e4
LT
2036
2037 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2038 rt->rt6i_dst.plen = 128;
5578689a 2039 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2040
d8d1f30b 2041 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2042
2043 return rt;
2044}
2045
8ed67789
DL
2046struct arg_dev_net {
2047 struct net_device *dev;
2048 struct net *net;
2049};
2050
1da177e4
LT
2051static int fib6_ifdown(struct rt6_info *rt, void *arg)
2052{
bc3ef660 2053 const struct arg_dev_net *adn = arg;
2054 const struct net_device *dev = adn->dev;
8ed67789 2055
bc3ef660 2056 if ((rt->rt6i_dev == dev || dev == NULL) &&
2057 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2058 RT6_TRACE("deleted by ifdown %p\n", rt);
2059 return -1;
2060 }
2061 return 0;
2062}
2063
f3db4851 2064void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2065{
8ed67789
DL
2066 struct arg_dev_net adn = {
2067 .dev = dev,
2068 .net = net,
2069 };
2070
2071 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2072 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2073}
2074
2075struct rt6_mtu_change_arg
2076{
2077 struct net_device *dev;
2078 unsigned mtu;
2079};
2080
2081static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2082{
2083 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2084 struct inet6_dev *idev;
2085
2086 /* In IPv6 pmtu discovery is not optional,
2087 so that RTAX_MTU lock cannot disable it.
2088 We still use this lock to block changes
2089 caused by addrconf/ndisc.
2090 */
2091
2092 idev = __in6_dev_get(arg->dev);
2093 if (idev == NULL)
2094 return 0;
2095
2096 /* For administrative MTU increase, there is no way to discover
2097 IPv6 PMTU increase, so PMTU increase should be updated here.
2098 Since RFC 1981 doesn't include administrative MTU increase
2099 update PMTU increase is a MUST. (i.e. jumbo frame)
2100 */
2101 /*
2102 If new MTU is less than route PMTU, this new MTU will be the
2103 lowest MTU in the path, update the route PMTU to reflect PMTU
2104 decreases; if new MTU is greater than route PMTU, and the
2105 old MTU is the lowest MTU in the path, update the route PMTU
2106 to reflect the increase. In this case if the other nodes' MTU
2107 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2108 PMTU discouvery.
2109 */
2110 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2111 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2112 (dst_mtu(&rt->dst) >= arg->mtu ||
2113 (dst_mtu(&rt->dst) < arg->mtu &&
2114 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2115 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2116 }
1da177e4
LT
2117 return 0;
2118}
2119
2120void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2121{
c71099ac
TG
2122 struct rt6_mtu_change_arg arg = {
2123 .dev = dev,
2124 .mtu = mtu,
2125 };
1da177e4 2126
c346dca1 2127 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2128}
2129
ef7c79ed 2130static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2131 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2132 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2133 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2134 [RTA_PRIORITY] = { .type = NLA_U32 },
2135 [RTA_METRICS] = { .type = NLA_NESTED },
2136};
2137
2138static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2139 struct fib6_config *cfg)
1da177e4 2140{
86872cb5
TG
2141 struct rtmsg *rtm;
2142 struct nlattr *tb[RTA_MAX+1];
2143 int err;
1da177e4 2144
86872cb5
TG
2145 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2146 if (err < 0)
2147 goto errout;
1da177e4 2148
86872cb5
TG
2149 err = -EINVAL;
2150 rtm = nlmsg_data(nlh);
2151 memset(cfg, 0, sizeof(*cfg));
2152
2153 cfg->fc_table = rtm->rtm_table;
2154 cfg->fc_dst_len = rtm->rtm_dst_len;
2155 cfg->fc_src_len = rtm->rtm_src_len;
2156 cfg->fc_flags = RTF_UP;
2157 cfg->fc_protocol = rtm->rtm_protocol;
2158
2159 if (rtm->rtm_type == RTN_UNREACHABLE)
2160 cfg->fc_flags |= RTF_REJECT;
2161
ab79ad14
2162 if (rtm->rtm_type == RTN_LOCAL)
2163 cfg->fc_flags |= RTF_LOCAL;
2164
86872cb5
TG
2165 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2166 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2167 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2168
2169 if (tb[RTA_GATEWAY]) {
2170 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2171 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2172 }
86872cb5
TG
2173
2174 if (tb[RTA_DST]) {
2175 int plen = (rtm->rtm_dst_len + 7) >> 3;
2176
2177 if (nla_len(tb[RTA_DST]) < plen)
2178 goto errout;
2179
2180 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2181 }
86872cb5
TG
2182
2183 if (tb[RTA_SRC]) {
2184 int plen = (rtm->rtm_src_len + 7) >> 3;
2185
2186 if (nla_len(tb[RTA_SRC]) < plen)
2187 goto errout;
2188
2189 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2190 }
86872cb5
TG
2191
2192 if (tb[RTA_OIF])
2193 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2194
2195 if (tb[RTA_PRIORITY])
2196 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2197
2198 if (tb[RTA_METRICS]) {
2199 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2200 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2201 }
86872cb5
TG
2202
2203 if (tb[RTA_TABLE])
2204 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2205
2206 err = 0;
2207errout:
2208 return err;
1da177e4
LT
2209}
2210
c127ea2c 2211static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2212{
86872cb5
TG
2213 struct fib6_config cfg;
2214 int err;
1da177e4 2215
86872cb5
TG
2216 err = rtm_to_fib6_config(skb, nlh, &cfg);
2217 if (err < 0)
2218 return err;
2219
2220 return ip6_route_del(&cfg);
1da177e4
LT
2221}
2222
c127ea2c 2223static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2224{
86872cb5
TG
2225 struct fib6_config cfg;
2226 int err;
1da177e4 2227
86872cb5
TG
2228 err = rtm_to_fib6_config(skb, nlh, &cfg);
2229 if (err < 0)
2230 return err;
2231
2232 return ip6_route_add(&cfg);
1da177e4
LT
2233}
2234
339bf98f
TG
2235static inline size_t rt6_nlmsg_size(void)
2236{
2237 return NLMSG_ALIGN(sizeof(struct rtmsg))
2238 + nla_total_size(16) /* RTA_SRC */
2239 + nla_total_size(16) /* RTA_DST */
2240 + nla_total_size(16) /* RTA_GATEWAY */
2241 + nla_total_size(16) /* RTA_PREFSRC */
2242 + nla_total_size(4) /* RTA_TABLE */
2243 + nla_total_size(4) /* RTA_IIF */
2244 + nla_total_size(4) /* RTA_OIF */
2245 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2246 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2247 + nla_total_size(sizeof(struct rta_cacheinfo));
2248}
2249
191cd582
BH
2250static int rt6_fill_node(struct net *net,
2251 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2252 struct in6_addr *dst, struct in6_addr *src,
2253 int iif, int type, u32 pid, u32 seq,
7bc570c8 2254 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2255{
2256 struct rtmsg *rtm;
2d7202bf 2257 struct nlmsghdr *nlh;
e3703b3d 2258 long expires;
9e762a4a 2259 u32 table;
1da177e4
LT
2260
2261 if (prefix) { /* user wants prefix routes only */
2262 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2263 /* success since this is not a prefix route */
2264 return 1;
2265 }
2266 }
2267
2d7202bf
TG
2268 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2269 if (nlh == NULL)
26932566 2270 return -EMSGSIZE;
2d7202bf
TG
2271
2272 rtm = nlmsg_data(nlh);
1da177e4
LT
2273 rtm->rtm_family = AF_INET6;
2274 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2275 rtm->rtm_src_len = rt->rt6i_src.plen;
2276 rtm->rtm_tos = 0;
c71099ac 2277 if (rt->rt6i_table)
9e762a4a 2278 table = rt->rt6i_table->tb6_id;
c71099ac 2279 else
9e762a4a
PM
2280 table = RT6_TABLE_UNSPEC;
2281 rtm->rtm_table = table;
2d7202bf 2282 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2283 if (rt->rt6i_flags&RTF_REJECT)
2284 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2285 else if (rt->rt6i_flags&RTF_LOCAL)
2286 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2287 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2288 rtm->rtm_type = RTN_LOCAL;
2289 else
2290 rtm->rtm_type = RTN_UNICAST;
2291 rtm->rtm_flags = 0;
2292 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2293 rtm->rtm_protocol = rt->rt6i_protocol;
2294 if (rt->rt6i_flags&RTF_DYNAMIC)
2295 rtm->rtm_protocol = RTPROT_REDIRECT;
2296 else if (rt->rt6i_flags & RTF_ADDRCONF)
2297 rtm->rtm_protocol = RTPROT_KERNEL;
2298 else if (rt->rt6i_flags&RTF_DEFAULT)
2299 rtm->rtm_protocol = RTPROT_RA;
2300
2301 if (rt->rt6i_flags&RTF_CACHE)
2302 rtm->rtm_flags |= RTM_F_CLONED;
2303
2304 if (dst) {
2d7202bf 2305 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2306 rtm->rtm_dst_len = 128;
1da177e4 2307 } else if (rtm->rtm_dst_len)
2d7202bf 2308 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2309#ifdef CONFIG_IPV6_SUBTREES
2310 if (src) {
2d7202bf 2311 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2312 rtm->rtm_src_len = 128;
1da177e4 2313 } else if (rtm->rtm_src_len)
2d7202bf 2314 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2315#endif
7bc570c8
YH
2316 if (iif) {
2317#ifdef CONFIG_IPV6_MROUTE
2318 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2319 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2320 if (err <= 0) {
2321 if (!nowait) {
2322 if (err == 0)
2323 return 0;
2324 goto nla_put_failure;
2325 } else {
2326 if (err == -EMSGSIZE)
2327 goto nla_put_failure;
2328 }
2329 }
2330 } else
2331#endif
2332 NLA_PUT_U32(skb, RTA_IIF, iif);
2333 } else if (dst) {
d8d1f30b 2334 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2335 struct in6_addr saddr_buf;
191cd582 2336 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2337 dst, 0, &saddr_buf) == 0)
2d7202bf 2338 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2339 }
2d7202bf 2340
defb3519 2341 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2342 goto nla_put_failure;
2343
d8d1f30b
CG
2344 if (rt->dst.neighbour)
2345 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2346
d8d1f30b 2347 if (rt->dst.dev)
2d7202bf
TG
2348 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2349
2350 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2351
36e3deae
YH
2352 if (!(rt->rt6i_flags & RTF_EXPIRES))
2353 expires = 0;
2354 else if (rt->rt6i_expires - jiffies < INT_MAX)
2355 expires = rt->rt6i_expires - jiffies;
2356 else
2357 expires = INT_MAX;
69cdf8f9 2358
d8d1f30b
CG
2359 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2360 expires, rt->dst.error) < 0)
e3703b3d 2361 goto nla_put_failure;
2d7202bf
TG
2362
2363 return nlmsg_end(skb, nlh);
2364
2365nla_put_failure:
26932566
PM
2366 nlmsg_cancel(skb, nlh);
2367 return -EMSGSIZE;
1da177e4
LT
2368}
2369
1b43af54 2370int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2371{
2372 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2373 int prefix;
2374
2d7202bf
TG
2375 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2376 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2377 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2378 } else
2379 prefix = 0;
2380
191cd582
BH
2381 return rt6_fill_node(arg->net,
2382 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2383 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2384 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2385}
2386
c127ea2c 2387static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2388{
3b1e0a65 2389 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2390 struct nlattr *tb[RTA_MAX+1];
2391 struct rt6_info *rt;
1da177e4 2392 struct sk_buff *skb;
ab364a6f 2393 struct rtmsg *rtm;
1da177e4 2394 struct flowi fl;
ab364a6f 2395 int err, iif = 0;
1da177e4 2396
ab364a6f
TG
2397 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2398 if (err < 0)
2399 goto errout;
1da177e4 2400
ab364a6f 2401 err = -EINVAL;
1da177e4 2402 memset(&fl, 0, sizeof(fl));
1da177e4 2403
ab364a6f
TG
2404 if (tb[RTA_SRC]) {
2405 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2406 goto errout;
2407
2408 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2409 }
2410
2411 if (tb[RTA_DST]) {
2412 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2413 goto errout;
2414
2415 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2416 }
2417
2418 if (tb[RTA_IIF])
2419 iif = nla_get_u32(tb[RTA_IIF]);
2420
2421 if (tb[RTA_OIF])
2422 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2423
2424 if (iif) {
2425 struct net_device *dev;
5578689a 2426 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2427 if (!dev) {
2428 err = -ENODEV;
ab364a6f 2429 goto errout;
1da177e4
LT
2430 }
2431 }
2432
ab364a6f
TG
2433 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2434 if (skb == NULL) {
2435 err = -ENOBUFS;
2436 goto errout;
2437 }
1da177e4 2438
ab364a6f
TG
2439 /* Reserve room for dummy headers, this skb can pass
2440 through good chunk of routing engine.
2441 */
459a98ed 2442 skb_reset_mac_header(skb);
ab364a6f 2443 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2444
8a3edd80 2445 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2446 skb_dst_set(skb, &rt->dst);
1da177e4 2447
191cd582 2448 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2449 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2450 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2451 if (err < 0) {
ab364a6f
TG
2452 kfree_skb(skb);
2453 goto errout;
1da177e4
LT
2454 }
2455
5578689a 2456 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2457errout:
1da177e4 2458 return err;
1da177e4
LT
2459}
2460
86872cb5 2461void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2462{
2463 struct sk_buff *skb;
5578689a 2464 struct net *net = info->nl_net;
528c4ceb
DL
2465 u32 seq;
2466 int err;
2467
2468 err = -ENOBUFS;
2469 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2470
339bf98f 2471 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2472 if (skb == NULL)
2473 goto errout;
2474
191cd582 2475 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2476 event, info->pid, seq, 0, 0, 0);
26932566
PM
2477 if (err < 0) {
2478 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2479 WARN_ON(err == -EMSGSIZE);
2480 kfree_skb(skb);
2481 goto errout;
2482 }
1ce85fe4
PNA
2483 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2484 info->nlh, gfp_any());
2485 return;
21713ebc
TG
2486errout:
2487 if (err < 0)
5578689a 2488 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2489}
2490
8ed67789
DL
2491static int ip6_route_dev_notify(struct notifier_block *this,
2492 unsigned long event, void *data)
2493{
2494 struct net_device *dev = (struct net_device *)data;
c346dca1 2495 struct net *net = dev_net(dev);
8ed67789
DL
2496
2497 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2498 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2499 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2500#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2501 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2502 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2503 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2504 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2505#endif
2506 }
2507
2508 return NOTIFY_OK;
2509}
2510
1da177e4
LT
2511/*
2512 * /proc
2513 */
2514
2515#ifdef CONFIG_PROC_FS
2516
1da177e4
LT
2517struct rt6_proc_arg
2518{
2519 char *buffer;
2520 int offset;
2521 int length;
2522 int skip;
2523 int len;
2524};
2525
2526static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2527{
33120b30 2528 struct seq_file *m = p_arg;
1da177e4 2529
4b7a4274 2530 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2531
2532#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2533 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2534#else
33120b30 2535 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2536#endif
2537
2538 if (rt->rt6i_nexthop) {
4b7a4274 2539 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2540 } else {
33120b30 2541 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2542 }
33120b30 2543 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2544 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2545 rt->dst.__use, rt->rt6i_flags,
33120b30 2546 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2547 return 0;
2548}
2549
33120b30 2550static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2551{
f3db4851
DL
2552 struct net *net = (struct net *)m->private;
2553 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2554 return 0;
2555}
1da177e4 2556
33120b30
AD
2557static int ipv6_route_open(struct inode *inode, struct file *file)
2558{
de05c557 2559 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2560}
2561
33120b30
AD
2562static const struct file_operations ipv6_route_proc_fops = {
2563 .owner = THIS_MODULE,
2564 .open = ipv6_route_open,
2565 .read = seq_read,
2566 .llseek = seq_lseek,
b6fcbdb4 2567 .release = single_release_net,
33120b30
AD
2568};
2569
1da177e4
LT
2570static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2571{
69ddb805 2572 struct net *net = (struct net *)seq->private;
1da177e4 2573 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2574 net->ipv6.rt6_stats->fib_nodes,
2575 net->ipv6.rt6_stats->fib_route_nodes,
2576 net->ipv6.rt6_stats->fib_rt_alloc,
2577 net->ipv6.rt6_stats->fib_rt_entries,
2578 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2579 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2580 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2581
2582 return 0;
2583}
2584
2585static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2586{
de05c557 2587 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2588}
2589
9a32144e 2590static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2591 .owner = THIS_MODULE,
2592 .open = rt6_stats_seq_open,
2593 .read = seq_read,
2594 .llseek = seq_lseek,
b6fcbdb4 2595 .release = single_release_net,
1da177e4
LT
2596};
2597#endif /* CONFIG_PROC_FS */
2598
2599#ifdef CONFIG_SYSCTL
2600
1da177e4 2601static
8d65af78 2602int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2603 void __user *buffer, size_t *lenp, loff_t *ppos)
2604{
5b7c931d
DL
2605 struct net *net = current->nsproxy->net_ns;
2606 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2607 if (write) {
8d65af78 2608 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2609 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2610 return 0;
2611 } else
2612 return -EINVAL;
2613}
2614
760f2d01 2615ctl_table ipv6_route_table_template[] = {
1ab1457c 2616 {
1da177e4 2617 .procname = "flush",
4990509f 2618 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2619 .maxlen = sizeof(int),
89c8b3a1 2620 .mode = 0200,
6d9f239a 2621 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2622 },
2623 {
1da177e4 2624 .procname = "gc_thresh",
9a7ec3a9 2625 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2626 .maxlen = sizeof(int),
2627 .mode = 0644,
6d9f239a 2628 .proc_handler = proc_dointvec,
1da177e4
LT
2629 },
2630 {
1da177e4 2631 .procname = "max_size",
4990509f 2632 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
6d9f239a 2635 .proc_handler = proc_dointvec,
1da177e4
LT
2636 },
2637 {
1da177e4 2638 .procname = "gc_min_interval",
4990509f 2639 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2640 .maxlen = sizeof(int),
2641 .mode = 0644,
6d9f239a 2642 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2643 },
2644 {
1da177e4 2645 .procname = "gc_timeout",
4990509f 2646 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2647 .maxlen = sizeof(int),
2648 .mode = 0644,
6d9f239a 2649 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2650 },
2651 {
1da177e4 2652 .procname = "gc_interval",
4990509f 2653 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2654 .maxlen = sizeof(int),
2655 .mode = 0644,
6d9f239a 2656 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2657 },
2658 {
1da177e4 2659 .procname = "gc_elasticity",
4990509f 2660 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2661 .maxlen = sizeof(int),
2662 .mode = 0644,
f3d3f616 2663 .proc_handler = proc_dointvec,
1da177e4
LT
2664 },
2665 {
1da177e4 2666 .procname = "mtu_expires",
4990509f 2667 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2668 .maxlen = sizeof(int),
2669 .mode = 0644,
6d9f239a 2670 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2671 },
2672 {
1da177e4 2673 .procname = "min_adv_mss",
4990509f 2674 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2675 .maxlen = sizeof(int),
2676 .mode = 0644,
f3d3f616 2677 .proc_handler = proc_dointvec,
1da177e4
LT
2678 },
2679 {
1da177e4 2680 .procname = "gc_min_interval_ms",
4990509f 2681 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2682 .maxlen = sizeof(int),
2683 .mode = 0644,
6d9f239a 2684 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2685 },
f8572d8f 2686 { }
1da177e4
LT
2687};
2688
2c8c1e72 2689struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2690{
2691 struct ctl_table *table;
2692
2693 table = kmemdup(ipv6_route_table_template,
2694 sizeof(ipv6_route_table_template),
2695 GFP_KERNEL);
5ee09105
YH
2696
2697 if (table) {
2698 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2699 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2700 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2701 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2702 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2703 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2704 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2705 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2706 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2707 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2708 }
2709
760f2d01
DL
2710 return table;
2711}
1da177e4
LT
2712#endif
2713
2c8c1e72 2714static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2715{
633d424b 2716 int ret = -ENOMEM;
8ed67789 2717
86393e52
AD
2718 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2719 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2720
fc66f95c
ED
2721 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2722 goto out_ip6_dst_ops;
2723
8ed67789
DL
2724 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2725 sizeof(*net->ipv6.ip6_null_entry),
2726 GFP_KERNEL);
2727 if (!net->ipv6.ip6_null_entry)
fc66f95c 2728 goto out_ip6_dst_entries;
d8d1f30b 2729 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2730 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2731 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2732 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2733 ip6_template_metrics, true);
8ed67789
DL
2734
2735#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2736 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2737 sizeof(*net->ipv6.ip6_prohibit_entry),
2738 GFP_KERNEL);
68fffc67
PZ
2739 if (!net->ipv6.ip6_prohibit_entry)
2740 goto out_ip6_null_entry;
d8d1f30b 2741 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2742 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2743 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2744 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2745 ip6_template_metrics, true);
8ed67789
DL
2746
2747 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2748 sizeof(*net->ipv6.ip6_blk_hole_entry),
2749 GFP_KERNEL);
68fffc67
PZ
2750 if (!net->ipv6.ip6_blk_hole_entry)
2751 goto out_ip6_prohibit_entry;
d8d1f30b 2752 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2753 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2754 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2755 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2756 ip6_template_metrics, true);
8ed67789
DL
2757#endif
2758
b339a47c
PZ
2759 net->ipv6.sysctl.flush_delay = 0;
2760 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2761 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2762 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2763 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2764 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2765 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2766 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2767
cdb18761
DL
2768#ifdef CONFIG_PROC_FS
2769 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2770 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2771#endif
6891a346
BT
2772 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2773
8ed67789
DL
2774 ret = 0;
2775out:
2776 return ret;
f2fc6a54 2777
68fffc67
PZ
2778#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2779out_ip6_prohibit_entry:
2780 kfree(net->ipv6.ip6_prohibit_entry);
2781out_ip6_null_entry:
2782 kfree(net->ipv6.ip6_null_entry);
2783#endif
fc66f95c
ED
2784out_ip6_dst_entries:
2785 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2786out_ip6_dst_ops:
f2fc6a54 2787 goto out;
cdb18761
DL
2788}
2789
2c8c1e72 2790static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2791{
2792#ifdef CONFIG_PROC_FS
2793 proc_net_remove(net, "ipv6_route");
2794 proc_net_remove(net, "rt6_stats");
2795#endif
8ed67789
DL
2796 kfree(net->ipv6.ip6_null_entry);
2797#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2798 kfree(net->ipv6.ip6_prohibit_entry);
2799 kfree(net->ipv6.ip6_blk_hole_entry);
2800#endif
41bb78b4 2801 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2802}
2803
2804static struct pernet_operations ip6_route_net_ops = {
2805 .init = ip6_route_net_init,
2806 .exit = ip6_route_net_exit,
2807};
2808
8ed67789
DL
2809static struct notifier_block ip6_route_dev_notifier = {
2810 .notifier_call = ip6_route_dev_notify,
2811 .priority = 0,
2812};
2813
433d49c3 2814int __init ip6_route_init(void)
1da177e4 2815{
433d49c3
DL
2816 int ret;
2817
9a7ec3a9
DL
2818 ret = -ENOMEM;
2819 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2820 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2821 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2822 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2823 goto out;
14e50e57 2824
fc66f95c 2825 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2826 if (ret)
bdb3289f 2827 goto out_kmem_cache;
bdb3289f 2828
fc66f95c
ED
2829 ret = register_pernet_subsys(&ip6_route_net_ops);
2830 if (ret)
2831 goto out_dst_entries;
2832
5dc121e9
AE
2833 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2834
8ed67789
DL
2835 /* Registering of the loopback is done before this portion of code,
2836 * the loopback reference in rt6_info will not be taken, do it
2837 * manually for init_net */
d8d1f30b 2838 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2839 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2840 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2841 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2842 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2843 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2844 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2845 #endif
433d49c3
DL
2846 ret = fib6_init();
2847 if (ret)
8ed67789 2848 goto out_register_subsys;
433d49c3 2849
433d49c3
DL
2850 ret = xfrm6_init();
2851 if (ret)
cdb18761 2852 goto out_fib6_init;
c35b7e72 2853
433d49c3
DL
2854 ret = fib6_rules_init();
2855 if (ret)
2856 goto xfrm6_init;
7e5449c2 2857
433d49c3
DL
2858 ret = -ENOBUFS;
2859 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2860 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2861 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2862 goto fib6_rules_init;
c127ea2c 2863
8ed67789 2864 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2865 if (ret)
2866 goto fib6_rules_init;
8ed67789 2867
433d49c3
DL
2868out:
2869 return ret;
2870
2871fib6_rules_init:
433d49c3
DL
2872 fib6_rules_cleanup();
2873xfrm6_init:
433d49c3 2874 xfrm6_fini();
433d49c3 2875out_fib6_init:
433d49c3 2876 fib6_gc_cleanup();
8ed67789
DL
2877out_register_subsys:
2878 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2879out_dst_entries:
2880 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2881out_kmem_cache:
f2fc6a54 2882 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2883 goto out;
1da177e4
LT
2884}
2885
2886void ip6_route_cleanup(void)
2887{
8ed67789 2888 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2889 fib6_rules_cleanup();
1da177e4 2890 xfrm6_fini();
1da177e4 2891 fib6_gc_cleanup();
8ed67789 2892 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2893 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2894 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2895}