ipv6: Normalize arguments to ip6_dst_blackhole().
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 162 .default_advmss = ip6_default_advmss,
14e50e57 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
164};
165
62fa8a84
DM
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
bdb3289f 170static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
d8d1f30b
CG
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
1da177e4
LT
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
101367c2
TG
185#ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
6723ab54
DM
187static int ip6_pkt_prohibit(struct sk_buff *skb);
188static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 189
280a34c8 190static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
d8d1f30b
CG
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
101367c2
TG
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 200 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203};
204
bdb3289f 205static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
d8d1f30b
CG
211 .input = dst_discard,
212 .output = dst_discard,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
220#endif
221
1da177e4 222/* allocate dst with ip6_dst_ops */
f2fc6a54 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 224{
3c7bd1a1 225 return (struct rt6_info *)dst_alloc(ops, 0);
1da177e4
LT
226}
227
228static void ip6_dst_destroy(struct dst_entry *dst)
229{
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 232 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
1ab1457c 237 }
b3419363 238 if (peer) {
b3419363
DM
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242}
243
6431cbc2
DM
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
b3419363
DM
251void rt6_bind_peer(struct rt6_info *rt, int create)
252{
253 struct inet_peer *peer;
254
b3419363
DM
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
6431cbc2
DM
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
260}
261
262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264{
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 267 struct net_device *loopback_dev =
c346dca1 268 dev_net(dev)->loopback_dev;
1da177e4 269
5a3e55d6
DL
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
1da177e4
LT
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278}
279
280static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281{
a02cec21
ED
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
284}
285
c71099ac
TG
286static inline int rt6_need_strict(struct in6_addr *daddr)
287{
a02cec21
ED
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
290}
291
1da177e4 292/*
c71099ac 293 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
294 */
295
8ed67789
DL
296static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
dd3abc4e 298 struct in6_addr *saddr,
1da177e4 299 int oif,
d420895e 300 int flags)
1da177e4
LT
301{
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
dd3abc4e
YH
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
d8d1f30b 308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
1da177e4
LT
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 317 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 318 continue;
1ab1457c 319 if (local && (!oif ||
1da177e4
LT
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
dd3abc4e
YH
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
1da177e4 329 }
dd3abc4e 330 }
1da177e4 331
dd3abc4e 332 if (oif) {
1da177e4
LT
333 if (local)
334 return local;
335
d420895e 336 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 337 return net->ipv6.ip6_null_entry;
1da177e4 338 }
dd3abc4e 339out:
1da177e4
LT
340 return rt;
341}
342
27097255
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344static void rt6_probe(struct rt6_info *rt)
345{
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371}
372#else
373static inline void rt6_probe(struct rt6_info *rt)
374{
27097255
YH
375}
376#endif
377
1da177e4 378/*
554cfb7e 379 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 380 */
b6f99a21 381static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
382{
383 struct net_device *dev = rt->rt6i_dev;
161980f4 384 if (!oif || dev->ifindex == oif)
554cfb7e 385 return 2;
161980f4
DM
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
554cfb7e 390}
1da177e4 391
b6f99a21 392static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 393{
554cfb7e 394 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 395 int m;
4d0c5911
YH
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
554cfb7e
YH
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
4d0c5911 402 m = 2;
398bcbeb
YH
403#ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406#endif
407 else
ea73ee23 408 m = 1;
554cfb7e 409 read_unlock_bh(&neigh->lock);
398bcbeb
YH
410 } else
411 m = 0;
554cfb7e 412 return m;
1da177e4
LT
413}
414
554cfb7e
YH
415static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
1da177e4 417{
4d0c5911 418 int m, n;
1ab1457c 419
4d0c5911 420 m = rt6_check_dev(rt, oif);
77d16f45 421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 422 return -1;
ebacaaa0
YH
423#ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425#endif
4d0c5911 426 n = rt6_check_neigh(rt);
557e92ef 427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
428 return -1;
429 return m;
430}
431
f11e6659
DM
432static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
554cfb7e 434{
f11e6659
DM
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453out:
454 return match;
455}
456
457static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460{
461 struct rt6_info *rt, *match;
554cfb7e 462 int mpri = -1;
1da177e4 463
f11e6659
DM
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 466 rt = rt->dst.rt6_next)
f11e6659
DM
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 469 rt = rt->dst.rt6_next)
f11e6659 470 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 471
f11e6659
DM
472 return match;
473}
1da177e4 474
f11e6659
DM
475static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476{
477 struct rt6_info *match, *rt0;
8ed67789 478 struct net *net;
1da177e4 479
f11e6659 480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 481 __func__, fn->leaf, oif);
554cfb7e 482
f11e6659
DM
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 486
f11e6659 487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 488
554cfb7e 489 if (!match &&
f11e6659 490 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 491 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 492
554cfb7e 493 /* no entries matched; do round-robin */
f11e6659
DM
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
1da177e4 499 }
1da177e4 500
f11e6659 501 RT6_TRACE("%s() => %p\n",
0dc47877 502 __func__, match);
1da177e4 503
c346dca1 504 net = dev_net(rt0->rt6i_dev);
a02cec21 505 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
506}
507
70ceb4f5
YH
508#ifdef CONFIG_IPV6_ROUTE_INFO
509int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511{
c346dca1 512 struct net *net = dev_net(dev);
70ceb4f5
YH
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
4bed72e4 516 unsigned long lifetime;
70ceb4f5
YH
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 540 return -EINVAL;
70ceb4f5 541
4bed72e4 542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
efa2cea0
DL
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
70ceb4f5
YH
556
557 if (rt && !lifetime) {
e0a1ad73 558 ip6_del_rt(rt);
70ceb4f5
YH
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
efa2cea0 563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
4bed72e4 570 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
d8d1f30b 576 dst_release(&rt->dst);
70ceb4f5
YH
577 }
578 return 0;
579}
580#endif
581
8ed67789 582#define BACKTRACK(__net, saddr) \
982f56f3 583do { \
8ed67789 584 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 585 struct fib6_node *pn; \
e0eda7bb 586 while (1) { \
982f56f3
YH
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
c71099ac 596 } \
c71099ac 597 } \
982f56f3 598} while(0)
c71099ac 599
8ed67789
DL
600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
c71099ac 602 struct flowi *fl, int flags)
1da177e4
LT
603{
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
c71099ac
TG
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
609restart:
610 rt = fn->leaf;
dd3abc4e 611 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 612 BACKTRACK(net, &fl->fl6_src);
c71099ac 613out:
d8d1f30b 614 dst_use(&rt->dst, jiffies);
c71099ac 615 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
616 return rt;
617
618}
619
9acd9f3a
YH
620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
622{
623 struct flowi fl = {
624 .oif = oif,
5811662b 625 .fl6_dst = *daddr,
c71099ac
TG
626 };
627 struct dst_entry *dst;
77d16f45 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 629
adaa70bb
TG
630 if (saddr) {
631 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
606a2b48 635 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
1da177e4
LT
641 return NULL;
642}
643
7159039a
YH
644EXPORT_SYMBOL(rt6_lookup);
645
c71099ac 646/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
86872cb5 652static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
653{
654 int err;
c71099ac 655 struct fib6_table *table;
1da177e4 656
c71099ac
TG
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
86872cb5 659 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 660 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
661
662 return err;
663}
664
40e22e8f
TG
665int ip6_ins_rt(struct rt6_info *rt)
666{
4d1169c1 667 struct nl_info info = {
c346dca1 668 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 669 };
528c4ceb 670 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
671}
672
95a9a5ba
YH
673static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
1da177e4 675{
1da177e4
LT
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
14deae41
DM
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
58c4fb86
YH
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 693 }
1da177e4 694
58c4fb86 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
1da177e4
LT
699
700#ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705#endif
706
14deae41
DM
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
86393e52 720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
7e1b33e5 731 "ipv6: Neighbour table overflow.\n");
d8d1f30b 732 dst_free(&rt->dst);
14deae41
DM
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
1da177e4 736
95a9a5ba 737 }
1da177e4 738
95a9a5ba
YH
739 return rt;
740}
1da177e4 741
299d9939
YH
742static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743{
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 749 rt->dst.flags |= DST_HOST;
299d9939
YH
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753}
754
8ed67789
DL
755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi *fl, int flags)
1da177e4
LT
757{
758 struct fib6_node *fn;
519fbd87 759 struct rt6_info *rt, *nrt;
c71099ac 760 int strict = 0;
1da177e4 761 int attempts = 3;
519fbd87 762 int err;
53b7997f 763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 764
77d16f45 765 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
766
767relookup:
c71099ac 768 read_lock_bh(&table->tb6_lock);
1da177e4 769
8238dd06 770restart_2:
c71099ac 771 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
772
773restart:
4acad72d 774 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
775
776 BACKTRACK(net, &fl->fl6_src);
777 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 778 rt->rt6i_flags & RTF_CACHE)
1ddef044 779 goto out;
1da177e4 780
d8d1f30b 781 dst_hold(&rt->dst);
c71099ac 782 read_unlock_bh(&table->tb6_lock);
fb9de91e 783
519fbd87 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 785 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 786 else
c71099ac 787 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 788
d8d1f30b 789 dst_release(&rt->dst);
8ed67789 790 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 791
d8d1f30b 792 dst_hold(&rt->dst);
519fbd87 793 if (nrt) {
40e22e8f 794 err = ip6_ins_rt(nrt);
519fbd87 795 if (!err)
1da177e4 796 goto out2;
1da177e4 797 }
1da177e4 798
519fbd87
YH
799 if (--attempts <= 0)
800 goto out2;
801
802 /*
c71099ac 803 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
804 * released someone could insert this route. Relookup.
805 */
d8d1f30b 806 dst_release(&rt->dst);
519fbd87
YH
807 goto relookup;
808
809out:
8238dd06
YH
810 if (reachable) {
811 reachable = 0;
812 goto restart_2;
813 }
d8d1f30b 814 dst_hold(&rt->dst);
c71099ac 815 read_unlock_bh(&table->tb6_lock);
1da177e4 816out2:
d8d1f30b
CG
817 rt->dst.lastuse = jiffies;
818 rt->dst.__use++;
c71099ac
TG
819
820 return rt;
1da177e4
LT
821}
822
8ed67789 823static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
824 struct flowi *fl, int flags)
825{
8ed67789 826 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
827}
828
c71099ac
TG
829void ip6_route_input(struct sk_buff *skb)
830{
0660e03f 831 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 832 struct net *net = dev_net(skb->dev);
adaa70bb 833 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
834 struct flowi fl = {
835 .iif = skb->dev->ifindex,
5811662b
CG
836 .fl6_dst = iph->daddr,
837 .fl6_src = iph->saddr,
838 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 839 .mark = skb->mark,
c71099ac
TG
840 .proto = iph->nexthdr,
841 };
adaa70bb 842
1d6e55f1 843 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 844 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 845
adf30907 846 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
847}
848
8ed67789 849static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 850 struct flowi *fl, int flags)
1da177e4 851{
8ed67789 852 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
853}
854
4591db4f
DL
855struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
856 struct flowi *fl)
c71099ac
TG
857{
858 int flags = 0;
859
6057fd78 860 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 861 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 862
adaa70bb
TG
863 if (!ipv6_addr_any(&fl->fl6_src))
864 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
865 else if (sk)
866 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 867
4591db4f 868 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
869}
870
7159039a 871EXPORT_SYMBOL(ip6_route_output);
1da177e4 872
69ead7af 873struct dst_entry *ip6_dst_blackhole(struct net *net, struct dst_entry *dst_orig)
14e50e57 874{
69ead7af
DM
875 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
876 struct rt6_info *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
877 struct dst_entry *new = NULL;
878
879 if (rt) {
d8d1f30b 880 new = &rt->dst;
14e50e57 881
14e50e57 882 new->__use = 1;
352e512c
HX
883 new->input = dst_discard;
884 new->output = dst_discard;
14e50e57 885
defb3519 886 dst_copy_metrics(new, &ort->dst);
d8d1f30b 887 new->dev = ort->dst.dev;
14e50e57
DM
888 if (new->dev)
889 dev_hold(new->dev);
890 rt->rt6i_idev = ort->rt6i_idev;
891 if (rt->rt6i_idev)
892 in6_dev_hold(rt->rt6i_idev);
893 rt->rt6i_expires = 0;
894
895 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
896 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
897 rt->rt6i_metric = 0;
898
899 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
900#ifdef CONFIG_IPV6_SUBTREES
901 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
902#endif
903
904 dst_free(new);
905 }
906
69ead7af
DM
907 dst_release(dst_orig);
908 return new ? new : ERR_PTR(-ENOMEM);
14e50e57
DM
909}
910EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
911
1da177e4
LT
912/*
913 * Destination cache support functions
914 */
915
916static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
917{
918 struct rt6_info *rt;
919
920 rt = (struct rt6_info *) dst;
921
6431cbc2
DM
922 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
923 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
924 if (!rt->rt6i_peer)
925 rt6_bind_peer(rt, 0);
926 rt->rt6i_peer_genid = rt6_peer_genid();
927 }
1da177e4 928 return dst;
6431cbc2 929 }
1da177e4
LT
930 return NULL;
931}
932
933static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
934{
935 struct rt6_info *rt = (struct rt6_info *) dst;
936
937 if (rt) {
54c1a859
YH
938 if (rt->rt6i_flags & RTF_CACHE) {
939 if (rt6_check_expired(rt)) {
940 ip6_del_rt(rt);
941 dst = NULL;
942 }
943 } else {
1da177e4 944 dst_release(dst);
54c1a859
YH
945 dst = NULL;
946 }
1da177e4 947 }
54c1a859 948 return dst;
1da177e4
LT
949}
950
951static void ip6_link_failure(struct sk_buff *skb)
952{
953 struct rt6_info *rt;
954
3ffe533c 955 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 956
adf30907 957 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
958 if (rt) {
959 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 960 dst_set_expires(&rt->dst, 0);
1da177e4
LT
961 rt->rt6i_flags |= RTF_EXPIRES;
962 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
963 rt->rt6i_node->fn_sernum = -1;
964 }
965}
966
967static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
968{
969 struct rt6_info *rt6 = (struct rt6_info*)dst;
970
971 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
972 rt6->rt6i_flags |= RTF_MODIFIED;
973 if (mtu < IPV6_MIN_MTU) {
defb3519 974 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 975 mtu = IPV6_MIN_MTU;
defb3519
DM
976 features |= RTAX_FEATURE_ALLFRAG;
977 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 978 }
defb3519 979 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
980 }
981}
982
0dbaee3b 983static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 984{
0dbaee3b
DM
985 struct net_device *dev = dst->dev;
986 unsigned int mtu = dst_mtu(dst);
987 struct net *net = dev_net(dev);
988
1da177e4
LT
989 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
990
5578689a
DL
991 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
992 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
993
994 /*
1ab1457c
YH
995 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
996 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
997 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
998 * rely only on pmtu discovery"
999 */
1000 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1001 mtu = IPV6_MAXPLEN;
1002 return mtu;
1003}
1004
d33e4553
DM
1005static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1006{
1007 unsigned int mtu = IPV6_MIN_MTU;
1008 struct inet6_dev *idev;
1009
1010 rcu_read_lock();
1011 idev = __in6_dev_get(dst->dev);
1012 if (idev)
1013 mtu = idev->cnf.mtu6;
1014 rcu_read_unlock();
1015
1016 return mtu;
1017}
1018
3b00944c
YH
1019static struct dst_entry *icmp6_dst_gc_list;
1020static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1021
3b00944c 1022struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1023 struct neighbour *neigh,
9acd9f3a 1024 const struct in6_addr *addr)
1da177e4
LT
1025{
1026 struct rt6_info *rt;
1027 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1028 struct net *net = dev_net(dev);
1da177e4
LT
1029
1030 if (unlikely(idev == NULL))
1031 return NULL;
1032
86393e52 1033 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1034 if (unlikely(rt == NULL)) {
1035 in6_dev_put(idev);
1036 goto out;
1037 }
1038
1039 dev_hold(dev);
1040 if (neigh)
1041 neigh_hold(neigh);
14deae41 1042 else {
1da177e4 1043 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1044 if (IS_ERR(neigh))
1045 neigh = NULL;
1046 }
1da177e4
LT
1047
1048 rt->rt6i_dev = dev;
1049 rt->rt6i_idev = idev;
1050 rt->rt6i_nexthop = neigh;
d8d1f30b 1051 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1052 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1053 rt->dst.output = ip6_output;
1da177e4
LT
1054
1055#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1056 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1057 ? DST_HOST
1da177e4
LT
1058 : 0;
1059 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1060 rt->rt6i_dst.plen = 128;
1061#endif
1062
3b00944c 1063 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1064 rt->dst.next = icmp6_dst_gc_list;
1065 icmp6_dst_gc_list = &rt->dst;
3b00944c 1066 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1067
5578689a 1068 fib6_force_start_gc(net);
1da177e4
LT
1069
1070out:
d8d1f30b 1071 return &rt->dst;
1da177e4
LT
1072}
1073
3d0f24a7 1074int icmp6_dst_gc(void)
1da177e4 1075{
e9476e95 1076 struct dst_entry *dst, **pprev;
3d0f24a7 1077 int more = 0;
1da177e4 1078
3b00944c
YH
1079 spin_lock_bh(&icmp6_dst_lock);
1080 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1081
1da177e4
LT
1082 while ((dst = *pprev) != NULL) {
1083 if (!atomic_read(&dst->__refcnt)) {
1084 *pprev = dst->next;
1085 dst_free(dst);
1da177e4
LT
1086 } else {
1087 pprev = &dst->next;
3d0f24a7 1088 ++more;
1da177e4
LT
1089 }
1090 }
1091
3b00944c 1092 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1093
3d0f24a7 1094 return more;
1da177e4
LT
1095}
1096
1e493d19
DM
1097static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1098 void *arg)
1099{
1100 struct dst_entry *dst, **pprev;
1101
1102 spin_lock_bh(&icmp6_dst_lock);
1103 pprev = &icmp6_dst_gc_list;
1104 while ((dst = *pprev) != NULL) {
1105 struct rt6_info *rt = (struct rt6_info *) dst;
1106 if (func(rt, arg)) {
1107 *pprev = dst->next;
1108 dst_free(dst);
1109 } else {
1110 pprev = &dst->next;
1111 }
1112 }
1113 spin_unlock_bh(&icmp6_dst_lock);
1114}
1115
569d3645 1116static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1117{
1da177e4 1118 unsigned long now = jiffies;
86393e52 1119 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1120 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1121 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1122 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1123 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1124 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1125 int entries;
7019b78e 1126
fc66f95c 1127 entries = dst_entries_get_fast(ops);
7019b78e 1128 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1129 entries <= rt_max_size)
1da177e4
LT
1130 goto out;
1131
6891a346
BT
1132 net->ipv6.ip6_rt_gc_expire++;
1133 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1134 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1135 entries = dst_entries_get_slow(ops);
1136 if (entries < ops->gc_thresh)
7019b78e 1137 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1138out:
7019b78e 1139 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1140 return entries > rt_max_size;
1da177e4
LT
1141}
1142
1143/* Clean host part of a prefix. Not necessary in radix tree,
1144 but results in cleaner routing tables.
1145
1146 Remove it only when all the things will work!
1147 */
1148
6b75d090 1149int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1150{
5170ae82 1151 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1152 if (hoplimit == 0) {
6b75d090 1153 struct net_device *dev = dst->dev;
c68f24cc
ED
1154 struct inet6_dev *idev;
1155
1156 rcu_read_lock();
1157 idev = __in6_dev_get(dev);
1158 if (idev)
6b75d090 1159 hoplimit = idev->cnf.hop_limit;
c68f24cc 1160 else
53b7997f 1161 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1162 rcu_read_unlock();
1da177e4
LT
1163 }
1164 return hoplimit;
1165}
abbf46ae 1166EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1167
1168/*
1169 *
1170 */
1171
86872cb5 1172int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1173{
1174 int err;
5578689a 1175 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1176 struct rt6_info *rt = NULL;
1177 struct net_device *dev = NULL;
1178 struct inet6_dev *idev = NULL;
c71099ac 1179 struct fib6_table *table;
1da177e4
LT
1180 int addr_type;
1181
86872cb5 1182 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1183 return -EINVAL;
1184#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1185 if (cfg->fc_src_len)
1da177e4
LT
1186 return -EINVAL;
1187#endif
86872cb5 1188 if (cfg->fc_ifindex) {
1da177e4 1189 err = -ENODEV;
5578689a 1190 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1191 if (!dev)
1192 goto out;
1193 idev = in6_dev_get(dev);
1194 if (!idev)
1195 goto out;
1196 }
1197
86872cb5
TG
1198 if (cfg->fc_metric == 0)
1199 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1200
5578689a 1201 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1202 if (table == NULL) {
1203 err = -ENOBUFS;
1204 goto out;
1205 }
1206
86393e52 1207 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1208
1209 if (rt == NULL) {
1210 err = -ENOMEM;
1211 goto out;
1212 }
1213
d8d1f30b 1214 rt->dst.obsolete = -1;
6f704992
YH
1215 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1216 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1217 0;
1da177e4 1218
86872cb5
TG
1219 if (cfg->fc_protocol == RTPROT_UNSPEC)
1220 cfg->fc_protocol = RTPROT_BOOT;
1221 rt->rt6i_protocol = cfg->fc_protocol;
1222
1223 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1224
1225 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1226 rt->dst.input = ip6_mc_input;
ab79ad14
1227 else if (cfg->fc_flags & RTF_LOCAL)
1228 rt->dst.input = ip6_input;
1da177e4 1229 else
d8d1f30b 1230 rt->dst.input = ip6_forward;
1da177e4 1231
d8d1f30b 1232 rt->dst.output = ip6_output;
1da177e4 1233
86872cb5
TG
1234 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1235 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1236 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1237 rt->dst.flags = DST_HOST;
1da177e4
LT
1238
1239#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1240 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1241 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1242#endif
1243
86872cb5 1244 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1245
1246 /* We cannot add true routes via loopback here,
1247 they would result in kernel looping; promote them to reject routes
1248 */
86872cb5 1249 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1250 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1251 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1252 /* hold loopback dev/idev if we haven't done so. */
5578689a 1253 if (dev != net->loopback_dev) {
1da177e4
LT
1254 if (dev) {
1255 dev_put(dev);
1256 in6_dev_put(idev);
1257 }
5578689a 1258 dev = net->loopback_dev;
1da177e4
LT
1259 dev_hold(dev);
1260 idev = in6_dev_get(dev);
1261 if (!idev) {
1262 err = -ENODEV;
1263 goto out;
1264 }
1265 }
d8d1f30b
CG
1266 rt->dst.output = ip6_pkt_discard_out;
1267 rt->dst.input = ip6_pkt_discard;
1268 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1269 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1270 goto install_route;
1271 }
1272
86872cb5 1273 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1274 struct in6_addr *gw_addr;
1275 int gwa_type;
1276
86872cb5
TG
1277 gw_addr = &cfg->fc_gateway;
1278 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1279 gwa_type = ipv6_addr_type(gw_addr);
1280
1281 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1282 struct rt6_info *grt;
1283
1284 /* IPv6 strictly inhibits using not link-local
1285 addresses as nexthop address.
1286 Otherwise, router will not able to send redirects.
1287 It is very good, but in some (rare!) circumstances
1288 (SIT, PtP, NBMA NOARP links) it is handy to allow
1289 some exceptions. --ANK
1290 */
1291 err = -EINVAL;
1292 if (!(gwa_type&IPV6_ADDR_UNICAST))
1293 goto out;
1294
5578689a 1295 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1296
1297 err = -EHOSTUNREACH;
1298 if (grt == NULL)
1299 goto out;
1300 if (dev) {
1301 if (dev != grt->rt6i_dev) {
d8d1f30b 1302 dst_release(&grt->dst);
1da177e4
LT
1303 goto out;
1304 }
1305 } else {
1306 dev = grt->rt6i_dev;
1307 idev = grt->rt6i_idev;
1308 dev_hold(dev);
1309 in6_dev_hold(grt->rt6i_idev);
1310 }
1311 if (!(grt->rt6i_flags&RTF_GATEWAY))
1312 err = 0;
d8d1f30b 1313 dst_release(&grt->dst);
1da177e4
LT
1314
1315 if (err)
1316 goto out;
1317 }
1318 err = -EINVAL;
1319 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1320 goto out;
1321 }
1322
1323 err = -ENODEV;
1324 if (dev == NULL)
1325 goto out;
1326
86872cb5 1327 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1328 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1329 if (IS_ERR(rt->rt6i_nexthop)) {
1330 err = PTR_ERR(rt->rt6i_nexthop);
1331 rt->rt6i_nexthop = NULL;
1332 goto out;
1333 }
1334 }
1335
86872cb5 1336 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1337
1338install_route:
86872cb5
TG
1339 if (cfg->fc_mx) {
1340 struct nlattr *nla;
1341 int remaining;
1342
1343 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1344 int type = nla_type(nla);
86872cb5
TG
1345
1346 if (type) {
1347 if (type > RTAX_MAX) {
1da177e4
LT
1348 err = -EINVAL;
1349 goto out;
1350 }
86872cb5 1351
defb3519 1352 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1353 }
1da177e4
LT
1354 }
1355 }
1356
d8d1f30b 1357 rt->dst.dev = dev;
1da177e4 1358 rt->rt6i_idev = idev;
c71099ac 1359 rt->rt6i_table = table;
63152fc0 1360
c346dca1 1361 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1362
86872cb5 1363 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1364
1365out:
1366 if (dev)
1367 dev_put(dev);
1368 if (idev)
1369 in6_dev_put(idev);
1370 if (rt)
d8d1f30b 1371 dst_free(&rt->dst);
1da177e4
LT
1372 return err;
1373}
1374
86872cb5 1375static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1376{
1377 int err;
c71099ac 1378 struct fib6_table *table;
c346dca1 1379 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1380
8ed67789 1381 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1382 return -ENOENT;
1383
c71099ac
TG
1384 table = rt->rt6i_table;
1385 write_lock_bh(&table->tb6_lock);
1da177e4 1386
86872cb5 1387 err = fib6_del(rt, info);
d8d1f30b 1388 dst_release(&rt->dst);
1da177e4 1389
c71099ac 1390 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1391
1392 return err;
1393}
1394
e0a1ad73
TG
1395int ip6_del_rt(struct rt6_info *rt)
1396{
4d1169c1 1397 struct nl_info info = {
c346dca1 1398 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1399 };
528c4ceb 1400 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1401}
1402
86872cb5 1403static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1404{
c71099ac 1405 struct fib6_table *table;
1da177e4
LT
1406 struct fib6_node *fn;
1407 struct rt6_info *rt;
1408 int err = -ESRCH;
1409
5578689a 1410 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1411 if (table == NULL)
1412 return err;
1413
1414 read_lock_bh(&table->tb6_lock);
1da177e4 1415
c71099ac 1416 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1417 &cfg->fc_dst, cfg->fc_dst_len,
1418 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1419
1da177e4 1420 if (fn) {
d8d1f30b 1421 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1422 if (cfg->fc_ifindex &&
1da177e4 1423 (rt->rt6i_dev == NULL ||
86872cb5 1424 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1425 continue;
86872cb5
TG
1426 if (cfg->fc_flags & RTF_GATEWAY &&
1427 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1428 continue;
86872cb5 1429 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1430 continue;
d8d1f30b 1431 dst_hold(&rt->dst);
c71099ac 1432 read_unlock_bh(&table->tb6_lock);
1da177e4 1433
86872cb5 1434 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1435 }
1436 }
c71099ac 1437 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1438
1439 return err;
1440}
1441
1442/*
1443 * Handle redirects
1444 */
a6279458
YH
1445struct ip6rd_flowi {
1446 struct flowi fl;
1447 struct in6_addr gateway;
1448};
1449
8ed67789
DL
1450static struct rt6_info *__ip6_route_redirect(struct net *net,
1451 struct fib6_table *table,
a6279458
YH
1452 struct flowi *fl,
1453 int flags)
1da177e4 1454{
a6279458
YH
1455 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1456 struct rt6_info *rt;
e843b9e1 1457 struct fib6_node *fn;
c71099ac 1458
1da177e4 1459 /*
e843b9e1
YH
1460 * Get the "current" route for this destination and
1461 * check if the redirect has come from approriate router.
1462 *
1463 * RFC 2461 specifies that redirects should only be
1464 * accepted if they come from the nexthop to the target.
1465 * Due to the way the routes are chosen, this notion
1466 * is a bit fuzzy and one might need to check all possible
1467 * routes.
1da177e4 1468 */
1da177e4 1469
c71099ac 1470 read_lock_bh(&table->tb6_lock);
a6279458 1471 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1472restart:
d8d1f30b 1473 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1474 /*
1475 * Current route is on-link; redirect is always invalid.
1476 *
1477 * Seems, previous statement is not true. It could
1478 * be node, which looks for us as on-link (f.e. proxy ndisc)
1479 * But then router serving it might decide, that we should
1480 * know truth 8)8) --ANK (980726).
1481 */
1482 if (rt6_check_expired(rt))
1483 continue;
1484 if (!(rt->rt6i_flags & RTF_GATEWAY))
1485 continue;
a6279458 1486 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1487 continue;
a6279458 1488 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1489 continue;
1490 break;
1491 }
a6279458 1492
cb15d9c2 1493 if (!rt)
8ed67789
DL
1494 rt = net->ipv6.ip6_null_entry;
1495 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1496out:
d8d1f30b 1497 dst_hold(&rt->dst);
a6279458 1498
c71099ac 1499 read_unlock_bh(&table->tb6_lock);
e843b9e1 1500
a6279458
YH
1501 return rt;
1502};
1503
1504static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1505 struct in6_addr *src,
1506 struct in6_addr *gateway,
1507 struct net_device *dev)
1508{
adaa70bb 1509 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1510 struct net *net = dev_net(dev);
a6279458
YH
1511 struct ip6rd_flowi rdfl = {
1512 .fl = {
1513 .oif = dev->ifindex,
5811662b
CG
1514 .fl6_dst = *dest,
1515 .fl6_src = *src,
a6279458 1516 },
a6279458 1517 };
adaa70bb 1518
86c36ce4
BH
1519 ipv6_addr_copy(&rdfl.gateway, gateway);
1520
adaa70bb
TG
1521 if (rt6_need_strict(dest))
1522 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1523
5578689a 1524 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1525 flags, __ip6_route_redirect);
a6279458
YH
1526}
1527
1528void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1529 struct in6_addr *saddr,
1530 struct neighbour *neigh, u8 *lladdr, int on_link)
1531{
1532 struct rt6_info *rt, *nrt = NULL;
1533 struct netevent_redirect netevent;
c346dca1 1534 struct net *net = dev_net(neigh->dev);
a6279458
YH
1535
1536 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1537
8ed67789 1538 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1539 if (net_ratelimit())
1540 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1541 "for redirect target\n");
a6279458 1542 goto out;
1da177e4
LT
1543 }
1544
1da177e4
LT
1545 /*
1546 * We have finally decided to accept it.
1547 */
1548
1ab1457c 1549 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1550 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1551 NEIGH_UPDATE_F_OVERRIDE|
1552 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1553 NEIGH_UPDATE_F_ISROUTER))
1554 );
1555
1556 /*
1557 * Redirect received -> path was valid.
1558 * Look, redirects are sent only in response to data packets,
1559 * so that this nexthop apparently is reachable. --ANK
1560 */
d8d1f30b 1561 dst_confirm(&rt->dst);
1da177e4
LT
1562
1563 /* Duplicate redirect: silently ignore. */
d8d1f30b 1564 if (neigh == rt->dst.neighbour)
1da177e4
LT
1565 goto out;
1566
1567 nrt = ip6_rt_copy(rt);
1568 if (nrt == NULL)
1569 goto out;
1570
1571 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1572 if (on_link)
1573 nrt->rt6i_flags &= ~RTF_GATEWAY;
1574
1575 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1576 nrt->rt6i_dst.plen = 128;
d8d1f30b 1577 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1578
1579 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1580 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1581
40e22e8f 1582 if (ip6_ins_rt(nrt))
1da177e4
LT
1583 goto out;
1584
d8d1f30b
CG
1585 netevent.old = &rt->dst;
1586 netevent.new = &nrt->dst;
8d71740c
TT
1587 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1588
1da177e4 1589 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1590 ip6_del_rt(rt);
1da177e4
LT
1591 return;
1592 }
1593
1594out:
d8d1f30b 1595 dst_release(&rt->dst);
1da177e4
LT
1596}
1597
1598/*
1599 * Handle ICMP "packet too big" messages
1600 * i.e. Path MTU discovery
1601 */
1602
ae878ae2
1603static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1604 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1605{
1606 struct rt6_info *rt, *nrt;
1607 int allfrag = 0;
d3052b55 1608again:
ae878ae2 1609 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1610 if (rt == NULL)
1611 return;
1612
d3052b55
AV
1613 if (rt6_check_expired(rt)) {
1614 ip6_del_rt(rt);
1615 goto again;
1616 }
1617
d8d1f30b 1618 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1619 goto out;
1620
1621 if (pmtu < IPV6_MIN_MTU) {
1622 /*
1ab1457c 1623 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1624 * MTU (1280) and a fragment header should always be included
1625 * after a node receiving Too Big message reporting PMTU is
1626 * less than the IPv6 Minimum Link MTU.
1627 */
1628 pmtu = IPV6_MIN_MTU;
1629 allfrag = 1;
1630 }
1631
1632 /* New mtu received -> path was valid.
1633 They are sent only in response to data packets,
1634 so that this nexthop apparently is reachable. --ANK
1635 */
d8d1f30b 1636 dst_confirm(&rt->dst);
1da177e4
LT
1637
1638 /* Host route. If it is static, it would be better
1639 not to override it, but add new one, so that
1640 when cache entry will expire old pmtu
1641 would return automatically.
1642 */
1643 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1644 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1645 if (allfrag) {
1646 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1647 features |= RTAX_FEATURE_ALLFRAG;
1648 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1649 }
d8d1f30b 1650 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1651 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1652 goto out;
1653 }
1654
1655 /* Network route.
1656 Two cases are possible:
1657 1. It is connected route. Action: COW
1658 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1659 */
d5315b50 1660 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1661 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1662 else
1663 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1664
d5315b50 1665 if (nrt) {
defb3519
DM
1666 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1667 if (allfrag) {
1668 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1669 features |= RTAX_FEATURE_ALLFRAG;
1670 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1671 }
a1e78363
YH
1672
1673 /* According to RFC 1981, detecting PMTU increase shouldn't be
1674 * happened within 5 mins, the recommended timer is 10 mins.
1675 * Here this route expiration time is set to ip6_rt_mtu_expires
1676 * which is 10 mins. After 10 mins the decreased pmtu is expired
1677 * and detecting PMTU increase will be automatically happened.
1678 */
d8d1f30b 1679 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1680 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1681
40e22e8f 1682 ip6_ins_rt(nrt);
1da177e4 1683 }
1da177e4 1684out:
d8d1f30b 1685 dst_release(&rt->dst);
1da177e4
LT
1686}
1687
ae878ae2
1688void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1689 struct net_device *dev, u32 pmtu)
1690{
1691 struct net *net = dev_net(dev);
1692
1693 /*
1694 * RFC 1981 states that a node "MUST reduce the size of the packets it
1695 * is sending along the path" that caused the Packet Too Big message.
1696 * Since it's not possible in the general case to determine which
1697 * interface was used to send the original packet, we update the MTU
1698 * on the interface that will be used to send future packets. We also
1699 * update the MTU on the interface that received the Packet Too Big in
1700 * case the original packet was forced out that interface with
1701 * SO_BINDTODEVICE or similar. This is the next best thing to the
1702 * correct behaviour, which would be to update the MTU on all
1703 * interfaces.
1704 */
1705 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1707}
1708
1da177e4
LT
1709/*
1710 * Misc support functions
1711 */
1712
1713static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1714{
c346dca1 1715 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1716 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1717
1718 if (rt) {
d8d1f30b
CG
1719 rt->dst.input = ort->dst.input;
1720 rt->dst.output = ort->dst.output;
1721
defb3519 1722 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1723 rt->dst.error = ort->dst.error;
1724 rt->dst.dev = ort->dst.dev;
1725 if (rt->dst.dev)
1726 dev_hold(rt->dst.dev);
1da177e4
LT
1727 rt->rt6i_idev = ort->rt6i_idev;
1728 if (rt->rt6i_idev)
1729 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1730 rt->dst.lastuse = jiffies;
1da177e4
LT
1731 rt->rt6i_expires = 0;
1732
1733 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1734 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1735 rt->rt6i_metric = 0;
1736
1737 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1738#ifdef CONFIG_IPV6_SUBTREES
1739 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1740#endif
c71099ac 1741 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1742 }
1743 return rt;
1744}
1745
70ceb4f5 1746#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1747static struct rt6_info *rt6_get_route_info(struct net *net,
1748 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1749 struct in6_addr *gwaddr, int ifindex)
1750{
1751 struct fib6_node *fn;
1752 struct rt6_info *rt = NULL;
c71099ac
TG
1753 struct fib6_table *table;
1754
efa2cea0 1755 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1756 if (table == NULL)
1757 return NULL;
70ceb4f5 1758
c71099ac
TG
1759 write_lock_bh(&table->tb6_lock);
1760 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1761 if (!fn)
1762 goto out;
1763
d8d1f30b 1764 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1765 if (rt->rt6i_dev->ifindex != ifindex)
1766 continue;
1767 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1768 continue;
1769 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1770 continue;
d8d1f30b 1771 dst_hold(&rt->dst);
70ceb4f5
YH
1772 break;
1773 }
1774out:
c71099ac 1775 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1776 return rt;
1777}
1778
efa2cea0
DL
1779static struct rt6_info *rt6_add_route_info(struct net *net,
1780 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1781 struct in6_addr *gwaddr, int ifindex,
1782 unsigned pref)
1783{
86872cb5
TG
1784 struct fib6_config cfg = {
1785 .fc_table = RT6_TABLE_INFO,
238fc7ea 1786 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1787 .fc_ifindex = ifindex,
1788 .fc_dst_len = prefixlen,
1789 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1790 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1791 .fc_nlinfo.pid = 0,
1792 .fc_nlinfo.nlh = NULL,
1793 .fc_nlinfo.nl_net = net,
86872cb5
TG
1794 };
1795
1796 ipv6_addr_copy(&cfg.fc_dst, prefix);
1797 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1798
e317da96
YH
1799 /* We should treat it as a default route if prefix length is 0. */
1800 if (!prefixlen)
86872cb5 1801 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1802
86872cb5 1803 ip6_route_add(&cfg);
70ceb4f5 1804
efa2cea0 1805 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1806}
1807#endif
1808
1da177e4 1809struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1810{
1da177e4 1811 struct rt6_info *rt;
c71099ac 1812 struct fib6_table *table;
1da177e4 1813
c346dca1 1814 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1815 if (table == NULL)
1816 return NULL;
1da177e4 1817
c71099ac 1818 write_lock_bh(&table->tb6_lock);
d8d1f30b 1819 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1820 if (dev == rt->rt6i_dev &&
045927ff 1821 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1822 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1823 break;
1824 }
1825 if (rt)
d8d1f30b 1826 dst_hold(&rt->dst);
c71099ac 1827 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1828 return rt;
1829}
1830
1831struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1832 struct net_device *dev,
1833 unsigned int pref)
1da177e4 1834{
86872cb5
TG
1835 struct fib6_config cfg = {
1836 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1837 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1838 .fc_ifindex = dev->ifindex,
1839 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1840 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1841 .fc_nlinfo.pid = 0,
1842 .fc_nlinfo.nlh = NULL,
c346dca1 1843 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1844 };
1da177e4 1845
86872cb5 1846 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1847
86872cb5 1848 ip6_route_add(&cfg);
1da177e4 1849
1da177e4
LT
1850 return rt6_get_dflt_router(gwaddr, dev);
1851}
1852
7b4da532 1853void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1854{
1855 struct rt6_info *rt;
c71099ac
TG
1856 struct fib6_table *table;
1857
1858 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1859 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1860 if (table == NULL)
1861 return;
1da177e4
LT
1862
1863restart:
c71099ac 1864 read_lock_bh(&table->tb6_lock);
d8d1f30b 1865 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1866 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1867 dst_hold(&rt->dst);
c71099ac 1868 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1869 ip6_del_rt(rt);
1da177e4
LT
1870 goto restart;
1871 }
1872 }
c71099ac 1873 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1874}
1875
5578689a
DL
1876static void rtmsg_to_fib6_config(struct net *net,
1877 struct in6_rtmsg *rtmsg,
86872cb5
TG
1878 struct fib6_config *cfg)
1879{
1880 memset(cfg, 0, sizeof(*cfg));
1881
1882 cfg->fc_table = RT6_TABLE_MAIN;
1883 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1884 cfg->fc_metric = rtmsg->rtmsg_metric;
1885 cfg->fc_expires = rtmsg->rtmsg_info;
1886 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1887 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1888 cfg->fc_flags = rtmsg->rtmsg_flags;
1889
5578689a 1890 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1891
86872cb5
TG
1892 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1893 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1894 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1895}
1896
5578689a 1897int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1898{
86872cb5 1899 struct fib6_config cfg;
1da177e4
LT
1900 struct in6_rtmsg rtmsg;
1901 int err;
1902
1903 switch(cmd) {
1904 case SIOCADDRT: /* Add a route */
1905 case SIOCDELRT: /* Delete a route */
1906 if (!capable(CAP_NET_ADMIN))
1907 return -EPERM;
1908 err = copy_from_user(&rtmsg, arg,
1909 sizeof(struct in6_rtmsg));
1910 if (err)
1911 return -EFAULT;
86872cb5 1912
5578689a 1913 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1914
1da177e4
LT
1915 rtnl_lock();
1916 switch (cmd) {
1917 case SIOCADDRT:
86872cb5 1918 err = ip6_route_add(&cfg);
1da177e4
LT
1919 break;
1920 case SIOCDELRT:
86872cb5 1921 err = ip6_route_del(&cfg);
1da177e4
LT
1922 break;
1923 default:
1924 err = -EINVAL;
1925 }
1926 rtnl_unlock();
1927
1928 return err;
3ff50b79 1929 }
1da177e4
LT
1930
1931 return -EINVAL;
1932}
1933
1934/*
1935 * Drop the packet on the floor
1936 */
1937
d5fdd6ba 1938static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1939{
612f09e8 1940 int type;
adf30907 1941 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1942 switch (ipstats_mib_noroutes) {
1943 case IPSTATS_MIB_INNOROUTES:
0660e03f 1944 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1945 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1946 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1947 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1948 break;
1949 }
1950 /* FALLTHROUGH */
1951 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1952 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1953 ipstats_mib_noroutes);
612f09e8
YH
1954 break;
1955 }
3ffe533c 1956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1957 kfree_skb(skb);
1958 return 0;
1959}
1960
9ce8ade0
TG
1961static int ip6_pkt_discard(struct sk_buff *skb)
1962{
612f09e8 1963 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1964}
1965
20380731 1966static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1967{
adf30907 1968 skb->dev = skb_dst(skb)->dev;
612f09e8 1969 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1970}
1971
6723ab54
DM
1972#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1973
9ce8ade0
TG
1974static int ip6_pkt_prohibit(struct sk_buff *skb)
1975{
612f09e8 1976 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1977}
1978
1979static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1980{
adf30907 1981 skb->dev = skb_dst(skb)->dev;
612f09e8 1982 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1983}
1984
6723ab54
DM
1985#endif
1986
1da177e4
LT
1987/*
1988 * Allocate a dst for local (unicast / anycast) address.
1989 */
1990
1991struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1992 const struct in6_addr *addr,
1993 int anycast)
1994{
c346dca1 1995 struct net *net = dev_net(idev->dev);
86393e52 1996 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1997 struct neighbour *neigh;
1da177e4 1998
40385653
BG
1999 if (rt == NULL) {
2000 if (net_ratelimit())
2001 pr_warning("IPv6: Maximum number of routes reached,"
2002 " consider increasing route/max_size.\n");
1da177e4 2003 return ERR_PTR(-ENOMEM);
40385653 2004 }
1da177e4 2005
5578689a 2006 dev_hold(net->loopback_dev);
1da177e4
LT
2007 in6_dev_hold(idev);
2008
d8d1f30b
CG
2009 rt->dst.flags = DST_HOST;
2010 rt->dst.input = ip6_input;
2011 rt->dst.output = ip6_output;
5578689a 2012 rt->rt6i_dev = net->loopback_dev;
1da177e4 2013 rt->rt6i_idev = idev;
defb3519 2014 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 2015 rt->dst.obsolete = -1;
1da177e4
LT
2016
2017 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2018 if (anycast)
2019 rt->rt6i_flags |= RTF_ANYCAST;
2020 else
1da177e4 2021 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2022 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2023 if (IS_ERR(neigh)) {
d8d1f30b 2024 dst_free(&rt->dst);
14deae41
DM
2025
2026 /* We are casting this because that is the return
2027 * value type. But an errno encoded pointer is the
2028 * same regardless of the underlying pointer type,
2029 * and that's what we are returning. So this is OK.
2030 */
2031 return (struct rt6_info *) neigh;
1da177e4 2032 }
14deae41 2033 rt->rt6i_nexthop = neigh;
1da177e4
LT
2034
2035 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2036 rt->rt6i_dst.plen = 128;
5578689a 2037 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2038
d8d1f30b 2039 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2040
2041 return rt;
2042}
2043
8ed67789
DL
2044struct arg_dev_net {
2045 struct net_device *dev;
2046 struct net *net;
2047};
2048
1da177e4
LT
2049static int fib6_ifdown(struct rt6_info *rt, void *arg)
2050{
bc3ef660 2051 const struct arg_dev_net *adn = arg;
2052 const struct net_device *dev = adn->dev;
8ed67789 2053
bc3ef660 2054 if ((rt->rt6i_dev == dev || dev == NULL) &&
2055 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2056 RT6_TRACE("deleted by ifdown %p\n", rt);
2057 return -1;
2058 }
2059 return 0;
2060}
2061
f3db4851 2062void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2063{
8ed67789
DL
2064 struct arg_dev_net adn = {
2065 .dev = dev,
2066 .net = net,
2067 };
2068
2069 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2070 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2071}
2072
2073struct rt6_mtu_change_arg
2074{
2075 struct net_device *dev;
2076 unsigned mtu;
2077};
2078
2079static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2080{
2081 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2082 struct inet6_dev *idev;
2083
2084 /* In IPv6 pmtu discovery is not optional,
2085 so that RTAX_MTU lock cannot disable it.
2086 We still use this lock to block changes
2087 caused by addrconf/ndisc.
2088 */
2089
2090 idev = __in6_dev_get(arg->dev);
2091 if (idev == NULL)
2092 return 0;
2093
2094 /* For administrative MTU increase, there is no way to discover
2095 IPv6 PMTU increase, so PMTU increase should be updated here.
2096 Since RFC 1981 doesn't include administrative MTU increase
2097 update PMTU increase is a MUST. (i.e. jumbo frame)
2098 */
2099 /*
2100 If new MTU is less than route PMTU, this new MTU will be the
2101 lowest MTU in the path, update the route PMTU to reflect PMTU
2102 decreases; if new MTU is greater than route PMTU, and the
2103 old MTU is the lowest MTU in the path, update the route PMTU
2104 to reflect the increase. In this case if the other nodes' MTU
2105 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2106 PMTU discouvery.
2107 */
2108 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2109 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2110 (dst_mtu(&rt->dst) >= arg->mtu ||
2111 (dst_mtu(&rt->dst) < arg->mtu &&
2112 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2113 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2114 }
1da177e4
LT
2115 return 0;
2116}
2117
2118void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2119{
c71099ac
TG
2120 struct rt6_mtu_change_arg arg = {
2121 .dev = dev,
2122 .mtu = mtu,
2123 };
1da177e4 2124
c346dca1 2125 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2126}
2127
ef7c79ed 2128static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2129 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2130 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2131 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2132 [RTA_PRIORITY] = { .type = NLA_U32 },
2133 [RTA_METRICS] = { .type = NLA_NESTED },
2134};
2135
2136static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2137 struct fib6_config *cfg)
1da177e4 2138{
86872cb5
TG
2139 struct rtmsg *rtm;
2140 struct nlattr *tb[RTA_MAX+1];
2141 int err;
1da177e4 2142
86872cb5
TG
2143 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2144 if (err < 0)
2145 goto errout;
1da177e4 2146
86872cb5
TG
2147 err = -EINVAL;
2148 rtm = nlmsg_data(nlh);
2149 memset(cfg, 0, sizeof(*cfg));
2150
2151 cfg->fc_table = rtm->rtm_table;
2152 cfg->fc_dst_len = rtm->rtm_dst_len;
2153 cfg->fc_src_len = rtm->rtm_src_len;
2154 cfg->fc_flags = RTF_UP;
2155 cfg->fc_protocol = rtm->rtm_protocol;
2156
2157 if (rtm->rtm_type == RTN_UNREACHABLE)
2158 cfg->fc_flags |= RTF_REJECT;
2159
ab79ad14
2160 if (rtm->rtm_type == RTN_LOCAL)
2161 cfg->fc_flags |= RTF_LOCAL;
2162
86872cb5
TG
2163 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2164 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2165 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2166
2167 if (tb[RTA_GATEWAY]) {
2168 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2169 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2170 }
86872cb5
TG
2171
2172 if (tb[RTA_DST]) {
2173 int plen = (rtm->rtm_dst_len + 7) >> 3;
2174
2175 if (nla_len(tb[RTA_DST]) < plen)
2176 goto errout;
2177
2178 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2179 }
86872cb5
TG
2180
2181 if (tb[RTA_SRC]) {
2182 int plen = (rtm->rtm_src_len + 7) >> 3;
2183
2184 if (nla_len(tb[RTA_SRC]) < plen)
2185 goto errout;
2186
2187 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2188 }
86872cb5
TG
2189
2190 if (tb[RTA_OIF])
2191 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2192
2193 if (tb[RTA_PRIORITY])
2194 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2195
2196 if (tb[RTA_METRICS]) {
2197 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2198 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2199 }
86872cb5
TG
2200
2201 if (tb[RTA_TABLE])
2202 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2203
2204 err = 0;
2205errout:
2206 return err;
1da177e4
LT
2207}
2208
c127ea2c 2209static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2210{
86872cb5
TG
2211 struct fib6_config cfg;
2212 int err;
1da177e4 2213
86872cb5
TG
2214 err = rtm_to_fib6_config(skb, nlh, &cfg);
2215 if (err < 0)
2216 return err;
2217
2218 return ip6_route_del(&cfg);
1da177e4
LT
2219}
2220
c127ea2c 2221static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2222{
86872cb5
TG
2223 struct fib6_config cfg;
2224 int err;
1da177e4 2225
86872cb5
TG
2226 err = rtm_to_fib6_config(skb, nlh, &cfg);
2227 if (err < 0)
2228 return err;
2229
2230 return ip6_route_add(&cfg);
1da177e4
LT
2231}
2232
339bf98f
TG
2233static inline size_t rt6_nlmsg_size(void)
2234{
2235 return NLMSG_ALIGN(sizeof(struct rtmsg))
2236 + nla_total_size(16) /* RTA_SRC */
2237 + nla_total_size(16) /* RTA_DST */
2238 + nla_total_size(16) /* RTA_GATEWAY */
2239 + nla_total_size(16) /* RTA_PREFSRC */
2240 + nla_total_size(4) /* RTA_TABLE */
2241 + nla_total_size(4) /* RTA_IIF */
2242 + nla_total_size(4) /* RTA_OIF */
2243 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2244 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2245 + nla_total_size(sizeof(struct rta_cacheinfo));
2246}
2247
191cd582
BH
2248static int rt6_fill_node(struct net *net,
2249 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2250 struct in6_addr *dst, struct in6_addr *src,
2251 int iif, int type, u32 pid, u32 seq,
7bc570c8 2252 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2253{
2254 struct rtmsg *rtm;
2d7202bf 2255 struct nlmsghdr *nlh;
e3703b3d 2256 long expires;
9e762a4a 2257 u32 table;
1da177e4
LT
2258
2259 if (prefix) { /* user wants prefix routes only */
2260 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2261 /* success since this is not a prefix route */
2262 return 1;
2263 }
2264 }
2265
2d7202bf
TG
2266 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2267 if (nlh == NULL)
26932566 2268 return -EMSGSIZE;
2d7202bf
TG
2269
2270 rtm = nlmsg_data(nlh);
1da177e4
LT
2271 rtm->rtm_family = AF_INET6;
2272 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2273 rtm->rtm_src_len = rt->rt6i_src.plen;
2274 rtm->rtm_tos = 0;
c71099ac 2275 if (rt->rt6i_table)
9e762a4a 2276 table = rt->rt6i_table->tb6_id;
c71099ac 2277 else
9e762a4a
PM
2278 table = RT6_TABLE_UNSPEC;
2279 rtm->rtm_table = table;
2d7202bf 2280 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2281 if (rt->rt6i_flags&RTF_REJECT)
2282 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2283 else if (rt->rt6i_flags&RTF_LOCAL)
2284 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2285 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2286 rtm->rtm_type = RTN_LOCAL;
2287 else
2288 rtm->rtm_type = RTN_UNICAST;
2289 rtm->rtm_flags = 0;
2290 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2291 rtm->rtm_protocol = rt->rt6i_protocol;
2292 if (rt->rt6i_flags&RTF_DYNAMIC)
2293 rtm->rtm_protocol = RTPROT_REDIRECT;
2294 else if (rt->rt6i_flags & RTF_ADDRCONF)
2295 rtm->rtm_protocol = RTPROT_KERNEL;
2296 else if (rt->rt6i_flags&RTF_DEFAULT)
2297 rtm->rtm_protocol = RTPROT_RA;
2298
2299 if (rt->rt6i_flags&RTF_CACHE)
2300 rtm->rtm_flags |= RTM_F_CLONED;
2301
2302 if (dst) {
2d7202bf 2303 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2304 rtm->rtm_dst_len = 128;
1da177e4 2305 } else if (rtm->rtm_dst_len)
2d7202bf 2306 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2307#ifdef CONFIG_IPV6_SUBTREES
2308 if (src) {
2d7202bf 2309 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2310 rtm->rtm_src_len = 128;
1da177e4 2311 } else if (rtm->rtm_src_len)
2d7202bf 2312 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2313#endif
7bc570c8
YH
2314 if (iif) {
2315#ifdef CONFIG_IPV6_MROUTE
2316 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2317 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2318 if (err <= 0) {
2319 if (!nowait) {
2320 if (err == 0)
2321 return 0;
2322 goto nla_put_failure;
2323 } else {
2324 if (err == -EMSGSIZE)
2325 goto nla_put_failure;
2326 }
2327 }
2328 } else
2329#endif
2330 NLA_PUT_U32(skb, RTA_IIF, iif);
2331 } else if (dst) {
d8d1f30b 2332 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2333 struct in6_addr saddr_buf;
191cd582 2334 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2335 dst, 0, &saddr_buf) == 0)
2d7202bf 2336 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2337 }
2d7202bf 2338
defb3519 2339 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2340 goto nla_put_failure;
2341
d8d1f30b
CG
2342 if (rt->dst.neighbour)
2343 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2344
d8d1f30b 2345 if (rt->dst.dev)
2d7202bf
TG
2346 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2347
2348 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2349
36e3deae
YH
2350 if (!(rt->rt6i_flags & RTF_EXPIRES))
2351 expires = 0;
2352 else if (rt->rt6i_expires - jiffies < INT_MAX)
2353 expires = rt->rt6i_expires - jiffies;
2354 else
2355 expires = INT_MAX;
69cdf8f9 2356
d8d1f30b
CG
2357 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2358 expires, rt->dst.error) < 0)
e3703b3d 2359 goto nla_put_failure;
2d7202bf
TG
2360
2361 return nlmsg_end(skb, nlh);
2362
2363nla_put_failure:
26932566
PM
2364 nlmsg_cancel(skb, nlh);
2365 return -EMSGSIZE;
1da177e4
LT
2366}
2367
1b43af54 2368int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2369{
2370 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2371 int prefix;
2372
2d7202bf
TG
2373 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2374 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2375 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2376 } else
2377 prefix = 0;
2378
191cd582
BH
2379 return rt6_fill_node(arg->net,
2380 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2381 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2382 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2383}
2384
c127ea2c 2385static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2386{
3b1e0a65 2387 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2388 struct nlattr *tb[RTA_MAX+1];
2389 struct rt6_info *rt;
1da177e4 2390 struct sk_buff *skb;
ab364a6f 2391 struct rtmsg *rtm;
1da177e4 2392 struct flowi fl;
ab364a6f 2393 int err, iif = 0;
1da177e4 2394
ab364a6f
TG
2395 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2396 if (err < 0)
2397 goto errout;
1da177e4 2398
ab364a6f 2399 err = -EINVAL;
1da177e4 2400 memset(&fl, 0, sizeof(fl));
1da177e4 2401
ab364a6f
TG
2402 if (tb[RTA_SRC]) {
2403 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2404 goto errout;
2405
2406 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2407 }
2408
2409 if (tb[RTA_DST]) {
2410 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2411 goto errout;
2412
2413 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2414 }
2415
2416 if (tb[RTA_IIF])
2417 iif = nla_get_u32(tb[RTA_IIF]);
2418
2419 if (tb[RTA_OIF])
2420 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2421
2422 if (iif) {
2423 struct net_device *dev;
5578689a 2424 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2425 if (!dev) {
2426 err = -ENODEV;
ab364a6f 2427 goto errout;
1da177e4
LT
2428 }
2429 }
2430
ab364a6f
TG
2431 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2432 if (skb == NULL) {
2433 err = -ENOBUFS;
2434 goto errout;
2435 }
1da177e4 2436
ab364a6f
TG
2437 /* Reserve room for dummy headers, this skb can pass
2438 through good chunk of routing engine.
2439 */
459a98ed 2440 skb_reset_mac_header(skb);
ab364a6f 2441 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2442
8a3edd80 2443 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2444 skb_dst_set(skb, &rt->dst);
1da177e4 2445
191cd582 2446 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2447 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2448 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2449 if (err < 0) {
ab364a6f
TG
2450 kfree_skb(skb);
2451 goto errout;
1da177e4
LT
2452 }
2453
5578689a 2454 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2455errout:
1da177e4 2456 return err;
1da177e4
LT
2457}
2458
86872cb5 2459void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2460{
2461 struct sk_buff *skb;
5578689a 2462 struct net *net = info->nl_net;
528c4ceb
DL
2463 u32 seq;
2464 int err;
2465
2466 err = -ENOBUFS;
2467 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2468
339bf98f 2469 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2470 if (skb == NULL)
2471 goto errout;
2472
191cd582 2473 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2474 event, info->pid, seq, 0, 0, 0);
26932566
PM
2475 if (err < 0) {
2476 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2477 WARN_ON(err == -EMSGSIZE);
2478 kfree_skb(skb);
2479 goto errout;
2480 }
1ce85fe4
PNA
2481 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2482 info->nlh, gfp_any());
2483 return;
21713ebc
TG
2484errout:
2485 if (err < 0)
5578689a 2486 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2487}
2488
8ed67789
DL
2489static int ip6_route_dev_notify(struct notifier_block *this,
2490 unsigned long event, void *data)
2491{
2492 struct net_device *dev = (struct net_device *)data;
c346dca1 2493 struct net *net = dev_net(dev);
8ed67789
DL
2494
2495 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2496 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2497 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2498#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2499 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2500 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2501 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2502 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2503#endif
2504 }
2505
2506 return NOTIFY_OK;
2507}
2508
1da177e4
LT
2509/*
2510 * /proc
2511 */
2512
2513#ifdef CONFIG_PROC_FS
2514
1da177e4
LT
2515struct rt6_proc_arg
2516{
2517 char *buffer;
2518 int offset;
2519 int length;
2520 int skip;
2521 int len;
2522};
2523
2524static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2525{
33120b30 2526 struct seq_file *m = p_arg;
1da177e4 2527
4b7a4274 2528 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2529
2530#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2531 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2532#else
33120b30 2533 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2534#endif
2535
2536 if (rt->rt6i_nexthop) {
4b7a4274 2537 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2538 } else {
33120b30 2539 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2540 }
33120b30 2541 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2542 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2543 rt->dst.__use, rt->rt6i_flags,
33120b30 2544 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2545 return 0;
2546}
2547
33120b30 2548static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2549{
f3db4851
DL
2550 struct net *net = (struct net *)m->private;
2551 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2552 return 0;
2553}
1da177e4 2554
33120b30
AD
2555static int ipv6_route_open(struct inode *inode, struct file *file)
2556{
de05c557 2557 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2558}
2559
33120b30
AD
2560static const struct file_operations ipv6_route_proc_fops = {
2561 .owner = THIS_MODULE,
2562 .open = ipv6_route_open,
2563 .read = seq_read,
2564 .llseek = seq_lseek,
b6fcbdb4 2565 .release = single_release_net,
33120b30
AD
2566};
2567
1da177e4
LT
2568static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2569{
69ddb805 2570 struct net *net = (struct net *)seq->private;
1da177e4 2571 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2572 net->ipv6.rt6_stats->fib_nodes,
2573 net->ipv6.rt6_stats->fib_route_nodes,
2574 net->ipv6.rt6_stats->fib_rt_alloc,
2575 net->ipv6.rt6_stats->fib_rt_entries,
2576 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2577 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2578 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2579
2580 return 0;
2581}
2582
2583static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2584{
de05c557 2585 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2586}
2587
9a32144e 2588static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2589 .owner = THIS_MODULE,
2590 .open = rt6_stats_seq_open,
2591 .read = seq_read,
2592 .llseek = seq_lseek,
b6fcbdb4 2593 .release = single_release_net,
1da177e4
LT
2594};
2595#endif /* CONFIG_PROC_FS */
2596
2597#ifdef CONFIG_SYSCTL
2598
1da177e4 2599static
8d65af78 2600int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2601 void __user *buffer, size_t *lenp, loff_t *ppos)
2602{
5b7c931d
DL
2603 struct net *net = current->nsproxy->net_ns;
2604 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2605 if (write) {
8d65af78 2606 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2607 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2608 return 0;
2609 } else
2610 return -EINVAL;
2611}
2612
760f2d01 2613ctl_table ipv6_route_table_template[] = {
1ab1457c 2614 {
1da177e4 2615 .procname = "flush",
4990509f 2616 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2617 .maxlen = sizeof(int),
89c8b3a1 2618 .mode = 0200,
6d9f239a 2619 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2620 },
2621 {
1da177e4 2622 .procname = "gc_thresh",
9a7ec3a9 2623 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
6d9f239a 2626 .proc_handler = proc_dointvec,
1da177e4
LT
2627 },
2628 {
1da177e4 2629 .procname = "max_size",
4990509f 2630 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2631 .maxlen = sizeof(int),
2632 .mode = 0644,
6d9f239a 2633 .proc_handler = proc_dointvec,
1da177e4
LT
2634 },
2635 {
1da177e4 2636 .procname = "gc_min_interval",
4990509f 2637 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2638 .maxlen = sizeof(int),
2639 .mode = 0644,
6d9f239a 2640 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2641 },
2642 {
1da177e4 2643 .procname = "gc_timeout",
4990509f 2644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2645 .maxlen = sizeof(int),
2646 .mode = 0644,
6d9f239a 2647 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2648 },
2649 {
1da177e4 2650 .procname = "gc_interval",
4990509f 2651 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2652 .maxlen = sizeof(int),
2653 .mode = 0644,
6d9f239a 2654 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2655 },
2656 {
1da177e4 2657 .procname = "gc_elasticity",
4990509f 2658 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2659 .maxlen = sizeof(int),
2660 .mode = 0644,
f3d3f616 2661 .proc_handler = proc_dointvec,
1da177e4
LT
2662 },
2663 {
1da177e4 2664 .procname = "mtu_expires",
4990509f 2665 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2666 .maxlen = sizeof(int),
2667 .mode = 0644,
6d9f239a 2668 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2669 },
2670 {
1da177e4 2671 .procname = "min_adv_mss",
4990509f 2672 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2673 .maxlen = sizeof(int),
2674 .mode = 0644,
f3d3f616 2675 .proc_handler = proc_dointvec,
1da177e4
LT
2676 },
2677 {
1da177e4 2678 .procname = "gc_min_interval_ms",
4990509f 2679 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2680 .maxlen = sizeof(int),
2681 .mode = 0644,
6d9f239a 2682 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2683 },
f8572d8f 2684 { }
1da177e4
LT
2685};
2686
2c8c1e72 2687struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2688{
2689 struct ctl_table *table;
2690
2691 table = kmemdup(ipv6_route_table_template,
2692 sizeof(ipv6_route_table_template),
2693 GFP_KERNEL);
5ee09105
YH
2694
2695 if (table) {
2696 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2697 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2698 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2699 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2700 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2701 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2702 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2703 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2704 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2705 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2706 }
2707
760f2d01
DL
2708 return table;
2709}
1da177e4
LT
2710#endif
2711
2c8c1e72 2712static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2713{
633d424b 2714 int ret = -ENOMEM;
8ed67789 2715
86393e52
AD
2716 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2717 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2718
fc66f95c
ED
2719 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2720 goto out_ip6_dst_ops;
2721
8ed67789
DL
2722 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2723 sizeof(*net->ipv6.ip6_null_entry),
2724 GFP_KERNEL);
2725 if (!net->ipv6.ip6_null_entry)
fc66f95c 2726 goto out_ip6_dst_entries;
d8d1f30b 2727 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2728 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2729 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2730 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2731 ip6_template_metrics, true);
8ed67789
DL
2732
2733#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2735 sizeof(*net->ipv6.ip6_prohibit_entry),
2736 GFP_KERNEL);
68fffc67
PZ
2737 if (!net->ipv6.ip6_prohibit_entry)
2738 goto out_ip6_null_entry;
d8d1f30b 2739 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2740 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2741 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2742 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2743 ip6_template_metrics, true);
8ed67789
DL
2744
2745 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2746 sizeof(*net->ipv6.ip6_blk_hole_entry),
2747 GFP_KERNEL);
68fffc67
PZ
2748 if (!net->ipv6.ip6_blk_hole_entry)
2749 goto out_ip6_prohibit_entry;
d8d1f30b 2750 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2751 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2752 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2753 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2754 ip6_template_metrics, true);
8ed67789
DL
2755#endif
2756
b339a47c
PZ
2757 net->ipv6.sysctl.flush_delay = 0;
2758 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2759 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2760 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2761 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2762 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2763 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2764 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2765
cdb18761
DL
2766#ifdef CONFIG_PROC_FS
2767 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2768 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2769#endif
6891a346
BT
2770 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2771
8ed67789
DL
2772 ret = 0;
2773out:
2774 return ret;
f2fc6a54 2775
68fffc67
PZ
2776#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2777out_ip6_prohibit_entry:
2778 kfree(net->ipv6.ip6_prohibit_entry);
2779out_ip6_null_entry:
2780 kfree(net->ipv6.ip6_null_entry);
2781#endif
fc66f95c
ED
2782out_ip6_dst_entries:
2783 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2784out_ip6_dst_ops:
f2fc6a54 2785 goto out;
cdb18761
DL
2786}
2787
2c8c1e72 2788static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2789{
2790#ifdef CONFIG_PROC_FS
2791 proc_net_remove(net, "ipv6_route");
2792 proc_net_remove(net, "rt6_stats");
2793#endif
8ed67789
DL
2794 kfree(net->ipv6.ip6_null_entry);
2795#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2796 kfree(net->ipv6.ip6_prohibit_entry);
2797 kfree(net->ipv6.ip6_blk_hole_entry);
2798#endif
41bb78b4 2799 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2800}
2801
2802static struct pernet_operations ip6_route_net_ops = {
2803 .init = ip6_route_net_init,
2804 .exit = ip6_route_net_exit,
2805};
2806
8ed67789
DL
2807static struct notifier_block ip6_route_dev_notifier = {
2808 .notifier_call = ip6_route_dev_notify,
2809 .priority = 0,
2810};
2811
433d49c3 2812int __init ip6_route_init(void)
1da177e4 2813{
433d49c3
DL
2814 int ret;
2815
9a7ec3a9
DL
2816 ret = -ENOMEM;
2817 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2818 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2819 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2820 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2821 goto out;
14e50e57 2822
fc66f95c 2823 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2824 if (ret)
bdb3289f 2825 goto out_kmem_cache;
bdb3289f 2826
fc66f95c
ED
2827 ret = register_pernet_subsys(&ip6_route_net_ops);
2828 if (ret)
2829 goto out_dst_entries;
2830
5dc121e9
AE
2831 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2832
8ed67789
DL
2833 /* Registering of the loopback is done before this portion of code,
2834 * the loopback reference in rt6_info will not be taken, do it
2835 * manually for init_net */
d8d1f30b 2836 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2837 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2838 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2839 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2840 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2841 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2842 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2843 #endif
433d49c3
DL
2844 ret = fib6_init();
2845 if (ret)
8ed67789 2846 goto out_register_subsys;
433d49c3 2847
433d49c3
DL
2848 ret = xfrm6_init();
2849 if (ret)
cdb18761 2850 goto out_fib6_init;
c35b7e72 2851
433d49c3
DL
2852 ret = fib6_rules_init();
2853 if (ret)
2854 goto xfrm6_init;
7e5449c2 2855
433d49c3
DL
2856 ret = -ENOBUFS;
2857 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2858 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2859 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2860 goto fib6_rules_init;
c127ea2c 2861
8ed67789 2862 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2863 if (ret)
2864 goto fib6_rules_init;
8ed67789 2865
433d49c3
DL
2866out:
2867 return ret;
2868
2869fib6_rules_init:
433d49c3
DL
2870 fib6_rules_cleanup();
2871xfrm6_init:
433d49c3 2872 xfrm6_fini();
433d49c3 2873out_fib6_init:
433d49c3 2874 fib6_gc_cleanup();
8ed67789
DL
2875out_register_subsys:
2876 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2877out_dst_entries:
2878 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2879out_kmem_cache:
f2fc6a54 2880 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2881 goto out;
1da177e4
LT
2882}
2883
2884void ip6_route_cleanup(void)
2885{
8ed67789 2886 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2887 fib6_rules_cleanup();
1da177e4 2888 xfrm6_fini();
1da177e4 2889 fib6_gc_cleanup();
8ed67789 2890 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2891 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2892 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2893}