ipv6: hash is calculated but not used afterwards
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 162 .default_advmss = ip6_default_advmss,
14e50e57 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
164};
165
62fa8a84
DM
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
bdb3289f 170static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
d8d1f30b
CG
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
1da177e4
LT
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
101367c2
TG
185#ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
6723ab54
DM
187static int ip6_pkt_prohibit(struct sk_buff *skb);
188static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 189
280a34c8 190static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
d8d1f30b
CG
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
101367c2
TG
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 200 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203};
204
bdb3289f 205static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
d8d1f30b
CG
211 .input = dst_discard,
212 .output = dst_discard,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
220#endif
221
1da177e4 222/* allocate dst with ip6_dst_ops */
f2fc6a54 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 224{
3c7bd1a1 225 return (struct rt6_info *)dst_alloc(ops, 0);
1da177e4
LT
226}
227
228static void ip6_dst_destroy(struct dst_entry *dst)
229{
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 232 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
1ab1457c 237 }
b3419363 238 if (peer) {
b3419363
DM
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242}
243
6431cbc2
DM
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
b3419363
DM
251void rt6_bind_peer(struct rt6_info *rt, int create)
252{
253 struct inet_peer *peer;
254
b3419363
DM
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
6431cbc2
DM
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
260}
261
262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264{
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 267 struct net_device *loopback_dev =
c346dca1 268 dev_net(dev)->loopback_dev;
1da177e4 269
5a3e55d6
DL
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
1da177e4
LT
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278}
279
280static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281{
a02cec21
ED
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
284}
285
c71099ac
TG
286static inline int rt6_need_strict(struct in6_addr *daddr)
287{
a02cec21
ED
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
290}
291
1da177e4 292/*
c71099ac 293 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
294 */
295
8ed67789
DL
296static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
dd3abc4e 298 struct in6_addr *saddr,
1da177e4 299 int oif,
d420895e 300 int flags)
1da177e4
LT
301{
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
dd3abc4e
YH
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
d8d1f30b 308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
1da177e4
LT
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 317 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 318 continue;
1ab1457c 319 if (local && (!oif ||
1da177e4
LT
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
dd3abc4e
YH
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
1da177e4 329 }
dd3abc4e 330 }
1da177e4 331
dd3abc4e 332 if (oif) {
1da177e4
LT
333 if (local)
334 return local;
335
d420895e 336 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 337 return net->ipv6.ip6_null_entry;
1da177e4 338 }
dd3abc4e 339out:
1da177e4
LT
340 return rt;
341}
342
27097255
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344static void rt6_probe(struct rt6_info *rt)
345{
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371}
372#else
373static inline void rt6_probe(struct rt6_info *rt)
374{
27097255
YH
375}
376#endif
377
1da177e4 378/*
554cfb7e 379 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 380 */
b6f99a21 381static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
382{
383 struct net_device *dev = rt->rt6i_dev;
161980f4 384 if (!oif || dev->ifindex == oif)
554cfb7e 385 return 2;
161980f4
DM
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
554cfb7e 390}
1da177e4 391
b6f99a21 392static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 393{
554cfb7e 394 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 395 int m;
4d0c5911
YH
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
554cfb7e
YH
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
4d0c5911 402 m = 2;
398bcbeb
YH
403#ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406#endif
407 else
ea73ee23 408 m = 1;
554cfb7e 409 read_unlock_bh(&neigh->lock);
398bcbeb
YH
410 } else
411 m = 0;
554cfb7e 412 return m;
1da177e4
LT
413}
414
554cfb7e
YH
415static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
1da177e4 417{
4d0c5911 418 int m, n;
1ab1457c 419
4d0c5911 420 m = rt6_check_dev(rt, oif);
77d16f45 421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 422 return -1;
ebacaaa0
YH
423#ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425#endif
4d0c5911 426 n = rt6_check_neigh(rt);
557e92ef 427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
428 return -1;
429 return m;
430}
431
f11e6659
DM
432static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
554cfb7e 434{
f11e6659
DM
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453out:
454 return match;
455}
456
457static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460{
461 struct rt6_info *rt, *match;
554cfb7e 462 int mpri = -1;
1da177e4 463
f11e6659
DM
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 466 rt = rt->dst.rt6_next)
f11e6659
DM
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 469 rt = rt->dst.rt6_next)
f11e6659 470 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 471
f11e6659
DM
472 return match;
473}
1da177e4 474
f11e6659
DM
475static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476{
477 struct rt6_info *match, *rt0;
8ed67789 478 struct net *net;
1da177e4 479
f11e6659 480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 481 __func__, fn->leaf, oif);
554cfb7e 482
f11e6659
DM
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 486
f11e6659 487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 488
554cfb7e 489 if (!match &&
f11e6659 490 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 491 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 492
554cfb7e 493 /* no entries matched; do round-robin */
f11e6659
DM
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
1da177e4 499 }
1da177e4 500
f11e6659 501 RT6_TRACE("%s() => %p\n",
0dc47877 502 __func__, match);
1da177e4 503
c346dca1 504 net = dev_net(rt0->rt6i_dev);
a02cec21 505 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
506}
507
70ceb4f5
YH
508#ifdef CONFIG_IPV6_ROUTE_INFO
509int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511{
c346dca1 512 struct net *net = dev_net(dev);
70ceb4f5
YH
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
4bed72e4 516 unsigned long lifetime;
70ceb4f5
YH
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 540 return -EINVAL;
70ceb4f5 541
4bed72e4 542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
efa2cea0
DL
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
70ceb4f5
YH
556
557 if (rt && !lifetime) {
e0a1ad73 558 ip6_del_rt(rt);
70ceb4f5
YH
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
efa2cea0 563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
4bed72e4 570 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
d8d1f30b 576 dst_release(&rt->dst);
70ceb4f5
YH
577 }
578 return 0;
579}
580#endif
581
8ed67789 582#define BACKTRACK(__net, saddr) \
982f56f3 583do { \
8ed67789 584 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 585 struct fib6_node *pn; \
e0eda7bb 586 while (1) { \
982f56f3
YH
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
c71099ac 596 } \
c71099ac 597 } \
982f56f3 598} while(0)
c71099ac 599
8ed67789
DL
600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
c71099ac 602 struct flowi *fl, int flags)
1da177e4
LT
603{
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
c71099ac
TG
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
609restart:
610 rt = fn->leaf;
dd3abc4e 611 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 612 BACKTRACK(net, &fl->fl6_src);
c71099ac 613out:
d8d1f30b 614 dst_use(&rt->dst, jiffies);
c71099ac 615 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
616 return rt;
617
618}
619
9acd9f3a
YH
620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
622{
623 struct flowi fl = {
624 .oif = oif,
5811662b 625 .fl6_dst = *daddr,
c71099ac
TG
626 };
627 struct dst_entry *dst;
77d16f45 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 629
adaa70bb
TG
630 if (saddr) {
631 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
606a2b48 635 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
1da177e4
LT
641 return NULL;
642}
643
7159039a
YH
644EXPORT_SYMBOL(rt6_lookup);
645
c71099ac 646/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
86872cb5 652static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
653{
654 int err;
c71099ac 655 struct fib6_table *table;
1da177e4 656
c71099ac
TG
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
86872cb5 659 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 660 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
661
662 return err;
663}
664
40e22e8f
TG
665int ip6_ins_rt(struct rt6_info *rt)
666{
4d1169c1 667 struct nl_info info = {
c346dca1 668 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 669 };
528c4ceb 670 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
671}
672
95a9a5ba
YH
673static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
1da177e4 675{
1da177e4
LT
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
14deae41
DM
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
58c4fb86
YH
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 693 }
1da177e4 694
58c4fb86 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
1da177e4
LT
699
700#ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705#endif
706
14deae41
DM
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
86393e52 720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
7e1b33e5 731 "ipv6: Neighbour table overflow.\n");
d8d1f30b 732 dst_free(&rt->dst);
14deae41
DM
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
1da177e4 736
95a9a5ba 737 }
1da177e4 738
95a9a5ba
YH
739 return rt;
740}
1da177e4 741
299d9939
YH
742static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743{
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 749 rt->dst.flags |= DST_HOST;
299d9939
YH
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753}
754
8ed67789
DL
755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi *fl, int flags)
1da177e4
LT
757{
758 struct fib6_node *fn;
519fbd87 759 struct rt6_info *rt, *nrt;
c71099ac 760 int strict = 0;
1da177e4 761 int attempts = 3;
519fbd87 762 int err;
53b7997f 763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 764
77d16f45 765 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
766
767relookup:
c71099ac 768 read_lock_bh(&table->tb6_lock);
1da177e4 769
8238dd06 770restart_2:
c71099ac 771 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
772
773restart:
4acad72d 774 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
775
776 BACKTRACK(net, &fl->fl6_src);
777 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 778 rt->rt6i_flags & RTF_CACHE)
1ddef044 779 goto out;
1da177e4 780
d8d1f30b 781 dst_hold(&rt->dst);
c71099ac 782 read_unlock_bh(&table->tb6_lock);
fb9de91e 783
519fbd87 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 785 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 786 else
c71099ac 787 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 788
d8d1f30b 789 dst_release(&rt->dst);
8ed67789 790 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 791
d8d1f30b 792 dst_hold(&rt->dst);
519fbd87 793 if (nrt) {
40e22e8f 794 err = ip6_ins_rt(nrt);
519fbd87 795 if (!err)
1da177e4 796 goto out2;
1da177e4 797 }
1da177e4 798
519fbd87
YH
799 if (--attempts <= 0)
800 goto out2;
801
802 /*
c71099ac 803 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
804 * released someone could insert this route. Relookup.
805 */
d8d1f30b 806 dst_release(&rt->dst);
519fbd87
YH
807 goto relookup;
808
809out:
8238dd06
YH
810 if (reachable) {
811 reachable = 0;
812 goto restart_2;
813 }
d8d1f30b 814 dst_hold(&rt->dst);
c71099ac 815 read_unlock_bh(&table->tb6_lock);
1da177e4 816out2:
d8d1f30b
CG
817 rt->dst.lastuse = jiffies;
818 rt->dst.__use++;
c71099ac
TG
819
820 return rt;
1da177e4
LT
821}
822
8ed67789 823static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
824 struct flowi *fl, int flags)
825{
8ed67789 826 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
827}
828
c71099ac
TG
829void ip6_route_input(struct sk_buff *skb)
830{
0660e03f 831 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 832 struct net *net = dev_net(skb->dev);
adaa70bb 833 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
834 struct flowi fl = {
835 .iif = skb->dev->ifindex,
5811662b
CG
836 .fl6_dst = iph->daddr,
837 .fl6_src = iph->saddr,
838 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 839 .mark = skb->mark,
c71099ac
TG
840 .proto = iph->nexthdr,
841 };
adaa70bb 842
1d6e55f1 843 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 844 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 845
adf30907 846 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
847}
848
8ed67789 849static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 850 struct flowi *fl, int flags)
1da177e4 851{
8ed67789 852 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
853}
854
4591db4f
DL
855struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
856 struct flowi *fl)
c71099ac
TG
857{
858 int flags = 0;
859
6057fd78 860 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 861 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 862
adaa70bb
TG
863 if (!ipv6_addr_any(&fl->fl6_src))
864 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
865 else if (sk)
866 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 867
4591db4f 868 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
869}
870
7159039a 871EXPORT_SYMBOL(ip6_route_output);
1da177e4 872
14e50e57
DM
873int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
874{
875 struct rt6_info *ort = (struct rt6_info *) *dstp;
876 struct rt6_info *rt = (struct rt6_info *)
3c7bd1a1 877 dst_alloc(&ip6_dst_blackhole_ops, 1);
14e50e57
DM
878 struct dst_entry *new = NULL;
879
880 if (rt) {
d8d1f30b 881 new = &rt->dst;
14e50e57 882
14e50e57 883 new->__use = 1;
352e512c
HX
884 new->input = dst_discard;
885 new->output = dst_discard;
14e50e57 886
defb3519 887 dst_copy_metrics(new, &ort->dst);
d8d1f30b 888 new->dev = ort->dst.dev;
14e50e57
DM
889 if (new->dev)
890 dev_hold(new->dev);
891 rt->rt6i_idev = ort->rt6i_idev;
892 if (rt->rt6i_idev)
893 in6_dev_hold(rt->rt6i_idev);
894 rt->rt6i_expires = 0;
895
896 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
897 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
898 rt->rt6i_metric = 0;
899
900 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
901#ifdef CONFIG_IPV6_SUBTREES
902 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
903#endif
904
905 dst_free(new);
906 }
907
908 dst_release(*dstp);
909 *dstp = new;
a02cec21 910 return new ? 0 : -ENOMEM;
14e50e57
DM
911}
912EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
913
1da177e4
LT
914/*
915 * Destination cache support functions
916 */
917
918static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
919{
920 struct rt6_info *rt;
921
922 rt = (struct rt6_info *) dst;
923
6431cbc2
DM
924 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
925 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
926 if (!rt->rt6i_peer)
927 rt6_bind_peer(rt, 0);
928 rt->rt6i_peer_genid = rt6_peer_genid();
929 }
1da177e4 930 return dst;
6431cbc2 931 }
1da177e4
LT
932 return NULL;
933}
934
935static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
936{
937 struct rt6_info *rt = (struct rt6_info *) dst;
938
939 if (rt) {
54c1a859
YH
940 if (rt->rt6i_flags & RTF_CACHE) {
941 if (rt6_check_expired(rt)) {
942 ip6_del_rt(rt);
943 dst = NULL;
944 }
945 } else {
1da177e4 946 dst_release(dst);
54c1a859
YH
947 dst = NULL;
948 }
1da177e4 949 }
54c1a859 950 return dst;
1da177e4
LT
951}
952
953static void ip6_link_failure(struct sk_buff *skb)
954{
955 struct rt6_info *rt;
956
3ffe533c 957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 958
adf30907 959 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
960 if (rt) {
961 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 962 dst_set_expires(&rt->dst, 0);
1da177e4
LT
963 rt->rt6i_flags |= RTF_EXPIRES;
964 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
965 rt->rt6i_node->fn_sernum = -1;
966 }
967}
968
969static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
970{
971 struct rt6_info *rt6 = (struct rt6_info*)dst;
972
973 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
974 rt6->rt6i_flags |= RTF_MODIFIED;
975 if (mtu < IPV6_MIN_MTU) {
defb3519 976 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 977 mtu = IPV6_MIN_MTU;
defb3519
DM
978 features |= RTAX_FEATURE_ALLFRAG;
979 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 980 }
defb3519 981 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
982 }
983}
984
0dbaee3b 985static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 986{
0dbaee3b
DM
987 struct net_device *dev = dst->dev;
988 unsigned int mtu = dst_mtu(dst);
989 struct net *net = dev_net(dev);
990
1da177e4
LT
991 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
992
5578689a
DL
993 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
994 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
995
996 /*
1ab1457c
YH
997 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
998 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
999 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1000 * rely only on pmtu discovery"
1001 */
1002 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1003 mtu = IPV6_MAXPLEN;
1004 return mtu;
1005}
1006
d33e4553
DM
1007static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1008{
1009 unsigned int mtu = IPV6_MIN_MTU;
1010 struct inet6_dev *idev;
1011
1012 rcu_read_lock();
1013 idev = __in6_dev_get(dst->dev);
1014 if (idev)
1015 mtu = idev->cnf.mtu6;
1016 rcu_read_unlock();
1017
1018 return mtu;
1019}
1020
3b00944c
YH
1021static struct dst_entry *icmp6_dst_gc_list;
1022static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1023
3b00944c 1024struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1025 struct neighbour *neigh,
9acd9f3a 1026 const struct in6_addr *addr)
1da177e4
LT
1027{
1028 struct rt6_info *rt;
1029 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1030 struct net *net = dev_net(dev);
1da177e4
LT
1031
1032 if (unlikely(idev == NULL))
1033 return NULL;
1034
86393e52 1035 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1036 if (unlikely(rt == NULL)) {
1037 in6_dev_put(idev);
1038 goto out;
1039 }
1040
1041 dev_hold(dev);
1042 if (neigh)
1043 neigh_hold(neigh);
14deae41 1044 else {
1da177e4 1045 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1046 if (IS_ERR(neigh))
1047 neigh = NULL;
1048 }
1da177e4
LT
1049
1050 rt->rt6i_dev = dev;
1051 rt->rt6i_idev = idev;
1052 rt->rt6i_nexthop = neigh;
d8d1f30b 1053 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1054 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1055 rt->dst.output = ip6_output;
1da177e4
LT
1056
1057#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1058 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1059 ? DST_HOST
1da177e4
LT
1060 : 0;
1061 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1062 rt->rt6i_dst.plen = 128;
1063#endif
1064
3b00944c 1065 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1066 rt->dst.next = icmp6_dst_gc_list;
1067 icmp6_dst_gc_list = &rt->dst;
3b00944c 1068 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1069
5578689a 1070 fib6_force_start_gc(net);
1da177e4
LT
1071
1072out:
d8d1f30b 1073 return &rt->dst;
1da177e4
LT
1074}
1075
3d0f24a7 1076int icmp6_dst_gc(void)
1da177e4
LT
1077{
1078 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1079 int more = 0;
1da177e4
LT
1080
1081 next = NULL;
5d0bbeeb 1082
3b00944c
YH
1083 spin_lock_bh(&icmp6_dst_lock);
1084 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1085
1da177e4
LT
1086 while ((dst = *pprev) != NULL) {
1087 if (!atomic_read(&dst->__refcnt)) {
1088 *pprev = dst->next;
1089 dst_free(dst);
1da177e4
LT
1090 } else {
1091 pprev = &dst->next;
3d0f24a7 1092 ++more;
1da177e4
LT
1093 }
1094 }
1095
3b00944c 1096 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1097
3d0f24a7 1098 return more;
1da177e4
LT
1099}
1100
1e493d19
DM
1101static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1102 void *arg)
1103{
1104 struct dst_entry *dst, **pprev;
1105
1106 spin_lock_bh(&icmp6_dst_lock);
1107 pprev = &icmp6_dst_gc_list;
1108 while ((dst = *pprev) != NULL) {
1109 struct rt6_info *rt = (struct rt6_info *) dst;
1110 if (func(rt, arg)) {
1111 *pprev = dst->next;
1112 dst_free(dst);
1113 } else {
1114 pprev = &dst->next;
1115 }
1116 }
1117 spin_unlock_bh(&icmp6_dst_lock);
1118}
1119
569d3645 1120static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1121{
1da177e4 1122 unsigned long now = jiffies;
86393e52 1123 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1124 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1125 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1126 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1127 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1128 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1129 int entries;
7019b78e 1130
fc66f95c 1131 entries = dst_entries_get_fast(ops);
7019b78e 1132 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1133 entries <= rt_max_size)
1da177e4
LT
1134 goto out;
1135
6891a346
BT
1136 net->ipv6.ip6_rt_gc_expire++;
1137 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1138 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1139 entries = dst_entries_get_slow(ops);
1140 if (entries < ops->gc_thresh)
7019b78e 1141 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1142out:
7019b78e 1143 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1144 return entries > rt_max_size;
1da177e4
LT
1145}
1146
1147/* Clean host part of a prefix. Not necessary in radix tree,
1148 but results in cleaner routing tables.
1149
1150 Remove it only when all the things will work!
1151 */
1152
6b75d090 1153int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1154{
5170ae82 1155 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1156 if (hoplimit == 0) {
6b75d090 1157 struct net_device *dev = dst->dev;
c68f24cc
ED
1158 struct inet6_dev *idev;
1159
1160 rcu_read_lock();
1161 idev = __in6_dev_get(dev);
1162 if (idev)
6b75d090 1163 hoplimit = idev->cnf.hop_limit;
c68f24cc 1164 else
53b7997f 1165 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1166 rcu_read_unlock();
1da177e4
LT
1167 }
1168 return hoplimit;
1169}
abbf46ae 1170EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1171
1172/*
1173 *
1174 */
1175
86872cb5 1176int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1177{
1178 int err;
5578689a 1179 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1180 struct rt6_info *rt = NULL;
1181 struct net_device *dev = NULL;
1182 struct inet6_dev *idev = NULL;
c71099ac 1183 struct fib6_table *table;
1da177e4
LT
1184 int addr_type;
1185
86872cb5 1186 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1187 return -EINVAL;
1188#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1189 if (cfg->fc_src_len)
1da177e4
LT
1190 return -EINVAL;
1191#endif
86872cb5 1192 if (cfg->fc_ifindex) {
1da177e4 1193 err = -ENODEV;
5578689a 1194 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1195 if (!dev)
1196 goto out;
1197 idev = in6_dev_get(dev);
1198 if (!idev)
1199 goto out;
1200 }
1201
86872cb5
TG
1202 if (cfg->fc_metric == 0)
1203 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1204
5578689a 1205 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1206 if (table == NULL) {
1207 err = -ENOBUFS;
1208 goto out;
1209 }
1210
86393e52 1211 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1212
1213 if (rt == NULL) {
1214 err = -ENOMEM;
1215 goto out;
1216 }
1217
d8d1f30b 1218 rt->dst.obsolete = -1;
6f704992
YH
1219 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1220 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1221 0;
1da177e4 1222
86872cb5
TG
1223 if (cfg->fc_protocol == RTPROT_UNSPEC)
1224 cfg->fc_protocol = RTPROT_BOOT;
1225 rt->rt6i_protocol = cfg->fc_protocol;
1226
1227 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1228
1229 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1230 rt->dst.input = ip6_mc_input;
ab79ad14
1231 else if (cfg->fc_flags & RTF_LOCAL)
1232 rt->dst.input = ip6_input;
1da177e4 1233 else
d8d1f30b 1234 rt->dst.input = ip6_forward;
1da177e4 1235
d8d1f30b 1236 rt->dst.output = ip6_output;
1da177e4 1237
86872cb5
TG
1238 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1239 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1240 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1241 rt->dst.flags = DST_HOST;
1da177e4
LT
1242
1243#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1244 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1245 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1246#endif
1247
86872cb5 1248 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1249
1250 /* We cannot add true routes via loopback here,
1251 they would result in kernel looping; promote them to reject routes
1252 */
86872cb5 1253 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1254 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1255 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1256 /* hold loopback dev/idev if we haven't done so. */
5578689a 1257 if (dev != net->loopback_dev) {
1da177e4
LT
1258 if (dev) {
1259 dev_put(dev);
1260 in6_dev_put(idev);
1261 }
5578689a 1262 dev = net->loopback_dev;
1da177e4
LT
1263 dev_hold(dev);
1264 idev = in6_dev_get(dev);
1265 if (!idev) {
1266 err = -ENODEV;
1267 goto out;
1268 }
1269 }
d8d1f30b
CG
1270 rt->dst.output = ip6_pkt_discard_out;
1271 rt->dst.input = ip6_pkt_discard;
1272 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1273 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1274 goto install_route;
1275 }
1276
86872cb5 1277 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1278 struct in6_addr *gw_addr;
1279 int gwa_type;
1280
86872cb5
TG
1281 gw_addr = &cfg->fc_gateway;
1282 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1283 gwa_type = ipv6_addr_type(gw_addr);
1284
1285 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1286 struct rt6_info *grt;
1287
1288 /* IPv6 strictly inhibits using not link-local
1289 addresses as nexthop address.
1290 Otherwise, router will not able to send redirects.
1291 It is very good, but in some (rare!) circumstances
1292 (SIT, PtP, NBMA NOARP links) it is handy to allow
1293 some exceptions. --ANK
1294 */
1295 err = -EINVAL;
1296 if (!(gwa_type&IPV6_ADDR_UNICAST))
1297 goto out;
1298
5578689a 1299 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1300
1301 err = -EHOSTUNREACH;
1302 if (grt == NULL)
1303 goto out;
1304 if (dev) {
1305 if (dev != grt->rt6i_dev) {
d8d1f30b 1306 dst_release(&grt->dst);
1da177e4
LT
1307 goto out;
1308 }
1309 } else {
1310 dev = grt->rt6i_dev;
1311 idev = grt->rt6i_idev;
1312 dev_hold(dev);
1313 in6_dev_hold(grt->rt6i_idev);
1314 }
1315 if (!(grt->rt6i_flags&RTF_GATEWAY))
1316 err = 0;
d8d1f30b 1317 dst_release(&grt->dst);
1da177e4
LT
1318
1319 if (err)
1320 goto out;
1321 }
1322 err = -EINVAL;
1323 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1324 goto out;
1325 }
1326
1327 err = -ENODEV;
1328 if (dev == NULL)
1329 goto out;
1330
86872cb5 1331 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1332 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1333 if (IS_ERR(rt->rt6i_nexthop)) {
1334 err = PTR_ERR(rt->rt6i_nexthop);
1335 rt->rt6i_nexthop = NULL;
1336 goto out;
1337 }
1338 }
1339
86872cb5 1340 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1341
1342install_route:
86872cb5
TG
1343 if (cfg->fc_mx) {
1344 struct nlattr *nla;
1345 int remaining;
1346
1347 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1348 int type = nla_type(nla);
86872cb5
TG
1349
1350 if (type) {
1351 if (type > RTAX_MAX) {
1da177e4
LT
1352 err = -EINVAL;
1353 goto out;
1354 }
86872cb5 1355
defb3519 1356 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1357 }
1da177e4
LT
1358 }
1359 }
1360
d8d1f30b 1361 rt->dst.dev = dev;
1da177e4 1362 rt->rt6i_idev = idev;
c71099ac 1363 rt->rt6i_table = table;
63152fc0 1364
c346dca1 1365 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1366
86872cb5 1367 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1368
1369out:
1370 if (dev)
1371 dev_put(dev);
1372 if (idev)
1373 in6_dev_put(idev);
1374 if (rt)
d8d1f30b 1375 dst_free(&rt->dst);
1da177e4
LT
1376 return err;
1377}
1378
86872cb5 1379static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1380{
1381 int err;
c71099ac 1382 struct fib6_table *table;
c346dca1 1383 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1384
8ed67789 1385 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1386 return -ENOENT;
1387
c71099ac
TG
1388 table = rt->rt6i_table;
1389 write_lock_bh(&table->tb6_lock);
1da177e4 1390
86872cb5 1391 err = fib6_del(rt, info);
d8d1f30b 1392 dst_release(&rt->dst);
1da177e4 1393
c71099ac 1394 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1395
1396 return err;
1397}
1398
e0a1ad73
TG
1399int ip6_del_rt(struct rt6_info *rt)
1400{
4d1169c1 1401 struct nl_info info = {
c346dca1 1402 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1403 };
528c4ceb 1404 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1405}
1406
86872cb5 1407static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1408{
c71099ac 1409 struct fib6_table *table;
1da177e4
LT
1410 struct fib6_node *fn;
1411 struct rt6_info *rt;
1412 int err = -ESRCH;
1413
5578689a 1414 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1415 if (table == NULL)
1416 return err;
1417
1418 read_lock_bh(&table->tb6_lock);
1da177e4 1419
c71099ac 1420 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1421 &cfg->fc_dst, cfg->fc_dst_len,
1422 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1423
1da177e4 1424 if (fn) {
d8d1f30b 1425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1426 if (cfg->fc_ifindex &&
1da177e4 1427 (rt->rt6i_dev == NULL ||
86872cb5 1428 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1429 continue;
86872cb5
TG
1430 if (cfg->fc_flags & RTF_GATEWAY &&
1431 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1432 continue;
86872cb5 1433 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1434 continue;
d8d1f30b 1435 dst_hold(&rt->dst);
c71099ac 1436 read_unlock_bh(&table->tb6_lock);
1da177e4 1437
86872cb5 1438 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1439 }
1440 }
c71099ac 1441 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1442
1443 return err;
1444}
1445
1446/*
1447 * Handle redirects
1448 */
a6279458
YH
1449struct ip6rd_flowi {
1450 struct flowi fl;
1451 struct in6_addr gateway;
1452};
1453
8ed67789
DL
1454static struct rt6_info *__ip6_route_redirect(struct net *net,
1455 struct fib6_table *table,
a6279458
YH
1456 struct flowi *fl,
1457 int flags)
1da177e4 1458{
a6279458
YH
1459 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1460 struct rt6_info *rt;
e843b9e1 1461 struct fib6_node *fn;
c71099ac 1462
1da177e4 1463 /*
e843b9e1
YH
1464 * Get the "current" route for this destination and
1465 * check if the redirect has come from approriate router.
1466 *
1467 * RFC 2461 specifies that redirects should only be
1468 * accepted if they come from the nexthop to the target.
1469 * Due to the way the routes are chosen, this notion
1470 * is a bit fuzzy and one might need to check all possible
1471 * routes.
1da177e4 1472 */
1da177e4 1473
c71099ac 1474 read_lock_bh(&table->tb6_lock);
a6279458 1475 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1476restart:
d8d1f30b 1477 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1478 /*
1479 * Current route is on-link; redirect is always invalid.
1480 *
1481 * Seems, previous statement is not true. It could
1482 * be node, which looks for us as on-link (f.e. proxy ndisc)
1483 * But then router serving it might decide, that we should
1484 * know truth 8)8) --ANK (980726).
1485 */
1486 if (rt6_check_expired(rt))
1487 continue;
1488 if (!(rt->rt6i_flags & RTF_GATEWAY))
1489 continue;
a6279458 1490 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1491 continue;
a6279458 1492 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1493 continue;
1494 break;
1495 }
a6279458 1496
cb15d9c2 1497 if (!rt)
8ed67789
DL
1498 rt = net->ipv6.ip6_null_entry;
1499 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1500out:
d8d1f30b 1501 dst_hold(&rt->dst);
a6279458 1502
c71099ac 1503 read_unlock_bh(&table->tb6_lock);
e843b9e1 1504
a6279458
YH
1505 return rt;
1506};
1507
1508static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1509 struct in6_addr *src,
1510 struct in6_addr *gateway,
1511 struct net_device *dev)
1512{
adaa70bb 1513 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1514 struct net *net = dev_net(dev);
a6279458
YH
1515 struct ip6rd_flowi rdfl = {
1516 .fl = {
1517 .oif = dev->ifindex,
5811662b
CG
1518 .fl6_dst = *dest,
1519 .fl6_src = *src,
a6279458 1520 },
a6279458 1521 };
adaa70bb 1522
86c36ce4
BH
1523 ipv6_addr_copy(&rdfl.gateway, gateway);
1524
adaa70bb
TG
1525 if (rt6_need_strict(dest))
1526 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1527
5578689a 1528 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1529 flags, __ip6_route_redirect);
a6279458
YH
1530}
1531
1532void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1533 struct in6_addr *saddr,
1534 struct neighbour *neigh, u8 *lladdr, int on_link)
1535{
1536 struct rt6_info *rt, *nrt = NULL;
1537 struct netevent_redirect netevent;
c346dca1 1538 struct net *net = dev_net(neigh->dev);
a6279458
YH
1539
1540 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1541
8ed67789 1542 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1543 if (net_ratelimit())
1544 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1545 "for redirect target\n");
a6279458 1546 goto out;
1da177e4
LT
1547 }
1548
1da177e4
LT
1549 /*
1550 * We have finally decided to accept it.
1551 */
1552
1ab1457c 1553 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1554 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1555 NEIGH_UPDATE_F_OVERRIDE|
1556 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1557 NEIGH_UPDATE_F_ISROUTER))
1558 );
1559
1560 /*
1561 * Redirect received -> path was valid.
1562 * Look, redirects are sent only in response to data packets,
1563 * so that this nexthop apparently is reachable. --ANK
1564 */
d8d1f30b 1565 dst_confirm(&rt->dst);
1da177e4
LT
1566
1567 /* Duplicate redirect: silently ignore. */
d8d1f30b 1568 if (neigh == rt->dst.neighbour)
1da177e4
LT
1569 goto out;
1570
1571 nrt = ip6_rt_copy(rt);
1572 if (nrt == NULL)
1573 goto out;
1574
1575 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1576 if (on_link)
1577 nrt->rt6i_flags &= ~RTF_GATEWAY;
1578
1579 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1580 nrt->rt6i_dst.plen = 128;
d8d1f30b 1581 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1582
1583 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1584 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1585
40e22e8f 1586 if (ip6_ins_rt(nrt))
1da177e4
LT
1587 goto out;
1588
d8d1f30b
CG
1589 netevent.old = &rt->dst;
1590 netevent.new = &nrt->dst;
8d71740c
TT
1591 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1592
1da177e4 1593 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1594 ip6_del_rt(rt);
1da177e4
LT
1595 return;
1596 }
1597
1598out:
d8d1f30b 1599 dst_release(&rt->dst);
1da177e4
LT
1600}
1601
1602/*
1603 * Handle ICMP "packet too big" messages
1604 * i.e. Path MTU discovery
1605 */
1606
ae878ae2
1607static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1608 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1609{
1610 struct rt6_info *rt, *nrt;
1611 int allfrag = 0;
d3052b55 1612again:
ae878ae2 1613 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1614 if (rt == NULL)
1615 return;
1616
d3052b55
AV
1617 if (rt6_check_expired(rt)) {
1618 ip6_del_rt(rt);
1619 goto again;
1620 }
1621
d8d1f30b 1622 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1623 goto out;
1624
1625 if (pmtu < IPV6_MIN_MTU) {
1626 /*
1ab1457c 1627 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1628 * MTU (1280) and a fragment header should always be included
1629 * after a node receiving Too Big message reporting PMTU is
1630 * less than the IPv6 Minimum Link MTU.
1631 */
1632 pmtu = IPV6_MIN_MTU;
1633 allfrag = 1;
1634 }
1635
1636 /* New mtu received -> path was valid.
1637 They are sent only in response to data packets,
1638 so that this nexthop apparently is reachable. --ANK
1639 */
d8d1f30b 1640 dst_confirm(&rt->dst);
1da177e4
LT
1641
1642 /* Host route. If it is static, it would be better
1643 not to override it, but add new one, so that
1644 when cache entry will expire old pmtu
1645 would return automatically.
1646 */
1647 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1648 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1649 if (allfrag) {
1650 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1651 features |= RTAX_FEATURE_ALLFRAG;
1652 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1653 }
d8d1f30b 1654 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1655 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1656 goto out;
1657 }
1658
1659 /* Network route.
1660 Two cases are possible:
1661 1. It is connected route. Action: COW
1662 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1663 */
d5315b50 1664 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1665 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1666 else
1667 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1668
d5315b50 1669 if (nrt) {
defb3519
DM
1670 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1671 if (allfrag) {
1672 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1673 features |= RTAX_FEATURE_ALLFRAG;
1674 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1675 }
a1e78363
YH
1676
1677 /* According to RFC 1981, detecting PMTU increase shouldn't be
1678 * happened within 5 mins, the recommended timer is 10 mins.
1679 * Here this route expiration time is set to ip6_rt_mtu_expires
1680 * which is 10 mins. After 10 mins the decreased pmtu is expired
1681 * and detecting PMTU increase will be automatically happened.
1682 */
d8d1f30b 1683 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1684 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1685
40e22e8f 1686 ip6_ins_rt(nrt);
1da177e4 1687 }
1da177e4 1688out:
d8d1f30b 1689 dst_release(&rt->dst);
1da177e4
LT
1690}
1691
ae878ae2
1692void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1693 struct net_device *dev, u32 pmtu)
1694{
1695 struct net *net = dev_net(dev);
1696
1697 /*
1698 * RFC 1981 states that a node "MUST reduce the size of the packets it
1699 * is sending along the path" that caused the Packet Too Big message.
1700 * Since it's not possible in the general case to determine which
1701 * interface was used to send the original packet, we update the MTU
1702 * on the interface that will be used to send future packets. We also
1703 * update the MTU on the interface that received the Packet Too Big in
1704 * case the original packet was forced out that interface with
1705 * SO_BINDTODEVICE or similar. This is the next best thing to the
1706 * correct behaviour, which would be to update the MTU on all
1707 * interfaces.
1708 */
1709 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1710 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1711}
1712
1da177e4
LT
1713/*
1714 * Misc support functions
1715 */
1716
1717static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1718{
c346dca1 1719 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1720 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1721
1722 if (rt) {
d8d1f30b
CG
1723 rt->dst.input = ort->dst.input;
1724 rt->dst.output = ort->dst.output;
1725
defb3519 1726 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1727 rt->dst.error = ort->dst.error;
1728 rt->dst.dev = ort->dst.dev;
1729 if (rt->dst.dev)
1730 dev_hold(rt->dst.dev);
1da177e4
LT
1731 rt->rt6i_idev = ort->rt6i_idev;
1732 if (rt->rt6i_idev)
1733 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1734 rt->dst.lastuse = jiffies;
1da177e4
LT
1735 rt->rt6i_expires = 0;
1736
1737 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1738 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1739 rt->rt6i_metric = 0;
1740
1741 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1742#ifdef CONFIG_IPV6_SUBTREES
1743 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1744#endif
c71099ac 1745 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1746 }
1747 return rt;
1748}
1749
70ceb4f5 1750#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1751static struct rt6_info *rt6_get_route_info(struct net *net,
1752 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1753 struct in6_addr *gwaddr, int ifindex)
1754{
1755 struct fib6_node *fn;
1756 struct rt6_info *rt = NULL;
c71099ac
TG
1757 struct fib6_table *table;
1758
efa2cea0 1759 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1760 if (table == NULL)
1761 return NULL;
70ceb4f5 1762
c71099ac
TG
1763 write_lock_bh(&table->tb6_lock);
1764 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1765 if (!fn)
1766 goto out;
1767
d8d1f30b 1768 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1769 if (rt->rt6i_dev->ifindex != ifindex)
1770 continue;
1771 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1772 continue;
1773 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1774 continue;
d8d1f30b 1775 dst_hold(&rt->dst);
70ceb4f5
YH
1776 break;
1777 }
1778out:
c71099ac 1779 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1780 return rt;
1781}
1782
efa2cea0
DL
1783static struct rt6_info *rt6_add_route_info(struct net *net,
1784 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1785 struct in6_addr *gwaddr, int ifindex,
1786 unsigned pref)
1787{
86872cb5
TG
1788 struct fib6_config cfg = {
1789 .fc_table = RT6_TABLE_INFO,
238fc7ea 1790 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1791 .fc_ifindex = ifindex,
1792 .fc_dst_len = prefixlen,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1794 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
1797 .fc_nlinfo.nl_net = net,
86872cb5
TG
1798 };
1799
1800 ipv6_addr_copy(&cfg.fc_dst, prefix);
1801 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1802
e317da96
YH
1803 /* We should treat it as a default route if prefix length is 0. */
1804 if (!prefixlen)
86872cb5 1805 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1806
86872cb5 1807 ip6_route_add(&cfg);
70ceb4f5 1808
efa2cea0 1809 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1810}
1811#endif
1812
1da177e4 1813struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1814{
1da177e4 1815 struct rt6_info *rt;
c71099ac 1816 struct fib6_table *table;
1da177e4 1817
c346dca1 1818 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1819 if (table == NULL)
1820 return NULL;
1da177e4 1821
c71099ac 1822 write_lock_bh(&table->tb6_lock);
d8d1f30b 1823 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1824 if (dev == rt->rt6i_dev &&
045927ff 1825 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1826 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1827 break;
1828 }
1829 if (rt)
d8d1f30b 1830 dst_hold(&rt->dst);
c71099ac 1831 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1832 return rt;
1833}
1834
1835struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1836 struct net_device *dev,
1837 unsigned int pref)
1da177e4 1838{
86872cb5
TG
1839 struct fib6_config cfg = {
1840 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1841 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1842 .fc_ifindex = dev->ifindex,
1843 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1844 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1845 .fc_nlinfo.pid = 0,
1846 .fc_nlinfo.nlh = NULL,
c346dca1 1847 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1848 };
1da177e4 1849
86872cb5 1850 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1851
86872cb5 1852 ip6_route_add(&cfg);
1da177e4 1853
1da177e4
LT
1854 return rt6_get_dflt_router(gwaddr, dev);
1855}
1856
7b4da532 1857void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1858{
1859 struct rt6_info *rt;
c71099ac
TG
1860 struct fib6_table *table;
1861
1862 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1863 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1864 if (table == NULL)
1865 return;
1da177e4
LT
1866
1867restart:
c71099ac 1868 read_lock_bh(&table->tb6_lock);
d8d1f30b 1869 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1870 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1871 dst_hold(&rt->dst);
c71099ac 1872 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1873 ip6_del_rt(rt);
1da177e4
LT
1874 goto restart;
1875 }
1876 }
c71099ac 1877 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1878}
1879
5578689a
DL
1880static void rtmsg_to_fib6_config(struct net *net,
1881 struct in6_rtmsg *rtmsg,
86872cb5
TG
1882 struct fib6_config *cfg)
1883{
1884 memset(cfg, 0, sizeof(*cfg));
1885
1886 cfg->fc_table = RT6_TABLE_MAIN;
1887 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1888 cfg->fc_metric = rtmsg->rtmsg_metric;
1889 cfg->fc_expires = rtmsg->rtmsg_info;
1890 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1891 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1892 cfg->fc_flags = rtmsg->rtmsg_flags;
1893
5578689a 1894 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1895
86872cb5
TG
1896 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1897 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1898 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1899}
1900
5578689a 1901int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1902{
86872cb5 1903 struct fib6_config cfg;
1da177e4
LT
1904 struct in6_rtmsg rtmsg;
1905 int err;
1906
1907 switch(cmd) {
1908 case SIOCADDRT: /* Add a route */
1909 case SIOCDELRT: /* Delete a route */
1910 if (!capable(CAP_NET_ADMIN))
1911 return -EPERM;
1912 err = copy_from_user(&rtmsg, arg,
1913 sizeof(struct in6_rtmsg));
1914 if (err)
1915 return -EFAULT;
86872cb5 1916
5578689a 1917 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1918
1da177e4
LT
1919 rtnl_lock();
1920 switch (cmd) {
1921 case SIOCADDRT:
86872cb5 1922 err = ip6_route_add(&cfg);
1da177e4
LT
1923 break;
1924 case SIOCDELRT:
86872cb5 1925 err = ip6_route_del(&cfg);
1da177e4
LT
1926 break;
1927 default:
1928 err = -EINVAL;
1929 }
1930 rtnl_unlock();
1931
1932 return err;
3ff50b79 1933 }
1da177e4
LT
1934
1935 return -EINVAL;
1936}
1937
1938/*
1939 * Drop the packet on the floor
1940 */
1941
d5fdd6ba 1942static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1943{
612f09e8 1944 int type;
adf30907 1945 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1946 switch (ipstats_mib_noroutes) {
1947 case IPSTATS_MIB_INNOROUTES:
0660e03f 1948 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1949 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1950 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1951 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1952 break;
1953 }
1954 /* FALLTHROUGH */
1955 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1956 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1957 ipstats_mib_noroutes);
612f09e8
YH
1958 break;
1959 }
3ffe533c 1960 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1961 kfree_skb(skb);
1962 return 0;
1963}
1964
9ce8ade0
TG
1965static int ip6_pkt_discard(struct sk_buff *skb)
1966{
612f09e8 1967 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1968}
1969
20380731 1970static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1971{
adf30907 1972 skb->dev = skb_dst(skb)->dev;
612f09e8 1973 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1974}
1975
6723ab54
DM
1976#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1977
9ce8ade0
TG
1978static int ip6_pkt_prohibit(struct sk_buff *skb)
1979{
612f09e8 1980 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1981}
1982
1983static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1984{
adf30907 1985 skb->dev = skb_dst(skb)->dev;
612f09e8 1986 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1987}
1988
6723ab54
DM
1989#endif
1990
1da177e4
LT
1991/*
1992 * Allocate a dst for local (unicast / anycast) address.
1993 */
1994
1995struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1996 const struct in6_addr *addr,
1997 int anycast)
1998{
c346dca1 1999 struct net *net = dev_net(idev->dev);
86393e52 2000 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 2001 struct neighbour *neigh;
1da177e4 2002
40385653
BG
2003 if (rt == NULL) {
2004 if (net_ratelimit())
2005 pr_warning("IPv6: Maximum number of routes reached,"
2006 " consider increasing route/max_size.\n");
1da177e4 2007 return ERR_PTR(-ENOMEM);
40385653 2008 }
1da177e4 2009
5578689a 2010 dev_hold(net->loopback_dev);
1da177e4
LT
2011 in6_dev_hold(idev);
2012
d8d1f30b
CG
2013 rt->dst.flags = DST_HOST;
2014 rt->dst.input = ip6_input;
2015 rt->dst.output = ip6_output;
5578689a 2016 rt->rt6i_dev = net->loopback_dev;
1da177e4 2017 rt->rt6i_idev = idev;
defb3519 2018 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 2019 rt->dst.obsolete = -1;
1da177e4
LT
2020
2021 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2022 if (anycast)
2023 rt->rt6i_flags |= RTF_ANYCAST;
2024 else
1da177e4 2025 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2026 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2027 if (IS_ERR(neigh)) {
d8d1f30b 2028 dst_free(&rt->dst);
14deae41
DM
2029
2030 /* We are casting this because that is the return
2031 * value type. But an errno encoded pointer is the
2032 * same regardless of the underlying pointer type,
2033 * and that's what we are returning. So this is OK.
2034 */
2035 return (struct rt6_info *) neigh;
1da177e4 2036 }
14deae41 2037 rt->rt6i_nexthop = neigh;
1da177e4
LT
2038
2039 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2040 rt->rt6i_dst.plen = 128;
5578689a 2041 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2042
d8d1f30b 2043 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2044
2045 return rt;
2046}
2047
8ed67789
DL
2048struct arg_dev_net {
2049 struct net_device *dev;
2050 struct net *net;
2051};
2052
1da177e4
LT
2053static int fib6_ifdown(struct rt6_info *rt, void *arg)
2054{
bc3ef660 2055 const struct arg_dev_net *adn = arg;
2056 const struct net_device *dev = adn->dev;
8ed67789 2057
bc3ef660 2058 if ((rt->rt6i_dev == dev || dev == NULL) &&
2059 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2060 RT6_TRACE("deleted by ifdown %p\n", rt);
2061 return -1;
2062 }
2063 return 0;
2064}
2065
f3db4851 2066void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2067{
8ed67789
DL
2068 struct arg_dev_net adn = {
2069 .dev = dev,
2070 .net = net,
2071 };
2072
2073 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2074 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2075}
2076
2077struct rt6_mtu_change_arg
2078{
2079 struct net_device *dev;
2080 unsigned mtu;
2081};
2082
2083static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2084{
2085 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2086 struct inet6_dev *idev;
2087
2088 /* In IPv6 pmtu discovery is not optional,
2089 so that RTAX_MTU lock cannot disable it.
2090 We still use this lock to block changes
2091 caused by addrconf/ndisc.
2092 */
2093
2094 idev = __in6_dev_get(arg->dev);
2095 if (idev == NULL)
2096 return 0;
2097
2098 /* For administrative MTU increase, there is no way to discover
2099 IPv6 PMTU increase, so PMTU increase should be updated here.
2100 Since RFC 1981 doesn't include administrative MTU increase
2101 update PMTU increase is a MUST. (i.e. jumbo frame)
2102 */
2103 /*
2104 If new MTU is less than route PMTU, this new MTU will be the
2105 lowest MTU in the path, update the route PMTU to reflect PMTU
2106 decreases; if new MTU is greater than route PMTU, and the
2107 old MTU is the lowest MTU in the path, update the route PMTU
2108 to reflect the increase. In this case if the other nodes' MTU
2109 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2110 PMTU discouvery.
2111 */
2112 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2113 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2114 (dst_mtu(&rt->dst) >= arg->mtu ||
2115 (dst_mtu(&rt->dst) < arg->mtu &&
2116 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2117 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2118 }
1da177e4
LT
2119 return 0;
2120}
2121
2122void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2123{
c71099ac
TG
2124 struct rt6_mtu_change_arg arg = {
2125 .dev = dev,
2126 .mtu = mtu,
2127 };
1da177e4 2128
c346dca1 2129 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2130}
2131
ef7c79ed 2132static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2133 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2134 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2135 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2136 [RTA_PRIORITY] = { .type = NLA_U32 },
2137 [RTA_METRICS] = { .type = NLA_NESTED },
2138};
2139
2140static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2141 struct fib6_config *cfg)
1da177e4 2142{
86872cb5
TG
2143 struct rtmsg *rtm;
2144 struct nlattr *tb[RTA_MAX+1];
2145 int err;
1da177e4 2146
86872cb5
TG
2147 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2148 if (err < 0)
2149 goto errout;
1da177e4 2150
86872cb5
TG
2151 err = -EINVAL;
2152 rtm = nlmsg_data(nlh);
2153 memset(cfg, 0, sizeof(*cfg));
2154
2155 cfg->fc_table = rtm->rtm_table;
2156 cfg->fc_dst_len = rtm->rtm_dst_len;
2157 cfg->fc_src_len = rtm->rtm_src_len;
2158 cfg->fc_flags = RTF_UP;
2159 cfg->fc_protocol = rtm->rtm_protocol;
2160
2161 if (rtm->rtm_type == RTN_UNREACHABLE)
2162 cfg->fc_flags |= RTF_REJECT;
2163
ab79ad14
2164 if (rtm->rtm_type == RTN_LOCAL)
2165 cfg->fc_flags |= RTF_LOCAL;
2166
86872cb5
TG
2167 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2168 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2169 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2170
2171 if (tb[RTA_GATEWAY]) {
2172 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2173 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2174 }
86872cb5
TG
2175
2176 if (tb[RTA_DST]) {
2177 int plen = (rtm->rtm_dst_len + 7) >> 3;
2178
2179 if (nla_len(tb[RTA_DST]) < plen)
2180 goto errout;
2181
2182 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2183 }
86872cb5
TG
2184
2185 if (tb[RTA_SRC]) {
2186 int plen = (rtm->rtm_src_len + 7) >> 3;
2187
2188 if (nla_len(tb[RTA_SRC]) < plen)
2189 goto errout;
2190
2191 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2192 }
86872cb5
TG
2193
2194 if (tb[RTA_OIF])
2195 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2196
2197 if (tb[RTA_PRIORITY])
2198 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2199
2200 if (tb[RTA_METRICS]) {
2201 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2202 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2203 }
86872cb5
TG
2204
2205 if (tb[RTA_TABLE])
2206 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2207
2208 err = 0;
2209errout:
2210 return err;
1da177e4
LT
2211}
2212
c127ea2c 2213static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2214{
86872cb5
TG
2215 struct fib6_config cfg;
2216 int err;
1da177e4 2217
86872cb5
TG
2218 err = rtm_to_fib6_config(skb, nlh, &cfg);
2219 if (err < 0)
2220 return err;
2221
2222 return ip6_route_del(&cfg);
1da177e4
LT
2223}
2224
c127ea2c 2225static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2226{
86872cb5
TG
2227 struct fib6_config cfg;
2228 int err;
1da177e4 2229
86872cb5
TG
2230 err = rtm_to_fib6_config(skb, nlh, &cfg);
2231 if (err < 0)
2232 return err;
2233
2234 return ip6_route_add(&cfg);
1da177e4
LT
2235}
2236
339bf98f
TG
2237static inline size_t rt6_nlmsg_size(void)
2238{
2239 return NLMSG_ALIGN(sizeof(struct rtmsg))
2240 + nla_total_size(16) /* RTA_SRC */
2241 + nla_total_size(16) /* RTA_DST */
2242 + nla_total_size(16) /* RTA_GATEWAY */
2243 + nla_total_size(16) /* RTA_PREFSRC */
2244 + nla_total_size(4) /* RTA_TABLE */
2245 + nla_total_size(4) /* RTA_IIF */
2246 + nla_total_size(4) /* RTA_OIF */
2247 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2248 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2249 + nla_total_size(sizeof(struct rta_cacheinfo));
2250}
2251
191cd582
BH
2252static int rt6_fill_node(struct net *net,
2253 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2254 struct in6_addr *dst, struct in6_addr *src,
2255 int iif, int type, u32 pid, u32 seq,
7bc570c8 2256 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2257{
2258 struct rtmsg *rtm;
2d7202bf 2259 struct nlmsghdr *nlh;
e3703b3d 2260 long expires;
9e762a4a 2261 u32 table;
1da177e4
LT
2262
2263 if (prefix) { /* user wants prefix routes only */
2264 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2265 /* success since this is not a prefix route */
2266 return 1;
2267 }
2268 }
2269
2d7202bf
TG
2270 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2271 if (nlh == NULL)
26932566 2272 return -EMSGSIZE;
2d7202bf
TG
2273
2274 rtm = nlmsg_data(nlh);
1da177e4
LT
2275 rtm->rtm_family = AF_INET6;
2276 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2277 rtm->rtm_src_len = rt->rt6i_src.plen;
2278 rtm->rtm_tos = 0;
c71099ac 2279 if (rt->rt6i_table)
9e762a4a 2280 table = rt->rt6i_table->tb6_id;
c71099ac 2281 else
9e762a4a
PM
2282 table = RT6_TABLE_UNSPEC;
2283 rtm->rtm_table = table;
2d7202bf 2284 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2285 if (rt->rt6i_flags&RTF_REJECT)
2286 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2287 else if (rt->rt6i_flags&RTF_LOCAL)
2288 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2289 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2290 rtm->rtm_type = RTN_LOCAL;
2291 else
2292 rtm->rtm_type = RTN_UNICAST;
2293 rtm->rtm_flags = 0;
2294 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2295 rtm->rtm_protocol = rt->rt6i_protocol;
2296 if (rt->rt6i_flags&RTF_DYNAMIC)
2297 rtm->rtm_protocol = RTPROT_REDIRECT;
2298 else if (rt->rt6i_flags & RTF_ADDRCONF)
2299 rtm->rtm_protocol = RTPROT_KERNEL;
2300 else if (rt->rt6i_flags&RTF_DEFAULT)
2301 rtm->rtm_protocol = RTPROT_RA;
2302
2303 if (rt->rt6i_flags&RTF_CACHE)
2304 rtm->rtm_flags |= RTM_F_CLONED;
2305
2306 if (dst) {
2d7202bf 2307 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2308 rtm->rtm_dst_len = 128;
1da177e4 2309 } else if (rtm->rtm_dst_len)
2d7202bf 2310 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2311#ifdef CONFIG_IPV6_SUBTREES
2312 if (src) {
2d7202bf 2313 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2314 rtm->rtm_src_len = 128;
1da177e4 2315 } else if (rtm->rtm_src_len)
2d7202bf 2316 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2317#endif
7bc570c8
YH
2318 if (iif) {
2319#ifdef CONFIG_IPV6_MROUTE
2320 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2321 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2322 if (err <= 0) {
2323 if (!nowait) {
2324 if (err == 0)
2325 return 0;
2326 goto nla_put_failure;
2327 } else {
2328 if (err == -EMSGSIZE)
2329 goto nla_put_failure;
2330 }
2331 }
2332 } else
2333#endif
2334 NLA_PUT_U32(skb, RTA_IIF, iif);
2335 } else if (dst) {
d8d1f30b 2336 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2337 struct in6_addr saddr_buf;
191cd582 2338 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2339 dst, 0, &saddr_buf) == 0)
2d7202bf 2340 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2341 }
2d7202bf 2342
defb3519 2343 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2344 goto nla_put_failure;
2345
d8d1f30b
CG
2346 if (rt->dst.neighbour)
2347 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2348
d8d1f30b 2349 if (rt->dst.dev)
2d7202bf
TG
2350 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2351
2352 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2353
36e3deae
YH
2354 if (!(rt->rt6i_flags & RTF_EXPIRES))
2355 expires = 0;
2356 else if (rt->rt6i_expires - jiffies < INT_MAX)
2357 expires = rt->rt6i_expires - jiffies;
2358 else
2359 expires = INT_MAX;
69cdf8f9 2360
d8d1f30b
CG
2361 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2362 expires, rt->dst.error) < 0)
e3703b3d 2363 goto nla_put_failure;
2d7202bf
TG
2364
2365 return nlmsg_end(skb, nlh);
2366
2367nla_put_failure:
26932566
PM
2368 nlmsg_cancel(skb, nlh);
2369 return -EMSGSIZE;
1da177e4
LT
2370}
2371
1b43af54 2372int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2373{
2374 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2375 int prefix;
2376
2d7202bf
TG
2377 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2378 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2379 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2380 } else
2381 prefix = 0;
2382
191cd582
BH
2383 return rt6_fill_node(arg->net,
2384 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2385 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2386 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2387}
2388
c127ea2c 2389static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2390{
3b1e0a65 2391 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2392 struct nlattr *tb[RTA_MAX+1];
2393 struct rt6_info *rt;
1da177e4 2394 struct sk_buff *skb;
ab364a6f 2395 struct rtmsg *rtm;
1da177e4 2396 struct flowi fl;
ab364a6f 2397 int err, iif = 0;
1da177e4 2398
ab364a6f
TG
2399 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2400 if (err < 0)
2401 goto errout;
1da177e4 2402
ab364a6f 2403 err = -EINVAL;
1da177e4 2404 memset(&fl, 0, sizeof(fl));
1da177e4 2405
ab364a6f
TG
2406 if (tb[RTA_SRC]) {
2407 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2408 goto errout;
2409
2410 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2411 }
2412
2413 if (tb[RTA_DST]) {
2414 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2415 goto errout;
2416
2417 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2418 }
2419
2420 if (tb[RTA_IIF])
2421 iif = nla_get_u32(tb[RTA_IIF]);
2422
2423 if (tb[RTA_OIF])
2424 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2425
2426 if (iif) {
2427 struct net_device *dev;
5578689a 2428 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2429 if (!dev) {
2430 err = -ENODEV;
ab364a6f 2431 goto errout;
1da177e4
LT
2432 }
2433 }
2434
ab364a6f
TG
2435 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2436 if (skb == NULL) {
2437 err = -ENOBUFS;
2438 goto errout;
2439 }
1da177e4 2440
ab364a6f
TG
2441 /* Reserve room for dummy headers, this skb can pass
2442 through good chunk of routing engine.
2443 */
459a98ed 2444 skb_reset_mac_header(skb);
ab364a6f 2445 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2446
8a3edd80 2447 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2448 skb_dst_set(skb, &rt->dst);
1da177e4 2449
191cd582 2450 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2451 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2452 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2453 if (err < 0) {
ab364a6f
TG
2454 kfree_skb(skb);
2455 goto errout;
1da177e4
LT
2456 }
2457
5578689a 2458 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2459errout:
1da177e4 2460 return err;
1da177e4
LT
2461}
2462
86872cb5 2463void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2464{
2465 struct sk_buff *skb;
5578689a 2466 struct net *net = info->nl_net;
528c4ceb
DL
2467 u32 seq;
2468 int err;
2469
2470 err = -ENOBUFS;
2471 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2472
339bf98f 2473 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2474 if (skb == NULL)
2475 goto errout;
2476
191cd582 2477 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2478 event, info->pid, seq, 0, 0, 0);
26932566
PM
2479 if (err < 0) {
2480 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2481 WARN_ON(err == -EMSGSIZE);
2482 kfree_skb(skb);
2483 goto errout;
2484 }
1ce85fe4
PNA
2485 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2486 info->nlh, gfp_any());
2487 return;
21713ebc
TG
2488errout:
2489 if (err < 0)
5578689a 2490 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2491}
2492
8ed67789
DL
2493static int ip6_route_dev_notify(struct notifier_block *this,
2494 unsigned long event, void *data)
2495{
2496 struct net_device *dev = (struct net_device *)data;
c346dca1 2497 struct net *net = dev_net(dev);
8ed67789
DL
2498
2499 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2500 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2501 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2502#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2503 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2504 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2505 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2506 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2507#endif
2508 }
2509
2510 return NOTIFY_OK;
2511}
2512
1da177e4
LT
2513/*
2514 * /proc
2515 */
2516
2517#ifdef CONFIG_PROC_FS
2518
1da177e4
LT
2519struct rt6_proc_arg
2520{
2521 char *buffer;
2522 int offset;
2523 int length;
2524 int skip;
2525 int len;
2526};
2527
2528static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2529{
33120b30 2530 struct seq_file *m = p_arg;
1da177e4 2531
4b7a4274 2532 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2533
2534#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2535 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2536#else
33120b30 2537 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2538#endif
2539
2540 if (rt->rt6i_nexthop) {
4b7a4274 2541 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2542 } else {
33120b30 2543 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2544 }
33120b30 2545 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2546 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2547 rt->dst.__use, rt->rt6i_flags,
33120b30 2548 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2549 return 0;
2550}
2551
33120b30 2552static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2553{
f3db4851
DL
2554 struct net *net = (struct net *)m->private;
2555 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2556 return 0;
2557}
1da177e4 2558
33120b30
AD
2559static int ipv6_route_open(struct inode *inode, struct file *file)
2560{
de05c557 2561 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2562}
2563
33120b30
AD
2564static const struct file_operations ipv6_route_proc_fops = {
2565 .owner = THIS_MODULE,
2566 .open = ipv6_route_open,
2567 .read = seq_read,
2568 .llseek = seq_lseek,
b6fcbdb4 2569 .release = single_release_net,
33120b30
AD
2570};
2571
1da177e4
LT
2572static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2573{
69ddb805 2574 struct net *net = (struct net *)seq->private;
1da177e4 2575 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2576 net->ipv6.rt6_stats->fib_nodes,
2577 net->ipv6.rt6_stats->fib_route_nodes,
2578 net->ipv6.rt6_stats->fib_rt_alloc,
2579 net->ipv6.rt6_stats->fib_rt_entries,
2580 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2581 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2582 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2583
2584 return 0;
2585}
2586
2587static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2588{
de05c557 2589 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2590}
2591
9a32144e 2592static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2593 .owner = THIS_MODULE,
2594 .open = rt6_stats_seq_open,
2595 .read = seq_read,
2596 .llseek = seq_lseek,
b6fcbdb4 2597 .release = single_release_net,
1da177e4
LT
2598};
2599#endif /* CONFIG_PROC_FS */
2600
2601#ifdef CONFIG_SYSCTL
2602
1da177e4 2603static
8d65af78 2604int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2605 void __user *buffer, size_t *lenp, loff_t *ppos)
2606{
5b7c931d
DL
2607 struct net *net = current->nsproxy->net_ns;
2608 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2609 if (write) {
8d65af78 2610 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2611 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2612 return 0;
2613 } else
2614 return -EINVAL;
2615}
2616
760f2d01 2617ctl_table ipv6_route_table_template[] = {
1ab1457c 2618 {
1da177e4 2619 .procname = "flush",
4990509f 2620 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2621 .maxlen = sizeof(int),
89c8b3a1 2622 .mode = 0200,
6d9f239a 2623 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2624 },
2625 {
1da177e4 2626 .procname = "gc_thresh",
9a7ec3a9 2627 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2628 .maxlen = sizeof(int),
2629 .mode = 0644,
6d9f239a 2630 .proc_handler = proc_dointvec,
1da177e4
LT
2631 },
2632 {
1da177e4 2633 .procname = "max_size",
4990509f 2634 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2635 .maxlen = sizeof(int),
2636 .mode = 0644,
6d9f239a 2637 .proc_handler = proc_dointvec,
1da177e4
LT
2638 },
2639 {
1da177e4 2640 .procname = "gc_min_interval",
4990509f 2641 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2642 .maxlen = sizeof(int),
2643 .mode = 0644,
6d9f239a 2644 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2645 },
2646 {
1da177e4 2647 .procname = "gc_timeout",
4990509f 2648 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2649 .maxlen = sizeof(int),
2650 .mode = 0644,
6d9f239a 2651 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2652 },
2653 {
1da177e4 2654 .procname = "gc_interval",
4990509f 2655 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2656 .maxlen = sizeof(int),
2657 .mode = 0644,
6d9f239a 2658 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2659 },
2660 {
1da177e4 2661 .procname = "gc_elasticity",
4990509f 2662 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2663 .maxlen = sizeof(int),
2664 .mode = 0644,
f3d3f616 2665 .proc_handler = proc_dointvec,
1da177e4
LT
2666 },
2667 {
1da177e4 2668 .procname = "mtu_expires",
4990509f 2669 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2670 .maxlen = sizeof(int),
2671 .mode = 0644,
6d9f239a 2672 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2673 },
2674 {
1da177e4 2675 .procname = "min_adv_mss",
4990509f 2676 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2677 .maxlen = sizeof(int),
2678 .mode = 0644,
f3d3f616 2679 .proc_handler = proc_dointvec,
1da177e4
LT
2680 },
2681 {
1da177e4 2682 .procname = "gc_min_interval_ms",
4990509f 2683 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2684 .maxlen = sizeof(int),
2685 .mode = 0644,
6d9f239a 2686 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2687 },
f8572d8f 2688 { }
1da177e4
LT
2689};
2690
2c8c1e72 2691struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2692{
2693 struct ctl_table *table;
2694
2695 table = kmemdup(ipv6_route_table_template,
2696 sizeof(ipv6_route_table_template),
2697 GFP_KERNEL);
5ee09105
YH
2698
2699 if (table) {
2700 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2701 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2702 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2703 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2704 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2705 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2706 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2707 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2708 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2709 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2710 }
2711
760f2d01
DL
2712 return table;
2713}
1da177e4
LT
2714#endif
2715
2c8c1e72 2716static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2717{
633d424b 2718 int ret = -ENOMEM;
8ed67789 2719
86393e52
AD
2720 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2721 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2722
fc66f95c
ED
2723 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2724 goto out_ip6_dst_ops;
2725
8ed67789
DL
2726 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2727 sizeof(*net->ipv6.ip6_null_entry),
2728 GFP_KERNEL);
2729 if (!net->ipv6.ip6_null_entry)
fc66f95c 2730 goto out_ip6_dst_entries;
d8d1f30b 2731 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2732 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2733 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2734 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2735 ip6_template_metrics, true);
8ed67789
DL
2736
2737#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2739 sizeof(*net->ipv6.ip6_prohibit_entry),
2740 GFP_KERNEL);
68fffc67
PZ
2741 if (!net->ipv6.ip6_prohibit_entry)
2742 goto out_ip6_null_entry;
d8d1f30b 2743 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2744 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2745 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2746 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2747 ip6_template_metrics, true);
8ed67789
DL
2748
2749 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2750 sizeof(*net->ipv6.ip6_blk_hole_entry),
2751 GFP_KERNEL);
68fffc67
PZ
2752 if (!net->ipv6.ip6_blk_hole_entry)
2753 goto out_ip6_prohibit_entry;
d8d1f30b 2754 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2755 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2756 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2757 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2758 ip6_template_metrics, true);
8ed67789
DL
2759#endif
2760
b339a47c
PZ
2761 net->ipv6.sysctl.flush_delay = 0;
2762 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2763 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2764 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2765 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2766 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2767 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2768 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2769
cdb18761
DL
2770#ifdef CONFIG_PROC_FS
2771 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2772 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2773#endif
6891a346
BT
2774 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2775
8ed67789
DL
2776 ret = 0;
2777out:
2778 return ret;
f2fc6a54 2779
68fffc67
PZ
2780#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2781out_ip6_prohibit_entry:
2782 kfree(net->ipv6.ip6_prohibit_entry);
2783out_ip6_null_entry:
2784 kfree(net->ipv6.ip6_null_entry);
2785#endif
fc66f95c
ED
2786out_ip6_dst_entries:
2787 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2788out_ip6_dst_ops:
f2fc6a54 2789 goto out;
cdb18761
DL
2790}
2791
2c8c1e72 2792static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2793{
2794#ifdef CONFIG_PROC_FS
2795 proc_net_remove(net, "ipv6_route");
2796 proc_net_remove(net, "rt6_stats");
2797#endif
8ed67789
DL
2798 kfree(net->ipv6.ip6_null_entry);
2799#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2800 kfree(net->ipv6.ip6_prohibit_entry);
2801 kfree(net->ipv6.ip6_blk_hole_entry);
2802#endif
41bb78b4 2803 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2804}
2805
2806static struct pernet_operations ip6_route_net_ops = {
2807 .init = ip6_route_net_init,
2808 .exit = ip6_route_net_exit,
2809};
2810
8ed67789
DL
2811static struct notifier_block ip6_route_dev_notifier = {
2812 .notifier_call = ip6_route_dev_notify,
2813 .priority = 0,
2814};
2815
433d49c3 2816int __init ip6_route_init(void)
1da177e4 2817{
433d49c3
DL
2818 int ret;
2819
9a7ec3a9
DL
2820 ret = -ENOMEM;
2821 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2822 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2823 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2824 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2825 goto out;
14e50e57 2826
fc66f95c 2827 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2828 if (ret)
bdb3289f 2829 goto out_kmem_cache;
bdb3289f 2830
fc66f95c
ED
2831 ret = register_pernet_subsys(&ip6_route_net_ops);
2832 if (ret)
2833 goto out_dst_entries;
2834
5dc121e9
AE
2835 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2836
8ed67789
DL
2837 /* Registering of the loopback is done before this portion of code,
2838 * the loopback reference in rt6_info will not be taken, do it
2839 * manually for init_net */
d8d1f30b 2840 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2841 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2842 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2843 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2844 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2845 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2846 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2847 #endif
433d49c3
DL
2848 ret = fib6_init();
2849 if (ret)
8ed67789 2850 goto out_register_subsys;
433d49c3 2851
433d49c3
DL
2852 ret = xfrm6_init();
2853 if (ret)
cdb18761 2854 goto out_fib6_init;
c35b7e72 2855
433d49c3
DL
2856 ret = fib6_rules_init();
2857 if (ret)
2858 goto xfrm6_init;
7e5449c2 2859
433d49c3
DL
2860 ret = -ENOBUFS;
2861 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2862 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2863 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2864 goto fib6_rules_init;
c127ea2c 2865
8ed67789 2866 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2867 if (ret)
2868 goto fib6_rules_init;
8ed67789 2869
433d49c3
DL
2870out:
2871 return ret;
2872
2873fib6_rules_init:
433d49c3
DL
2874 fib6_rules_cleanup();
2875xfrm6_init:
433d49c3 2876 xfrm6_fini();
433d49c3 2877out_fib6_init:
433d49c3 2878 fib6_gc_cleanup();
8ed67789
DL
2879out_register_subsys:
2880 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2881out_dst_entries:
2882 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2883out_kmem_cache:
f2fc6a54 2884 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2885 goto out;
1da177e4
LT
2886}
2887
2888void ip6_route_cleanup(void)
2889{
8ed67789 2890 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2891 fib6_rules_cleanup();
1da177e4 2892 xfrm6_fini();
1da177e4 2893 fib6_gc_cleanup();
8ed67789 2894 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2895 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2896 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2897}