bnx2x: Clear MDIO access warning during first driver load
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
21efcfa0
ED
75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 79static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
70ceb4f5 91#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 92static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 95 unsigned pref);
efa2cea0 96static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
97 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
99#endif
100
06582540
DM
101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
107 if (!rt->rt6i_peer)
108 rt6_bind_peer(rt, 1);
109
110 peer = rt->rt6i_peer;
111 if (peer) {
112 u32 *old_p = __DST_METRICS_PTR(old);
113 unsigned long prev, new;
114
115 p = peer->metrics;
116 if (inet_metrics_new(peer))
117 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118
119 new = (unsigned long) p;
120 prev = cmpxchg(&dst->_metrics, old, new);
121
122 if (prev != old) {
123 p = __DST_METRICS_PTR(prev);
124 if (prev & DST_METRICS_READ_ONLY)
125 p = NULL;
126 }
127 }
128 return p;
129}
130
d3aaeb38
DM
131static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
132{
133 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
134}
135
9a7ec3a9 136static struct dst_ops ip6_dst_ops_template = {
1da177e4 137 .family = AF_INET6,
09640e63 138 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
139 .gc = ip6_dst_gc,
140 .gc_thresh = 1024,
141 .check = ip6_dst_check,
0dbaee3b 142 .default_advmss = ip6_default_advmss,
d33e4553 143 .default_mtu = ip6_default_mtu,
06582540 144 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
145 .destroy = ip6_dst_destroy,
146 .ifdown = ip6_dst_ifdown,
147 .negative_advice = ip6_negative_advice,
148 .link_failure = ip6_link_failure,
149 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 150 .local_out = __ip6_local_out,
d3aaeb38 151 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
152};
153
ec831ea7
RD
154static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
155{
156 return 0;
157}
158
14e50e57
DM
159static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
160{
161}
162
0972ddb2
HB
163static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
164 unsigned long old)
165{
166 return NULL;
167}
168
14e50e57
DM
169static struct dst_ops ip6_dst_blackhole_ops = {
170 .family = AF_INET6,
09640e63 171 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
172 .destroy = ip6_dst_destroy,
173 .check = ip6_dst_check,
ec831ea7 174 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 175 .default_advmss = ip6_default_advmss,
14e50e57 176 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 177 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 178 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
179};
180
62fa8a84
DM
181static const u32 ip6_template_metrics[RTAX_MAX] = {
182 [RTAX_HOPLIMIT - 1] = 255,
183};
184
bdb3289f 185static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
186 .dst = {
187 .__refcnt = ATOMIC_INIT(1),
188 .__use = 1,
189 .obsolete = -1,
190 .error = -ENETUNREACH,
d8d1f30b
CG
191 .input = ip6_pkt_discard,
192 .output = ip6_pkt_discard_out,
1da177e4
LT
193 },
194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 195 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
196 .rt6i_metric = ~(u32) 0,
197 .rt6i_ref = ATOMIC_INIT(1),
198};
199
101367c2
TG
200#ifdef CONFIG_IPV6_MULTIPLE_TABLES
201
6723ab54
DM
202static int ip6_pkt_prohibit(struct sk_buff *skb);
203static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 204
280a34c8 205static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EACCES,
d8d1f30b
CG
211 .input = ip6_pkt_prohibit,
212 .output = ip6_pkt_prohibit_out,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
bdb3289f 220static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = -1,
225 .error = -EINVAL,
d8d1f30b
CG
226 .input = dst_discard,
227 .output = dst_discard,
101367c2
TG
228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 230 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233};
234
235#endif
236
1da177e4 237/* allocate dst with ip6_dst_ops */
5c1e6aa3 238static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
239 struct net_device *dev,
240 int flags)
1da177e4 241{
957c665f 242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662
DM
243
244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
245
246 return rt;
1da177e4
LT
247}
248
249static void ip6_dst_destroy(struct dst_entry *dst)
250{
251 struct rt6_info *rt = (struct rt6_info *)dst;
252 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 253 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
254
255 if (idev != NULL) {
256 rt->rt6i_idev = NULL;
257 in6_dev_put(idev);
1ab1457c 258 }
b3419363 259 if (peer) {
b3419363
DM
260 rt->rt6i_peer = NULL;
261 inet_putpeer(peer);
262 }
263}
264
6431cbc2
DM
265static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
266
267static u32 rt6_peer_genid(void)
268{
269 return atomic_read(&__rt6_peer_genid);
270}
271
b3419363
DM
272void rt6_bind_peer(struct rt6_info *rt, int create)
273{
274 struct inet_peer *peer;
275
b3419363
DM
276 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
277 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
278 inet_putpeer(peer);
6431cbc2
DM
279 else
280 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
281}
282
283static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 int how)
285{
286 struct rt6_info *rt = (struct rt6_info *)dst;
287 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 288 struct net_device *loopback_dev =
c346dca1 289 dev_net(dev)->loopback_dev;
1da177e4 290
5a3e55d6
DL
291 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
292 struct inet6_dev *loopback_idev =
293 in6_dev_get(loopback_dev);
1da177e4
LT
294 if (loopback_idev != NULL) {
295 rt->rt6i_idev = loopback_idev;
296 in6_dev_put(idev);
297 }
298 }
299}
300
301static __inline__ int rt6_check_expired(const struct rt6_info *rt)
302{
a02cec21
ED
303 return (rt->rt6i_flags & RTF_EXPIRES) &&
304 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
305}
306
b71d1d42 307static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 308{
a02cec21
ED
309 return ipv6_addr_type(daddr) &
310 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
311}
312
1da177e4 313/*
c71099ac 314 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
315 */
316
8ed67789
DL
317static inline struct rt6_info *rt6_device_match(struct net *net,
318 struct rt6_info *rt,
b71d1d42 319 const struct in6_addr *saddr,
1da177e4 320 int oif,
d420895e 321 int flags)
1da177e4
LT
322{
323 struct rt6_info *local = NULL;
324 struct rt6_info *sprt;
325
dd3abc4e
YH
326 if (!oif && ipv6_addr_any(saddr))
327 goto out;
328
d8d1f30b 329 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
330 struct net_device *dev = sprt->rt6i_dev;
331
332 if (oif) {
1da177e4
LT
333 if (dev->ifindex == oif)
334 return sprt;
335 if (dev->flags & IFF_LOOPBACK) {
336 if (sprt->rt6i_idev == NULL ||
337 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 338 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 339 continue;
1ab1457c 340 if (local && (!oif ||
1da177e4
LT
341 local->rt6i_idev->dev->ifindex == oif))
342 continue;
343 }
344 local = sprt;
345 }
dd3abc4e
YH
346 } else {
347 if (ipv6_chk_addr(net, saddr, dev,
348 flags & RT6_LOOKUP_F_IFACE))
349 return sprt;
1da177e4 350 }
dd3abc4e 351 }
1da177e4 352
dd3abc4e 353 if (oif) {
1da177e4
LT
354 if (local)
355 return local;
356
d420895e 357 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 358 return net->ipv6.ip6_null_entry;
1da177e4 359 }
dd3abc4e 360out:
1da177e4
LT
361 return rt;
362}
363
27097255
YH
364#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt)
366{
69cce1d1 367 struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255
YH
368 /*
369 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it
371 * is really so; aka Router Reachability Probing.
372 *
373 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute.
375 */
376 if (!neigh || (neigh->nud_state & NUD_VALID))
377 return;
378 read_lock_bh(&neigh->lock);
379 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 380 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
381 struct in6_addr mcaddr;
382 struct in6_addr *target;
383
384 neigh->updated = jiffies;
385 read_unlock_bh(&neigh->lock);
386
387 target = (struct in6_addr *)&neigh->primary_key;
388 addrconf_addr_solict_mult(target, &mcaddr);
389 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
390 } else
391 read_unlock_bh(&neigh->lock);
392}
393#else
394static inline void rt6_probe(struct rt6_info *rt)
395{
27097255
YH
396}
397#endif
398
1da177e4 399/*
554cfb7e 400 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 401 */
b6f99a21 402static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
403{
404 struct net_device *dev = rt->rt6i_dev;
161980f4 405 if (!oif || dev->ifindex == oif)
554cfb7e 406 return 2;
161980f4
DM
407 if ((dev->flags & IFF_LOOPBACK) &&
408 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
409 return 1;
410 return 0;
554cfb7e 411}
1da177e4 412
b6f99a21 413static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 414{
69cce1d1 415 struct neighbour *neigh = dst_get_neighbour(&rt->dst);
398bcbeb 416 int m;
4d0c5911
YH
417 if (rt->rt6i_flags & RTF_NONEXTHOP ||
418 !(rt->rt6i_flags & RTF_GATEWAY))
419 m = 1;
420 else if (neigh) {
554cfb7e
YH
421 read_lock_bh(&neigh->lock);
422 if (neigh->nud_state & NUD_VALID)
4d0c5911 423 m = 2;
398bcbeb
YH
424#ifdef CONFIG_IPV6_ROUTER_PREF
425 else if (neigh->nud_state & NUD_FAILED)
426 m = 0;
427#endif
428 else
ea73ee23 429 m = 1;
554cfb7e 430 read_unlock_bh(&neigh->lock);
398bcbeb
YH
431 } else
432 m = 0;
554cfb7e 433 return m;
1da177e4
LT
434}
435
554cfb7e
YH
436static int rt6_score_route(struct rt6_info *rt, int oif,
437 int strict)
1da177e4 438{
4d0c5911 439 int m, n;
1ab1457c 440
4d0c5911 441 m = rt6_check_dev(rt, oif);
77d16f45 442 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 443 return -1;
ebacaaa0
YH
444#ifdef CONFIG_IPV6_ROUTER_PREF
445 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
446#endif
4d0c5911 447 n = rt6_check_neigh(rt);
557e92ef 448 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
449 return -1;
450 return m;
451}
452
f11e6659
DM
453static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
454 int *mpri, struct rt6_info *match)
554cfb7e 455{
f11e6659
DM
456 int m;
457
458 if (rt6_check_expired(rt))
459 goto out;
460
461 m = rt6_score_route(rt, oif, strict);
462 if (m < 0)
463 goto out;
464
465 if (m > *mpri) {
466 if (strict & RT6_LOOKUP_F_REACHABLE)
467 rt6_probe(match);
468 *mpri = m;
469 match = rt;
470 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
471 rt6_probe(rt);
472 }
473
474out:
475 return match;
476}
477
478static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
479 struct rt6_info *rr_head,
480 u32 metric, int oif, int strict)
481{
482 struct rt6_info *rt, *match;
554cfb7e 483 int mpri = -1;
1da177e4 484
f11e6659
DM
485 match = NULL;
486 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 487 rt = rt->dst.rt6_next)
f11e6659
DM
488 match = find_match(rt, oif, strict, &mpri, match);
489 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 490 rt = rt->dst.rt6_next)
f11e6659 491 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 492
f11e6659
DM
493 return match;
494}
1da177e4 495
f11e6659
DM
496static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
497{
498 struct rt6_info *match, *rt0;
8ed67789 499 struct net *net;
1da177e4 500
f11e6659 501 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 502 __func__, fn->leaf, oif);
554cfb7e 503
f11e6659
DM
504 rt0 = fn->rr_ptr;
505 if (!rt0)
506 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 507
f11e6659 508 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 509
554cfb7e 510 if (!match &&
f11e6659 511 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 512 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 513
554cfb7e 514 /* no entries matched; do round-robin */
f11e6659
DM
515 if (!next || next->rt6i_metric != rt0->rt6i_metric)
516 next = fn->leaf;
517
518 if (next != rt0)
519 fn->rr_ptr = next;
1da177e4 520 }
1da177e4 521
f11e6659 522 RT6_TRACE("%s() => %p\n",
0dc47877 523 __func__, match);
1da177e4 524
c346dca1 525 net = dev_net(rt0->rt6i_dev);
a02cec21 526 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
527}
528
70ceb4f5
YH
529#ifdef CONFIG_IPV6_ROUTE_INFO
530int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 531 const struct in6_addr *gwaddr)
70ceb4f5 532{
c346dca1 533 struct net *net = dev_net(dev);
70ceb4f5
YH
534 struct route_info *rinfo = (struct route_info *) opt;
535 struct in6_addr prefix_buf, *prefix;
536 unsigned int pref;
4bed72e4 537 unsigned long lifetime;
70ceb4f5
YH
538 struct rt6_info *rt;
539
540 if (len < sizeof(struct route_info)) {
541 return -EINVAL;
542 }
543
544 /* Sanity check for prefix_len and length */
545 if (rinfo->length > 3) {
546 return -EINVAL;
547 } else if (rinfo->prefix_len > 128) {
548 return -EINVAL;
549 } else if (rinfo->prefix_len > 64) {
550 if (rinfo->length < 2) {
551 return -EINVAL;
552 }
553 } else if (rinfo->prefix_len > 0) {
554 if (rinfo->length < 1) {
555 return -EINVAL;
556 }
557 }
558
559 pref = rinfo->route_pref;
560 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 561 return -EINVAL;
70ceb4f5 562
4bed72e4 563 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
564
565 if (rinfo->length == 3)
566 prefix = (struct in6_addr *)rinfo->prefix;
567 else {
568 /* this function is safe */
569 ipv6_addr_prefix(&prefix_buf,
570 (struct in6_addr *)rinfo->prefix,
571 rinfo->prefix_len);
572 prefix = &prefix_buf;
573 }
574
efa2cea0
DL
575 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
576 dev->ifindex);
70ceb4f5
YH
577
578 if (rt && !lifetime) {
e0a1ad73 579 ip6_del_rt(rt);
70ceb4f5
YH
580 rt = NULL;
581 }
582
583 if (!rt && lifetime)
efa2cea0 584 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
585 pref);
586 else if (rt)
587 rt->rt6i_flags = RTF_ROUTEINFO |
588 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
589
590 if (rt) {
4bed72e4 591 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
592 rt->rt6i_flags &= ~RTF_EXPIRES;
593 } else {
594 rt->rt6i_expires = jiffies + HZ * lifetime;
595 rt->rt6i_flags |= RTF_EXPIRES;
596 }
d8d1f30b 597 dst_release(&rt->dst);
70ceb4f5
YH
598 }
599 return 0;
600}
601#endif
602
8ed67789 603#define BACKTRACK(__net, saddr) \
982f56f3 604do { \
8ed67789 605 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 606 struct fib6_node *pn; \
e0eda7bb 607 while (1) { \
982f56f3
YH
608 if (fn->fn_flags & RTN_TL_ROOT) \
609 goto out; \
610 pn = fn->parent; \
611 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 612 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
613 else \
614 fn = pn; \
615 if (fn->fn_flags & RTN_RTINFO) \
616 goto restart; \
c71099ac 617 } \
c71099ac 618 } \
982f56f3 619} while(0)
c71099ac 620
8ed67789
DL
621static struct rt6_info *ip6_pol_route_lookup(struct net *net,
622 struct fib6_table *table,
4c9483b2 623 struct flowi6 *fl6, int flags)
1da177e4
LT
624{
625 struct fib6_node *fn;
626 struct rt6_info *rt;
627
c71099ac 628 read_lock_bh(&table->tb6_lock);
4c9483b2 629 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
630restart:
631 rt = fn->leaf;
4c9483b2
DM
632 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
633 BACKTRACK(net, &fl6->saddr);
c71099ac 634out:
d8d1f30b 635 dst_use(&rt->dst, jiffies);
c71099ac 636 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
637 return rt;
638
639}
640
9acd9f3a
YH
641struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
642 const struct in6_addr *saddr, int oif, int strict)
c71099ac 643{
4c9483b2
DM
644 struct flowi6 fl6 = {
645 .flowi6_oif = oif,
646 .daddr = *daddr,
c71099ac
TG
647 };
648 struct dst_entry *dst;
77d16f45 649 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 650
adaa70bb 651 if (saddr) {
4c9483b2 652 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
653 flags |= RT6_LOOKUP_F_HAS_SADDR;
654 }
655
4c9483b2 656 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
657 if (dst->error == 0)
658 return (struct rt6_info *) dst;
659
660 dst_release(dst);
661
1da177e4
LT
662 return NULL;
663}
664
7159039a
YH
665EXPORT_SYMBOL(rt6_lookup);
666
c71099ac 667/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
668 It takes new route entry, the addition fails by any reason the
669 route is freed. In any case, if caller does not hold it, it may
670 be destroyed.
671 */
672
86872cb5 673static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
674{
675 int err;
c71099ac 676 struct fib6_table *table;
1da177e4 677
c71099ac
TG
678 table = rt->rt6i_table;
679 write_lock_bh(&table->tb6_lock);
86872cb5 680 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 681 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
682
683 return err;
684}
685
40e22e8f
TG
686int ip6_ins_rt(struct rt6_info *rt)
687{
4d1169c1 688 struct nl_info info = {
c346dca1 689 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 690 };
528c4ceb 691 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
692}
693
21efcfa0
ED
694static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
695 const struct in6_addr *daddr,
b71d1d42 696 const struct in6_addr *saddr)
1da177e4 697{
1da177e4
LT
698 struct rt6_info *rt;
699
700 /*
701 * Clone the route.
702 */
703
21efcfa0 704 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
705
706 if (rt) {
14deae41
DM
707 struct neighbour *neigh;
708 int attempts = !in_softirq();
709
58c4fb86
YH
710 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
711 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 712 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 713 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 714 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 715 }
1da177e4
LT
716
717 rt->rt6i_dst.plen = 128;
718 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 719 rt->dst.flags |= DST_HOST;
1da177e4
LT
720
721#ifdef CONFIG_IPV6_SUBTREES
722 if (rt->rt6i_src.plen && saddr) {
723 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
724 rt->rt6i_src.plen = 128;
725 }
726#endif
727
14deae41
DM
728 retry:
729 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
730 if (IS_ERR(neigh)) {
731 struct net *net = dev_net(rt->rt6i_dev);
732 int saved_rt_min_interval =
733 net->ipv6.sysctl.ip6_rt_gc_min_interval;
734 int saved_rt_elasticity =
735 net->ipv6.sysctl.ip6_rt_gc_elasticity;
736
737 if (attempts-- > 0) {
738 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
739 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
740
86393e52 741 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
742
743 net->ipv6.sysctl.ip6_rt_gc_elasticity =
744 saved_rt_elasticity;
745 net->ipv6.sysctl.ip6_rt_gc_min_interval =
746 saved_rt_min_interval;
747 goto retry;
748 }
749
750 if (net_ratelimit())
751 printk(KERN_WARNING
7e1b33e5 752 "ipv6: Neighbour table overflow.\n");
d8d1f30b 753 dst_free(&rt->dst);
14deae41
DM
754 return NULL;
755 }
69cce1d1 756 dst_set_neighbour(&rt->dst, neigh);
1da177e4 757
95a9a5ba 758 }
1da177e4 759
95a9a5ba
YH
760 return rt;
761}
1da177e4 762
21efcfa0
ED
763static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
764 const struct in6_addr *daddr)
299d9939 765{
21efcfa0
ED
766 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
767
299d9939 768 if (rt) {
299d9939
YH
769 rt->rt6i_dst.plen = 128;
770 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 771 rt->dst.flags |= DST_HOST;
69cce1d1 772 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
299d9939
YH
773 }
774 return rt;
775}
776
8ed67789 777static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 778 struct flowi6 *fl6, int flags)
1da177e4
LT
779{
780 struct fib6_node *fn;
519fbd87 781 struct rt6_info *rt, *nrt;
c71099ac 782 int strict = 0;
1da177e4 783 int attempts = 3;
519fbd87 784 int err;
53b7997f 785 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 786
77d16f45 787 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
788
789relookup:
c71099ac 790 read_lock_bh(&table->tb6_lock);
1da177e4 791
8238dd06 792restart_2:
4c9483b2 793 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
794
795restart:
4acad72d 796 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 797
4c9483b2 798 BACKTRACK(net, &fl6->saddr);
8ed67789 799 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 800 rt->rt6i_flags & RTF_CACHE)
1ddef044 801 goto out;
1da177e4 802
d8d1f30b 803 dst_hold(&rt->dst);
c71099ac 804 read_unlock_bh(&table->tb6_lock);
fb9de91e 805
69cce1d1 806 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 807 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 808 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 809 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
810 else
811 goto out2;
e40cf353 812
d8d1f30b 813 dst_release(&rt->dst);
8ed67789 814 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 815
d8d1f30b 816 dst_hold(&rt->dst);
519fbd87 817 if (nrt) {
40e22e8f 818 err = ip6_ins_rt(nrt);
519fbd87 819 if (!err)
1da177e4 820 goto out2;
1da177e4 821 }
1da177e4 822
519fbd87
YH
823 if (--attempts <= 0)
824 goto out2;
825
826 /*
c71099ac 827 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
828 * released someone could insert this route. Relookup.
829 */
d8d1f30b 830 dst_release(&rt->dst);
519fbd87
YH
831 goto relookup;
832
833out:
8238dd06
YH
834 if (reachable) {
835 reachable = 0;
836 goto restart_2;
837 }
d8d1f30b 838 dst_hold(&rt->dst);
c71099ac 839 read_unlock_bh(&table->tb6_lock);
1da177e4 840out2:
d8d1f30b
CG
841 rt->dst.lastuse = jiffies;
842 rt->dst.__use++;
c71099ac
TG
843
844 return rt;
1da177e4
LT
845}
846
8ed67789 847static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 848 struct flowi6 *fl6, int flags)
4acad72d 849{
4c9483b2 850 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
851}
852
c71099ac
TG
853void ip6_route_input(struct sk_buff *skb)
854{
b71d1d42 855 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 856 struct net *net = dev_net(skb->dev);
adaa70bb 857 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
858 struct flowi6 fl6 = {
859 .flowi6_iif = skb->dev->ifindex,
860 .daddr = iph->daddr,
861 .saddr = iph->saddr,
862 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
863 .flowi6_mark = skb->mark,
864 .flowi6_proto = iph->nexthdr,
c71099ac 865 };
adaa70bb 866
1d6e55f1 867 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 868 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 869
4c9483b2 870 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
871}
872
8ed67789 873static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 874 struct flowi6 *fl6, int flags)
1da177e4 875{
4c9483b2 876 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
877}
878
9c7a4f9c 879struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 880 struct flowi6 *fl6)
c71099ac
TG
881{
882 int flags = 0;
883
4c9483b2 884 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 885 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 886
4c9483b2 887 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 888 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
889 else if (sk)
890 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 891
4c9483b2 892 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
893}
894
7159039a 895EXPORT_SYMBOL(ip6_route_output);
1da177e4 896
2774c131 897struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 898{
5c1e6aa3 899 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
900 struct dst_entry *new = NULL;
901
5c1e6aa3 902 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 903 if (rt) {
cf911662
DM
904 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
905
d8d1f30b 906 new = &rt->dst;
14e50e57 907
14e50e57 908 new->__use = 1;
352e512c
HX
909 new->input = dst_discard;
910 new->output = dst_discard;
14e50e57 911
21efcfa0
ED
912 if (dst_metrics_read_only(&ort->dst))
913 new->_metrics = ort->dst._metrics;
914 else
915 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
916 rt->rt6i_idev = ort->rt6i_idev;
917 if (rt->rt6i_idev)
918 in6_dev_hold(rt->rt6i_idev);
919 rt->rt6i_expires = 0;
920
921 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
922 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
923 rt->rt6i_metric = 0;
924
925 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
926#ifdef CONFIG_IPV6_SUBTREES
927 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
928#endif
929
930 dst_free(new);
931 }
932
69ead7af
DM
933 dst_release(dst_orig);
934 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 935}
14e50e57 936
1da177e4
LT
937/*
938 * Destination cache support functions
939 */
940
941static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
942{
943 struct rt6_info *rt;
944
945 rt = (struct rt6_info *) dst;
946
6431cbc2
DM
947 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
948 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
949 if (!rt->rt6i_peer)
950 rt6_bind_peer(rt, 0);
951 rt->rt6i_peer_genid = rt6_peer_genid();
952 }
1da177e4 953 return dst;
6431cbc2 954 }
1da177e4
LT
955 return NULL;
956}
957
958static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
959{
960 struct rt6_info *rt = (struct rt6_info *) dst;
961
962 if (rt) {
54c1a859
YH
963 if (rt->rt6i_flags & RTF_CACHE) {
964 if (rt6_check_expired(rt)) {
965 ip6_del_rt(rt);
966 dst = NULL;
967 }
968 } else {
1da177e4 969 dst_release(dst);
54c1a859
YH
970 dst = NULL;
971 }
1da177e4 972 }
54c1a859 973 return dst;
1da177e4
LT
974}
975
976static void ip6_link_failure(struct sk_buff *skb)
977{
978 struct rt6_info *rt;
979
3ffe533c 980 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 981
adf30907 982 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
983 if (rt) {
984 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 985 dst_set_expires(&rt->dst, 0);
1da177e4
LT
986 rt->rt6i_flags |= RTF_EXPIRES;
987 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
988 rt->rt6i_node->fn_sernum = -1;
989 }
990}
991
992static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
993{
994 struct rt6_info *rt6 = (struct rt6_info*)dst;
995
996 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
997 rt6->rt6i_flags |= RTF_MODIFIED;
998 if (mtu < IPV6_MIN_MTU) {
defb3519 999 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1000 mtu = IPV6_MIN_MTU;
defb3519
DM
1001 features |= RTAX_FEATURE_ALLFRAG;
1002 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1003 }
defb3519 1004 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1005 }
1006}
1007
0dbaee3b 1008static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1009{
0dbaee3b
DM
1010 struct net_device *dev = dst->dev;
1011 unsigned int mtu = dst_mtu(dst);
1012 struct net *net = dev_net(dev);
1013
1da177e4
LT
1014 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1015
5578689a
DL
1016 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1017 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1018
1019 /*
1ab1457c
YH
1020 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1021 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1022 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1023 * rely only on pmtu discovery"
1024 */
1025 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1026 mtu = IPV6_MAXPLEN;
1027 return mtu;
1028}
1029
d33e4553
DM
1030static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1031{
1032 unsigned int mtu = IPV6_MIN_MTU;
1033 struct inet6_dev *idev;
1034
1035 rcu_read_lock();
1036 idev = __in6_dev_get(dst->dev);
1037 if (idev)
1038 mtu = idev->cnf.mtu6;
1039 rcu_read_unlock();
1040
1041 return mtu;
1042}
1043
3b00944c
YH
1044static struct dst_entry *icmp6_dst_gc_list;
1045static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1046
3b00944c 1047struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1048 struct neighbour *neigh,
9acd9f3a 1049 const struct in6_addr *addr)
1da177e4
LT
1050{
1051 struct rt6_info *rt;
1052 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1053 struct net *net = dev_net(dev);
1da177e4
LT
1054
1055 if (unlikely(idev == NULL))
1056 return NULL;
1057
957c665f 1058 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1da177e4
LT
1059 if (unlikely(rt == NULL)) {
1060 in6_dev_put(idev);
1061 goto out;
1062 }
1063
1da177e4
LT
1064 if (neigh)
1065 neigh_hold(neigh);
14deae41 1066 else {
1da177e4 1067 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1068 if (IS_ERR(neigh))
1069 neigh = NULL;
1070 }
1da177e4 1071
1da177e4 1072 rt->rt6i_idev = idev;
69cce1d1 1073 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1074 atomic_set(&rt->dst.__refcnt, 1);
21efcfa0 1075 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
defb3519 1076 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1077 rt->dst.output = ip6_output;
1da177e4 1078
3b00944c 1079 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1080 rt->dst.next = icmp6_dst_gc_list;
1081 icmp6_dst_gc_list = &rt->dst;
3b00944c 1082 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1083
5578689a 1084 fib6_force_start_gc(net);
1da177e4
LT
1085
1086out:
d8d1f30b 1087 return &rt->dst;
1da177e4
LT
1088}
1089
3d0f24a7 1090int icmp6_dst_gc(void)
1da177e4 1091{
e9476e95 1092 struct dst_entry *dst, **pprev;
3d0f24a7 1093 int more = 0;
1da177e4 1094
3b00944c
YH
1095 spin_lock_bh(&icmp6_dst_lock);
1096 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1097
1da177e4
LT
1098 while ((dst = *pprev) != NULL) {
1099 if (!atomic_read(&dst->__refcnt)) {
1100 *pprev = dst->next;
1101 dst_free(dst);
1da177e4
LT
1102 } else {
1103 pprev = &dst->next;
3d0f24a7 1104 ++more;
1da177e4
LT
1105 }
1106 }
1107
3b00944c 1108 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1109
3d0f24a7 1110 return more;
1da177e4
LT
1111}
1112
1e493d19
DM
1113static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1114 void *arg)
1115{
1116 struct dst_entry *dst, **pprev;
1117
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
1120 while ((dst = *pprev) != NULL) {
1121 struct rt6_info *rt = (struct rt6_info *) dst;
1122 if (func(rt, arg)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1125 } else {
1126 pprev = &dst->next;
1127 }
1128 }
1129 spin_unlock_bh(&icmp6_dst_lock);
1130}
1131
569d3645 1132static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1133{
1da177e4 1134 unsigned long now = jiffies;
86393e52 1135 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1136 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1137 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1138 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1139 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1140 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1141 int entries;
7019b78e 1142
fc66f95c 1143 entries = dst_entries_get_fast(ops);
7019b78e 1144 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1145 entries <= rt_max_size)
1da177e4
LT
1146 goto out;
1147
6891a346
BT
1148 net->ipv6.ip6_rt_gc_expire++;
1149 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1150 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1151 entries = dst_entries_get_slow(ops);
1152 if (entries < ops->gc_thresh)
7019b78e 1153 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1154out:
7019b78e 1155 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1156 return entries > rt_max_size;
1da177e4
LT
1157}
1158
1159/* Clean host part of a prefix. Not necessary in radix tree,
1160 but results in cleaner routing tables.
1161
1162 Remove it only when all the things will work!
1163 */
1164
6b75d090 1165int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1166{
5170ae82 1167 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1168 if (hoplimit == 0) {
6b75d090 1169 struct net_device *dev = dst->dev;
c68f24cc
ED
1170 struct inet6_dev *idev;
1171
1172 rcu_read_lock();
1173 idev = __in6_dev_get(dev);
1174 if (idev)
6b75d090 1175 hoplimit = idev->cnf.hop_limit;
c68f24cc 1176 else
53b7997f 1177 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1178 rcu_read_unlock();
1da177e4
LT
1179 }
1180 return hoplimit;
1181}
abbf46ae 1182EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1183
1184/*
1185 *
1186 */
1187
86872cb5 1188int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1189{
1190 int err;
5578689a 1191 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1192 struct rt6_info *rt = NULL;
1193 struct net_device *dev = NULL;
1194 struct inet6_dev *idev = NULL;
c71099ac 1195 struct fib6_table *table;
1da177e4
LT
1196 int addr_type;
1197
86872cb5 1198 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1199 return -EINVAL;
1200#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1201 if (cfg->fc_src_len)
1da177e4
LT
1202 return -EINVAL;
1203#endif
86872cb5 1204 if (cfg->fc_ifindex) {
1da177e4 1205 err = -ENODEV;
5578689a 1206 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1207 if (!dev)
1208 goto out;
1209 idev = in6_dev_get(dev);
1210 if (!idev)
1211 goto out;
1212 }
1213
86872cb5
TG
1214 if (cfg->fc_metric == 0)
1215 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1216
5578689a 1217 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1218 if (table == NULL) {
1219 err = -ENOBUFS;
1220 goto out;
1221 }
1222
957c665f 1223 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4
LT
1224
1225 if (rt == NULL) {
1226 err = -ENOMEM;
1227 goto out;
1228 }
1229
d8d1f30b 1230 rt->dst.obsolete = -1;
6f704992
YH
1231 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1232 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1233 0;
1da177e4 1234
86872cb5
TG
1235 if (cfg->fc_protocol == RTPROT_UNSPEC)
1236 cfg->fc_protocol = RTPROT_BOOT;
1237 rt->rt6i_protocol = cfg->fc_protocol;
1238
1239 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1240
1241 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1242 rt->dst.input = ip6_mc_input;
ab79ad14
1243 else if (cfg->fc_flags & RTF_LOCAL)
1244 rt->dst.input = ip6_input;
1da177e4 1245 else
d8d1f30b 1246 rt->dst.input = ip6_forward;
1da177e4 1247
d8d1f30b 1248 rt->dst.output = ip6_output;
1da177e4 1249
86872cb5
TG
1250 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1251 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1252 if (rt->rt6i_dst.plen == 128)
11d53b49 1253 rt->dst.flags |= DST_HOST;
1da177e4
LT
1254
1255#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1256 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1257 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1258#endif
1259
86872cb5 1260 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1261
1262 /* We cannot add true routes via loopback here,
1263 they would result in kernel looping; promote them to reject routes
1264 */
86872cb5 1265 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1266 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1267 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1268 /* hold loopback dev/idev if we haven't done so. */
5578689a 1269 if (dev != net->loopback_dev) {
1da177e4
LT
1270 if (dev) {
1271 dev_put(dev);
1272 in6_dev_put(idev);
1273 }
5578689a 1274 dev = net->loopback_dev;
1da177e4
LT
1275 dev_hold(dev);
1276 idev = in6_dev_get(dev);
1277 if (!idev) {
1278 err = -ENODEV;
1279 goto out;
1280 }
1281 }
d8d1f30b
CG
1282 rt->dst.output = ip6_pkt_discard_out;
1283 rt->dst.input = ip6_pkt_discard;
1284 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1285 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1286 goto install_route;
1287 }
1288
86872cb5 1289 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1290 const struct in6_addr *gw_addr;
1da177e4
LT
1291 int gwa_type;
1292
86872cb5
TG
1293 gw_addr = &cfg->fc_gateway;
1294 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1295 gwa_type = ipv6_addr_type(gw_addr);
1296
1297 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1298 struct rt6_info *grt;
1299
1300 /* IPv6 strictly inhibits using not link-local
1301 addresses as nexthop address.
1302 Otherwise, router will not able to send redirects.
1303 It is very good, but in some (rare!) circumstances
1304 (SIT, PtP, NBMA NOARP links) it is handy to allow
1305 some exceptions. --ANK
1306 */
1307 err = -EINVAL;
1308 if (!(gwa_type&IPV6_ADDR_UNICAST))
1309 goto out;
1310
5578689a 1311 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1312
1313 err = -EHOSTUNREACH;
1314 if (grt == NULL)
1315 goto out;
1316 if (dev) {
1317 if (dev != grt->rt6i_dev) {
d8d1f30b 1318 dst_release(&grt->dst);
1da177e4
LT
1319 goto out;
1320 }
1321 } else {
1322 dev = grt->rt6i_dev;
1323 idev = grt->rt6i_idev;
1324 dev_hold(dev);
1325 in6_dev_hold(grt->rt6i_idev);
1326 }
1327 if (!(grt->rt6i_flags&RTF_GATEWAY))
1328 err = 0;
d8d1f30b 1329 dst_release(&grt->dst);
1da177e4
LT
1330
1331 if (err)
1332 goto out;
1333 }
1334 err = -EINVAL;
1335 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1336 goto out;
1337 }
1338
1339 err = -ENODEV;
1340 if (dev == NULL)
1341 goto out;
1342
c3968a85
DW
1343 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1344 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1345 err = -EINVAL;
1346 goto out;
1347 }
1348 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1349 rt->rt6i_prefsrc.plen = 128;
1350 } else
1351 rt->rt6i_prefsrc.plen = 0;
1352
86872cb5 1353 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1354 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1355 if (IS_ERR(n)) {
1356 err = PTR_ERR(n);
1da177e4
LT
1357 goto out;
1358 }
69cce1d1 1359 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1360 }
1361
86872cb5 1362 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1363
1364install_route:
86872cb5
TG
1365 if (cfg->fc_mx) {
1366 struct nlattr *nla;
1367 int remaining;
1368
1369 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1370 int type = nla_type(nla);
86872cb5
TG
1371
1372 if (type) {
1373 if (type > RTAX_MAX) {
1da177e4
LT
1374 err = -EINVAL;
1375 goto out;
1376 }
86872cb5 1377
defb3519 1378 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1379 }
1da177e4
LT
1380 }
1381 }
1382
d8d1f30b 1383 rt->dst.dev = dev;
1da177e4 1384 rt->rt6i_idev = idev;
c71099ac 1385 rt->rt6i_table = table;
63152fc0 1386
c346dca1 1387 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1388
86872cb5 1389 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1390
1391out:
1392 if (dev)
1393 dev_put(dev);
1394 if (idev)
1395 in6_dev_put(idev);
1396 if (rt)
d8d1f30b 1397 dst_free(&rt->dst);
1da177e4
LT
1398 return err;
1399}
1400
86872cb5 1401static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1402{
1403 int err;
c71099ac 1404 struct fib6_table *table;
c346dca1 1405 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1406
8ed67789 1407 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1408 return -ENOENT;
1409
c71099ac
TG
1410 table = rt->rt6i_table;
1411 write_lock_bh(&table->tb6_lock);
1da177e4 1412
86872cb5 1413 err = fib6_del(rt, info);
d8d1f30b 1414 dst_release(&rt->dst);
1da177e4 1415
c71099ac 1416 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1417
1418 return err;
1419}
1420
e0a1ad73
TG
1421int ip6_del_rt(struct rt6_info *rt)
1422{
4d1169c1 1423 struct nl_info info = {
c346dca1 1424 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1425 };
528c4ceb 1426 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1427}
1428
86872cb5 1429static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1430{
c71099ac 1431 struct fib6_table *table;
1da177e4
LT
1432 struct fib6_node *fn;
1433 struct rt6_info *rt;
1434 int err = -ESRCH;
1435
5578689a 1436 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1437 if (table == NULL)
1438 return err;
1439
1440 read_lock_bh(&table->tb6_lock);
1da177e4 1441
c71099ac 1442 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1443 &cfg->fc_dst, cfg->fc_dst_len,
1444 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1445
1da177e4 1446 if (fn) {
d8d1f30b 1447 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1448 if (cfg->fc_ifindex &&
1da177e4 1449 (rt->rt6i_dev == NULL ||
86872cb5 1450 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1451 continue;
86872cb5
TG
1452 if (cfg->fc_flags & RTF_GATEWAY &&
1453 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1454 continue;
86872cb5 1455 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1456 continue;
d8d1f30b 1457 dst_hold(&rt->dst);
c71099ac 1458 read_unlock_bh(&table->tb6_lock);
1da177e4 1459
86872cb5 1460 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1461 }
1462 }
c71099ac 1463 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1464
1465 return err;
1466}
1467
1468/*
1469 * Handle redirects
1470 */
a6279458 1471struct ip6rd_flowi {
4c9483b2 1472 struct flowi6 fl6;
a6279458
YH
1473 struct in6_addr gateway;
1474};
1475
8ed67789
DL
1476static struct rt6_info *__ip6_route_redirect(struct net *net,
1477 struct fib6_table *table,
4c9483b2 1478 struct flowi6 *fl6,
a6279458 1479 int flags)
1da177e4 1480{
4c9483b2 1481 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1482 struct rt6_info *rt;
e843b9e1 1483 struct fib6_node *fn;
c71099ac 1484
1da177e4 1485 /*
e843b9e1
YH
1486 * Get the "current" route for this destination and
1487 * check if the redirect has come from approriate router.
1488 *
1489 * RFC 2461 specifies that redirects should only be
1490 * accepted if they come from the nexthop to the target.
1491 * Due to the way the routes are chosen, this notion
1492 * is a bit fuzzy and one might need to check all possible
1493 * routes.
1da177e4 1494 */
1da177e4 1495
c71099ac 1496 read_lock_bh(&table->tb6_lock);
4c9483b2 1497 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1498restart:
d8d1f30b 1499 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1500 /*
1501 * Current route is on-link; redirect is always invalid.
1502 *
1503 * Seems, previous statement is not true. It could
1504 * be node, which looks for us as on-link (f.e. proxy ndisc)
1505 * But then router serving it might decide, that we should
1506 * know truth 8)8) --ANK (980726).
1507 */
1508 if (rt6_check_expired(rt))
1509 continue;
1510 if (!(rt->rt6i_flags & RTF_GATEWAY))
1511 continue;
4c9483b2 1512 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1513 continue;
a6279458 1514 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1515 continue;
1516 break;
1517 }
a6279458 1518
cb15d9c2 1519 if (!rt)
8ed67789 1520 rt = net->ipv6.ip6_null_entry;
4c9483b2 1521 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1522out:
d8d1f30b 1523 dst_hold(&rt->dst);
a6279458 1524
c71099ac 1525 read_unlock_bh(&table->tb6_lock);
e843b9e1 1526
a6279458
YH
1527 return rt;
1528};
1529
b71d1d42
ED
1530static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1531 const struct in6_addr *src,
1532 const struct in6_addr *gateway,
a6279458
YH
1533 struct net_device *dev)
1534{
adaa70bb 1535 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1536 struct net *net = dev_net(dev);
a6279458 1537 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1538 .fl6 = {
1539 .flowi6_oif = dev->ifindex,
1540 .daddr = *dest,
1541 .saddr = *src,
a6279458 1542 },
a6279458 1543 };
adaa70bb 1544
86c36ce4
BH
1545 ipv6_addr_copy(&rdfl.gateway, gateway);
1546
adaa70bb
TG
1547 if (rt6_need_strict(dest))
1548 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1549
4c9483b2 1550 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1551 flags, __ip6_route_redirect);
a6279458
YH
1552}
1553
b71d1d42
ED
1554void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1555 const struct in6_addr *saddr,
a6279458
YH
1556 struct neighbour *neigh, u8 *lladdr, int on_link)
1557{
1558 struct rt6_info *rt, *nrt = NULL;
1559 struct netevent_redirect netevent;
c346dca1 1560 struct net *net = dev_net(neigh->dev);
a6279458
YH
1561
1562 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1563
8ed67789 1564 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1565 if (net_ratelimit())
1566 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1567 "for redirect target\n");
a6279458 1568 goto out;
1da177e4
LT
1569 }
1570
1da177e4
LT
1571 /*
1572 * We have finally decided to accept it.
1573 */
1574
1ab1457c 1575 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1576 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1577 NEIGH_UPDATE_F_OVERRIDE|
1578 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1579 NEIGH_UPDATE_F_ISROUTER))
1580 );
1581
1582 /*
1583 * Redirect received -> path was valid.
1584 * Look, redirects are sent only in response to data packets,
1585 * so that this nexthop apparently is reachable. --ANK
1586 */
d8d1f30b 1587 dst_confirm(&rt->dst);
1da177e4
LT
1588
1589 /* Duplicate redirect: silently ignore. */
69cce1d1 1590 if (neigh == dst_get_neighbour(&rt->dst))
1da177e4
LT
1591 goto out;
1592
21efcfa0 1593 nrt = ip6_rt_copy(rt, dest);
1da177e4
LT
1594 if (nrt == NULL)
1595 goto out;
1596
1597 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1598 if (on_link)
1599 nrt->rt6i_flags &= ~RTF_GATEWAY;
1600
1da177e4 1601 nrt->rt6i_dst.plen = 128;
d8d1f30b 1602 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1603
1604 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
69cce1d1 1605 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1606
40e22e8f 1607 if (ip6_ins_rt(nrt))
1da177e4
LT
1608 goto out;
1609
d8d1f30b
CG
1610 netevent.old = &rt->dst;
1611 netevent.new = &nrt->dst;
8d71740c
TT
1612 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1613
1da177e4 1614 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1615 ip6_del_rt(rt);
1da177e4
LT
1616 return;
1617 }
1618
1619out:
d8d1f30b 1620 dst_release(&rt->dst);
1da177e4
LT
1621}
1622
1623/*
1624 * Handle ICMP "packet too big" messages
1625 * i.e. Path MTU discovery
1626 */
1627
b71d1d42 1628static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1629 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1630{
1631 struct rt6_info *rt, *nrt;
1632 int allfrag = 0;
d3052b55 1633again:
ae878ae2 1634 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1635 if (rt == NULL)
1636 return;
1637
d3052b55
AV
1638 if (rt6_check_expired(rt)) {
1639 ip6_del_rt(rt);
1640 goto again;
1641 }
1642
d8d1f30b 1643 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1644 goto out;
1645
1646 if (pmtu < IPV6_MIN_MTU) {
1647 /*
1ab1457c 1648 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1649 * MTU (1280) and a fragment header should always be included
1650 * after a node receiving Too Big message reporting PMTU is
1651 * less than the IPv6 Minimum Link MTU.
1652 */
1653 pmtu = IPV6_MIN_MTU;
1654 allfrag = 1;
1655 }
1656
1657 /* New mtu received -> path was valid.
1658 They are sent only in response to data packets,
1659 so that this nexthop apparently is reachable. --ANK
1660 */
d8d1f30b 1661 dst_confirm(&rt->dst);
1da177e4
LT
1662
1663 /* Host route. If it is static, it would be better
1664 not to override it, but add new one, so that
1665 when cache entry will expire old pmtu
1666 would return automatically.
1667 */
1668 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1669 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1670 if (allfrag) {
1671 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1672 features |= RTAX_FEATURE_ALLFRAG;
1673 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1674 }
d8d1f30b 1675 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1676 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1677 goto out;
1678 }
1679
1680 /* Network route.
1681 Two cases are possible:
1682 1. It is connected route. Action: COW
1683 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1684 */
69cce1d1 1685 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1686 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1687 else
1688 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1689
d5315b50 1690 if (nrt) {
defb3519
DM
1691 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1692 if (allfrag) {
1693 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1694 features |= RTAX_FEATURE_ALLFRAG;
1695 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1696 }
a1e78363
YH
1697
1698 /* According to RFC 1981, detecting PMTU increase shouldn't be
1699 * happened within 5 mins, the recommended timer is 10 mins.
1700 * Here this route expiration time is set to ip6_rt_mtu_expires
1701 * which is 10 mins. After 10 mins the decreased pmtu is expired
1702 * and detecting PMTU increase will be automatically happened.
1703 */
d8d1f30b 1704 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1705 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1706
40e22e8f 1707 ip6_ins_rt(nrt);
1da177e4 1708 }
1da177e4 1709out:
d8d1f30b 1710 dst_release(&rt->dst);
1da177e4
LT
1711}
1712
b71d1d42 1713void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1714 struct net_device *dev, u32 pmtu)
1715{
1716 struct net *net = dev_net(dev);
1717
1718 /*
1719 * RFC 1981 states that a node "MUST reduce the size of the packets it
1720 * is sending along the path" that caused the Packet Too Big message.
1721 * Since it's not possible in the general case to determine which
1722 * interface was used to send the original packet, we update the MTU
1723 * on the interface that will be used to send future packets. We also
1724 * update the MTU on the interface that received the Packet Too Big in
1725 * case the original packet was forced out that interface with
1726 * SO_BINDTODEVICE or similar. This is the next best thing to the
1727 * correct behaviour, which would be to update the MTU on all
1728 * interfaces.
1729 */
1730 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1731 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1732}
1733
1da177e4
LT
1734/*
1735 * Misc support functions
1736 */
1737
21efcfa0
ED
1738static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1739 const struct in6_addr *dest)
1da177e4 1740{
c346dca1 1741 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1742 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1743 ort->dst.dev, 0);
1da177e4
LT
1744
1745 if (rt) {
d8d1f30b
CG
1746 rt->dst.input = ort->dst.input;
1747 rt->dst.output = ort->dst.output;
1748
21efcfa0
ED
1749 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1750 rt->rt6i_dst.plen = ort->rt6i_dst.plen;
defb3519 1751 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1752 rt->dst.error = ort->dst.error;
1da177e4
LT
1753 rt->rt6i_idev = ort->rt6i_idev;
1754 if (rt->rt6i_idev)
1755 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1756 rt->dst.lastuse = jiffies;
1da177e4
LT
1757 rt->rt6i_expires = 0;
1758
1759 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1760 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1761 rt->rt6i_metric = 0;
1762
1da177e4
LT
1763#ifdef CONFIG_IPV6_SUBTREES
1764 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1765#endif
0f6c6392 1766 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1767 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1768 }
1769 return rt;
1770}
1771
70ceb4f5 1772#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1773static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1774 const struct in6_addr *prefix, int prefixlen,
1775 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1776{
1777 struct fib6_node *fn;
1778 struct rt6_info *rt = NULL;
c71099ac
TG
1779 struct fib6_table *table;
1780
efa2cea0 1781 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1782 if (table == NULL)
1783 return NULL;
70ceb4f5 1784
c71099ac
TG
1785 write_lock_bh(&table->tb6_lock);
1786 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1787 if (!fn)
1788 goto out;
1789
d8d1f30b 1790 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1791 if (rt->rt6i_dev->ifindex != ifindex)
1792 continue;
1793 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1794 continue;
1795 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1796 continue;
d8d1f30b 1797 dst_hold(&rt->dst);
70ceb4f5
YH
1798 break;
1799 }
1800out:
c71099ac 1801 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1802 return rt;
1803}
1804
efa2cea0 1805static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1806 const struct in6_addr *prefix, int prefixlen,
1807 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1808 unsigned pref)
1809{
86872cb5
TG
1810 struct fib6_config cfg = {
1811 .fc_table = RT6_TABLE_INFO,
238fc7ea 1812 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1813 .fc_ifindex = ifindex,
1814 .fc_dst_len = prefixlen,
1815 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1816 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1817 .fc_nlinfo.pid = 0,
1818 .fc_nlinfo.nlh = NULL,
1819 .fc_nlinfo.nl_net = net,
86872cb5
TG
1820 };
1821
1822 ipv6_addr_copy(&cfg.fc_dst, prefix);
1823 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1824
e317da96
YH
1825 /* We should treat it as a default route if prefix length is 0. */
1826 if (!prefixlen)
86872cb5 1827 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1828
86872cb5 1829 ip6_route_add(&cfg);
70ceb4f5 1830
efa2cea0 1831 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1832}
1833#endif
1834
b71d1d42 1835struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1836{
1da177e4 1837 struct rt6_info *rt;
c71099ac 1838 struct fib6_table *table;
1da177e4 1839
c346dca1 1840 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1841 if (table == NULL)
1842 return NULL;
1da177e4 1843
c71099ac 1844 write_lock_bh(&table->tb6_lock);
d8d1f30b 1845 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1846 if (dev == rt->rt6i_dev &&
045927ff 1847 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1848 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1849 break;
1850 }
1851 if (rt)
d8d1f30b 1852 dst_hold(&rt->dst);
c71099ac 1853 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1854 return rt;
1855}
1856
b71d1d42 1857struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1858 struct net_device *dev,
1859 unsigned int pref)
1da177e4 1860{
86872cb5
TG
1861 struct fib6_config cfg = {
1862 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1863 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1864 .fc_ifindex = dev->ifindex,
1865 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1866 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1867 .fc_nlinfo.pid = 0,
1868 .fc_nlinfo.nlh = NULL,
c346dca1 1869 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1870 };
1da177e4 1871
86872cb5 1872 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1873
86872cb5 1874 ip6_route_add(&cfg);
1da177e4 1875
1da177e4
LT
1876 return rt6_get_dflt_router(gwaddr, dev);
1877}
1878
7b4da532 1879void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1880{
1881 struct rt6_info *rt;
c71099ac
TG
1882 struct fib6_table *table;
1883
1884 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1885 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1886 if (table == NULL)
1887 return;
1da177e4
LT
1888
1889restart:
c71099ac 1890 read_lock_bh(&table->tb6_lock);
d8d1f30b 1891 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1892 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1893 dst_hold(&rt->dst);
c71099ac 1894 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1895 ip6_del_rt(rt);
1da177e4
LT
1896 goto restart;
1897 }
1898 }
c71099ac 1899 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1900}
1901
5578689a
DL
1902static void rtmsg_to_fib6_config(struct net *net,
1903 struct in6_rtmsg *rtmsg,
86872cb5
TG
1904 struct fib6_config *cfg)
1905{
1906 memset(cfg, 0, sizeof(*cfg));
1907
1908 cfg->fc_table = RT6_TABLE_MAIN;
1909 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1910 cfg->fc_metric = rtmsg->rtmsg_metric;
1911 cfg->fc_expires = rtmsg->rtmsg_info;
1912 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1913 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1914 cfg->fc_flags = rtmsg->rtmsg_flags;
1915
5578689a 1916 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1917
86872cb5
TG
1918 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1919 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1920 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1921}
1922
5578689a 1923int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1924{
86872cb5 1925 struct fib6_config cfg;
1da177e4
LT
1926 struct in6_rtmsg rtmsg;
1927 int err;
1928
1929 switch(cmd) {
1930 case SIOCADDRT: /* Add a route */
1931 case SIOCDELRT: /* Delete a route */
1932 if (!capable(CAP_NET_ADMIN))
1933 return -EPERM;
1934 err = copy_from_user(&rtmsg, arg,
1935 sizeof(struct in6_rtmsg));
1936 if (err)
1937 return -EFAULT;
86872cb5 1938
5578689a 1939 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1940
1da177e4
LT
1941 rtnl_lock();
1942 switch (cmd) {
1943 case SIOCADDRT:
86872cb5 1944 err = ip6_route_add(&cfg);
1da177e4
LT
1945 break;
1946 case SIOCDELRT:
86872cb5 1947 err = ip6_route_del(&cfg);
1da177e4
LT
1948 break;
1949 default:
1950 err = -EINVAL;
1951 }
1952 rtnl_unlock();
1953
1954 return err;
3ff50b79 1955 }
1da177e4
LT
1956
1957 return -EINVAL;
1958}
1959
1960/*
1961 * Drop the packet on the floor
1962 */
1963
d5fdd6ba 1964static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1965{
612f09e8 1966 int type;
adf30907 1967 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1968 switch (ipstats_mib_noroutes) {
1969 case IPSTATS_MIB_INNOROUTES:
0660e03f 1970 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1971 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1972 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1973 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1974 break;
1975 }
1976 /* FALLTHROUGH */
1977 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1978 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1979 ipstats_mib_noroutes);
612f09e8
YH
1980 break;
1981 }
3ffe533c 1982 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1983 kfree_skb(skb);
1984 return 0;
1985}
1986
9ce8ade0
TG
1987static int ip6_pkt_discard(struct sk_buff *skb)
1988{
612f09e8 1989 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1990}
1991
20380731 1992static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1993{
adf30907 1994 skb->dev = skb_dst(skb)->dev;
612f09e8 1995 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1996}
1997
6723ab54
DM
1998#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1999
9ce8ade0
TG
2000static int ip6_pkt_prohibit(struct sk_buff *skb)
2001{
612f09e8 2002 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2003}
2004
2005static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2006{
adf30907 2007 skb->dev = skb_dst(skb)->dev;
612f09e8 2008 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2009}
2010
6723ab54
DM
2011#endif
2012
1da177e4
LT
2013/*
2014 * Allocate a dst for local (unicast / anycast) address.
2015 */
2016
2017struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2018 const struct in6_addr *addr,
2019 int anycast)
2020{
c346dca1 2021 struct net *net = dev_net(idev->dev);
5c1e6aa3 2022 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2023 net->loopback_dev, 0);
14deae41 2024 struct neighbour *neigh;
1da177e4 2025
40385653
BG
2026 if (rt == NULL) {
2027 if (net_ratelimit())
2028 pr_warning("IPv6: Maximum number of routes reached,"
2029 " consider increasing route/max_size.\n");
1da177e4 2030 return ERR_PTR(-ENOMEM);
40385653 2031 }
1da177e4 2032
1da177e4
LT
2033 in6_dev_hold(idev);
2034
11d53b49 2035 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2036 rt->dst.input = ip6_input;
2037 rt->dst.output = ip6_output;
1da177e4 2038 rt->rt6i_idev = idev;
d8d1f30b 2039 rt->dst.obsolete = -1;
1da177e4
LT
2040
2041 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2042 if (anycast)
2043 rt->rt6i_flags |= RTF_ANYCAST;
2044 else
1da177e4 2045 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2046 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2047 if (IS_ERR(neigh)) {
d8d1f30b 2048 dst_free(&rt->dst);
14deae41 2049
29546a64 2050 return ERR_CAST(neigh);
1da177e4 2051 }
69cce1d1 2052 dst_set_neighbour(&rt->dst, neigh);
1da177e4
LT
2053
2054 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2055 rt->rt6i_dst.plen = 128;
5578689a 2056 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2057
d8d1f30b 2058 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2059
2060 return rt;
2061}
2062
c3968a85
DW
2063int ip6_route_get_saddr(struct net *net,
2064 struct rt6_info *rt,
b71d1d42 2065 const struct in6_addr *daddr,
c3968a85
DW
2066 unsigned int prefs,
2067 struct in6_addr *saddr)
2068{
2069 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2070 int err = 0;
2071 if (rt->rt6i_prefsrc.plen)
2072 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2073 else
2074 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2075 daddr, prefs, saddr);
2076 return err;
2077}
2078
2079/* remove deleted ip from prefsrc entries */
2080struct arg_dev_net_ip {
2081 struct net_device *dev;
2082 struct net *net;
2083 struct in6_addr *addr;
2084};
2085
2086static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2087{
2088 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2089 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2090 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2091
2092 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2093 rt != net->ipv6.ip6_null_entry &&
2094 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2095 /* remove prefsrc entry */
2096 rt->rt6i_prefsrc.plen = 0;
2097 }
2098 return 0;
2099}
2100
2101void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2102{
2103 struct net *net = dev_net(ifp->idev->dev);
2104 struct arg_dev_net_ip adni = {
2105 .dev = ifp->idev->dev,
2106 .net = net,
2107 .addr = &ifp->addr,
2108 };
2109 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2110}
2111
8ed67789
DL
2112struct arg_dev_net {
2113 struct net_device *dev;
2114 struct net *net;
2115};
2116
1da177e4
LT
2117static int fib6_ifdown(struct rt6_info *rt, void *arg)
2118{
bc3ef660 2119 const struct arg_dev_net *adn = arg;
2120 const struct net_device *dev = adn->dev;
8ed67789 2121
bc3ef660 2122 if ((rt->rt6i_dev == dev || dev == NULL) &&
2123 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2124 RT6_TRACE("deleted by ifdown %p\n", rt);
2125 return -1;
2126 }
2127 return 0;
2128}
2129
f3db4851 2130void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2131{
8ed67789
DL
2132 struct arg_dev_net adn = {
2133 .dev = dev,
2134 .net = net,
2135 };
2136
2137 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2138 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2139}
2140
2141struct rt6_mtu_change_arg
2142{
2143 struct net_device *dev;
2144 unsigned mtu;
2145};
2146
2147static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2148{
2149 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2150 struct inet6_dev *idev;
2151
2152 /* In IPv6 pmtu discovery is not optional,
2153 so that RTAX_MTU lock cannot disable it.
2154 We still use this lock to block changes
2155 caused by addrconf/ndisc.
2156 */
2157
2158 idev = __in6_dev_get(arg->dev);
2159 if (idev == NULL)
2160 return 0;
2161
2162 /* For administrative MTU increase, there is no way to discover
2163 IPv6 PMTU increase, so PMTU increase should be updated here.
2164 Since RFC 1981 doesn't include administrative MTU increase
2165 update PMTU increase is a MUST. (i.e. jumbo frame)
2166 */
2167 /*
2168 If new MTU is less than route PMTU, this new MTU will be the
2169 lowest MTU in the path, update the route PMTU to reflect PMTU
2170 decreases; if new MTU is greater than route PMTU, and the
2171 old MTU is the lowest MTU in the path, update the route PMTU
2172 to reflect the increase. In this case if the other nodes' MTU
2173 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2174 PMTU discouvery.
2175 */
2176 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2177 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2178 (dst_mtu(&rt->dst) >= arg->mtu ||
2179 (dst_mtu(&rt->dst) < arg->mtu &&
2180 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2181 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2182 }
1da177e4
LT
2183 return 0;
2184}
2185
2186void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2187{
c71099ac
TG
2188 struct rt6_mtu_change_arg arg = {
2189 .dev = dev,
2190 .mtu = mtu,
2191 };
1da177e4 2192
c346dca1 2193 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2194}
2195
ef7c79ed 2196static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2197 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2198 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2199 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2200 [RTA_PRIORITY] = { .type = NLA_U32 },
2201 [RTA_METRICS] = { .type = NLA_NESTED },
2202};
2203
2204static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2205 struct fib6_config *cfg)
1da177e4 2206{
86872cb5
TG
2207 struct rtmsg *rtm;
2208 struct nlattr *tb[RTA_MAX+1];
2209 int err;
1da177e4 2210
86872cb5
TG
2211 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2212 if (err < 0)
2213 goto errout;
1da177e4 2214
86872cb5
TG
2215 err = -EINVAL;
2216 rtm = nlmsg_data(nlh);
2217 memset(cfg, 0, sizeof(*cfg));
2218
2219 cfg->fc_table = rtm->rtm_table;
2220 cfg->fc_dst_len = rtm->rtm_dst_len;
2221 cfg->fc_src_len = rtm->rtm_src_len;
2222 cfg->fc_flags = RTF_UP;
2223 cfg->fc_protocol = rtm->rtm_protocol;
2224
2225 if (rtm->rtm_type == RTN_UNREACHABLE)
2226 cfg->fc_flags |= RTF_REJECT;
2227
ab79ad14
2228 if (rtm->rtm_type == RTN_LOCAL)
2229 cfg->fc_flags |= RTF_LOCAL;
2230
86872cb5
TG
2231 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2232 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2233 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2234
2235 if (tb[RTA_GATEWAY]) {
2236 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2237 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2238 }
86872cb5
TG
2239
2240 if (tb[RTA_DST]) {
2241 int plen = (rtm->rtm_dst_len + 7) >> 3;
2242
2243 if (nla_len(tb[RTA_DST]) < plen)
2244 goto errout;
2245
2246 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2247 }
86872cb5
TG
2248
2249 if (tb[RTA_SRC]) {
2250 int plen = (rtm->rtm_src_len + 7) >> 3;
2251
2252 if (nla_len(tb[RTA_SRC]) < plen)
2253 goto errout;
2254
2255 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2256 }
86872cb5 2257
c3968a85
DW
2258 if (tb[RTA_PREFSRC])
2259 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2260
86872cb5
TG
2261 if (tb[RTA_OIF])
2262 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2263
2264 if (tb[RTA_PRIORITY])
2265 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2266
2267 if (tb[RTA_METRICS]) {
2268 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2269 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2270 }
86872cb5
TG
2271
2272 if (tb[RTA_TABLE])
2273 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2274
2275 err = 0;
2276errout:
2277 return err;
1da177e4
LT
2278}
2279
c127ea2c 2280static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2281{
86872cb5
TG
2282 struct fib6_config cfg;
2283 int err;
1da177e4 2284
86872cb5
TG
2285 err = rtm_to_fib6_config(skb, nlh, &cfg);
2286 if (err < 0)
2287 return err;
2288
2289 return ip6_route_del(&cfg);
1da177e4
LT
2290}
2291
c127ea2c 2292static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2293{
86872cb5
TG
2294 struct fib6_config cfg;
2295 int err;
1da177e4 2296
86872cb5
TG
2297 err = rtm_to_fib6_config(skb, nlh, &cfg);
2298 if (err < 0)
2299 return err;
2300
2301 return ip6_route_add(&cfg);
1da177e4
LT
2302}
2303
339bf98f
TG
2304static inline size_t rt6_nlmsg_size(void)
2305{
2306 return NLMSG_ALIGN(sizeof(struct rtmsg))
2307 + nla_total_size(16) /* RTA_SRC */
2308 + nla_total_size(16) /* RTA_DST */
2309 + nla_total_size(16) /* RTA_GATEWAY */
2310 + nla_total_size(16) /* RTA_PREFSRC */
2311 + nla_total_size(4) /* RTA_TABLE */
2312 + nla_total_size(4) /* RTA_IIF */
2313 + nla_total_size(4) /* RTA_OIF */
2314 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2315 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2316 + nla_total_size(sizeof(struct rta_cacheinfo));
2317}
2318
191cd582
BH
2319static int rt6_fill_node(struct net *net,
2320 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2321 struct in6_addr *dst, struct in6_addr *src,
2322 int iif, int type, u32 pid, u32 seq,
7bc570c8 2323 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2324{
2325 struct rtmsg *rtm;
2d7202bf 2326 struct nlmsghdr *nlh;
e3703b3d 2327 long expires;
9e762a4a 2328 u32 table;
1da177e4
LT
2329
2330 if (prefix) { /* user wants prefix routes only */
2331 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2332 /* success since this is not a prefix route */
2333 return 1;
2334 }
2335 }
2336
2d7202bf
TG
2337 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2338 if (nlh == NULL)
26932566 2339 return -EMSGSIZE;
2d7202bf
TG
2340
2341 rtm = nlmsg_data(nlh);
1da177e4
LT
2342 rtm->rtm_family = AF_INET6;
2343 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2344 rtm->rtm_src_len = rt->rt6i_src.plen;
2345 rtm->rtm_tos = 0;
c71099ac 2346 if (rt->rt6i_table)
9e762a4a 2347 table = rt->rt6i_table->tb6_id;
c71099ac 2348 else
9e762a4a
PM
2349 table = RT6_TABLE_UNSPEC;
2350 rtm->rtm_table = table;
2d7202bf 2351 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2352 if (rt->rt6i_flags&RTF_REJECT)
2353 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2354 else if (rt->rt6i_flags&RTF_LOCAL)
2355 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2356 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2357 rtm->rtm_type = RTN_LOCAL;
2358 else
2359 rtm->rtm_type = RTN_UNICAST;
2360 rtm->rtm_flags = 0;
2361 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2362 rtm->rtm_protocol = rt->rt6i_protocol;
2363 if (rt->rt6i_flags&RTF_DYNAMIC)
2364 rtm->rtm_protocol = RTPROT_REDIRECT;
2365 else if (rt->rt6i_flags & RTF_ADDRCONF)
2366 rtm->rtm_protocol = RTPROT_KERNEL;
2367 else if (rt->rt6i_flags&RTF_DEFAULT)
2368 rtm->rtm_protocol = RTPROT_RA;
2369
2370 if (rt->rt6i_flags&RTF_CACHE)
2371 rtm->rtm_flags |= RTM_F_CLONED;
2372
2373 if (dst) {
2d7202bf 2374 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2375 rtm->rtm_dst_len = 128;
1da177e4 2376 } else if (rtm->rtm_dst_len)
2d7202bf 2377 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2378#ifdef CONFIG_IPV6_SUBTREES
2379 if (src) {
2d7202bf 2380 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2381 rtm->rtm_src_len = 128;
1da177e4 2382 } else if (rtm->rtm_src_len)
2d7202bf 2383 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2384#endif
7bc570c8
YH
2385 if (iif) {
2386#ifdef CONFIG_IPV6_MROUTE
2387 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2388 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2389 if (err <= 0) {
2390 if (!nowait) {
2391 if (err == 0)
2392 return 0;
2393 goto nla_put_failure;
2394 } else {
2395 if (err == -EMSGSIZE)
2396 goto nla_put_failure;
2397 }
2398 }
2399 } else
2400#endif
2401 NLA_PUT_U32(skb, RTA_IIF, iif);
2402 } else if (dst) {
1da177e4 2403 struct in6_addr saddr_buf;
c3968a85 2404 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2405 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2406 }
2d7202bf 2407
c3968a85
DW
2408 if (rt->rt6i_prefsrc.plen) {
2409 struct in6_addr saddr_buf;
2410 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2411 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2412 }
2413
defb3519 2414 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2415 goto nla_put_failure;
2416
69cce1d1
DM
2417 if (dst_get_neighbour(&rt->dst))
2418 NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key);
2d7202bf 2419
d8d1f30b 2420 if (rt->dst.dev)
2d7202bf
TG
2421 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2422
2423 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2424
36e3deae
YH
2425 if (!(rt->rt6i_flags & RTF_EXPIRES))
2426 expires = 0;
2427 else if (rt->rt6i_expires - jiffies < INT_MAX)
2428 expires = rt->rt6i_expires - jiffies;
2429 else
2430 expires = INT_MAX;
69cdf8f9 2431
d8d1f30b
CG
2432 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2433 expires, rt->dst.error) < 0)
e3703b3d 2434 goto nla_put_failure;
2d7202bf
TG
2435
2436 return nlmsg_end(skb, nlh);
2437
2438nla_put_failure:
26932566
PM
2439 nlmsg_cancel(skb, nlh);
2440 return -EMSGSIZE;
1da177e4
LT
2441}
2442
1b43af54 2443int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2444{
2445 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2446 int prefix;
2447
2d7202bf
TG
2448 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2449 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2450 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2451 } else
2452 prefix = 0;
2453
191cd582
BH
2454 return rt6_fill_node(arg->net,
2455 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2456 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2457 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2458}
2459
c127ea2c 2460static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2461{
3b1e0a65 2462 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2463 struct nlattr *tb[RTA_MAX+1];
2464 struct rt6_info *rt;
1da177e4 2465 struct sk_buff *skb;
ab364a6f 2466 struct rtmsg *rtm;
4c9483b2 2467 struct flowi6 fl6;
ab364a6f 2468 int err, iif = 0;
1da177e4 2469
ab364a6f
TG
2470 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2471 if (err < 0)
2472 goto errout;
1da177e4 2473
ab364a6f 2474 err = -EINVAL;
4c9483b2 2475 memset(&fl6, 0, sizeof(fl6));
1da177e4 2476
ab364a6f
TG
2477 if (tb[RTA_SRC]) {
2478 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2479 goto errout;
2480
4c9483b2 2481 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2482 }
2483
2484 if (tb[RTA_DST]) {
2485 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2486 goto errout;
2487
4c9483b2 2488 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2489 }
2490
2491 if (tb[RTA_IIF])
2492 iif = nla_get_u32(tb[RTA_IIF]);
2493
2494 if (tb[RTA_OIF])
4c9483b2 2495 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2496
2497 if (iif) {
2498 struct net_device *dev;
5578689a 2499 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2500 if (!dev) {
2501 err = -ENODEV;
ab364a6f 2502 goto errout;
1da177e4
LT
2503 }
2504 }
2505
ab364a6f
TG
2506 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2507 if (skb == NULL) {
2508 err = -ENOBUFS;
2509 goto errout;
2510 }
1da177e4 2511
ab364a6f
TG
2512 /* Reserve room for dummy headers, this skb can pass
2513 through good chunk of routing engine.
2514 */
459a98ed 2515 skb_reset_mac_header(skb);
ab364a6f 2516 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2517
4c9483b2 2518 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2519 skb_dst_set(skb, &rt->dst);
1da177e4 2520
4c9483b2 2521 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2522 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2523 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2524 if (err < 0) {
ab364a6f
TG
2525 kfree_skb(skb);
2526 goto errout;
1da177e4
LT
2527 }
2528
5578689a 2529 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2530errout:
1da177e4 2531 return err;
1da177e4
LT
2532}
2533
86872cb5 2534void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2535{
2536 struct sk_buff *skb;
5578689a 2537 struct net *net = info->nl_net;
528c4ceb
DL
2538 u32 seq;
2539 int err;
2540
2541 err = -ENOBUFS;
2542 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2543
339bf98f 2544 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2545 if (skb == NULL)
2546 goto errout;
2547
191cd582 2548 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2549 event, info->pid, seq, 0, 0, 0);
26932566
PM
2550 if (err < 0) {
2551 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2552 WARN_ON(err == -EMSGSIZE);
2553 kfree_skb(skb);
2554 goto errout;
2555 }
1ce85fe4
PNA
2556 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2557 info->nlh, gfp_any());
2558 return;
21713ebc
TG
2559errout:
2560 if (err < 0)
5578689a 2561 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2562}
2563
8ed67789
DL
2564static int ip6_route_dev_notify(struct notifier_block *this,
2565 unsigned long event, void *data)
2566{
2567 struct net_device *dev = (struct net_device *)data;
c346dca1 2568 struct net *net = dev_net(dev);
8ed67789
DL
2569
2570 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2571 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2572 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2573#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2574 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2575 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2576 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2577 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2578#endif
2579 }
2580
2581 return NOTIFY_OK;
2582}
2583
1da177e4
LT
2584/*
2585 * /proc
2586 */
2587
2588#ifdef CONFIG_PROC_FS
2589
1da177e4
LT
2590struct rt6_proc_arg
2591{
2592 char *buffer;
2593 int offset;
2594 int length;
2595 int skip;
2596 int len;
2597};
2598
2599static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2600{
33120b30 2601 struct seq_file *m = p_arg;
69cce1d1 2602 struct neighbour *n;
1da177e4 2603
4b7a4274 2604 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2605
2606#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2607 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2608#else
33120b30 2609 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2610#endif
69cce1d1
DM
2611 n = dst_get_neighbour(&rt->dst);
2612 if (n) {
2613 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2614 } else {
33120b30 2615 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2616 }
33120b30 2617 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2618 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2619 rt->dst.__use, rt->rt6i_flags,
33120b30 2620 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2621 return 0;
2622}
2623
33120b30 2624static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2625{
f3db4851
DL
2626 struct net *net = (struct net *)m->private;
2627 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2628 return 0;
2629}
1da177e4 2630
33120b30
AD
2631static int ipv6_route_open(struct inode *inode, struct file *file)
2632{
de05c557 2633 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2634}
2635
33120b30
AD
2636static const struct file_operations ipv6_route_proc_fops = {
2637 .owner = THIS_MODULE,
2638 .open = ipv6_route_open,
2639 .read = seq_read,
2640 .llseek = seq_lseek,
b6fcbdb4 2641 .release = single_release_net,
33120b30
AD
2642};
2643
1da177e4
LT
2644static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2645{
69ddb805 2646 struct net *net = (struct net *)seq->private;
1da177e4 2647 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2648 net->ipv6.rt6_stats->fib_nodes,
2649 net->ipv6.rt6_stats->fib_route_nodes,
2650 net->ipv6.rt6_stats->fib_rt_alloc,
2651 net->ipv6.rt6_stats->fib_rt_entries,
2652 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2653 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2654 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2655
2656 return 0;
2657}
2658
2659static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2660{
de05c557 2661 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2662}
2663
9a32144e 2664static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2665 .owner = THIS_MODULE,
2666 .open = rt6_stats_seq_open,
2667 .read = seq_read,
2668 .llseek = seq_lseek,
b6fcbdb4 2669 .release = single_release_net,
1da177e4
LT
2670};
2671#endif /* CONFIG_PROC_FS */
2672
2673#ifdef CONFIG_SYSCTL
2674
1da177e4 2675static
8d65af78 2676int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2677 void __user *buffer, size_t *lenp, loff_t *ppos)
2678{
c486da34
LAG
2679 struct net *net;
2680 int delay;
2681 if (!write)
1da177e4 2682 return -EINVAL;
c486da34
LAG
2683
2684 net = (struct net *)ctl->extra1;
2685 delay = net->ipv6.sysctl.flush_delay;
2686 proc_dointvec(ctl, write, buffer, lenp, ppos);
2687 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2688 return 0;
1da177e4
LT
2689}
2690
760f2d01 2691ctl_table ipv6_route_table_template[] = {
1ab1457c 2692 {
1da177e4 2693 .procname = "flush",
4990509f 2694 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2695 .maxlen = sizeof(int),
89c8b3a1 2696 .mode = 0200,
6d9f239a 2697 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2698 },
2699 {
1da177e4 2700 .procname = "gc_thresh",
9a7ec3a9 2701 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2702 .maxlen = sizeof(int),
2703 .mode = 0644,
6d9f239a 2704 .proc_handler = proc_dointvec,
1da177e4
LT
2705 },
2706 {
1da177e4 2707 .procname = "max_size",
4990509f 2708 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2709 .maxlen = sizeof(int),
2710 .mode = 0644,
6d9f239a 2711 .proc_handler = proc_dointvec,
1da177e4
LT
2712 },
2713 {
1da177e4 2714 .procname = "gc_min_interval",
4990509f 2715 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2716 .maxlen = sizeof(int),
2717 .mode = 0644,
6d9f239a 2718 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2719 },
2720 {
1da177e4 2721 .procname = "gc_timeout",
4990509f 2722 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2723 .maxlen = sizeof(int),
2724 .mode = 0644,
6d9f239a 2725 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2726 },
2727 {
1da177e4 2728 .procname = "gc_interval",
4990509f 2729 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2730 .maxlen = sizeof(int),
2731 .mode = 0644,
6d9f239a 2732 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2733 },
2734 {
1da177e4 2735 .procname = "gc_elasticity",
4990509f 2736 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2737 .maxlen = sizeof(int),
2738 .mode = 0644,
f3d3f616 2739 .proc_handler = proc_dointvec,
1da177e4
LT
2740 },
2741 {
1da177e4 2742 .procname = "mtu_expires",
4990509f 2743 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2744 .maxlen = sizeof(int),
2745 .mode = 0644,
6d9f239a 2746 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2747 },
2748 {
1da177e4 2749 .procname = "min_adv_mss",
4990509f 2750 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2751 .maxlen = sizeof(int),
2752 .mode = 0644,
f3d3f616 2753 .proc_handler = proc_dointvec,
1da177e4
LT
2754 },
2755 {
1da177e4 2756 .procname = "gc_min_interval_ms",
4990509f 2757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2758 .maxlen = sizeof(int),
2759 .mode = 0644,
6d9f239a 2760 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2761 },
f8572d8f 2762 { }
1da177e4
LT
2763};
2764
2c8c1e72 2765struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2766{
2767 struct ctl_table *table;
2768
2769 table = kmemdup(ipv6_route_table_template,
2770 sizeof(ipv6_route_table_template),
2771 GFP_KERNEL);
5ee09105
YH
2772
2773 if (table) {
2774 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2775 table[0].extra1 = net;
86393e52 2776 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2777 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2778 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2779 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2780 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2781 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2782 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2783 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2784 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2785 }
2786
760f2d01
DL
2787 return table;
2788}
1da177e4
LT
2789#endif
2790
2c8c1e72 2791static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2792{
633d424b 2793 int ret = -ENOMEM;
8ed67789 2794
86393e52
AD
2795 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2796 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2797
fc66f95c
ED
2798 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2799 goto out_ip6_dst_ops;
2800
8ed67789
DL
2801 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2802 sizeof(*net->ipv6.ip6_null_entry),
2803 GFP_KERNEL);
2804 if (!net->ipv6.ip6_null_entry)
fc66f95c 2805 goto out_ip6_dst_entries;
d8d1f30b 2806 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2807 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2808 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2809 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2810 ip6_template_metrics, true);
8ed67789
DL
2811
2812#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2813 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2814 sizeof(*net->ipv6.ip6_prohibit_entry),
2815 GFP_KERNEL);
68fffc67
PZ
2816 if (!net->ipv6.ip6_prohibit_entry)
2817 goto out_ip6_null_entry;
d8d1f30b 2818 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2819 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2820 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2821 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2822 ip6_template_metrics, true);
8ed67789
DL
2823
2824 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2825 sizeof(*net->ipv6.ip6_blk_hole_entry),
2826 GFP_KERNEL);
68fffc67
PZ
2827 if (!net->ipv6.ip6_blk_hole_entry)
2828 goto out_ip6_prohibit_entry;
d8d1f30b 2829 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2830 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2831 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2832 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2833 ip6_template_metrics, true);
8ed67789
DL
2834#endif
2835
b339a47c
PZ
2836 net->ipv6.sysctl.flush_delay = 0;
2837 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2838 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2839 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2840 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2841 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2842 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2843 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2844
cdb18761
DL
2845#ifdef CONFIG_PROC_FS
2846 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2847 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2848#endif
6891a346
BT
2849 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2850
8ed67789
DL
2851 ret = 0;
2852out:
2853 return ret;
f2fc6a54 2854
68fffc67
PZ
2855#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2856out_ip6_prohibit_entry:
2857 kfree(net->ipv6.ip6_prohibit_entry);
2858out_ip6_null_entry:
2859 kfree(net->ipv6.ip6_null_entry);
2860#endif
fc66f95c
ED
2861out_ip6_dst_entries:
2862 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2863out_ip6_dst_ops:
f2fc6a54 2864 goto out;
cdb18761
DL
2865}
2866
2c8c1e72 2867static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2868{
2869#ifdef CONFIG_PROC_FS
2870 proc_net_remove(net, "ipv6_route");
2871 proc_net_remove(net, "rt6_stats");
2872#endif
8ed67789
DL
2873 kfree(net->ipv6.ip6_null_entry);
2874#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2875 kfree(net->ipv6.ip6_prohibit_entry);
2876 kfree(net->ipv6.ip6_blk_hole_entry);
2877#endif
41bb78b4 2878 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2879}
2880
2881static struct pernet_operations ip6_route_net_ops = {
2882 .init = ip6_route_net_init,
2883 .exit = ip6_route_net_exit,
2884};
2885
8ed67789
DL
2886static struct notifier_block ip6_route_dev_notifier = {
2887 .notifier_call = ip6_route_dev_notify,
2888 .priority = 0,
2889};
2890
433d49c3 2891int __init ip6_route_init(void)
1da177e4 2892{
433d49c3
DL
2893 int ret;
2894
9a7ec3a9
DL
2895 ret = -ENOMEM;
2896 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2897 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2898 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2899 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2900 goto out;
14e50e57 2901
fc66f95c 2902 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2903 if (ret)
bdb3289f 2904 goto out_kmem_cache;
bdb3289f 2905
fc66f95c
ED
2906 ret = register_pernet_subsys(&ip6_route_net_ops);
2907 if (ret)
2908 goto out_dst_entries;
2909
5dc121e9
AE
2910 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2911
8ed67789
DL
2912 /* Registering of the loopback is done before this portion of code,
2913 * the loopback reference in rt6_info will not be taken, do it
2914 * manually for init_net */
d8d1f30b 2915 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2916 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2917 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2918 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2919 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2920 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2921 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2922 #endif
433d49c3
DL
2923 ret = fib6_init();
2924 if (ret)
8ed67789 2925 goto out_register_subsys;
433d49c3 2926
433d49c3
DL
2927 ret = xfrm6_init();
2928 if (ret)
cdb18761 2929 goto out_fib6_init;
c35b7e72 2930
433d49c3
DL
2931 ret = fib6_rules_init();
2932 if (ret)
2933 goto xfrm6_init;
7e5449c2 2934
433d49c3 2935 ret = -ENOBUFS;
c7ac8679
GR
2936 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2937 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2938 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2939 goto fib6_rules_init;
c127ea2c 2940
8ed67789 2941 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2942 if (ret)
2943 goto fib6_rules_init;
8ed67789 2944
433d49c3
DL
2945out:
2946 return ret;
2947
2948fib6_rules_init:
433d49c3
DL
2949 fib6_rules_cleanup();
2950xfrm6_init:
433d49c3 2951 xfrm6_fini();
433d49c3 2952out_fib6_init:
433d49c3 2953 fib6_gc_cleanup();
8ed67789
DL
2954out_register_subsys:
2955 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2956out_dst_entries:
2957 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2958out_kmem_cache:
f2fc6a54 2959 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2960 goto out;
1da177e4
LT
2961}
2962
2963void ip6_route_cleanup(void)
2964{
8ed67789 2965 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2966 fib6_rules_cleanup();
1da177e4 2967 xfrm6_fini();
1da177e4 2968 fib6_gc_cleanup();
8ed67789 2969 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2970 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2971 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2972}