net: Add net_ratelimited_function and net_<level>_ratelimited macros
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
1716a961 65static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
95c96174 85 unsigned int pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
a7563f34 128 if (!ipv6_addr_any(p))
39232973
DM
129 return (const void *) p;
130 return daddr;
131}
132
d3aaeb38
DM
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
8ade06c6 145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 146{
8ade06c6
DM
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
f83c7790
DM
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
d3aaeb38
DM
156}
157
9a7ec3a9 158static struct dst_ops ip6_dst_ops_template = {
1da177e4 159 .family = AF_INET6,
09640e63 160 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
0dbaee3b 164 .default_advmss = ip6_default_advmss,
ebb762f2 165 .mtu = ip6_mtu,
06582540 166 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 172 .local_out = __ip6_local_out,
d3aaeb38 173 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
174};
175
ebb762f2 176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 177{
618f9bc7
SK
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
ec831ea7
RD
181}
182
14e50e57
DM
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
0972ddb2
HB
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
14e50e57
DM
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
09640e63 195 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
ebb762f2 198 .mtu = ip6_blackhole_mtu,
214f45c9 199 .default_advmss = ip6_default_advmss,
14e50e57 200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 202 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
203};
204
62fa8a84
DM
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
bdb3289f 209static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
d8d1f30b
CG
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
1da177e4
LT
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
101367c2
TG
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
6723ab54
DM
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 228
280a34c8 229static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
d8d1f30b
CG
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
101367c2
TG
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 239 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
bdb3289f 244static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
d8d1f30b
CG
250 .input = dst_discard,
251 .output = dst_discard,
101367c2
TG
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 254 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
1da177e4 261/* allocate dst with ip6_dst_ops */
5c1e6aa3 262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
263 struct net_device *dev,
264 int flags)
1da177e4 265{
957c665f 266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 267
38308473 268 if (rt)
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
271
272 return rt;
1da177e4
LT
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 279 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
1716a961
G
288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
b3419363 292 if (peer) {
b3419363
DM
293 rt->rt6i_peer = NULL;
294 inet_putpeer(peer);
295 }
296}
297
6431cbc2
DM
298static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300static u32 rt6_peer_genid(void)
301{
302 return atomic_read(&__rt6_peer_genid);
303}
304
b3419363
DM
305void rt6_bind_peer(struct rt6_info *rt, int create)
306{
307 struct inet_peer *peer;
308
b3419363
DM
309 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
6431cbc2
DM
312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
314}
315
316static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318{
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 321 struct net_device *loopback_dev =
c346dca1 322 dev_net(dev)->loopback_dev;
1da177e4 323
38308473 324 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
38308473 327 if (loopback_idev) {
1da177e4
LT
328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332}
333
334static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335{
1716a961
G
336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return 1;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return 0;
1da177e4
LT
347}
348
b71d1d42 349static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 350{
a02cec21
ED
351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
353}
354
1da177e4 355/*
c71099ac 356 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
357 */
358
8ed67789
DL
359static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
b71d1d42 361 const struct in6_addr *saddr,
1da177e4 362 int oif,
d420895e 363 int flags)
1da177e4
LT
364{
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
dd3abc4e
YH
368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
d8d1f30b 371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 372 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
373
374 if (oif) {
1da177e4
LT
375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
38308473 378 if (!sprt->rt6i_idev ||
1da177e4 379 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 380 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 381 continue;
1ab1457c 382 if (local && (!oif ||
1da177e4
LT
383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
dd3abc4e
YH
388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
1da177e4 392 }
dd3abc4e 393 }
1da177e4 394
dd3abc4e 395 if (oif) {
1da177e4
LT
396 if (local)
397 return local;
398
d420895e 399 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 400 return net->ipv6.ip6_null_entry;
1da177e4 401 }
dd3abc4e 402out:
1da177e4
LT
403 return rt;
404}
405
27097255
YH
406#ifdef CONFIG_IPV6_ROUTER_PREF
407static void rt6_probe(struct rt6_info *rt)
408{
f2c31e32 409 struct neighbour *neigh;
27097255
YH
410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
f2c31e32 418 rcu_read_lock();
27217455 419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 420 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 421 goto out;
27097255
YH
422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 434 } else {
27097255 435 read_unlock_bh(&neigh->lock);
f2c31e32
ED
436 }
437out:
438 rcu_read_unlock();
27097255
YH
439}
440#else
441static inline void rt6_probe(struct rt6_info *rt)
442{
27097255
YH
443}
444#endif
445
1da177e4 446/*
554cfb7e 447 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 448 */
b6f99a21 449static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 450{
d1918542 451 struct net_device *dev = rt->dst.dev;
161980f4 452 if (!oif || dev->ifindex == oif)
554cfb7e 453 return 2;
161980f4
DM
454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
554cfb7e 458}
1da177e4 459
b6f99a21 460static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 461{
f2c31e32 462 struct neighbour *neigh;
398bcbeb 463 int m;
f2c31e32
ED
464
465 rcu_read_lock();
27217455 466 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
554cfb7e
YH
471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
4d0c5911 473 m = 2;
398bcbeb
YH
474#ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477#endif
478 else
ea73ee23 479 m = 1;
554cfb7e 480 read_unlock_bh(&neigh->lock);
398bcbeb
YH
481 } else
482 m = 0;
f2c31e32 483 rcu_read_unlock();
554cfb7e 484 return m;
1da177e4
LT
485}
486
554cfb7e
YH
487static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
1da177e4 489{
4d0c5911 490 int m, n;
1ab1457c 491
4d0c5911 492 m = rt6_check_dev(rt, oif);
77d16f45 493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 494 return -1;
ebacaaa0
YH
495#ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497#endif
4d0c5911 498 n = rt6_check_neigh(rt);
557e92ef 499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
500 return -1;
501 return m;
502}
503
f11e6659
DM
504static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
554cfb7e 506{
f11e6659
DM
507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525out:
526 return match;
527}
528
529static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532{
533 struct rt6_info *rt, *match;
554cfb7e 534 int mpri = -1;
1da177e4 535
f11e6659
DM
536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 538 rt = rt->dst.rt6_next)
f11e6659
DM
539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 541 rt = rt->dst.rt6_next)
f11e6659 542 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 543
f11e6659
DM
544 return match;
545}
1da177e4 546
f11e6659
DM
547static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548{
549 struct rt6_info *match, *rt0;
8ed67789 550 struct net *net;
1da177e4 551
f11e6659
DM
552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 555
f11e6659 556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 557
554cfb7e 558 if (!match &&
f11e6659 559 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 560 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 561
554cfb7e 562 /* no entries matched; do round-robin */
f11e6659
DM
563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
1da177e4 568 }
1da177e4 569
d1918542 570 net = dev_net(rt0->dst.dev);
a02cec21 571 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
572}
573
70ceb4f5
YH
574#ifdef CONFIG_IPV6_ROUTE_INFO
575int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 576 const struct in6_addr *gwaddr)
70ceb4f5 577{
c346dca1 578 struct net *net = dev_net(dev);
70ceb4f5
YH
579 struct route_info *rinfo = (struct route_info *) opt;
580 struct in6_addr prefix_buf, *prefix;
581 unsigned int pref;
4bed72e4 582 unsigned long lifetime;
70ceb4f5
YH
583 struct rt6_info *rt;
584
585 if (len < sizeof(struct route_info)) {
586 return -EINVAL;
587 }
588
589 /* Sanity check for prefix_len and length */
590 if (rinfo->length > 3) {
591 return -EINVAL;
592 } else if (rinfo->prefix_len > 128) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 64) {
595 if (rinfo->length < 2) {
596 return -EINVAL;
597 }
598 } else if (rinfo->prefix_len > 0) {
599 if (rinfo->length < 1) {
600 return -EINVAL;
601 }
602 }
603
604 pref = rinfo->route_pref;
605 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 606 return -EINVAL;
70ceb4f5 607
4bed72e4 608 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
609
610 if (rinfo->length == 3)
611 prefix = (struct in6_addr *)rinfo->prefix;
612 else {
613 /* this function is safe */
614 ipv6_addr_prefix(&prefix_buf,
615 (struct in6_addr *)rinfo->prefix,
616 rinfo->prefix_len);
617 prefix = &prefix_buf;
618 }
619
efa2cea0
DL
620 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
621 dev->ifindex);
70ceb4f5
YH
622
623 if (rt && !lifetime) {
e0a1ad73 624 ip6_del_rt(rt);
70ceb4f5
YH
625 rt = NULL;
626 }
627
628 if (!rt && lifetime)
efa2cea0 629 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
630 pref);
631 else if (rt)
632 rt->rt6i_flags = RTF_ROUTEINFO |
633 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
634
635 if (rt) {
1716a961
G
636 if (!addrconf_finite_timeout(lifetime))
637 rt6_clean_expires(rt);
638 else
639 rt6_set_expires(rt, jiffies + HZ * lifetime);
640
d8d1f30b 641 dst_release(&rt->dst);
70ceb4f5
YH
642 }
643 return 0;
644}
645#endif
646
8ed67789 647#define BACKTRACK(__net, saddr) \
982f56f3 648do { \
8ed67789 649 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 650 struct fib6_node *pn; \
e0eda7bb 651 while (1) { \
982f56f3
YH
652 if (fn->fn_flags & RTN_TL_ROOT) \
653 goto out; \
654 pn = fn->parent; \
655 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 656 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
657 else \
658 fn = pn; \
659 if (fn->fn_flags & RTN_RTINFO) \
660 goto restart; \
c71099ac 661 } \
c71099ac 662 } \
38308473 663} while (0)
c71099ac 664
8ed67789
DL
665static struct rt6_info *ip6_pol_route_lookup(struct net *net,
666 struct fib6_table *table,
4c9483b2 667 struct flowi6 *fl6, int flags)
1da177e4
LT
668{
669 struct fib6_node *fn;
670 struct rt6_info *rt;
671
c71099ac 672 read_lock_bh(&table->tb6_lock);
4c9483b2 673 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
674restart:
675 rt = fn->leaf;
4c9483b2
DM
676 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
677 BACKTRACK(net, &fl6->saddr);
c71099ac 678out:
d8d1f30b 679 dst_use(&rt->dst, jiffies);
c71099ac 680 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
681 return rt;
682
683}
684
ea6e574e
FW
685struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
686 int flags)
687{
688 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
689}
690EXPORT_SYMBOL_GPL(ip6_route_lookup);
691
9acd9f3a
YH
692struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
693 const struct in6_addr *saddr, int oif, int strict)
c71099ac 694{
4c9483b2
DM
695 struct flowi6 fl6 = {
696 .flowi6_oif = oif,
697 .daddr = *daddr,
c71099ac
TG
698 };
699 struct dst_entry *dst;
77d16f45 700 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 701
adaa70bb 702 if (saddr) {
4c9483b2 703 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
704 flags |= RT6_LOOKUP_F_HAS_SADDR;
705 }
706
4c9483b2 707 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
708 if (dst->error == 0)
709 return (struct rt6_info *) dst;
710
711 dst_release(dst);
712
1da177e4
LT
713 return NULL;
714}
715
7159039a
YH
716EXPORT_SYMBOL(rt6_lookup);
717
c71099ac 718/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
719 It takes new route entry, the addition fails by any reason the
720 route is freed. In any case, if caller does not hold it, it may
721 be destroyed.
722 */
723
86872cb5 724static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
725{
726 int err;
c71099ac 727 struct fib6_table *table;
1da177e4 728
c71099ac
TG
729 table = rt->rt6i_table;
730 write_lock_bh(&table->tb6_lock);
86872cb5 731 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 732 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
733
734 return err;
735}
736
40e22e8f
TG
737int ip6_ins_rt(struct rt6_info *rt)
738{
4d1169c1 739 struct nl_info info = {
d1918542 740 .nl_net = dev_net(rt->dst.dev),
4d1169c1 741 };
528c4ceb 742 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
743}
744
1716a961 745static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 746 const struct in6_addr *daddr,
b71d1d42 747 const struct in6_addr *saddr)
1da177e4 748{
1da177e4
LT
749 struct rt6_info *rt;
750
751 /*
752 * Clone the route.
753 */
754
21efcfa0 755 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
756
757 if (rt) {
14deae41
DM
758 int attempts = !in_softirq();
759
38308473 760 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 761 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 762 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 763 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 764 rt->rt6i_gateway = *daddr;
58c4fb86 765 }
1da177e4 766
1da177e4 767 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
768
769#ifdef CONFIG_IPV6_SUBTREES
770 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 771 rt->rt6i_src.addr = *saddr;
1da177e4
LT
772 rt->rt6i_src.plen = 128;
773 }
774#endif
775
14deae41 776 retry:
8ade06c6 777 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 778 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
779 int saved_rt_min_interval =
780 net->ipv6.sysctl.ip6_rt_gc_min_interval;
781 int saved_rt_elasticity =
782 net->ipv6.sysctl.ip6_rt_gc_elasticity;
783
784 if (attempts-- > 0) {
785 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
786 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
787
86393e52 788 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
789
790 net->ipv6.sysctl.ip6_rt_gc_elasticity =
791 saved_rt_elasticity;
792 net->ipv6.sysctl.ip6_rt_gc_min_interval =
793 saved_rt_min_interval;
794 goto retry;
795 }
796
797 if (net_ratelimit())
798 printk(KERN_WARNING
7e1b33e5 799 "ipv6: Neighbour table overflow.\n");
d8d1f30b 800 dst_free(&rt->dst);
14deae41
DM
801 return NULL;
802 }
95a9a5ba 803 }
1da177e4 804
95a9a5ba
YH
805 return rt;
806}
1da177e4 807
21efcfa0
ED
808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
809 const struct in6_addr *daddr)
299d9939 810{
21efcfa0
ED
811 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
812
299d9939 813 if (rt) {
299d9939 814 rt->rt6i_flags |= RTF_CACHE;
27217455 815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
816 }
817 return rt;
818}
819
8ed67789 820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 821 struct flowi6 *fl6, int flags)
1da177e4
LT
822{
823 struct fib6_node *fn;
519fbd87 824 struct rt6_info *rt, *nrt;
c71099ac 825 int strict = 0;
1da177e4 826 int attempts = 3;
519fbd87 827 int err;
53b7997f 828 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 829
77d16f45 830 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
831
832relookup:
c71099ac 833 read_lock_bh(&table->tb6_lock);
1da177e4 834
8238dd06 835restart_2:
4c9483b2 836 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
837
838restart:
4acad72d 839 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 840
4c9483b2 841 BACKTRACK(net, &fl6->saddr);
8ed67789 842 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 843 rt->rt6i_flags & RTF_CACHE)
1ddef044 844 goto out;
1da177e4 845
d8d1f30b 846 dst_hold(&rt->dst);
c71099ac 847 read_unlock_bh(&table->tb6_lock);
fb9de91e 848
27217455 849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 851 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 852 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
853 else
854 goto out2;
e40cf353 855
d8d1f30b 856 dst_release(&rt->dst);
8ed67789 857 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 858
d8d1f30b 859 dst_hold(&rt->dst);
519fbd87 860 if (nrt) {
40e22e8f 861 err = ip6_ins_rt(nrt);
519fbd87 862 if (!err)
1da177e4 863 goto out2;
1da177e4 864 }
1da177e4 865
519fbd87
YH
866 if (--attempts <= 0)
867 goto out2;
868
869 /*
c71099ac 870 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
871 * released someone could insert this route. Relookup.
872 */
d8d1f30b 873 dst_release(&rt->dst);
519fbd87
YH
874 goto relookup;
875
876out:
8238dd06
YH
877 if (reachable) {
878 reachable = 0;
879 goto restart_2;
880 }
d8d1f30b 881 dst_hold(&rt->dst);
c71099ac 882 read_unlock_bh(&table->tb6_lock);
1da177e4 883out2:
d8d1f30b
CG
884 rt->dst.lastuse = jiffies;
885 rt->dst.__use++;
c71099ac
TG
886
887 return rt;
1da177e4
LT
888}
889
8ed67789 890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 891 struct flowi6 *fl6, int flags)
4acad72d 892{
4c9483b2 893 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
894}
895
72331bc0
SL
896static struct dst_entry *ip6_route_input_lookup(struct net *net,
897 struct net_device *dev,
898 struct flowi6 *fl6, int flags)
899{
900 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
904}
905
c71099ac
TG
906void ip6_route_input(struct sk_buff *skb)
907{
b71d1d42 908 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 909 struct net *net = dev_net(skb->dev);
adaa70bb 910 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
911 struct flowi6 fl6 = {
912 .flowi6_iif = skb->dev->ifindex,
913 .daddr = iph->daddr,
914 .saddr = iph->saddr,
38308473 915 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
916 .flowi6_mark = skb->mark,
917 .flowi6_proto = iph->nexthdr,
c71099ac 918 };
adaa70bb 919
72331bc0 920 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
921}
922
8ed67789 923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 924 struct flowi6 *fl6, int flags)
1da177e4 925{
4c9483b2 926 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
927}
928
9c7a4f9c 929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 930 struct flowi6 *fl6)
c71099ac
TG
931{
932 int flags = 0;
933
4c9483b2 934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 935 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 936
4c9483b2 937 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 938 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
939 else if (sk)
940 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 941
4c9483b2 942 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
943}
944
7159039a 945EXPORT_SYMBOL(ip6_route_output);
1da177e4 946
2774c131 947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 948{
5c1e6aa3 949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
950 struct dst_entry *new = NULL;
951
5c1e6aa3 952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 953 if (rt) {
cf911662
DM
954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
d8d1f30b 956 new = &rt->dst;
14e50e57 957
14e50e57 958 new->__use = 1;
352e512c
HX
959 new->input = dst_discard;
960 new->output = dst_discard;
14e50e57 961
21efcfa0
ED
962 if (dst_metrics_read_only(&ort->dst))
963 new->_metrics = ort->dst._metrics;
964 else
965 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
966 rt->rt6i_idev = ort->rt6i_idev;
967 if (rt->rt6i_idev)
968 in6_dev_hold(rt->rt6i_idev);
14e50e57 969
4e3fd7a0 970 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
971 rt->rt6i_flags = ort->rt6i_flags;
972 rt6_clean_expires(rt);
14e50e57
DM
973 rt->rt6i_metric = 0;
974
975 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
976#ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
978#endif
979
980 dst_free(new);
981 }
982
69ead7af
DM
983 dst_release(dst_orig);
984 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 985}
14e50e57 986
1da177e4
LT
987/*
988 * Destination cache support functions
989 */
990
991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
992{
993 struct rt6_info *rt;
994
995 rt = (struct rt6_info *) dst;
996
6431cbc2
DM
997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer)
1000 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid();
1002 }
1da177e4 1003 return dst;
6431cbc2 1004 }
1da177e4
LT
1005 return NULL;
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010 struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012 if (rt) {
54c1a859
YH
1013 if (rt->rt6i_flags & RTF_CACHE) {
1014 if (rt6_check_expired(rt)) {
1015 ip6_del_rt(rt);
1016 dst = NULL;
1017 }
1018 } else {
1da177e4 1019 dst_release(dst);
54c1a859
YH
1020 dst = NULL;
1021 }
1da177e4 1022 }
54c1a859 1023 return dst;
1da177e4
LT
1024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028 struct rt6_info *rt;
1029
3ffe533c 1030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1031
adf30907 1032 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1033 if (rt) {
1716a961
G
1034 if (rt->rt6i_flags & RTF_CACHE)
1035 rt6_update_expires(rt, 0);
1036 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1037 rt->rt6i_node->fn_sernum = -1;
1038 }
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) {
defb3519 1048 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1049 mtu = IPV6_MIN_MTU;
defb3519
DM
1050 features |= RTAX_FEATURE_ALLFRAG;
1051 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1052 }
defb3519 1053 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1054 }
1055}
1056
0dbaee3b 1057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1058{
0dbaee3b
DM
1059 struct net_device *dev = dst->dev;
1060 unsigned int mtu = dst_mtu(dst);
1061 struct net *net = dev_net(dev);
1062
1da177e4
LT
1063 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
5578689a
DL
1065 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1067
1068 /*
1ab1457c
YH
1069 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1072 * rely only on pmtu discovery"
1073 */
1074 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075 mtu = IPV6_MAXPLEN;
1076 return mtu;
1077}
1078
ebb762f2 1079static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1080{
d33e4553 1081 struct inet6_dev *idev;
618f9bc7
SK
1082 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084 if (mtu)
1085 return mtu;
1086
1087 mtu = IPV6_MIN_MTU;
d33e4553
DM
1088
1089 rcu_read_lock();
1090 idev = __in6_dev_get(dst->dev);
1091 if (idev)
1092 mtu = idev->cnf.mtu6;
1093 rcu_read_unlock();
1094
1095 return mtu;
1096}
1097
3b00944c
YH
1098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1100
3b00944c 1101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1102 struct neighbour *neigh,
87a11578 1103 struct flowi6 *fl6)
1da177e4 1104{
87a11578 1105 struct dst_entry *dst;
1da177e4
LT
1106 struct rt6_info *rt;
1107 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1108 struct net *net = dev_net(dev);
1da177e4 1109
38308473 1110 if (unlikely(!idev))
122bdf67 1111 return ERR_PTR(-ENODEV);
1da177e4 1112
957c665f 1113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1114 if (unlikely(!rt)) {
1da177e4 1115 in6_dev_put(idev);
87a11578 1116 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1117 goto out;
1118 }
1119
1da177e4
LT
1120 if (neigh)
1121 neigh_hold(neigh);
14deae41 1122 else {
f83c7790 1123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1124 if (IS_ERR(neigh)) {
252c3d84 1125 in6_dev_put(idev);
b43faac6
DM
1126 dst_free(&rt->dst);
1127 return ERR_CAST(neigh);
1128 }
14deae41 1129 }
1da177e4 1130
8e2ec639
YZ
1131 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output;
69cce1d1 1133 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1134 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1135 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1136 rt->rt6i_dst.plen = 128;
1137 rt->rt6i_idev = idev;
7011687f 1138 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1139
3b00944c 1140 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1141 rt->dst.next = icmp6_dst_gc_list;
1142 icmp6_dst_gc_list = &rt->dst;
3b00944c 1143 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1144
5578689a 1145 fib6_force_start_gc(net);
1da177e4 1146
87a11578
DM
1147 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
1da177e4 1149out:
87a11578 1150 return dst;
1da177e4
LT
1151}
1152
3d0f24a7 1153int icmp6_dst_gc(void)
1da177e4 1154{
e9476e95 1155 struct dst_entry *dst, **pprev;
3d0f24a7 1156 int more = 0;
1da177e4 1157
3b00944c
YH
1158 spin_lock_bh(&icmp6_dst_lock);
1159 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1160
1da177e4
LT
1161 while ((dst = *pprev) != NULL) {
1162 if (!atomic_read(&dst->__refcnt)) {
1163 *pprev = dst->next;
1164 dst_free(dst);
1da177e4
LT
1165 } else {
1166 pprev = &dst->next;
3d0f24a7 1167 ++more;
1da177e4
LT
1168 }
1169 }
1170
3b00944c 1171 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1172
3d0f24a7 1173 return more;
1da177e4
LT
1174}
1175
1e493d19
DM
1176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177 void *arg)
1178{
1179 struct dst_entry *dst, **pprev;
1180
1181 spin_lock_bh(&icmp6_dst_lock);
1182 pprev = &icmp6_dst_gc_list;
1183 while ((dst = *pprev) != NULL) {
1184 struct rt6_info *rt = (struct rt6_info *) dst;
1185 if (func(rt, arg)) {
1186 *pprev = dst->next;
1187 dst_free(dst);
1188 } else {
1189 pprev = &dst->next;
1190 }
1191 }
1192 spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
569d3645 1195static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1196{
1da177e4 1197 unsigned long now = jiffies;
86393e52 1198 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1199 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1204 int entries;
7019b78e 1205
fc66f95c 1206 entries = dst_entries_get_fast(ops);
7019b78e 1207 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1208 entries <= rt_max_size)
1da177e4
LT
1209 goto out;
1210
6891a346
BT
1211 net->ipv6.ip6_rt_gc_expire++;
1212 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1214 entries = dst_entries_get_slow(ops);
1215 if (entries < ops->gc_thresh)
7019b78e 1216 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1217out:
7019b78e 1218 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1219 return entries > rt_max_size;
1da177e4
LT
1220}
1221
1222/* Clean host part of a prefix. Not necessary in radix tree,
1223 but results in cleaner routing tables.
1224
1225 Remove it only when all the things will work!
1226 */
1227
6b75d090 1228int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1229{
5170ae82 1230 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1231 if (hoplimit == 0) {
6b75d090 1232 struct net_device *dev = dst->dev;
c68f24cc
ED
1233 struct inet6_dev *idev;
1234
1235 rcu_read_lock();
1236 idev = __in6_dev_get(dev);
1237 if (idev)
6b75d090 1238 hoplimit = idev->cnf.hop_limit;
c68f24cc 1239 else
53b7997f 1240 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1241 rcu_read_unlock();
1da177e4
LT
1242 }
1243 return hoplimit;
1244}
abbf46ae 1245EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1246
1247/*
1248 *
1249 */
1250
86872cb5 1251int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1252{
1253 int err;
5578689a 1254 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1255 struct rt6_info *rt = NULL;
1256 struct net_device *dev = NULL;
1257 struct inet6_dev *idev = NULL;
c71099ac 1258 struct fib6_table *table;
1da177e4
LT
1259 int addr_type;
1260
86872cb5 1261 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1262 return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1264 if (cfg->fc_src_len)
1da177e4
LT
1265 return -EINVAL;
1266#endif
86872cb5 1267 if (cfg->fc_ifindex) {
1da177e4 1268 err = -ENODEV;
5578689a 1269 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1270 if (!dev)
1271 goto out;
1272 idev = in6_dev_get(dev);
1273 if (!idev)
1274 goto out;
1275 }
1276
86872cb5
TG
1277 if (cfg->fc_metric == 0)
1278 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1279
d71314b4 1280 err = -ENOBUFS;
38308473
DM
1281 if (cfg->fc_nlinfo.nlh &&
1282 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1283 table = fib6_get_table(net, cfg->fc_table);
38308473 1284 if (!table) {
d71314b4
MV
1285 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1286 table = fib6_new_table(net, cfg->fc_table);
1287 }
1288 } else {
1289 table = fib6_new_table(net, cfg->fc_table);
1290 }
38308473
DM
1291
1292 if (!table)
c71099ac 1293 goto out;
c71099ac 1294
957c665f 1295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1296
38308473 1297 if (!rt) {
1da177e4
LT
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301
d8d1f30b 1302 rt->dst.obsolete = -1;
1716a961
G
1303
1304 if (cfg->fc_flags & RTF_EXPIRES)
1305 rt6_set_expires(rt, jiffies +
1306 clock_t_to_jiffies(cfg->fc_expires));
1307 else
1308 rt6_clean_expires(rt);
1da177e4 1309
86872cb5
TG
1310 if (cfg->fc_protocol == RTPROT_UNSPEC)
1311 cfg->fc_protocol = RTPROT_BOOT;
1312 rt->rt6i_protocol = cfg->fc_protocol;
1313
1314 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1315
1316 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1317 rt->dst.input = ip6_mc_input;
ab79ad14
1318 else if (cfg->fc_flags & RTF_LOCAL)
1319 rt->dst.input = ip6_input;
1da177e4 1320 else
d8d1f30b 1321 rt->dst.input = ip6_forward;
1da177e4 1322
d8d1f30b 1323 rt->dst.output = ip6_output;
1da177e4 1324
86872cb5
TG
1325 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1327 if (rt->rt6i_dst.plen == 128)
11d53b49 1328 rt->dst.flags |= DST_HOST;
1da177e4 1329
8e2ec639
YZ
1330 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332 if (!metrics) {
1333 err = -ENOMEM;
1334 goto out;
1335 }
1336 dst_init_metrics(&rt->dst, metrics, 0);
1337 }
1da177e4 1338#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1339 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1341#endif
1342
86872cb5 1343 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1344
1345 /* We cannot add true routes via loopback here,
1346 they would result in kernel looping; promote them to reject routes
1347 */
86872cb5 1348 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1349 (dev && (dev->flags & IFF_LOOPBACK) &&
1350 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1352 /* hold loopback dev/idev if we haven't done so. */
5578689a 1353 if (dev != net->loopback_dev) {
1da177e4
LT
1354 if (dev) {
1355 dev_put(dev);
1356 in6_dev_put(idev);
1357 }
5578689a 1358 dev = net->loopback_dev;
1da177e4
LT
1359 dev_hold(dev);
1360 idev = in6_dev_get(dev);
1361 if (!idev) {
1362 err = -ENODEV;
1363 goto out;
1364 }
1365 }
d8d1f30b
CG
1366 rt->dst.output = ip6_pkt_discard_out;
1367 rt->dst.input = ip6_pkt_discard;
1368 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1369 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370 goto install_route;
1371 }
1372
86872cb5 1373 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1374 const struct in6_addr *gw_addr;
1da177e4
LT
1375 int gwa_type;
1376
86872cb5 1377 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1378 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1379 gwa_type = ipv6_addr_type(gw_addr);
1380
1381 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382 struct rt6_info *grt;
1383
1384 /* IPv6 strictly inhibits using not link-local
1385 addresses as nexthop address.
1386 Otherwise, router will not able to send redirects.
1387 It is very good, but in some (rare!) circumstances
1388 (SIT, PtP, NBMA NOARP links) it is handy to allow
1389 some exceptions. --ANK
1390 */
1391 err = -EINVAL;
38308473 1392 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1393 goto out;
1394
5578689a 1395 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1396
1397 err = -EHOSTUNREACH;
38308473 1398 if (!grt)
1da177e4
LT
1399 goto out;
1400 if (dev) {
d1918542 1401 if (dev != grt->dst.dev) {
d8d1f30b 1402 dst_release(&grt->dst);
1da177e4
LT
1403 goto out;
1404 }
1405 } else {
d1918542 1406 dev = grt->dst.dev;
1da177e4
LT
1407 idev = grt->rt6i_idev;
1408 dev_hold(dev);
1409 in6_dev_hold(grt->rt6i_idev);
1410 }
38308473 1411 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1412 err = 0;
d8d1f30b 1413 dst_release(&grt->dst);
1da177e4
LT
1414
1415 if (err)
1416 goto out;
1417 }
1418 err = -EINVAL;
38308473 1419 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1420 goto out;
1421 }
1422
1423 err = -ENODEV;
38308473 1424 if (!dev)
1da177e4
LT
1425 goto out;
1426
c3968a85
DW
1427 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429 err = -EINVAL;
1430 goto out;
1431 }
4e3fd7a0 1432 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1433 rt->rt6i_prefsrc.plen = 128;
1434 } else
1435 rt->rt6i_prefsrc.plen = 0;
1436
86872cb5 1437 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1438 err = rt6_bind_neighbour(rt, dev);
f83c7790 1439 if (err)
1da177e4 1440 goto out;
1da177e4
LT
1441 }
1442
86872cb5 1443 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1444
1445install_route:
86872cb5
TG
1446 if (cfg->fc_mx) {
1447 struct nlattr *nla;
1448 int remaining;
1449
1450 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1451 int type = nla_type(nla);
86872cb5
TG
1452
1453 if (type) {
1454 if (type > RTAX_MAX) {
1da177e4
LT
1455 err = -EINVAL;
1456 goto out;
1457 }
86872cb5 1458
defb3519 1459 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1460 }
1da177e4
LT
1461 }
1462 }
1463
d8d1f30b 1464 rt->dst.dev = dev;
1da177e4 1465 rt->rt6i_idev = idev;
c71099ac 1466 rt->rt6i_table = table;
63152fc0 1467
c346dca1 1468 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1469
86872cb5 1470 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1471
1472out:
1473 if (dev)
1474 dev_put(dev);
1475 if (idev)
1476 in6_dev_put(idev);
1477 if (rt)
d8d1f30b 1478 dst_free(&rt->dst);
1da177e4
LT
1479 return err;
1480}
1481
86872cb5 1482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1483{
1484 int err;
c71099ac 1485 struct fib6_table *table;
d1918542 1486 struct net *net = dev_net(rt->dst.dev);
1da177e4 1487
8ed67789 1488 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1489 return -ENOENT;
1490
c71099ac
TG
1491 table = rt->rt6i_table;
1492 write_lock_bh(&table->tb6_lock);
1da177e4 1493
86872cb5 1494 err = fib6_del(rt, info);
d8d1f30b 1495 dst_release(&rt->dst);
1da177e4 1496
c71099ac 1497 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1498
1499 return err;
1500}
1501
e0a1ad73
TG
1502int ip6_del_rt(struct rt6_info *rt)
1503{
4d1169c1 1504 struct nl_info info = {
d1918542 1505 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1506 };
528c4ceb 1507 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1508}
1509
86872cb5 1510static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1511{
c71099ac 1512 struct fib6_table *table;
1da177e4
LT
1513 struct fib6_node *fn;
1514 struct rt6_info *rt;
1515 int err = -ESRCH;
1516
5578689a 1517 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1518 if (!table)
c71099ac
TG
1519 return err;
1520
1521 read_lock_bh(&table->tb6_lock);
1da177e4 1522
c71099ac 1523 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1524 &cfg->fc_dst, cfg->fc_dst_len,
1525 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1526
1da177e4 1527 if (fn) {
d8d1f30b 1528 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1529 if (cfg->fc_ifindex &&
d1918542
DM
1530 (!rt->dst.dev ||
1531 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1532 continue;
86872cb5
TG
1533 if (cfg->fc_flags & RTF_GATEWAY &&
1534 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1535 continue;
86872cb5 1536 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1537 continue;
d8d1f30b 1538 dst_hold(&rt->dst);
c71099ac 1539 read_unlock_bh(&table->tb6_lock);
1da177e4 1540
86872cb5 1541 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1542 }
1543 }
c71099ac 1544 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1545
1546 return err;
1547}
1548
1549/*
1550 * Handle redirects
1551 */
a6279458 1552struct ip6rd_flowi {
4c9483b2 1553 struct flowi6 fl6;
a6279458
YH
1554 struct in6_addr gateway;
1555};
1556
8ed67789
DL
1557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
4c9483b2 1559 struct flowi6 *fl6,
a6279458 1560 int flags)
1da177e4 1561{
4c9483b2 1562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1563 struct rt6_info *rt;
e843b9e1 1564 struct fib6_node *fn;
c71099ac 1565
1da177e4 1566 /*
e843b9e1
YH
1567 * Get the "current" route for this destination and
1568 * check if the redirect has come from approriate router.
1569 *
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1574 * routes.
1da177e4 1575 */
1da177e4 1576
c71099ac 1577 read_lock_bh(&table->tb6_lock);
4c9483b2 1578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1579restart:
d8d1f30b 1580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1581 /*
1582 * Current route is on-link; redirect is always invalid.
1583 *
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1588 */
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
d1918542 1593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1594 continue;
a6279458 1595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1596 continue;
1597 break;
1598 }
a6279458 1599
cb15d9c2 1600 if (!rt)
8ed67789 1601 rt = net->ipv6.ip6_null_entry;
4c9483b2 1602 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1603out:
d8d1f30b 1604 dst_hold(&rt->dst);
a6279458 1605
c71099ac 1606 read_unlock_bh(&table->tb6_lock);
e843b9e1 1607
a6279458
YH
1608 return rt;
1609};
1610
b71d1d42
ED
1611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612 const struct in6_addr *src,
1613 const struct in6_addr *gateway,
a6279458
YH
1614 struct net_device *dev)
1615{
adaa70bb 1616 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1617 struct net *net = dev_net(dev);
a6279458 1618 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
a6279458 1623 },
a6279458 1624 };
adaa70bb 1625
4e3fd7a0 1626 rdfl.gateway = *gateway;
86c36ce4 1627
adaa70bb
TG
1628 if (rt6_need_strict(dest))
1629 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1630
4c9483b2 1631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1632 flags, __ip6_route_redirect);
a6279458
YH
1633}
1634
b71d1d42
ED
1635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636 const struct in6_addr *saddr,
a6279458
YH
1637 struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
c346dca1 1641 struct net *net = dev_net(neigh->dev);
a6279458
YH
1642
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
8ed67789 1645 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1646 if (net_ratelimit())
1647 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1648 "for redirect target\n");
a6279458 1649 goto out;
1da177e4
LT
1650 }
1651
1da177e4
LT
1652 /*
1653 * We have finally decided to accept it.
1654 */
1655
1ab1457c 1656 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1657 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1658 NEIGH_UPDATE_F_OVERRIDE|
1659 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1660 NEIGH_UPDATE_F_ISROUTER))
1661 );
1662
1663 /*
1664 * Redirect received -> path was valid.
1665 * Look, redirects are sent only in response to data packets,
1666 * so that this nexthop apparently is reachable. --ANK
1667 */
d8d1f30b 1668 dst_confirm(&rt->dst);
1da177e4
LT
1669
1670 /* Duplicate redirect: silently ignore. */
27217455 1671 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1672 goto out;
1673
21efcfa0 1674 nrt = ip6_rt_copy(rt, dest);
38308473 1675 if (!nrt)
1da177e4
LT
1676 goto out;
1677
1678 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1679 if (on_link)
1680 nrt->rt6i_flags &= ~RTF_GATEWAY;
1681
4e3fd7a0 1682 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1683 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1684
40e22e8f 1685 if (ip6_ins_rt(nrt))
1da177e4
LT
1686 goto out;
1687
d8d1f30b
CG
1688 netevent.old = &rt->dst;
1689 netevent.new = &nrt->dst;
8d71740c
TT
1690 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1691
38308473 1692 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1693 ip6_del_rt(rt);
1da177e4
LT
1694 return;
1695 }
1696
1697out:
d8d1f30b 1698 dst_release(&rt->dst);
1da177e4
LT
1699}
1700
1701/*
1702 * Handle ICMP "packet too big" messages
1703 * i.e. Path MTU discovery
1704 */
1705
b71d1d42 1706static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1707 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1708{
1709 struct rt6_info *rt, *nrt;
1710 int allfrag = 0;
d3052b55 1711again:
ae878ae2 1712 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1713 if (!rt)
1da177e4
LT
1714 return;
1715
d3052b55
AV
1716 if (rt6_check_expired(rt)) {
1717 ip6_del_rt(rt);
1718 goto again;
1719 }
1720
d8d1f30b 1721 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1722 goto out;
1723
1724 if (pmtu < IPV6_MIN_MTU) {
1725 /*
1ab1457c 1726 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1727 * MTU (1280) and a fragment header should always be included
1728 * after a node receiving Too Big message reporting PMTU is
1729 * less than the IPv6 Minimum Link MTU.
1730 */
1731 pmtu = IPV6_MIN_MTU;
1732 allfrag = 1;
1733 }
1734
1735 /* New mtu received -> path was valid.
1736 They are sent only in response to data packets,
1737 so that this nexthop apparently is reachable. --ANK
1738 */
d8d1f30b 1739 dst_confirm(&rt->dst);
1da177e4
LT
1740
1741 /* Host route. If it is static, it would be better
1742 not to override it, but add new one, so that
1743 when cache entry will expire old pmtu
1744 would return automatically.
1745 */
1746 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1747 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1748 if (allfrag) {
1749 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1750 features |= RTAX_FEATURE_ALLFRAG;
1751 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1752 }
1716a961
G
1753 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1754 rt->rt6i_flags |= RTF_MODIFIED;
1da177e4
LT
1755 goto out;
1756 }
1757
1758 /* Network route.
1759 Two cases are possible:
1760 1. It is connected route. Action: COW
1761 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1762 */
27217455 1763 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1764 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1765 else
1766 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1767
d5315b50 1768 if (nrt) {
defb3519
DM
1769 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1770 if (allfrag) {
1771 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1772 features |= RTAX_FEATURE_ALLFRAG;
1773 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1774 }
a1e78363
YH
1775
1776 /* According to RFC 1981, detecting PMTU increase shouldn't be
1777 * happened within 5 mins, the recommended timer is 10 mins.
1778 * Here this route expiration time is set to ip6_rt_mtu_expires
1779 * which is 10 mins. After 10 mins the decreased pmtu is expired
1780 * and detecting PMTU increase will be automatically happened.
1781 */
1716a961
G
1782 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1783 nrt->rt6i_flags |= RTF_DYNAMIC;
40e22e8f 1784 ip6_ins_rt(nrt);
1da177e4 1785 }
1da177e4 1786out:
d8d1f30b 1787 dst_release(&rt->dst);
1da177e4
LT
1788}
1789
b71d1d42 1790void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1791 struct net_device *dev, u32 pmtu)
1792{
1793 struct net *net = dev_net(dev);
1794
1795 /*
1796 * RFC 1981 states that a node "MUST reduce the size of the packets it
1797 * is sending along the path" that caused the Packet Too Big message.
1798 * Since it's not possible in the general case to determine which
1799 * interface was used to send the original packet, we update the MTU
1800 * on the interface that will be used to send future packets. We also
1801 * update the MTU on the interface that received the Packet Too Big in
1802 * case the original packet was forced out that interface with
1803 * SO_BINDTODEVICE or similar. This is the next best thing to the
1804 * correct behaviour, which would be to update the MTU on all
1805 * interfaces.
1806 */
1807 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1808 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1809}
1810
1da177e4
LT
1811/*
1812 * Misc support functions
1813 */
1814
1716a961 1815static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1816 const struct in6_addr *dest)
1da177e4 1817{
d1918542 1818 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1819 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1820 ort->dst.dev, 0);
1da177e4
LT
1821
1822 if (rt) {
d8d1f30b
CG
1823 rt->dst.input = ort->dst.input;
1824 rt->dst.output = ort->dst.output;
8e2ec639 1825 rt->dst.flags |= DST_HOST;
d8d1f30b 1826
4e3fd7a0 1827 rt->rt6i_dst.addr = *dest;
8e2ec639 1828 rt->rt6i_dst.plen = 128;
defb3519 1829 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1830 rt->dst.error = ort->dst.error;
1da177e4
LT
1831 rt->rt6i_idev = ort->rt6i_idev;
1832 if (rt->rt6i_idev)
1833 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1834 rt->dst.lastuse = jiffies;
1da177e4 1835
4e3fd7a0 1836 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1837 rt->rt6i_flags = ort->rt6i_flags;
1838 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1839 (RTF_DEFAULT | RTF_ADDRCONF))
1840 rt6_set_from(rt, ort);
1841 else
1842 rt6_clean_expires(rt);
1da177e4
LT
1843 rt->rt6i_metric = 0;
1844
1da177e4
LT
1845#ifdef CONFIG_IPV6_SUBTREES
1846 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1847#endif
0f6c6392 1848 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1849 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1850 }
1851 return rt;
1852}
1853
70ceb4f5 1854#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1855static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1856 const struct in6_addr *prefix, int prefixlen,
1857 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1858{
1859 struct fib6_node *fn;
1860 struct rt6_info *rt = NULL;
c71099ac
TG
1861 struct fib6_table *table;
1862
efa2cea0 1863 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1864 if (!table)
c71099ac 1865 return NULL;
70ceb4f5 1866
c71099ac
TG
1867 write_lock_bh(&table->tb6_lock);
1868 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1869 if (!fn)
1870 goto out;
1871
d8d1f30b 1872 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1873 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1874 continue;
1875 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1876 continue;
1877 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1878 continue;
d8d1f30b 1879 dst_hold(&rt->dst);
70ceb4f5
YH
1880 break;
1881 }
1882out:
c71099ac 1883 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1884 return rt;
1885}
1886
efa2cea0 1887static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1888 const struct in6_addr *prefix, int prefixlen,
1889 const struct in6_addr *gwaddr, int ifindex,
95c96174 1890 unsigned int pref)
70ceb4f5 1891{
86872cb5
TG
1892 struct fib6_config cfg = {
1893 .fc_table = RT6_TABLE_INFO,
238fc7ea 1894 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1895 .fc_ifindex = ifindex,
1896 .fc_dst_len = prefixlen,
1897 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1898 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1899 .fc_nlinfo.pid = 0,
1900 .fc_nlinfo.nlh = NULL,
1901 .fc_nlinfo.nl_net = net,
86872cb5
TG
1902 };
1903
4e3fd7a0
AD
1904 cfg.fc_dst = *prefix;
1905 cfg.fc_gateway = *gwaddr;
70ceb4f5 1906
e317da96
YH
1907 /* We should treat it as a default route if prefix length is 0. */
1908 if (!prefixlen)
86872cb5 1909 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1910
86872cb5 1911 ip6_route_add(&cfg);
70ceb4f5 1912
efa2cea0 1913 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1914}
1915#endif
1916
b71d1d42 1917struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1918{
1da177e4 1919 struct rt6_info *rt;
c71099ac 1920 struct fib6_table *table;
1da177e4 1921
c346dca1 1922 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1923 if (!table)
c71099ac 1924 return NULL;
1da177e4 1925
c71099ac 1926 write_lock_bh(&table->tb6_lock);
d8d1f30b 1927 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1928 if (dev == rt->dst.dev &&
045927ff 1929 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1930 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1931 break;
1932 }
1933 if (rt)
d8d1f30b 1934 dst_hold(&rt->dst);
c71099ac 1935 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1936 return rt;
1937}
1938
b71d1d42 1939struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1940 struct net_device *dev,
1941 unsigned int pref)
1da177e4 1942{
86872cb5
TG
1943 struct fib6_config cfg = {
1944 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1945 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1946 .fc_ifindex = dev->ifindex,
1947 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1948 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1949 .fc_nlinfo.pid = 0,
1950 .fc_nlinfo.nlh = NULL,
c346dca1 1951 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1952 };
1da177e4 1953
4e3fd7a0 1954 cfg.fc_gateway = *gwaddr;
1da177e4 1955
86872cb5 1956 ip6_route_add(&cfg);
1da177e4 1957
1da177e4
LT
1958 return rt6_get_dflt_router(gwaddr, dev);
1959}
1960
7b4da532 1961void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1962{
1963 struct rt6_info *rt;
c71099ac
TG
1964 struct fib6_table *table;
1965
1966 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1967 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1968 if (!table)
c71099ac 1969 return;
1da177e4
LT
1970
1971restart:
c71099ac 1972 read_lock_bh(&table->tb6_lock);
d8d1f30b 1973 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1974 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1975 dst_hold(&rt->dst);
c71099ac 1976 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1977 ip6_del_rt(rt);
1da177e4
LT
1978 goto restart;
1979 }
1980 }
c71099ac 1981 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1982}
1983
5578689a
DL
1984static void rtmsg_to_fib6_config(struct net *net,
1985 struct in6_rtmsg *rtmsg,
86872cb5
TG
1986 struct fib6_config *cfg)
1987{
1988 memset(cfg, 0, sizeof(*cfg));
1989
1990 cfg->fc_table = RT6_TABLE_MAIN;
1991 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1992 cfg->fc_metric = rtmsg->rtmsg_metric;
1993 cfg->fc_expires = rtmsg->rtmsg_info;
1994 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1995 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1996 cfg->fc_flags = rtmsg->rtmsg_flags;
1997
5578689a 1998 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1999
4e3fd7a0
AD
2000 cfg->fc_dst = rtmsg->rtmsg_dst;
2001 cfg->fc_src = rtmsg->rtmsg_src;
2002 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2003}
2004
5578689a 2005int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2006{
86872cb5 2007 struct fib6_config cfg;
1da177e4
LT
2008 struct in6_rtmsg rtmsg;
2009 int err;
2010
2011 switch(cmd) {
2012 case SIOCADDRT: /* Add a route */
2013 case SIOCDELRT: /* Delete a route */
2014 if (!capable(CAP_NET_ADMIN))
2015 return -EPERM;
2016 err = copy_from_user(&rtmsg, arg,
2017 sizeof(struct in6_rtmsg));
2018 if (err)
2019 return -EFAULT;
86872cb5 2020
5578689a 2021 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2022
1da177e4
LT
2023 rtnl_lock();
2024 switch (cmd) {
2025 case SIOCADDRT:
86872cb5 2026 err = ip6_route_add(&cfg);
1da177e4
LT
2027 break;
2028 case SIOCDELRT:
86872cb5 2029 err = ip6_route_del(&cfg);
1da177e4
LT
2030 break;
2031 default:
2032 err = -EINVAL;
2033 }
2034 rtnl_unlock();
2035
2036 return err;
3ff50b79 2037 }
1da177e4
LT
2038
2039 return -EINVAL;
2040}
2041
2042/*
2043 * Drop the packet on the floor
2044 */
2045
d5fdd6ba 2046static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2047{
612f09e8 2048 int type;
adf30907 2049 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2050 switch (ipstats_mib_noroutes) {
2051 case IPSTATS_MIB_INNOROUTES:
0660e03f 2052 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2053 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2054 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2055 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2056 break;
2057 }
2058 /* FALLTHROUGH */
2059 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2060 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2061 ipstats_mib_noroutes);
612f09e8
YH
2062 break;
2063 }
3ffe533c 2064 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2065 kfree_skb(skb);
2066 return 0;
2067}
2068
9ce8ade0
TG
2069static int ip6_pkt_discard(struct sk_buff *skb)
2070{
612f09e8 2071 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2072}
2073
20380731 2074static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2075{
adf30907 2076 skb->dev = skb_dst(skb)->dev;
612f09e8 2077 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2078}
2079
6723ab54
DM
2080#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2081
9ce8ade0
TG
2082static int ip6_pkt_prohibit(struct sk_buff *skb)
2083{
612f09e8 2084 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2085}
2086
2087static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2088{
adf30907 2089 skb->dev = skb_dst(skb)->dev;
612f09e8 2090 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2091}
2092
6723ab54
DM
2093#endif
2094
1da177e4
LT
2095/*
2096 * Allocate a dst for local (unicast / anycast) address.
2097 */
2098
2099struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2100 const struct in6_addr *addr,
8f031519 2101 bool anycast)
1da177e4 2102{
c346dca1 2103 struct net *net = dev_net(idev->dev);
5c1e6aa3 2104 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2105 net->loopback_dev, 0);
f83c7790 2106 int err;
1da177e4 2107
38308473 2108 if (!rt) {
40385653
BG
2109 if (net_ratelimit())
2110 pr_warning("IPv6: Maximum number of routes reached,"
2111 " consider increasing route/max_size.\n");
1da177e4 2112 return ERR_PTR(-ENOMEM);
40385653 2113 }
1da177e4 2114
1da177e4
LT
2115 in6_dev_hold(idev);
2116
11d53b49 2117 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2118 rt->dst.input = ip6_input;
2119 rt->dst.output = ip6_output;
1da177e4 2120 rt->rt6i_idev = idev;
d8d1f30b 2121 rt->dst.obsolete = -1;
1da177e4
LT
2122
2123 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2124 if (anycast)
2125 rt->rt6i_flags |= RTF_ANYCAST;
2126 else
1da177e4 2127 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2128 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2129 if (err) {
d8d1f30b 2130 dst_free(&rt->dst);
f83c7790 2131 return ERR_PTR(err);
1da177e4
LT
2132 }
2133
4e3fd7a0 2134 rt->rt6i_dst.addr = *addr;
1da177e4 2135 rt->rt6i_dst.plen = 128;
5578689a 2136 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2137
d8d1f30b 2138 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2139
2140 return rt;
2141}
2142
c3968a85
DW
2143int ip6_route_get_saddr(struct net *net,
2144 struct rt6_info *rt,
b71d1d42 2145 const struct in6_addr *daddr,
c3968a85
DW
2146 unsigned int prefs,
2147 struct in6_addr *saddr)
2148{
2149 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2150 int err = 0;
2151 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2152 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2153 else
2154 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2155 daddr, prefs, saddr);
2156 return err;
2157}
2158
2159/* remove deleted ip from prefsrc entries */
2160struct arg_dev_net_ip {
2161 struct net_device *dev;
2162 struct net *net;
2163 struct in6_addr *addr;
2164};
2165
2166static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2167{
2168 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2169 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2170 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2171
d1918542 2172 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2173 rt != net->ipv6.ip6_null_entry &&
2174 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2175 /* remove prefsrc entry */
2176 rt->rt6i_prefsrc.plen = 0;
2177 }
2178 return 0;
2179}
2180
2181void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2182{
2183 struct net *net = dev_net(ifp->idev->dev);
2184 struct arg_dev_net_ip adni = {
2185 .dev = ifp->idev->dev,
2186 .net = net,
2187 .addr = &ifp->addr,
2188 };
2189 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2190}
2191
8ed67789
DL
2192struct arg_dev_net {
2193 struct net_device *dev;
2194 struct net *net;
2195};
2196
1da177e4
LT
2197static int fib6_ifdown(struct rt6_info *rt, void *arg)
2198{
bc3ef660 2199 const struct arg_dev_net *adn = arg;
2200 const struct net_device *dev = adn->dev;
8ed67789 2201
d1918542 2202 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2203 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2204 return -1;
c159d30c 2205
1da177e4
LT
2206 return 0;
2207}
2208
f3db4851 2209void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2210{
8ed67789
DL
2211 struct arg_dev_net adn = {
2212 .dev = dev,
2213 .net = net,
2214 };
2215
2216 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2217 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2218}
2219
95c96174 2220struct rt6_mtu_change_arg {
1da177e4 2221 struct net_device *dev;
95c96174 2222 unsigned int mtu;
1da177e4
LT
2223};
2224
2225static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2226{
2227 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2228 struct inet6_dev *idev;
2229
2230 /* In IPv6 pmtu discovery is not optional,
2231 so that RTAX_MTU lock cannot disable it.
2232 We still use this lock to block changes
2233 caused by addrconf/ndisc.
2234 */
2235
2236 idev = __in6_dev_get(arg->dev);
38308473 2237 if (!idev)
1da177e4
LT
2238 return 0;
2239
2240 /* For administrative MTU increase, there is no way to discover
2241 IPv6 PMTU increase, so PMTU increase should be updated here.
2242 Since RFC 1981 doesn't include administrative MTU increase
2243 update PMTU increase is a MUST. (i.e. jumbo frame)
2244 */
2245 /*
2246 If new MTU is less than route PMTU, this new MTU will be the
2247 lowest MTU in the path, update the route PMTU to reflect PMTU
2248 decreases; if new MTU is greater than route PMTU, and the
2249 old MTU is the lowest MTU in the path, update the route PMTU
2250 to reflect the increase. In this case if the other nodes' MTU
2251 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2252 PMTU discouvery.
2253 */
d1918542 2254 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2255 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2256 (dst_mtu(&rt->dst) >= arg->mtu ||
2257 (dst_mtu(&rt->dst) < arg->mtu &&
2258 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2259 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2260 }
1da177e4
LT
2261 return 0;
2262}
2263
95c96174 2264void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2265{
c71099ac
TG
2266 struct rt6_mtu_change_arg arg = {
2267 .dev = dev,
2268 .mtu = mtu,
2269 };
1da177e4 2270
c346dca1 2271 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2272}
2273
ef7c79ed 2274static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2275 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2276 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2277 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2278 [RTA_PRIORITY] = { .type = NLA_U32 },
2279 [RTA_METRICS] = { .type = NLA_NESTED },
2280};
2281
2282static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2283 struct fib6_config *cfg)
1da177e4 2284{
86872cb5
TG
2285 struct rtmsg *rtm;
2286 struct nlattr *tb[RTA_MAX+1];
2287 int err;
1da177e4 2288
86872cb5
TG
2289 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2290 if (err < 0)
2291 goto errout;
1da177e4 2292
86872cb5
TG
2293 err = -EINVAL;
2294 rtm = nlmsg_data(nlh);
2295 memset(cfg, 0, sizeof(*cfg));
2296
2297 cfg->fc_table = rtm->rtm_table;
2298 cfg->fc_dst_len = rtm->rtm_dst_len;
2299 cfg->fc_src_len = rtm->rtm_src_len;
2300 cfg->fc_flags = RTF_UP;
2301 cfg->fc_protocol = rtm->rtm_protocol;
2302
2303 if (rtm->rtm_type == RTN_UNREACHABLE)
2304 cfg->fc_flags |= RTF_REJECT;
2305
ab79ad14
2306 if (rtm->rtm_type == RTN_LOCAL)
2307 cfg->fc_flags |= RTF_LOCAL;
2308
86872cb5
TG
2309 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2310 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2311 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2312
2313 if (tb[RTA_GATEWAY]) {
2314 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2315 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2316 }
86872cb5
TG
2317
2318 if (tb[RTA_DST]) {
2319 int plen = (rtm->rtm_dst_len + 7) >> 3;
2320
2321 if (nla_len(tb[RTA_DST]) < plen)
2322 goto errout;
2323
2324 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2325 }
86872cb5
TG
2326
2327 if (tb[RTA_SRC]) {
2328 int plen = (rtm->rtm_src_len + 7) >> 3;
2329
2330 if (nla_len(tb[RTA_SRC]) < plen)
2331 goto errout;
2332
2333 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2334 }
86872cb5 2335
c3968a85
DW
2336 if (tb[RTA_PREFSRC])
2337 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2338
86872cb5
TG
2339 if (tb[RTA_OIF])
2340 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2341
2342 if (tb[RTA_PRIORITY])
2343 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2344
2345 if (tb[RTA_METRICS]) {
2346 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2347 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2348 }
86872cb5
TG
2349
2350 if (tb[RTA_TABLE])
2351 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2352
2353 err = 0;
2354errout:
2355 return err;
1da177e4
LT
2356}
2357
c127ea2c 2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2359{
86872cb5
TG
2360 struct fib6_config cfg;
2361 int err;
1da177e4 2362
86872cb5
TG
2363 err = rtm_to_fib6_config(skb, nlh, &cfg);
2364 if (err < 0)
2365 return err;
2366
2367 return ip6_route_del(&cfg);
1da177e4
LT
2368}
2369
c127ea2c 2370static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2371{
86872cb5
TG
2372 struct fib6_config cfg;
2373 int err;
1da177e4 2374
86872cb5
TG
2375 err = rtm_to_fib6_config(skb, nlh, &cfg);
2376 if (err < 0)
2377 return err;
2378
2379 return ip6_route_add(&cfg);
1da177e4
LT
2380}
2381
339bf98f
TG
2382static inline size_t rt6_nlmsg_size(void)
2383{
2384 return NLMSG_ALIGN(sizeof(struct rtmsg))
2385 + nla_total_size(16) /* RTA_SRC */
2386 + nla_total_size(16) /* RTA_DST */
2387 + nla_total_size(16) /* RTA_GATEWAY */
2388 + nla_total_size(16) /* RTA_PREFSRC */
2389 + nla_total_size(4) /* RTA_TABLE */
2390 + nla_total_size(4) /* RTA_IIF */
2391 + nla_total_size(4) /* RTA_OIF */
2392 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2393 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2394 + nla_total_size(sizeof(struct rta_cacheinfo));
2395}
2396
191cd582
BH
2397static int rt6_fill_node(struct net *net,
2398 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2399 struct in6_addr *dst, struct in6_addr *src,
2400 int iif, int type, u32 pid, u32 seq,
7bc570c8 2401 int prefix, int nowait, unsigned int flags)
1da177e4 2402{
346f870b 2403 const struct inet_peer *peer;
1da177e4 2404 struct rtmsg *rtm;
2d7202bf 2405 struct nlmsghdr *nlh;
e3703b3d 2406 long expires;
9e762a4a 2407 u32 table;
f2c31e32 2408 struct neighbour *n;
346f870b 2409 u32 ts, tsage;
1da177e4
LT
2410
2411 if (prefix) { /* user wants prefix routes only */
2412 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2413 /* success since this is not a prefix route */
2414 return 1;
2415 }
2416 }
2417
2d7202bf 2418 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2419 if (!nlh)
26932566 2420 return -EMSGSIZE;
2d7202bf
TG
2421
2422 rtm = nlmsg_data(nlh);
1da177e4
LT
2423 rtm->rtm_family = AF_INET6;
2424 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2425 rtm->rtm_src_len = rt->rt6i_src.plen;
2426 rtm->rtm_tos = 0;
c71099ac 2427 if (rt->rt6i_table)
9e762a4a 2428 table = rt->rt6i_table->tb6_id;
c71099ac 2429 else
9e762a4a
PM
2430 table = RT6_TABLE_UNSPEC;
2431 rtm->rtm_table = table;
c78679e8
DM
2432 if (nla_put_u32(skb, RTA_TABLE, table))
2433 goto nla_put_failure;
38308473 2434 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2435 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2436 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2437 rtm->rtm_type = RTN_LOCAL;
d1918542 2438 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2439 rtm->rtm_type = RTN_LOCAL;
2440 else
2441 rtm->rtm_type = RTN_UNICAST;
2442 rtm->rtm_flags = 0;
2443 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2444 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2445 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2446 rtm->rtm_protocol = RTPROT_REDIRECT;
2447 else if (rt->rt6i_flags & RTF_ADDRCONF)
2448 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2449 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2450 rtm->rtm_protocol = RTPROT_RA;
2451
38308473 2452 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2453 rtm->rtm_flags |= RTM_F_CLONED;
2454
2455 if (dst) {
c78679e8
DM
2456 if (nla_put(skb, RTA_DST, 16, dst))
2457 goto nla_put_failure;
1ab1457c 2458 rtm->rtm_dst_len = 128;
1da177e4 2459 } else if (rtm->rtm_dst_len)
c78679e8
DM
2460 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2461 goto nla_put_failure;
1da177e4
LT
2462#ifdef CONFIG_IPV6_SUBTREES
2463 if (src) {
c78679e8
DM
2464 if (nla_put(skb, RTA_SRC, 16, src))
2465 goto nla_put_failure;
1ab1457c 2466 rtm->rtm_src_len = 128;
c78679e8
DM
2467 } else if (rtm->rtm_src_len &&
2468 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2469 goto nla_put_failure;
1da177e4 2470#endif
7bc570c8
YH
2471 if (iif) {
2472#ifdef CONFIG_IPV6_MROUTE
2473 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2474 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2475 if (err <= 0) {
2476 if (!nowait) {
2477 if (err == 0)
2478 return 0;
2479 goto nla_put_failure;
2480 } else {
2481 if (err == -EMSGSIZE)
2482 goto nla_put_failure;
2483 }
2484 }
2485 } else
2486#endif
c78679e8
DM
2487 if (nla_put_u32(skb, RTA_IIF, iif))
2488 goto nla_put_failure;
7bc570c8 2489 } else if (dst) {
1da177e4 2490 struct in6_addr saddr_buf;
c78679e8
DM
2491 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2492 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2493 goto nla_put_failure;
1da177e4 2494 }
2d7202bf 2495
c3968a85
DW
2496 if (rt->rt6i_prefsrc.plen) {
2497 struct in6_addr saddr_buf;
4e3fd7a0 2498 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2499 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2500 goto nla_put_failure;
c3968a85
DW
2501 }
2502
defb3519 2503 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2504 goto nla_put_failure;
2505
f2c31e32 2506 rcu_read_lock();
27217455 2507 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2508 if (n) {
2509 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2510 rcu_read_unlock();
2511 goto nla_put_failure;
2512 }
2513 }
f2c31e32 2514 rcu_read_unlock();
2d7202bf 2515
c78679e8
DM
2516 if (rt->dst.dev &&
2517 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2518 goto nla_put_failure;
2519 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2520 goto nla_put_failure;
36e3deae
YH
2521 if (!(rt->rt6i_flags & RTF_EXPIRES))
2522 expires = 0;
d1918542
DM
2523 else if (rt->dst.expires - jiffies < INT_MAX)
2524 expires = rt->dst.expires - jiffies;
36e3deae
YH
2525 else
2526 expires = INT_MAX;
69cdf8f9 2527
346f870b
DM
2528 peer = rt->rt6i_peer;
2529 ts = tsage = 0;
2530 if (peer && peer->tcp_ts_stamp) {
2531 ts = peer->tcp_ts;
2532 tsage = get_seconds() - peer->tcp_ts_stamp;
2533 }
2534
2535 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2536 expires, rt->dst.error) < 0)
e3703b3d 2537 goto nla_put_failure;
2d7202bf
TG
2538
2539 return nlmsg_end(skb, nlh);
2540
2541nla_put_failure:
26932566
PM
2542 nlmsg_cancel(skb, nlh);
2543 return -EMSGSIZE;
1da177e4
LT
2544}
2545
1b43af54 2546int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2547{
2548 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2549 int prefix;
2550
2d7202bf
TG
2551 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2552 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2553 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2554 } else
2555 prefix = 0;
2556
191cd582
BH
2557 return rt6_fill_node(arg->net,
2558 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2559 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2560 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2561}
2562
c127ea2c 2563static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2564{
3b1e0a65 2565 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2566 struct nlattr *tb[RTA_MAX+1];
2567 struct rt6_info *rt;
1da177e4 2568 struct sk_buff *skb;
ab364a6f 2569 struct rtmsg *rtm;
4c9483b2 2570 struct flowi6 fl6;
72331bc0 2571 int err, iif = 0, oif = 0;
1da177e4 2572
ab364a6f
TG
2573 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2574 if (err < 0)
2575 goto errout;
1da177e4 2576
ab364a6f 2577 err = -EINVAL;
4c9483b2 2578 memset(&fl6, 0, sizeof(fl6));
1da177e4 2579
ab364a6f
TG
2580 if (tb[RTA_SRC]) {
2581 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2582 goto errout;
2583
4e3fd7a0 2584 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2585 }
2586
2587 if (tb[RTA_DST]) {
2588 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2589 goto errout;
2590
4e3fd7a0 2591 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2592 }
2593
2594 if (tb[RTA_IIF])
2595 iif = nla_get_u32(tb[RTA_IIF]);
2596
2597 if (tb[RTA_OIF])
72331bc0 2598 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2599
2600 if (iif) {
2601 struct net_device *dev;
72331bc0
SL
2602 int flags = 0;
2603
5578689a 2604 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2605 if (!dev) {
2606 err = -ENODEV;
ab364a6f 2607 goto errout;
1da177e4 2608 }
72331bc0
SL
2609
2610 fl6.flowi6_iif = iif;
2611
2612 if (!ipv6_addr_any(&fl6.saddr))
2613 flags |= RT6_LOOKUP_F_HAS_SADDR;
2614
2615 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2616 flags);
2617 } else {
2618 fl6.flowi6_oif = oif;
2619
2620 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2621 }
2622
ab364a6f 2623 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2624 if (!skb) {
2173bff5 2625 dst_release(&rt->dst);
ab364a6f
TG
2626 err = -ENOBUFS;
2627 goto errout;
2628 }
1da177e4 2629
ab364a6f
TG
2630 /* Reserve room for dummy headers, this skb can pass
2631 through good chunk of routing engine.
2632 */
459a98ed 2633 skb_reset_mac_header(skb);
ab364a6f 2634 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2635
d8d1f30b 2636 skb_dst_set(skb, &rt->dst);
1da177e4 2637
4c9483b2 2638 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2639 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2640 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2641 if (err < 0) {
ab364a6f
TG
2642 kfree_skb(skb);
2643 goto errout;
1da177e4
LT
2644 }
2645
5578689a 2646 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2647errout:
1da177e4 2648 return err;
1da177e4
LT
2649}
2650
86872cb5 2651void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2652{
2653 struct sk_buff *skb;
5578689a 2654 struct net *net = info->nl_net;
528c4ceb
DL
2655 u32 seq;
2656 int err;
2657
2658 err = -ENOBUFS;
38308473 2659 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2660
339bf98f 2661 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2662 if (!skb)
21713ebc
TG
2663 goto errout;
2664
191cd582 2665 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2666 event, info->pid, seq, 0, 0, 0);
26932566
PM
2667 if (err < 0) {
2668 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2669 WARN_ON(err == -EMSGSIZE);
2670 kfree_skb(skb);
2671 goto errout;
2672 }
1ce85fe4
PNA
2673 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2674 info->nlh, gfp_any());
2675 return;
21713ebc
TG
2676errout:
2677 if (err < 0)
5578689a 2678 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2679}
2680
8ed67789
DL
2681static int ip6_route_dev_notify(struct notifier_block *this,
2682 unsigned long event, void *data)
2683{
2684 struct net_device *dev = (struct net_device *)data;
c346dca1 2685 struct net *net = dev_net(dev);
8ed67789
DL
2686
2687 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2688 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2689 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2690#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2691 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2692 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2693 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2694 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2695#endif
2696 }
2697
2698 return NOTIFY_OK;
2699}
2700
1da177e4
LT
2701/*
2702 * /proc
2703 */
2704
2705#ifdef CONFIG_PROC_FS
2706
1da177e4
LT
2707struct rt6_proc_arg
2708{
2709 char *buffer;
2710 int offset;
2711 int length;
2712 int skip;
2713 int len;
2714};
2715
2716static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2717{
33120b30 2718 struct seq_file *m = p_arg;
69cce1d1 2719 struct neighbour *n;
1da177e4 2720
4b7a4274 2721 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2722
2723#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2724 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2725#else
33120b30 2726 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2727#endif
f2c31e32 2728 rcu_read_lock();
27217455 2729 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2730 if (n) {
2731 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2732 } else {
33120b30 2733 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2734 }
f2c31e32 2735 rcu_read_unlock();
33120b30 2736 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2737 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2738 rt->dst.__use, rt->rt6i_flags,
d1918542 2739 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2740 return 0;
2741}
2742
33120b30 2743static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2744{
f3db4851 2745 struct net *net = (struct net *)m->private;
32b293a5 2746 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2747 return 0;
2748}
1da177e4 2749
33120b30
AD
2750static int ipv6_route_open(struct inode *inode, struct file *file)
2751{
de05c557 2752 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2753}
2754
33120b30
AD
2755static const struct file_operations ipv6_route_proc_fops = {
2756 .owner = THIS_MODULE,
2757 .open = ipv6_route_open,
2758 .read = seq_read,
2759 .llseek = seq_lseek,
b6fcbdb4 2760 .release = single_release_net,
33120b30
AD
2761};
2762
1da177e4
LT
2763static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2764{
69ddb805 2765 struct net *net = (struct net *)seq->private;
1da177e4 2766 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2767 net->ipv6.rt6_stats->fib_nodes,
2768 net->ipv6.rt6_stats->fib_route_nodes,
2769 net->ipv6.rt6_stats->fib_rt_alloc,
2770 net->ipv6.rt6_stats->fib_rt_entries,
2771 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2772 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2773 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2774
2775 return 0;
2776}
2777
2778static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2779{
de05c557 2780 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2781}
2782
9a32144e 2783static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2784 .owner = THIS_MODULE,
2785 .open = rt6_stats_seq_open,
2786 .read = seq_read,
2787 .llseek = seq_lseek,
b6fcbdb4 2788 .release = single_release_net,
1da177e4
LT
2789};
2790#endif /* CONFIG_PROC_FS */
2791
2792#ifdef CONFIG_SYSCTL
2793
1da177e4 2794static
8d65af78 2795int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2796 void __user *buffer, size_t *lenp, loff_t *ppos)
2797{
c486da34
LAG
2798 struct net *net;
2799 int delay;
2800 if (!write)
1da177e4 2801 return -EINVAL;
c486da34
LAG
2802
2803 net = (struct net *)ctl->extra1;
2804 delay = net->ipv6.sysctl.flush_delay;
2805 proc_dointvec(ctl, write, buffer, lenp, ppos);
2806 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2807 return 0;
1da177e4
LT
2808}
2809
760f2d01 2810ctl_table ipv6_route_table_template[] = {
1ab1457c 2811 {
1da177e4 2812 .procname = "flush",
4990509f 2813 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2814 .maxlen = sizeof(int),
89c8b3a1 2815 .mode = 0200,
6d9f239a 2816 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2817 },
2818 {
1da177e4 2819 .procname = "gc_thresh",
9a7ec3a9 2820 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2821 .maxlen = sizeof(int),
2822 .mode = 0644,
6d9f239a 2823 .proc_handler = proc_dointvec,
1da177e4
LT
2824 },
2825 {
1da177e4 2826 .procname = "max_size",
4990509f 2827 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2828 .maxlen = sizeof(int),
2829 .mode = 0644,
6d9f239a 2830 .proc_handler = proc_dointvec,
1da177e4
LT
2831 },
2832 {
1da177e4 2833 .procname = "gc_min_interval",
4990509f 2834 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2835 .maxlen = sizeof(int),
2836 .mode = 0644,
6d9f239a 2837 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2838 },
2839 {
1da177e4 2840 .procname = "gc_timeout",
4990509f 2841 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
6d9f239a 2844 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2845 },
2846 {
1da177e4 2847 .procname = "gc_interval",
4990509f 2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
6d9f239a 2851 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2852 },
2853 {
1da177e4 2854 .procname = "gc_elasticity",
4990509f 2855 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2856 .maxlen = sizeof(int),
2857 .mode = 0644,
f3d3f616 2858 .proc_handler = proc_dointvec,
1da177e4
LT
2859 },
2860 {
1da177e4 2861 .procname = "mtu_expires",
4990509f 2862 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
6d9f239a 2865 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2866 },
2867 {
1da177e4 2868 .procname = "min_adv_mss",
4990509f 2869 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
f3d3f616 2872 .proc_handler = proc_dointvec,
1da177e4
LT
2873 },
2874 {
1da177e4 2875 .procname = "gc_min_interval_ms",
4990509f 2876 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2877 .maxlen = sizeof(int),
2878 .mode = 0644,
6d9f239a 2879 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2880 },
f8572d8f 2881 { }
1da177e4
LT
2882};
2883
2c8c1e72 2884struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2885{
2886 struct ctl_table *table;
2887
2888 table = kmemdup(ipv6_route_table_template,
2889 sizeof(ipv6_route_table_template),
2890 GFP_KERNEL);
5ee09105
YH
2891
2892 if (table) {
2893 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2894 table[0].extra1 = net;
86393e52 2895 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2896 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2897 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2898 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2899 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2900 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2901 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2902 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2903 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2904 }
2905
760f2d01
DL
2906 return table;
2907}
1da177e4
LT
2908#endif
2909
2c8c1e72 2910static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2911{
633d424b 2912 int ret = -ENOMEM;
8ed67789 2913
86393e52
AD
2914 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2915 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2916
fc66f95c
ED
2917 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2918 goto out_ip6_dst_ops;
2919
8ed67789
DL
2920 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2921 sizeof(*net->ipv6.ip6_null_entry),
2922 GFP_KERNEL);
2923 if (!net->ipv6.ip6_null_entry)
fc66f95c 2924 goto out_ip6_dst_entries;
d8d1f30b 2925 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2926 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2927 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2928 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2929 ip6_template_metrics, true);
8ed67789
DL
2930
2931#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2933 sizeof(*net->ipv6.ip6_prohibit_entry),
2934 GFP_KERNEL);
68fffc67
PZ
2935 if (!net->ipv6.ip6_prohibit_entry)
2936 goto out_ip6_null_entry;
d8d1f30b 2937 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2938 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2939 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2940 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2941 ip6_template_metrics, true);
8ed67789
DL
2942
2943 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2944 sizeof(*net->ipv6.ip6_blk_hole_entry),
2945 GFP_KERNEL);
68fffc67
PZ
2946 if (!net->ipv6.ip6_blk_hole_entry)
2947 goto out_ip6_prohibit_entry;
d8d1f30b 2948 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2949 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2950 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2951 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2952 ip6_template_metrics, true);
8ed67789
DL
2953#endif
2954
b339a47c
PZ
2955 net->ipv6.sysctl.flush_delay = 0;
2956 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2957 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2958 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2959 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2960 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2961 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2962 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2963
cdb18761
DL
2964#ifdef CONFIG_PROC_FS
2965 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2966 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2967#endif
6891a346
BT
2968 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2969
8ed67789
DL
2970 ret = 0;
2971out:
2972 return ret;
f2fc6a54 2973
68fffc67
PZ
2974#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975out_ip6_prohibit_entry:
2976 kfree(net->ipv6.ip6_prohibit_entry);
2977out_ip6_null_entry:
2978 kfree(net->ipv6.ip6_null_entry);
2979#endif
fc66f95c
ED
2980out_ip6_dst_entries:
2981 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2982out_ip6_dst_ops:
f2fc6a54 2983 goto out;
cdb18761
DL
2984}
2985
2c8c1e72 2986static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2987{
2988#ifdef CONFIG_PROC_FS
2989 proc_net_remove(net, "ipv6_route");
2990 proc_net_remove(net, "rt6_stats");
2991#endif
8ed67789
DL
2992 kfree(net->ipv6.ip6_null_entry);
2993#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2994 kfree(net->ipv6.ip6_prohibit_entry);
2995 kfree(net->ipv6.ip6_blk_hole_entry);
2996#endif
41bb78b4 2997 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2998}
2999
3000static struct pernet_operations ip6_route_net_ops = {
3001 .init = ip6_route_net_init,
3002 .exit = ip6_route_net_exit,
3003};
3004
8ed67789
DL
3005static struct notifier_block ip6_route_dev_notifier = {
3006 .notifier_call = ip6_route_dev_notify,
3007 .priority = 0,
3008};
3009
433d49c3 3010int __init ip6_route_init(void)
1da177e4 3011{
433d49c3
DL
3012 int ret;
3013
9a7ec3a9
DL
3014 ret = -ENOMEM;
3015 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3016 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3017 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3018 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3019 goto out;
14e50e57 3020
fc66f95c 3021 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3022 if (ret)
bdb3289f 3023 goto out_kmem_cache;
bdb3289f 3024
fc66f95c
ED
3025 ret = register_pernet_subsys(&ip6_route_net_ops);
3026 if (ret)
3027 goto out_dst_entries;
3028
5dc121e9
AE
3029 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3030
8ed67789
DL
3031 /* Registering of the loopback is done before this portion of code,
3032 * the loopback reference in rt6_info will not be taken, do it
3033 * manually for init_net */
d8d1f30b 3034 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3035 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3036 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3037 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3038 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3039 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3040 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3041 #endif
433d49c3
DL
3042 ret = fib6_init();
3043 if (ret)
8ed67789 3044 goto out_register_subsys;
433d49c3 3045
433d49c3
DL
3046 ret = xfrm6_init();
3047 if (ret)
cdb18761 3048 goto out_fib6_init;
c35b7e72 3049
433d49c3
DL
3050 ret = fib6_rules_init();
3051 if (ret)
3052 goto xfrm6_init;
7e5449c2 3053
433d49c3 3054 ret = -ENOBUFS;
c7ac8679
GR
3055 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3056 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3057 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3058 goto fib6_rules_init;
c127ea2c 3059
8ed67789 3060 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3061 if (ret)
3062 goto fib6_rules_init;
8ed67789 3063
433d49c3
DL
3064out:
3065 return ret;
3066
3067fib6_rules_init:
433d49c3
DL
3068 fib6_rules_cleanup();
3069xfrm6_init:
433d49c3 3070 xfrm6_fini();
433d49c3 3071out_fib6_init:
433d49c3 3072 fib6_gc_cleanup();
8ed67789
DL
3073out_register_subsys:
3074 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3075out_dst_entries:
3076 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3077out_kmem_cache:
f2fc6a54 3078 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3079 goto out;
1da177e4
LT
3080}
3081
3082void ip6_route_cleanup(void)
3083{
8ed67789 3084 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3085 fib6_rules_cleanup();
1da177e4 3086 xfrm6_fini();
1da177e4 3087 fib6_gc_cleanup();
8ed67789 3088 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3089 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3090 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3091}