netdev/phy: Make get_phy_id() static and quit EXPORTing it.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
1716a961 65static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
95c96174 85 unsigned int pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
a7563f34 128 if (!ipv6_addr_any(p))
39232973
DM
129 return (const void *) p;
130 return daddr;
131}
132
d3aaeb38
DM
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
8ade06c6 145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 146{
8ade06c6
DM
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
f83c7790
DM
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
d3aaeb38
DM
156}
157
9a7ec3a9 158static struct dst_ops ip6_dst_ops_template = {
1da177e4 159 .family = AF_INET6,
09640e63 160 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
0dbaee3b 164 .default_advmss = ip6_default_advmss,
ebb762f2 165 .mtu = ip6_mtu,
06582540 166 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 172 .local_out = __ip6_local_out,
d3aaeb38 173 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
174};
175
ebb762f2 176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 177{
618f9bc7
SK
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
ec831ea7
RD
181}
182
14e50e57
DM
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
0972ddb2
HB
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
14e50e57
DM
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
09640e63 195 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
ebb762f2 198 .mtu = ip6_blackhole_mtu,
214f45c9 199 .default_advmss = ip6_default_advmss,
14e50e57 200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 202 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
203};
204
62fa8a84
DM
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
bdb3289f 209static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
d8d1f30b
CG
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
1da177e4
LT
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
101367c2
TG
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
6723ab54
DM
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 228
280a34c8 229static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
d8d1f30b
CG
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
101367c2
TG
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 239 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
bdb3289f 244static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
d8d1f30b
CG
250 .input = dst_discard,
251 .output = dst_discard,
101367c2
TG
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 254 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
1da177e4 261/* allocate dst with ip6_dst_ops */
5c1e6aa3 262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
263 struct net_device *dev,
264 int flags)
1da177e4 265{
957c665f 266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 267
38308473 268 if (rt)
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
271
272 return rt;
1da177e4
LT
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 279 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
1716a961
G
288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
b3419363 292 if (peer) {
b3419363
DM
293 rt->rt6i_peer = NULL;
294 inet_putpeer(peer);
295 }
296}
297
6431cbc2
DM
298static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300static u32 rt6_peer_genid(void)
301{
302 return atomic_read(&__rt6_peer_genid);
303}
304
b3419363
DM
305void rt6_bind_peer(struct rt6_info *rt, int create)
306{
307 struct inet_peer *peer;
308
b3419363
DM
309 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
6431cbc2
DM
312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
314}
315
316static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318{
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 321 struct net_device *loopback_dev =
c346dca1 322 dev_net(dev)->loopback_dev;
1da177e4 323
38308473 324 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
38308473 327 if (loopback_idev) {
1da177e4
LT
328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332}
333
334static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335{
1716a961
G
336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return 1;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return 0;
1da177e4
LT
347}
348
b71d1d42 349static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 350{
a02cec21
ED
351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
353}
354
1da177e4 355/*
c71099ac 356 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
357 */
358
8ed67789
DL
359static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
b71d1d42 361 const struct in6_addr *saddr,
1da177e4 362 int oif,
d420895e 363 int flags)
1da177e4
LT
364{
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
dd3abc4e
YH
368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
d8d1f30b 371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 372 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
373
374 if (oif) {
1da177e4
LT
375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
38308473 378 if (!sprt->rt6i_idev ||
1da177e4 379 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 380 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 381 continue;
1ab1457c 382 if (local && (!oif ||
1da177e4
LT
383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
dd3abc4e
YH
388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
1da177e4 392 }
dd3abc4e 393 }
1da177e4 394
dd3abc4e 395 if (oif) {
1da177e4
LT
396 if (local)
397 return local;
398
d420895e 399 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 400 return net->ipv6.ip6_null_entry;
1da177e4 401 }
dd3abc4e 402out:
1da177e4
LT
403 return rt;
404}
405
27097255
YH
406#ifdef CONFIG_IPV6_ROUTER_PREF
407static void rt6_probe(struct rt6_info *rt)
408{
f2c31e32 409 struct neighbour *neigh;
27097255
YH
410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
f2c31e32 418 rcu_read_lock();
27217455 419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 420 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 421 goto out;
27097255
YH
422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 434 } else {
27097255 435 read_unlock_bh(&neigh->lock);
f2c31e32
ED
436 }
437out:
438 rcu_read_unlock();
27097255
YH
439}
440#else
441static inline void rt6_probe(struct rt6_info *rt)
442{
27097255
YH
443}
444#endif
445
1da177e4 446/*
554cfb7e 447 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 448 */
b6f99a21 449static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 450{
d1918542 451 struct net_device *dev = rt->dst.dev;
161980f4 452 if (!oif || dev->ifindex == oif)
554cfb7e 453 return 2;
161980f4
DM
454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
554cfb7e 458}
1da177e4 459
b6f99a21 460static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 461{
f2c31e32 462 struct neighbour *neigh;
398bcbeb 463 int m;
f2c31e32
ED
464
465 rcu_read_lock();
27217455 466 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
554cfb7e
YH
471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
4d0c5911 473 m = 2;
398bcbeb
YH
474#ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477#endif
478 else
ea73ee23 479 m = 1;
554cfb7e 480 read_unlock_bh(&neigh->lock);
398bcbeb
YH
481 } else
482 m = 0;
f2c31e32 483 rcu_read_unlock();
554cfb7e 484 return m;
1da177e4
LT
485}
486
554cfb7e
YH
487static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
1da177e4 489{
4d0c5911 490 int m, n;
1ab1457c 491
4d0c5911 492 m = rt6_check_dev(rt, oif);
77d16f45 493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 494 return -1;
ebacaaa0
YH
495#ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497#endif
4d0c5911 498 n = rt6_check_neigh(rt);
557e92ef 499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
500 return -1;
501 return m;
502}
503
f11e6659
DM
504static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
554cfb7e 506{
f11e6659
DM
507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525out:
526 return match;
527}
528
529static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532{
533 struct rt6_info *rt, *match;
554cfb7e 534 int mpri = -1;
1da177e4 535
f11e6659
DM
536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 538 rt = rt->dst.rt6_next)
f11e6659
DM
539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 541 rt = rt->dst.rt6_next)
f11e6659 542 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 543
f11e6659
DM
544 return match;
545}
1da177e4 546
f11e6659
DM
547static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548{
549 struct rt6_info *match, *rt0;
8ed67789 550 struct net *net;
1da177e4 551
f11e6659
DM
552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 555
f11e6659 556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 557
554cfb7e 558 if (!match &&
f11e6659 559 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 560 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 561
554cfb7e 562 /* no entries matched; do round-robin */
f11e6659
DM
563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
1da177e4 568 }
1da177e4 569
d1918542 570 net = dev_net(rt0->dst.dev);
a02cec21 571 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
572}
573
70ceb4f5
YH
574#ifdef CONFIG_IPV6_ROUTE_INFO
575int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 576 const struct in6_addr *gwaddr)
70ceb4f5 577{
c346dca1 578 struct net *net = dev_net(dev);
70ceb4f5
YH
579 struct route_info *rinfo = (struct route_info *) opt;
580 struct in6_addr prefix_buf, *prefix;
581 unsigned int pref;
4bed72e4 582 unsigned long lifetime;
70ceb4f5
YH
583 struct rt6_info *rt;
584
585 if (len < sizeof(struct route_info)) {
586 return -EINVAL;
587 }
588
589 /* Sanity check for prefix_len and length */
590 if (rinfo->length > 3) {
591 return -EINVAL;
592 } else if (rinfo->prefix_len > 128) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 64) {
595 if (rinfo->length < 2) {
596 return -EINVAL;
597 }
598 } else if (rinfo->prefix_len > 0) {
599 if (rinfo->length < 1) {
600 return -EINVAL;
601 }
602 }
603
604 pref = rinfo->route_pref;
605 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 606 return -EINVAL;
70ceb4f5 607
4bed72e4 608 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
609
610 if (rinfo->length == 3)
611 prefix = (struct in6_addr *)rinfo->prefix;
612 else {
613 /* this function is safe */
614 ipv6_addr_prefix(&prefix_buf,
615 (struct in6_addr *)rinfo->prefix,
616 rinfo->prefix_len);
617 prefix = &prefix_buf;
618 }
619
efa2cea0
DL
620 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
621 dev->ifindex);
70ceb4f5
YH
622
623 if (rt && !lifetime) {
e0a1ad73 624 ip6_del_rt(rt);
70ceb4f5
YH
625 rt = NULL;
626 }
627
628 if (!rt && lifetime)
efa2cea0 629 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
630 pref);
631 else if (rt)
632 rt->rt6i_flags = RTF_ROUTEINFO |
633 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
634
635 if (rt) {
1716a961
G
636 if (!addrconf_finite_timeout(lifetime))
637 rt6_clean_expires(rt);
638 else
639 rt6_set_expires(rt, jiffies + HZ * lifetime);
640
d8d1f30b 641 dst_release(&rt->dst);
70ceb4f5
YH
642 }
643 return 0;
644}
645#endif
646
8ed67789 647#define BACKTRACK(__net, saddr) \
982f56f3 648do { \
8ed67789 649 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 650 struct fib6_node *pn; \
e0eda7bb 651 while (1) { \
982f56f3
YH
652 if (fn->fn_flags & RTN_TL_ROOT) \
653 goto out; \
654 pn = fn->parent; \
655 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 656 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
657 else \
658 fn = pn; \
659 if (fn->fn_flags & RTN_RTINFO) \
660 goto restart; \
c71099ac 661 } \
c71099ac 662 } \
38308473 663} while (0)
c71099ac 664
8ed67789
DL
665static struct rt6_info *ip6_pol_route_lookup(struct net *net,
666 struct fib6_table *table,
4c9483b2 667 struct flowi6 *fl6, int flags)
1da177e4
LT
668{
669 struct fib6_node *fn;
670 struct rt6_info *rt;
671
c71099ac 672 read_lock_bh(&table->tb6_lock);
4c9483b2 673 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
674restart:
675 rt = fn->leaf;
4c9483b2
DM
676 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
677 BACKTRACK(net, &fl6->saddr);
c71099ac 678out:
d8d1f30b 679 dst_use(&rt->dst, jiffies);
c71099ac 680 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
681 return rt;
682
683}
684
ea6e574e
FW
685struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
686 int flags)
687{
688 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
689}
690EXPORT_SYMBOL_GPL(ip6_route_lookup);
691
9acd9f3a
YH
692struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
693 const struct in6_addr *saddr, int oif, int strict)
c71099ac 694{
4c9483b2
DM
695 struct flowi6 fl6 = {
696 .flowi6_oif = oif,
697 .daddr = *daddr,
c71099ac
TG
698 };
699 struct dst_entry *dst;
77d16f45 700 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 701
adaa70bb 702 if (saddr) {
4c9483b2 703 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
704 flags |= RT6_LOOKUP_F_HAS_SADDR;
705 }
706
4c9483b2 707 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
708 if (dst->error == 0)
709 return (struct rt6_info *) dst;
710
711 dst_release(dst);
712
1da177e4
LT
713 return NULL;
714}
715
7159039a
YH
716EXPORT_SYMBOL(rt6_lookup);
717
c71099ac 718/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
719 It takes new route entry, the addition fails by any reason the
720 route is freed. In any case, if caller does not hold it, it may
721 be destroyed.
722 */
723
86872cb5 724static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
725{
726 int err;
c71099ac 727 struct fib6_table *table;
1da177e4 728
c71099ac
TG
729 table = rt->rt6i_table;
730 write_lock_bh(&table->tb6_lock);
86872cb5 731 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 732 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
733
734 return err;
735}
736
40e22e8f
TG
737int ip6_ins_rt(struct rt6_info *rt)
738{
4d1169c1 739 struct nl_info info = {
d1918542 740 .nl_net = dev_net(rt->dst.dev),
4d1169c1 741 };
528c4ceb 742 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
743}
744
1716a961 745static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 746 const struct in6_addr *daddr,
b71d1d42 747 const struct in6_addr *saddr)
1da177e4 748{
1da177e4
LT
749 struct rt6_info *rt;
750
751 /*
752 * Clone the route.
753 */
754
21efcfa0 755 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
756
757 if (rt) {
14deae41
DM
758 int attempts = !in_softirq();
759
38308473 760 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 761 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 762 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 763 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 764 rt->rt6i_gateway = *daddr;
58c4fb86 765 }
1da177e4 766
1da177e4 767 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
768
769#ifdef CONFIG_IPV6_SUBTREES
770 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 771 rt->rt6i_src.addr = *saddr;
1da177e4
LT
772 rt->rt6i_src.plen = 128;
773 }
774#endif
775
14deae41 776 retry:
8ade06c6 777 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 778 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
779 int saved_rt_min_interval =
780 net->ipv6.sysctl.ip6_rt_gc_min_interval;
781 int saved_rt_elasticity =
782 net->ipv6.sysctl.ip6_rt_gc_elasticity;
783
784 if (attempts-- > 0) {
785 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
786 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
787
86393e52 788 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
789
790 net->ipv6.sysctl.ip6_rt_gc_elasticity =
791 saved_rt_elasticity;
792 net->ipv6.sysctl.ip6_rt_gc_min_interval =
793 saved_rt_min_interval;
794 goto retry;
795 }
796
e87cc472 797 net_warn_ratelimited("ipv6: Neighbour table overflow\n");
d8d1f30b 798 dst_free(&rt->dst);
14deae41
DM
799 return NULL;
800 }
95a9a5ba 801 }
1da177e4 802
95a9a5ba
YH
803 return rt;
804}
1da177e4 805
21efcfa0
ED
806static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
807 const struct in6_addr *daddr)
299d9939 808{
21efcfa0
ED
809 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
810
299d9939 811 if (rt) {
299d9939 812 rt->rt6i_flags |= RTF_CACHE;
27217455 813 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
814 }
815 return rt;
816}
817
8ed67789 818static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 819 struct flowi6 *fl6, int flags)
1da177e4
LT
820{
821 struct fib6_node *fn;
519fbd87 822 struct rt6_info *rt, *nrt;
c71099ac 823 int strict = 0;
1da177e4 824 int attempts = 3;
519fbd87 825 int err;
53b7997f 826 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 827
77d16f45 828 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
829
830relookup:
c71099ac 831 read_lock_bh(&table->tb6_lock);
1da177e4 832
8238dd06 833restart_2:
4c9483b2 834 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
835
836restart:
4acad72d 837 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 838
4c9483b2 839 BACKTRACK(net, &fl6->saddr);
8ed67789 840 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 841 rt->rt6i_flags & RTF_CACHE)
1ddef044 842 goto out;
1da177e4 843
d8d1f30b 844 dst_hold(&rt->dst);
c71099ac 845 read_unlock_bh(&table->tb6_lock);
fb9de91e 846
27217455 847 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 848 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 849 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 850 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
851 else
852 goto out2;
e40cf353 853
d8d1f30b 854 dst_release(&rt->dst);
8ed67789 855 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 856
d8d1f30b 857 dst_hold(&rt->dst);
519fbd87 858 if (nrt) {
40e22e8f 859 err = ip6_ins_rt(nrt);
519fbd87 860 if (!err)
1da177e4 861 goto out2;
1da177e4 862 }
1da177e4 863
519fbd87
YH
864 if (--attempts <= 0)
865 goto out2;
866
867 /*
c71099ac 868 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
869 * released someone could insert this route. Relookup.
870 */
d8d1f30b 871 dst_release(&rt->dst);
519fbd87
YH
872 goto relookup;
873
874out:
8238dd06
YH
875 if (reachable) {
876 reachable = 0;
877 goto restart_2;
878 }
d8d1f30b 879 dst_hold(&rt->dst);
c71099ac 880 read_unlock_bh(&table->tb6_lock);
1da177e4 881out2:
d8d1f30b
CG
882 rt->dst.lastuse = jiffies;
883 rt->dst.__use++;
c71099ac
TG
884
885 return rt;
1da177e4
LT
886}
887
8ed67789 888static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 889 struct flowi6 *fl6, int flags)
4acad72d 890{
4c9483b2 891 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
892}
893
72331bc0
SL
894static struct dst_entry *ip6_route_input_lookup(struct net *net,
895 struct net_device *dev,
896 struct flowi6 *fl6, int flags)
897{
898 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
899 flags |= RT6_LOOKUP_F_IFACE;
900
901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
902}
903
c71099ac
TG
904void ip6_route_input(struct sk_buff *skb)
905{
b71d1d42 906 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 907 struct net *net = dev_net(skb->dev);
adaa70bb 908 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
909 struct flowi6 fl6 = {
910 .flowi6_iif = skb->dev->ifindex,
911 .daddr = iph->daddr,
912 .saddr = iph->saddr,
38308473 913 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
914 .flowi6_mark = skb->mark,
915 .flowi6_proto = iph->nexthdr,
c71099ac 916 };
adaa70bb 917
72331bc0 918 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
919}
920
8ed67789 921static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 922 struct flowi6 *fl6, int flags)
1da177e4 923{
4c9483b2 924 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
925}
926
9c7a4f9c 927struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 928 struct flowi6 *fl6)
c71099ac
TG
929{
930 int flags = 0;
931
4c9483b2 932 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 933 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 934
4c9483b2 935 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 936 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
937 else if (sk)
938 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 939
4c9483b2 940 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
941}
942
7159039a 943EXPORT_SYMBOL(ip6_route_output);
1da177e4 944
2774c131 945struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 946{
5c1e6aa3 947 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
948 struct dst_entry *new = NULL;
949
5c1e6aa3 950 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 951 if (rt) {
cf911662
DM
952 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
953
d8d1f30b 954 new = &rt->dst;
14e50e57 955
14e50e57 956 new->__use = 1;
352e512c
HX
957 new->input = dst_discard;
958 new->output = dst_discard;
14e50e57 959
21efcfa0
ED
960 if (dst_metrics_read_only(&ort->dst))
961 new->_metrics = ort->dst._metrics;
962 else
963 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
964 rt->rt6i_idev = ort->rt6i_idev;
965 if (rt->rt6i_idev)
966 in6_dev_hold(rt->rt6i_idev);
14e50e57 967
4e3fd7a0 968 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
969 rt->rt6i_flags = ort->rt6i_flags;
970 rt6_clean_expires(rt);
14e50e57
DM
971 rt->rt6i_metric = 0;
972
973 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
974#ifdef CONFIG_IPV6_SUBTREES
975 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
976#endif
977
978 dst_free(new);
979 }
980
69ead7af
DM
981 dst_release(dst_orig);
982 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 983}
14e50e57 984
1da177e4
LT
985/*
986 * Destination cache support functions
987 */
988
989static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
990{
991 struct rt6_info *rt;
992
993 rt = (struct rt6_info *) dst;
994
6431cbc2
DM
995 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
996 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
997 if (!rt->rt6i_peer)
998 rt6_bind_peer(rt, 0);
999 rt->rt6i_peer_genid = rt6_peer_genid();
1000 }
1da177e4 1001 return dst;
6431cbc2 1002 }
1da177e4
LT
1003 return NULL;
1004}
1005
1006static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1007{
1008 struct rt6_info *rt = (struct rt6_info *) dst;
1009
1010 if (rt) {
54c1a859
YH
1011 if (rt->rt6i_flags & RTF_CACHE) {
1012 if (rt6_check_expired(rt)) {
1013 ip6_del_rt(rt);
1014 dst = NULL;
1015 }
1016 } else {
1da177e4 1017 dst_release(dst);
54c1a859
YH
1018 dst = NULL;
1019 }
1da177e4 1020 }
54c1a859 1021 return dst;
1da177e4
LT
1022}
1023
1024static void ip6_link_failure(struct sk_buff *skb)
1025{
1026 struct rt6_info *rt;
1027
3ffe533c 1028 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1029
adf30907 1030 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1031 if (rt) {
1716a961
G
1032 if (rt->rt6i_flags & RTF_CACHE)
1033 rt6_update_expires(rt, 0);
1034 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1035 rt->rt6i_node->fn_sernum = -1;
1036 }
1037}
1038
1039static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1040{
1041 struct rt6_info *rt6 = (struct rt6_info*)dst;
1042
1043 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1044 rt6->rt6i_flags |= RTF_MODIFIED;
1045 if (mtu < IPV6_MIN_MTU) {
defb3519 1046 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1047 mtu = IPV6_MIN_MTU;
defb3519
DM
1048 features |= RTAX_FEATURE_ALLFRAG;
1049 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1050 }
defb3519 1051 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1052 }
1053}
1054
0dbaee3b 1055static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1056{
0dbaee3b
DM
1057 struct net_device *dev = dst->dev;
1058 unsigned int mtu = dst_mtu(dst);
1059 struct net *net = dev_net(dev);
1060
1da177e4
LT
1061 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1062
5578689a
DL
1063 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1064 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1065
1066 /*
1ab1457c
YH
1067 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1068 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1069 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1070 * rely only on pmtu discovery"
1071 */
1072 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1073 mtu = IPV6_MAXPLEN;
1074 return mtu;
1075}
1076
ebb762f2 1077static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1078{
d33e4553 1079 struct inet6_dev *idev;
618f9bc7
SK
1080 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1081
1082 if (mtu)
1083 return mtu;
1084
1085 mtu = IPV6_MIN_MTU;
d33e4553
DM
1086
1087 rcu_read_lock();
1088 idev = __in6_dev_get(dst->dev);
1089 if (idev)
1090 mtu = idev->cnf.mtu6;
1091 rcu_read_unlock();
1092
1093 return mtu;
1094}
1095
3b00944c
YH
1096static struct dst_entry *icmp6_dst_gc_list;
1097static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1098
3b00944c 1099struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1100 struct neighbour *neigh,
87a11578 1101 struct flowi6 *fl6)
1da177e4 1102{
87a11578 1103 struct dst_entry *dst;
1da177e4
LT
1104 struct rt6_info *rt;
1105 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1106 struct net *net = dev_net(dev);
1da177e4 1107
38308473 1108 if (unlikely(!idev))
122bdf67 1109 return ERR_PTR(-ENODEV);
1da177e4 1110
957c665f 1111 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1112 if (unlikely(!rt)) {
1da177e4 1113 in6_dev_put(idev);
87a11578 1114 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1115 goto out;
1116 }
1117
1da177e4
LT
1118 if (neigh)
1119 neigh_hold(neigh);
14deae41 1120 else {
f83c7790 1121 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1122 if (IS_ERR(neigh)) {
252c3d84 1123 in6_dev_put(idev);
b43faac6
DM
1124 dst_free(&rt->dst);
1125 return ERR_CAST(neigh);
1126 }
14deae41 1127 }
1da177e4 1128
8e2ec639
YZ
1129 rt->dst.flags |= DST_HOST;
1130 rt->dst.output = ip6_output;
69cce1d1 1131 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1132 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1133 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1134 rt->rt6i_dst.plen = 128;
1135 rt->rt6i_idev = idev;
7011687f 1136 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1137
3b00944c 1138 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1139 rt->dst.next = icmp6_dst_gc_list;
1140 icmp6_dst_gc_list = &rt->dst;
3b00944c 1141 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1142
5578689a 1143 fib6_force_start_gc(net);
1da177e4 1144
87a11578
DM
1145 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1146
1da177e4 1147out:
87a11578 1148 return dst;
1da177e4
LT
1149}
1150
3d0f24a7 1151int icmp6_dst_gc(void)
1da177e4 1152{
e9476e95 1153 struct dst_entry *dst, **pprev;
3d0f24a7 1154 int more = 0;
1da177e4 1155
3b00944c
YH
1156 spin_lock_bh(&icmp6_dst_lock);
1157 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1158
1da177e4
LT
1159 while ((dst = *pprev) != NULL) {
1160 if (!atomic_read(&dst->__refcnt)) {
1161 *pprev = dst->next;
1162 dst_free(dst);
1da177e4
LT
1163 } else {
1164 pprev = &dst->next;
3d0f24a7 1165 ++more;
1da177e4
LT
1166 }
1167 }
1168
3b00944c 1169 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1170
3d0f24a7 1171 return more;
1da177e4
LT
1172}
1173
1e493d19
DM
1174static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1175 void *arg)
1176{
1177 struct dst_entry *dst, **pprev;
1178
1179 spin_lock_bh(&icmp6_dst_lock);
1180 pprev = &icmp6_dst_gc_list;
1181 while ((dst = *pprev) != NULL) {
1182 struct rt6_info *rt = (struct rt6_info *) dst;
1183 if (func(rt, arg)) {
1184 *pprev = dst->next;
1185 dst_free(dst);
1186 } else {
1187 pprev = &dst->next;
1188 }
1189 }
1190 spin_unlock_bh(&icmp6_dst_lock);
1191}
1192
569d3645 1193static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1194{
1da177e4 1195 unsigned long now = jiffies;
86393e52 1196 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1197 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1198 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1199 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1200 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1201 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1202 int entries;
7019b78e 1203
fc66f95c 1204 entries = dst_entries_get_fast(ops);
7019b78e 1205 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1206 entries <= rt_max_size)
1da177e4
LT
1207 goto out;
1208
6891a346
BT
1209 net->ipv6.ip6_rt_gc_expire++;
1210 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1211 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1212 entries = dst_entries_get_slow(ops);
1213 if (entries < ops->gc_thresh)
7019b78e 1214 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1215out:
7019b78e 1216 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1217 return entries > rt_max_size;
1da177e4
LT
1218}
1219
1220/* Clean host part of a prefix. Not necessary in radix tree,
1221 but results in cleaner routing tables.
1222
1223 Remove it only when all the things will work!
1224 */
1225
6b75d090 1226int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1227{
5170ae82 1228 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1229 if (hoplimit == 0) {
6b75d090 1230 struct net_device *dev = dst->dev;
c68f24cc
ED
1231 struct inet6_dev *idev;
1232
1233 rcu_read_lock();
1234 idev = __in6_dev_get(dev);
1235 if (idev)
6b75d090 1236 hoplimit = idev->cnf.hop_limit;
c68f24cc 1237 else
53b7997f 1238 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1239 rcu_read_unlock();
1da177e4
LT
1240 }
1241 return hoplimit;
1242}
abbf46ae 1243EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1244
1245/*
1246 *
1247 */
1248
86872cb5 1249int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1250{
1251 int err;
5578689a 1252 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1253 struct rt6_info *rt = NULL;
1254 struct net_device *dev = NULL;
1255 struct inet6_dev *idev = NULL;
c71099ac 1256 struct fib6_table *table;
1da177e4
LT
1257 int addr_type;
1258
86872cb5 1259 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1260 return -EINVAL;
1261#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1262 if (cfg->fc_src_len)
1da177e4
LT
1263 return -EINVAL;
1264#endif
86872cb5 1265 if (cfg->fc_ifindex) {
1da177e4 1266 err = -ENODEV;
5578689a 1267 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1268 if (!dev)
1269 goto out;
1270 idev = in6_dev_get(dev);
1271 if (!idev)
1272 goto out;
1273 }
1274
86872cb5
TG
1275 if (cfg->fc_metric == 0)
1276 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1277
d71314b4 1278 err = -ENOBUFS;
38308473
DM
1279 if (cfg->fc_nlinfo.nlh &&
1280 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1281 table = fib6_get_table(net, cfg->fc_table);
38308473 1282 if (!table) {
d71314b4
MV
1283 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1284 table = fib6_new_table(net, cfg->fc_table);
1285 }
1286 } else {
1287 table = fib6_new_table(net, cfg->fc_table);
1288 }
38308473
DM
1289
1290 if (!table)
c71099ac 1291 goto out;
c71099ac 1292
957c665f 1293 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1294
38308473 1295 if (!rt) {
1da177e4
LT
1296 err = -ENOMEM;
1297 goto out;
1298 }
1299
d8d1f30b 1300 rt->dst.obsolete = -1;
1716a961
G
1301
1302 if (cfg->fc_flags & RTF_EXPIRES)
1303 rt6_set_expires(rt, jiffies +
1304 clock_t_to_jiffies(cfg->fc_expires));
1305 else
1306 rt6_clean_expires(rt);
1da177e4 1307
86872cb5
TG
1308 if (cfg->fc_protocol == RTPROT_UNSPEC)
1309 cfg->fc_protocol = RTPROT_BOOT;
1310 rt->rt6i_protocol = cfg->fc_protocol;
1311
1312 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1313
1314 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1315 rt->dst.input = ip6_mc_input;
ab79ad14
1316 else if (cfg->fc_flags & RTF_LOCAL)
1317 rt->dst.input = ip6_input;
1da177e4 1318 else
d8d1f30b 1319 rt->dst.input = ip6_forward;
1da177e4 1320
d8d1f30b 1321 rt->dst.output = ip6_output;
1da177e4 1322
86872cb5
TG
1323 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1324 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1325 if (rt->rt6i_dst.plen == 128)
11d53b49 1326 rt->dst.flags |= DST_HOST;
1da177e4 1327
8e2ec639
YZ
1328 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1329 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1330 if (!metrics) {
1331 err = -ENOMEM;
1332 goto out;
1333 }
1334 dst_init_metrics(&rt->dst, metrics, 0);
1335 }
1da177e4 1336#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1337 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1338 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1339#endif
1340
86872cb5 1341 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1342
1343 /* We cannot add true routes via loopback here,
1344 they would result in kernel looping; promote them to reject routes
1345 */
86872cb5 1346 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1347 (dev && (dev->flags & IFF_LOOPBACK) &&
1348 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1349 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1350 /* hold loopback dev/idev if we haven't done so. */
5578689a 1351 if (dev != net->loopback_dev) {
1da177e4
LT
1352 if (dev) {
1353 dev_put(dev);
1354 in6_dev_put(idev);
1355 }
5578689a 1356 dev = net->loopback_dev;
1da177e4
LT
1357 dev_hold(dev);
1358 idev = in6_dev_get(dev);
1359 if (!idev) {
1360 err = -ENODEV;
1361 goto out;
1362 }
1363 }
d8d1f30b
CG
1364 rt->dst.output = ip6_pkt_discard_out;
1365 rt->dst.input = ip6_pkt_discard;
1366 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1367 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1368 goto install_route;
1369 }
1370
86872cb5 1371 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1372 const struct in6_addr *gw_addr;
1da177e4
LT
1373 int gwa_type;
1374
86872cb5 1375 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1376 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1377 gwa_type = ipv6_addr_type(gw_addr);
1378
1379 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1380 struct rt6_info *grt;
1381
1382 /* IPv6 strictly inhibits using not link-local
1383 addresses as nexthop address.
1384 Otherwise, router will not able to send redirects.
1385 It is very good, but in some (rare!) circumstances
1386 (SIT, PtP, NBMA NOARP links) it is handy to allow
1387 some exceptions. --ANK
1388 */
1389 err = -EINVAL;
38308473 1390 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1391 goto out;
1392
5578689a 1393 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1394
1395 err = -EHOSTUNREACH;
38308473 1396 if (!grt)
1da177e4
LT
1397 goto out;
1398 if (dev) {
d1918542 1399 if (dev != grt->dst.dev) {
d8d1f30b 1400 dst_release(&grt->dst);
1da177e4
LT
1401 goto out;
1402 }
1403 } else {
d1918542 1404 dev = grt->dst.dev;
1da177e4
LT
1405 idev = grt->rt6i_idev;
1406 dev_hold(dev);
1407 in6_dev_hold(grt->rt6i_idev);
1408 }
38308473 1409 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1410 err = 0;
d8d1f30b 1411 dst_release(&grt->dst);
1da177e4
LT
1412
1413 if (err)
1414 goto out;
1415 }
1416 err = -EINVAL;
38308473 1417 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1418 goto out;
1419 }
1420
1421 err = -ENODEV;
38308473 1422 if (!dev)
1da177e4
LT
1423 goto out;
1424
c3968a85
DW
1425 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1426 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1427 err = -EINVAL;
1428 goto out;
1429 }
4e3fd7a0 1430 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1431 rt->rt6i_prefsrc.plen = 128;
1432 } else
1433 rt->rt6i_prefsrc.plen = 0;
1434
86872cb5 1435 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1436 err = rt6_bind_neighbour(rt, dev);
f83c7790 1437 if (err)
1da177e4 1438 goto out;
1da177e4
LT
1439 }
1440
86872cb5 1441 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1442
1443install_route:
86872cb5
TG
1444 if (cfg->fc_mx) {
1445 struct nlattr *nla;
1446 int remaining;
1447
1448 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1449 int type = nla_type(nla);
86872cb5
TG
1450
1451 if (type) {
1452 if (type > RTAX_MAX) {
1da177e4
LT
1453 err = -EINVAL;
1454 goto out;
1455 }
86872cb5 1456
defb3519 1457 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1458 }
1da177e4
LT
1459 }
1460 }
1461
d8d1f30b 1462 rt->dst.dev = dev;
1da177e4 1463 rt->rt6i_idev = idev;
c71099ac 1464 rt->rt6i_table = table;
63152fc0 1465
c346dca1 1466 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1467
86872cb5 1468 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1469
1470out:
1471 if (dev)
1472 dev_put(dev);
1473 if (idev)
1474 in6_dev_put(idev);
1475 if (rt)
d8d1f30b 1476 dst_free(&rt->dst);
1da177e4
LT
1477 return err;
1478}
1479
86872cb5 1480static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1481{
1482 int err;
c71099ac 1483 struct fib6_table *table;
d1918542 1484 struct net *net = dev_net(rt->dst.dev);
1da177e4 1485
8ed67789 1486 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1487 return -ENOENT;
1488
c71099ac
TG
1489 table = rt->rt6i_table;
1490 write_lock_bh(&table->tb6_lock);
1da177e4 1491
86872cb5 1492 err = fib6_del(rt, info);
d8d1f30b 1493 dst_release(&rt->dst);
1da177e4 1494
c71099ac 1495 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1496
1497 return err;
1498}
1499
e0a1ad73
TG
1500int ip6_del_rt(struct rt6_info *rt)
1501{
4d1169c1 1502 struct nl_info info = {
d1918542 1503 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1504 };
528c4ceb 1505 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1506}
1507
86872cb5 1508static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1509{
c71099ac 1510 struct fib6_table *table;
1da177e4
LT
1511 struct fib6_node *fn;
1512 struct rt6_info *rt;
1513 int err = -ESRCH;
1514
5578689a 1515 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1516 if (!table)
c71099ac
TG
1517 return err;
1518
1519 read_lock_bh(&table->tb6_lock);
1da177e4 1520
c71099ac 1521 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1522 &cfg->fc_dst, cfg->fc_dst_len,
1523 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1524
1da177e4 1525 if (fn) {
d8d1f30b 1526 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1527 if (cfg->fc_ifindex &&
d1918542
DM
1528 (!rt->dst.dev ||
1529 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1530 continue;
86872cb5
TG
1531 if (cfg->fc_flags & RTF_GATEWAY &&
1532 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1533 continue;
86872cb5 1534 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1535 continue;
d8d1f30b 1536 dst_hold(&rt->dst);
c71099ac 1537 read_unlock_bh(&table->tb6_lock);
1da177e4 1538
86872cb5 1539 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1540 }
1541 }
c71099ac 1542 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1543
1544 return err;
1545}
1546
1547/*
1548 * Handle redirects
1549 */
a6279458 1550struct ip6rd_flowi {
4c9483b2 1551 struct flowi6 fl6;
a6279458
YH
1552 struct in6_addr gateway;
1553};
1554
8ed67789
DL
1555static struct rt6_info *__ip6_route_redirect(struct net *net,
1556 struct fib6_table *table,
4c9483b2 1557 struct flowi6 *fl6,
a6279458 1558 int flags)
1da177e4 1559{
4c9483b2 1560 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1561 struct rt6_info *rt;
e843b9e1 1562 struct fib6_node *fn;
c71099ac 1563
1da177e4 1564 /*
e843b9e1
YH
1565 * Get the "current" route for this destination and
1566 * check if the redirect has come from approriate router.
1567 *
1568 * RFC 2461 specifies that redirects should only be
1569 * accepted if they come from the nexthop to the target.
1570 * Due to the way the routes are chosen, this notion
1571 * is a bit fuzzy and one might need to check all possible
1572 * routes.
1da177e4 1573 */
1da177e4 1574
c71099ac 1575 read_lock_bh(&table->tb6_lock);
4c9483b2 1576 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1577restart:
d8d1f30b 1578 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1579 /*
1580 * Current route is on-link; redirect is always invalid.
1581 *
1582 * Seems, previous statement is not true. It could
1583 * be node, which looks for us as on-link (f.e. proxy ndisc)
1584 * But then router serving it might decide, that we should
1585 * know truth 8)8) --ANK (980726).
1586 */
1587 if (rt6_check_expired(rt))
1588 continue;
1589 if (!(rt->rt6i_flags & RTF_GATEWAY))
1590 continue;
d1918542 1591 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1592 continue;
a6279458 1593 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1594 continue;
1595 break;
1596 }
a6279458 1597
cb15d9c2 1598 if (!rt)
8ed67789 1599 rt = net->ipv6.ip6_null_entry;
4c9483b2 1600 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1601out:
d8d1f30b 1602 dst_hold(&rt->dst);
a6279458 1603
c71099ac 1604 read_unlock_bh(&table->tb6_lock);
e843b9e1 1605
a6279458
YH
1606 return rt;
1607};
1608
b71d1d42
ED
1609static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1610 const struct in6_addr *src,
1611 const struct in6_addr *gateway,
a6279458
YH
1612 struct net_device *dev)
1613{
adaa70bb 1614 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1615 struct net *net = dev_net(dev);
a6279458 1616 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1617 .fl6 = {
1618 .flowi6_oif = dev->ifindex,
1619 .daddr = *dest,
1620 .saddr = *src,
a6279458 1621 },
a6279458 1622 };
adaa70bb 1623
4e3fd7a0 1624 rdfl.gateway = *gateway;
86c36ce4 1625
adaa70bb
TG
1626 if (rt6_need_strict(dest))
1627 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1628
4c9483b2 1629 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1630 flags, __ip6_route_redirect);
a6279458
YH
1631}
1632
b71d1d42
ED
1633void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1634 const struct in6_addr *saddr,
a6279458
YH
1635 struct neighbour *neigh, u8 *lladdr, int on_link)
1636{
1637 struct rt6_info *rt, *nrt = NULL;
1638 struct netevent_redirect netevent;
c346dca1 1639 struct net *net = dev_net(neigh->dev);
a6279458
YH
1640
1641 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1642
8ed67789 1643 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1644 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1645 goto out;
1da177e4
LT
1646 }
1647
1da177e4
LT
1648 /*
1649 * We have finally decided to accept it.
1650 */
1651
1ab1457c 1652 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1653 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1654 NEIGH_UPDATE_F_OVERRIDE|
1655 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1656 NEIGH_UPDATE_F_ISROUTER))
1657 );
1658
1659 /*
1660 * Redirect received -> path was valid.
1661 * Look, redirects are sent only in response to data packets,
1662 * so that this nexthop apparently is reachable. --ANK
1663 */
d8d1f30b 1664 dst_confirm(&rt->dst);
1da177e4
LT
1665
1666 /* Duplicate redirect: silently ignore. */
27217455 1667 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1668 goto out;
1669
21efcfa0 1670 nrt = ip6_rt_copy(rt, dest);
38308473 1671 if (!nrt)
1da177e4
LT
1672 goto out;
1673
1674 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1675 if (on_link)
1676 nrt->rt6i_flags &= ~RTF_GATEWAY;
1677
4e3fd7a0 1678 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1679 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1680
40e22e8f 1681 if (ip6_ins_rt(nrt))
1da177e4
LT
1682 goto out;
1683
d8d1f30b
CG
1684 netevent.old = &rt->dst;
1685 netevent.new = &nrt->dst;
8d71740c
TT
1686 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1687
38308473 1688 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1689 ip6_del_rt(rt);
1da177e4
LT
1690 return;
1691 }
1692
1693out:
d8d1f30b 1694 dst_release(&rt->dst);
1da177e4
LT
1695}
1696
1697/*
1698 * Handle ICMP "packet too big" messages
1699 * i.e. Path MTU discovery
1700 */
1701
b71d1d42 1702static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1703 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1704{
1705 struct rt6_info *rt, *nrt;
1706 int allfrag = 0;
d3052b55 1707again:
ae878ae2 1708 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1709 if (!rt)
1da177e4
LT
1710 return;
1711
d3052b55
AV
1712 if (rt6_check_expired(rt)) {
1713 ip6_del_rt(rt);
1714 goto again;
1715 }
1716
d8d1f30b 1717 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1718 goto out;
1719
1720 if (pmtu < IPV6_MIN_MTU) {
1721 /*
1ab1457c 1722 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1723 * MTU (1280) and a fragment header should always be included
1724 * after a node receiving Too Big message reporting PMTU is
1725 * less than the IPv6 Minimum Link MTU.
1726 */
1727 pmtu = IPV6_MIN_MTU;
1728 allfrag = 1;
1729 }
1730
1731 /* New mtu received -> path was valid.
1732 They are sent only in response to data packets,
1733 so that this nexthop apparently is reachable. --ANK
1734 */
d8d1f30b 1735 dst_confirm(&rt->dst);
1da177e4
LT
1736
1737 /* Host route. If it is static, it would be better
1738 not to override it, but add new one, so that
1739 when cache entry will expire old pmtu
1740 would return automatically.
1741 */
1742 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1743 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1744 if (allfrag) {
1745 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1746 features |= RTAX_FEATURE_ALLFRAG;
1747 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1748 }
1716a961
G
1749 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1750 rt->rt6i_flags |= RTF_MODIFIED;
1da177e4
LT
1751 goto out;
1752 }
1753
1754 /* Network route.
1755 Two cases are possible:
1756 1. It is connected route. Action: COW
1757 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1758 */
27217455 1759 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1760 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1761 else
1762 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1763
d5315b50 1764 if (nrt) {
defb3519
DM
1765 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1766 if (allfrag) {
1767 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1768 features |= RTAX_FEATURE_ALLFRAG;
1769 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1770 }
a1e78363
YH
1771
1772 /* According to RFC 1981, detecting PMTU increase shouldn't be
1773 * happened within 5 mins, the recommended timer is 10 mins.
1774 * Here this route expiration time is set to ip6_rt_mtu_expires
1775 * which is 10 mins. After 10 mins the decreased pmtu is expired
1776 * and detecting PMTU increase will be automatically happened.
1777 */
1716a961
G
1778 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1779 nrt->rt6i_flags |= RTF_DYNAMIC;
40e22e8f 1780 ip6_ins_rt(nrt);
1da177e4 1781 }
1da177e4 1782out:
d8d1f30b 1783 dst_release(&rt->dst);
1da177e4
LT
1784}
1785
b71d1d42 1786void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1787 struct net_device *dev, u32 pmtu)
1788{
1789 struct net *net = dev_net(dev);
1790
1791 /*
1792 * RFC 1981 states that a node "MUST reduce the size of the packets it
1793 * is sending along the path" that caused the Packet Too Big message.
1794 * Since it's not possible in the general case to determine which
1795 * interface was used to send the original packet, we update the MTU
1796 * on the interface that will be used to send future packets. We also
1797 * update the MTU on the interface that received the Packet Too Big in
1798 * case the original packet was forced out that interface with
1799 * SO_BINDTODEVICE or similar. This is the next best thing to the
1800 * correct behaviour, which would be to update the MTU on all
1801 * interfaces.
1802 */
1803 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1804 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1805}
1806
1da177e4
LT
1807/*
1808 * Misc support functions
1809 */
1810
1716a961 1811static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1812 const struct in6_addr *dest)
1da177e4 1813{
d1918542 1814 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1815 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1816 ort->dst.dev, 0);
1da177e4
LT
1817
1818 if (rt) {
d8d1f30b
CG
1819 rt->dst.input = ort->dst.input;
1820 rt->dst.output = ort->dst.output;
8e2ec639 1821 rt->dst.flags |= DST_HOST;
d8d1f30b 1822
4e3fd7a0 1823 rt->rt6i_dst.addr = *dest;
8e2ec639 1824 rt->rt6i_dst.plen = 128;
defb3519 1825 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1826 rt->dst.error = ort->dst.error;
1da177e4
LT
1827 rt->rt6i_idev = ort->rt6i_idev;
1828 if (rt->rt6i_idev)
1829 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1830 rt->dst.lastuse = jiffies;
1da177e4 1831
4e3fd7a0 1832 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1833 rt->rt6i_flags = ort->rt6i_flags;
1834 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1835 (RTF_DEFAULT | RTF_ADDRCONF))
1836 rt6_set_from(rt, ort);
1837 else
1838 rt6_clean_expires(rt);
1da177e4
LT
1839 rt->rt6i_metric = 0;
1840
1da177e4
LT
1841#ifdef CONFIG_IPV6_SUBTREES
1842 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1843#endif
0f6c6392 1844 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1845 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1846 }
1847 return rt;
1848}
1849
70ceb4f5 1850#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1851static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1852 const struct in6_addr *prefix, int prefixlen,
1853 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1854{
1855 struct fib6_node *fn;
1856 struct rt6_info *rt = NULL;
c71099ac
TG
1857 struct fib6_table *table;
1858
efa2cea0 1859 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1860 if (!table)
c71099ac 1861 return NULL;
70ceb4f5 1862
c71099ac
TG
1863 write_lock_bh(&table->tb6_lock);
1864 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1865 if (!fn)
1866 goto out;
1867
d8d1f30b 1868 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1869 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1870 continue;
1871 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1872 continue;
1873 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1874 continue;
d8d1f30b 1875 dst_hold(&rt->dst);
70ceb4f5
YH
1876 break;
1877 }
1878out:
c71099ac 1879 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1880 return rt;
1881}
1882
efa2cea0 1883static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1884 const struct in6_addr *prefix, int prefixlen,
1885 const struct in6_addr *gwaddr, int ifindex,
95c96174 1886 unsigned int pref)
70ceb4f5 1887{
86872cb5
TG
1888 struct fib6_config cfg = {
1889 .fc_table = RT6_TABLE_INFO,
238fc7ea 1890 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1891 .fc_ifindex = ifindex,
1892 .fc_dst_len = prefixlen,
1893 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1894 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1895 .fc_nlinfo.pid = 0,
1896 .fc_nlinfo.nlh = NULL,
1897 .fc_nlinfo.nl_net = net,
86872cb5
TG
1898 };
1899
4e3fd7a0
AD
1900 cfg.fc_dst = *prefix;
1901 cfg.fc_gateway = *gwaddr;
70ceb4f5 1902
e317da96
YH
1903 /* We should treat it as a default route if prefix length is 0. */
1904 if (!prefixlen)
86872cb5 1905 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1906
86872cb5 1907 ip6_route_add(&cfg);
70ceb4f5 1908
efa2cea0 1909 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1910}
1911#endif
1912
b71d1d42 1913struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1914{
1da177e4 1915 struct rt6_info *rt;
c71099ac 1916 struct fib6_table *table;
1da177e4 1917
c346dca1 1918 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1919 if (!table)
c71099ac 1920 return NULL;
1da177e4 1921
c71099ac 1922 write_lock_bh(&table->tb6_lock);
d8d1f30b 1923 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1924 if (dev == rt->dst.dev &&
045927ff 1925 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1926 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1927 break;
1928 }
1929 if (rt)
d8d1f30b 1930 dst_hold(&rt->dst);
c71099ac 1931 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1932 return rt;
1933}
1934
b71d1d42 1935struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1936 struct net_device *dev,
1937 unsigned int pref)
1da177e4 1938{
86872cb5
TG
1939 struct fib6_config cfg = {
1940 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1941 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1942 .fc_ifindex = dev->ifindex,
1943 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1944 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1945 .fc_nlinfo.pid = 0,
1946 .fc_nlinfo.nlh = NULL,
c346dca1 1947 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1948 };
1da177e4 1949
4e3fd7a0 1950 cfg.fc_gateway = *gwaddr;
1da177e4 1951
86872cb5 1952 ip6_route_add(&cfg);
1da177e4 1953
1da177e4
LT
1954 return rt6_get_dflt_router(gwaddr, dev);
1955}
1956
7b4da532 1957void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1958{
1959 struct rt6_info *rt;
c71099ac
TG
1960 struct fib6_table *table;
1961
1962 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1963 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1964 if (!table)
c71099ac 1965 return;
1da177e4
LT
1966
1967restart:
c71099ac 1968 read_lock_bh(&table->tb6_lock);
d8d1f30b 1969 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1970 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1971 dst_hold(&rt->dst);
c71099ac 1972 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1973 ip6_del_rt(rt);
1da177e4
LT
1974 goto restart;
1975 }
1976 }
c71099ac 1977 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1978}
1979
5578689a
DL
1980static void rtmsg_to_fib6_config(struct net *net,
1981 struct in6_rtmsg *rtmsg,
86872cb5
TG
1982 struct fib6_config *cfg)
1983{
1984 memset(cfg, 0, sizeof(*cfg));
1985
1986 cfg->fc_table = RT6_TABLE_MAIN;
1987 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1988 cfg->fc_metric = rtmsg->rtmsg_metric;
1989 cfg->fc_expires = rtmsg->rtmsg_info;
1990 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1991 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1992 cfg->fc_flags = rtmsg->rtmsg_flags;
1993
5578689a 1994 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1995
4e3fd7a0
AD
1996 cfg->fc_dst = rtmsg->rtmsg_dst;
1997 cfg->fc_src = rtmsg->rtmsg_src;
1998 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1999}
2000
5578689a 2001int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2002{
86872cb5 2003 struct fib6_config cfg;
1da177e4
LT
2004 struct in6_rtmsg rtmsg;
2005 int err;
2006
2007 switch(cmd) {
2008 case SIOCADDRT: /* Add a route */
2009 case SIOCDELRT: /* Delete a route */
2010 if (!capable(CAP_NET_ADMIN))
2011 return -EPERM;
2012 err = copy_from_user(&rtmsg, arg,
2013 sizeof(struct in6_rtmsg));
2014 if (err)
2015 return -EFAULT;
86872cb5 2016
5578689a 2017 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2018
1da177e4
LT
2019 rtnl_lock();
2020 switch (cmd) {
2021 case SIOCADDRT:
86872cb5 2022 err = ip6_route_add(&cfg);
1da177e4
LT
2023 break;
2024 case SIOCDELRT:
86872cb5 2025 err = ip6_route_del(&cfg);
1da177e4
LT
2026 break;
2027 default:
2028 err = -EINVAL;
2029 }
2030 rtnl_unlock();
2031
2032 return err;
3ff50b79 2033 }
1da177e4
LT
2034
2035 return -EINVAL;
2036}
2037
2038/*
2039 * Drop the packet on the floor
2040 */
2041
d5fdd6ba 2042static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2043{
612f09e8 2044 int type;
adf30907 2045 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2046 switch (ipstats_mib_noroutes) {
2047 case IPSTATS_MIB_INNOROUTES:
0660e03f 2048 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2049 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2050 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2051 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2052 break;
2053 }
2054 /* FALLTHROUGH */
2055 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2056 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2057 ipstats_mib_noroutes);
612f09e8
YH
2058 break;
2059 }
3ffe533c 2060 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2061 kfree_skb(skb);
2062 return 0;
2063}
2064
9ce8ade0
TG
2065static int ip6_pkt_discard(struct sk_buff *skb)
2066{
612f09e8 2067 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2068}
2069
20380731 2070static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2071{
adf30907 2072 skb->dev = skb_dst(skb)->dev;
612f09e8 2073 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2074}
2075
6723ab54
DM
2076#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2077
9ce8ade0
TG
2078static int ip6_pkt_prohibit(struct sk_buff *skb)
2079{
612f09e8 2080 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2081}
2082
2083static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2084{
adf30907 2085 skb->dev = skb_dst(skb)->dev;
612f09e8 2086 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2087}
2088
6723ab54
DM
2089#endif
2090
1da177e4
LT
2091/*
2092 * Allocate a dst for local (unicast / anycast) address.
2093 */
2094
2095struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2096 const struct in6_addr *addr,
8f031519 2097 bool anycast)
1da177e4 2098{
c346dca1 2099 struct net *net = dev_net(idev->dev);
5c1e6aa3 2100 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2101 net->loopback_dev, 0);
f83c7790 2102 int err;
1da177e4 2103
38308473 2104 if (!rt) {
e87cc472 2105 net_warn_ratelimited("IPv6: Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2106 return ERR_PTR(-ENOMEM);
40385653 2107 }
1da177e4 2108
1da177e4
LT
2109 in6_dev_hold(idev);
2110
11d53b49 2111 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2112 rt->dst.input = ip6_input;
2113 rt->dst.output = ip6_output;
1da177e4 2114 rt->rt6i_idev = idev;
d8d1f30b 2115 rt->dst.obsolete = -1;
1da177e4
LT
2116
2117 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2118 if (anycast)
2119 rt->rt6i_flags |= RTF_ANYCAST;
2120 else
1da177e4 2121 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2122 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2123 if (err) {
d8d1f30b 2124 dst_free(&rt->dst);
f83c7790 2125 return ERR_PTR(err);
1da177e4
LT
2126 }
2127
4e3fd7a0 2128 rt->rt6i_dst.addr = *addr;
1da177e4 2129 rt->rt6i_dst.plen = 128;
5578689a 2130 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2131
d8d1f30b 2132 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2133
2134 return rt;
2135}
2136
c3968a85
DW
2137int ip6_route_get_saddr(struct net *net,
2138 struct rt6_info *rt,
b71d1d42 2139 const struct in6_addr *daddr,
c3968a85
DW
2140 unsigned int prefs,
2141 struct in6_addr *saddr)
2142{
2143 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2144 int err = 0;
2145 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2146 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2147 else
2148 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2149 daddr, prefs, saddr);
2150 return err;
2151}
2152
2153/* remove deleted ip from prefsrc entries */
2154struct arg_dev_net_ip {
2155 struct net_device *dev;
2156 struct net *net;
2157 struct in6_addr *addr;
2158};
2159
2160static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2161{
2162 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2163 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2164 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2165
d1918542 2166 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2167 rt != net->ipv6.ip6_null_entry &&
2168 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2169 /* remove prefsrc entry */
2170 rt->rt6i_prefsrc.plen = 0;
2171 }
2172 return 0;
2173}
2174
2175void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2176{
2177 struct net *net = dev_net(ifp->idev->dev);
2178 struct arg_dev_net_ip adni = {
2179 .dev = ifp->idev->dev,
2180 .net = net,
2181 .addr = &ifp->addr,
2182 };
2183 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2184}
2185
8ed67789
DL
2186struct arg_dev_net {
2187 struct net_device *dev;
2188 struct net *net;
2189};
2190
1da177e4
LT
2191static int fib6_ifdown(struct rt6_info *rt, void *arg)
2192{
bc3ef660 2193 const struct arg_dev_net *adn = arg;
2194 const struct net_device *dev = adn->dev;
8ed67789 2195
d1918542 2196 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2197 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2198 return -1;
c159d30c 2199
1da177e4
LT
2200 return 0;
2201}
2202
f3db4851 2203void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2204{
8ed67789
DL
2205 struct arg_dev_net adn = {
2206 .dev = dev,
2207 .net = net,
2208 };
2209
2210 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2211 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2212}
2213
95c96174 2214struct rt6_mtu_change_arg {
1da177e4 2215 struct net_device *dev;
95c96174 2216 unsigned int mtu;
1da177e4
LT
2217};
2218
2219static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2220{
2221 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2222 struct inet6_dev *idev;
2223
2224 /* In IPv6 pmtu discovery is not optional,
2225 so that RTAX_MTU lock cannot disable it.
2226 We still use this lock to block changes
2227 caused by addrconf/ndisc.
2228 */
2229
2230 idev = __in6_dev_get(arg->dev);
38308473 2231 if (!idev)
1da177e4
LT
2232 return 0;
2233
2234 /* For administrative MTU increase, there is no way to discover
2235 IPv6 PMTU increase, so PMTU increase should be updated here.
2236 Since RFC 1981 doesn't include administrative MTU increase
2237 update PMTU increase is a MUST. (i.e. jumbo frame)
2238 */
2239 /*
2240 If new MTU is less than route PMTU, this new MTU will be the
2241 lowest MTU in the path, update the route PMTU to reflect PMTU
2242 decreases; if new MTU is greater than route PMTU, and the
2243 old MTU is the lowest MTU in the path, update the route PMTU
2244 to reflect the increase. In this case if the other nodes' MTU
2245 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2246 PMTU discouvery.
2247 */
d1918542 2248 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2249 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2250 (dst_mtu(&rt->dst) >= arg->mtu ||
2251 (dst_mtu(&rt->dst) < arg->mtu &&
2252 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2253 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2254 }
1da177e4
LT
2255 return 0;
2256}
2257
95c96174 2258void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2259{
c71099ac
TG
2260 struct rt6_mtu_change_arg arg = {
2261 .dev = dev,
2262 .mtu = mtu,
2263 };
1da177e4 2264
c346dca1 2265 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2266}
2267
ef7c79ed 2268static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2269 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2270 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2271 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2272 [RTA_PRIORITY] = { .type = NLA_U32 },
2273 [RTA_METRICS] = { .type = NLA_NESTED },
2274};
2275
2276static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2277 struct fib6_config *cfg)
1da177e4 2278{
86872cb5
TG
2279 struct rtmsg *rtm;
2280 struct nlattr *tb[RTA_MAX+1];
2281 int err;
1da177e4 2282
86872cb5
TG
2283 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2284 if (err < 0)
2285 goto errout;
1da177e4 2286
86872cb5
TG
2287 err = -EINVAL;
2288 rtm = nlmsg_data(nlh);
2289 memset(cfg, 0, sizeof(*cfg));
2290
2291 cfg->fc_table = rtm->rtm_table;
2292 cfg->fc_dst_len = rtm->rtm_dst_len;
2293 cfg->fc_src_len = rtm->rtm_src_len;
2294 cfg->fc_flags = RTF_UP;
2295 cfg->fc_protocol = rtm->rtm_protocol;
2296
2297 if (rtm->rtm_type == RTN_UNREACHABLE)
2298 cfg->fc_flags |= RTF_REJECT;
2299
ab79ad14
2300 if (rtm->rtm_type == RTN_LOCAL)
2301 cfg->fc_flags |= RTF_LOCAL;
2302
86872cb5
TG
2303 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2304 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2305 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2306
2307 if (tb[RTA_GATEWAY]) {
2308 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2309 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2310 }
86872cb5
TG
2311
2312 if (tb[RTA_DST]) {
2313 int plen = (rtm->rtm_dst_len + 7) >> 3;
2314
2315 if (nla_len(tb[RTA_DST]) < plen)
2316 goto errout;
2317
2318 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2319 }
86872cb5
TG
2320
2321 if (tb[RTA_SRC]) {
2322 int plen = (rtm->rtm_src_len + 7) >> 3;
2323
2324 if (nla_len(tb[RTA_SRC]) < plen)
2325 goto errout;
2326
2327 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2328 }
86872cb5 2329
c3968a85
DW
2330 if (tb[RTA_PREFSRC])
2331 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2332
86872cb5
TG
2333 if (tb[RTA_OIF])
2334 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2335
2336 if (tb[RTA_PRIORITY])
2337 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2338
2339 if (tb[RTA_METRICS]) {
2340 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2341 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2342 }
86872cb5
TG
2343
2344 if (tb[RTA_TABLE])
2345 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2346
2347 err = 0;
2348errout:
2349 return err;
1da177e4
LT
2350}
2351
c127ea2c 2352static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2353{
86872cb5
TG
2354 struct fib6_config cfg;
2355 int err;
1da177e4 2356
86872cb5
TG
2357 err = rtm_to_fib6_config(skb, nlh, &cfg);
2358 if (err < 0)
2359 return err;
2360
2361 return ip6_route_del(&cfg);
1da177e4
LT
2362}
2363
c127ea2c 2364static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2365{
86872cb5
TG
2366 struct fib6_config cfg;
2367 int err;
1da177e4 2368
86872cb5
TG
2369 err = rtm_to_fib6_config(skb, nlh, &cfg);
2370 if (err < 0)
2371 return err;
2372
2373 return ip6_route_add(&cfg);
1da177e4
LT
2374}
2375
339bf98f
TG
2376static inline size_t rt6_nlmsg_size(void)
2377{
2378 return NLMSG_ALIGN(sizeof(struct rtmsg))
2379 + nla_total_size(16) /* RTA_SRC */
2380 + nla_total_size(16) /* RTA_DST */
2381 + nla_total_size(16) /* RTA_GATEWAY */
2382 + nla_total_size(16) /* RTA_PREFSRC */
2383 + nla_total_size(4) /* RTA_TABLE */
2384 + nla_total_size(4) /* RTA_IIF */
2385 + nla_total_size(4) /* RTA_OIF */
2386 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2387 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2388 + nla_total_size(sizeof(struct rta_cacheinfo));
2389}
2390
191cd582
BH
2391static int rt6_fill_node(struct net *net,
2392 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2393 struct in6_addr *dst, struct in6_addr *src,
2394 int iif, int type, u32 pid, u32 seq,
7bc570c8 2395 int prefix, int nowait, unsigned int flags)
1da177e4 2396{
346f870b 2397 const struct inet_peer *peer;
1da177e4 2398 struct rtmsg *rtm;
2d7202bf 2399 struct nlmsghdr *nlh;
e3703b3d 2400 long expires;
9e762a4a 2401 u32 table;
f2c31e32 2402 struct neighbour *n;
346f870b 2403 u32 ts, tsage;
1da177e4
LT
2404
2405 if (prefix) { /* user wants prefix routes only */
2406 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2407 /* success since this is not a prefix route */
2408 return 1;
2409 }
2410 }
2411
2d7202bf 2412 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2413 if (!nlh)
26932566 2414 return -EMSGSIZE;
2d7202bf
TG
2415
2416 rtm = nlmsg_data(nlh);
1da177e4
LT
2417 rtm->rtm_family = AF_INET6;
2418 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2419 rtm->rtm_src_len = rt->rt6i_src.plen;
2420 rtm->rtm_tos = 0;
c71099ac 2421 if (rt->rt6i_table)
9e762a4a 2422 table = rt->rt6i_table->tb6_id;
c71099ac 2423 else
9e762a4a
PM
2424 table = RT6_TABLE_UNSPEC;
2425 rtm->rtm_table = table;
c78679e8
DM
2426 if (nla_put_u32(skb, RTA_TABLE, table))
2427 goto nla_put_failure;
38308473 2428 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2429 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2430 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2431 rtm->rtm_type = RTN_LOCAL;
d1918542 2432 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2433 rtm->rtm_type = RTN_LOCAL;
2434 else
2435 rtm->rtm_type = RTN_UNICAST;
2436 rtm->rtm_flags = 0;
2437 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2438 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2439 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2440 rtm->rtm_protocol = RTPROT_REDIRECT;
2441 else if (rt->rt6i_flags & RTF_ADDRCONF)
2442 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2443 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2444 rtm->rtm_protocol = RTPROT_RA;
2445
38308473 2446 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2447 rtm->rtm_flags |= RTM_F_CLONED;
2448
2449 if (dst) {
c78679e8
DM
2450 if (nla_put(skb, RTA_DST, 16, dst))
2451 goto nla_put_failure;
1ab1457c 2452 rtm->rtm_dst_len = 128;
1da177e4 2453 } else if (rtm->rtm_dst_len)
c78679e8
DM
2454 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2455 goto nla_put_failure;
1da177e4
LT
2456#ifdef CONFIG_IPV6_SUBTREES
2457 if (src) {
c78679e8
DM
2458 if (nla_put(skb, RTA_SRC, 16, src))
2459 goto nla_put_failure;
1ab1457c 2460 rtm->rtm_src_len = 128;
c78679e8
DM
2461 } else if (rtm->rtm_src_len &&
2462 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2463 goto nla_put_failure;
1da177e4 2464#endif
7bc570c8
YH
2465 if (iif) {
2466#ifdef CONFIG_IPV6_MROUTE
2467 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2468 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2469 if (err <= 0) {
2470 if (!nowait) {
2471 if (err == 0)
2472 return 0;
2473 goto nla_put_failure;
2474 } else {
2475 if (err == -EMSGSIZE)
2476 goto nla_put_failure;
2477 }
2478 }
2479 } else
2480#endif
c78679e8
DM
2481 if (nla_put_u32(skb, RTA_IIF, iif))
2482 goto nla_put_failure;
7bc570c8 2483 } else if (dst) {
1da177e4 2484 struct in6_addr saddr_buf;
c78679e8
DM
2485 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2486 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2487 goto nla_put_failure;
1da177e4 2488 }
2d7202bf 2489
c3968a85
DW
2490 if (rt->rt6i_prefsrc.plen) {
2491 struct in6_addr saddr_buf;
4e3fd7a0 2492 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2493 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2494 goto nla_put_failure;
c3968a85
DW
2495 }
2496
defb3519 2497 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2498 goto nla_put_failure;
2499
f2c31e32 2500 rcu_read_lock();
27217455 2501 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2502 if (n) {
2503 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2504 rcu_read_unlock();
2505 goto nla_put_failure;
2506 }
2507 }
f2c31e32 2508 rcu_read_unlock();
2d7202bf 2509
c78679e8
DM
2510 if (rt->dst.dev &&
2511 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2512 goto nla_put_failure;
2513 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2514 goto nla_put_failure;
36e3deae
YH
2515 if (!(rt->rt6i_flags & RTF_EXPIRES))
2516 expires = 0;
d1918542
DM
2517 else if (rt->dst.expires - jiffies < INT_MAX)
2518 expires = rt->dst.expires - jiffies;
36e3deae
YH
2519 else
2520 expires = INT_MAX;
69cdf8f9 2521
346f870b
DM
2522 peer = rt->rt6i_peer;
2523 ts = tsage = 0;
2524 if (peer && peer->tcp_ts_stamp) {
2525 ts = peer->tcp_ts;
2526 tsage = get_seconds() - peer->tcp_ts_stamp;
2527 }
2528
2529 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2530 expires, rt->dst.error) < 0)
e3703b3d 2531 goto nla_put_failure;
2d7202bf
TG
2532
2533 return nlmsg_end(skb, nlh);
2534
2535nla_put_failure:
26932566
PM
2536 nlmsg_cancel(skb, nlh);
2537 return -EMSGSIZE;
1da177e4
LT
2538}
2539
1b43af54 2540int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2541{
2542 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2543 int prefix;
2544
2d7202bf
TG
2545 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2546 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2547 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2548 } else
2549 prefix = 0;
2550
191cd582
BH
2551 return rt6_fill_node(arg->net,
2552 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2553 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2554 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2555}
2556
c127ea2c 2557static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2558{
3b1e0a65 2559 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2560 struct nlattr *tb[RTA_MAX+1];
2561 struct rt6_info *rt;
1da177e4 2562 struct sk_buff *skb;
ab364a6f 2563 struct rtmsg *rtm;
4c9483b2 2564 struct flowi6 fl6;
72331bc0 2565 int err, iif = 0, oif = 0;
1da177e4 2566
ab364a6f
TG
2567 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2568 if (err < 0)
2569 goto errout;
1da177e4 2570
ab364a6f 2571 err = -EINVAL;
4c9483b2 2572 memset(&fl6, 0, sizeof(fl6));
1da177e4 2573
ab364a6f
TG
2574 if (tb[RTA_SRC]) {
2575 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2576 goto errout;
2577
4e3fd7a0 2578 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2579 }
2580
2581 if (tb[RTA_DST]) {
2582 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2583 goto errout;
2584
4e3fd7a0 2585 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2586 }
2587
2588 if (tb[RTA_IIF])
2589 iif = nla_get_u32(tb[RTA_IIF]);
2590
2591 if (tb[RTA_OIF])
72331bc0 2592 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2593
2594 if (iif) {
2595 struct net_device *dev;
72331bc0
SL
2596 int flags = 0;
2597
5578689a 2598 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2599 if (!dev) {
2600 err = -ENODEV;
ab364a6f 2601 goto errout;
1da177e4 2602 }
72331bc0
SL
2603
2604 fl6.flowi6_iif = iif;
2605
2606 if (!ipv6_addr_any(&fl6.saddr))
2607 flags |= RT6_LOOKUP_F_HAS_SADDR;
2608
2609 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2610 flags);
2611 } else {
2612 fl6.flowi6_oif = oif;
2613
2614 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2615 }
2616
ab364a6f 2617 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2618 if (!skb) {
2173bff5 2619 dst_release(&rt->dst);
ab364a6f
TG
2620 err = -ENOBUFS;
2621 goto errout;
2622 }
1da177e4 2623
ab364a6f
TG
2624 /* Reserve room for dummy headers, this skb can pass
2625 through good chunk of routing engine.
2626 */
459a98ed 2627 skb_reset_mac_header(skb);
ab364a6f 2628 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2629
d8d1f30b 2630 skb_dst_set(skb, &rt->dst);
1da177e4 2631
4c9483b2 2632 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2633 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2634 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2635 if (err < 0) {
ab364a6f
TG
2636 kfree_skb(skb);
2637 goto errout;
1da177e4
LT
2638 }
2639
5578689a 2640 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2641errout:
1da177e4 2642 return err;
1da177e4
LT
2643}
2644
86872cb5 2645void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2646{
2647 struct sk_buff *skb;
5578689a 2648 struct net *net = info->nl_net;
528c4ceb
DL
2649 u32 seq;
2650 int err;
2651
2652 err = -ENOBUFS;
38308473 2653 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2654
339bf98f 2655 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2656 if (!skb)
21713ebc
TG
2657 goto errout;
2658
191cd582 2659 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2660 event, info->pid, seq, 0, 0, 0);
26932566
PM
2661 if (err < 0) {
2662 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2663 WARN_ON(err == -EMSGSIZE);
2664 kfree_skb(skb);
2665 goto errout;
2666 }
1ce85fe4
PNA
2667 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2668 info->nlh, gfp_any());
2669 return;
21713ebc
TG
2670errout:
2671 if (err < 0)
5578689a 2672 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2673}
2674
8ed67789
DL
2675static int ip6_route_dev_notify(struct notifier_block *this,
2676 unsigned long event, void *data)
2677{
2678 struct net_device *dev = (struct net_device *)data;
c346dca1 2679 struct net *net = dev_net(dev);
8ed67789
DL
2680
2681 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2682 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2683 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2684#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2685 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2686 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2687 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2688 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2689#endif
2690 }
2691
2692 return NOTIFY_OK;
2693}
2694
1da177e4
LT
2695/*
2696 * /proc
2697 */
2698
2699#ifdef CONFIG_PROC_FS
2700
1da177e4
LT
2701struct rt6_proc_arg
2702{
2703 char *buffer;
2704 int offset;
2705 int length;
2706 int skip;
2707 int len;
2708};
2709
2710static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2711{
33120b30 2712 struct seq_file *m = p_arg;
69cce1d1 2713 struct neighbour *n;
1da177e4 2714
4b7a4274 2715 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2716
2717#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2718 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2719#else
33120b30 2720 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2721#endif
f2c31e32 2722 rcu_read_lock();
27217455 2723 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2724 if (n) {
2725 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2726 } else {
33120b30 2727 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2728 }
f2c31e32 2729 rcu_read_unlock();
33120b30 2730 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2731 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2732 rt->dst.__use, rt->rt6i_flags,
d1918542 2733 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2734 return 0;
2735}
2736
33120b30 2737static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2738{
f3db4851 2739 struct net *net = (struct net *)m->private;
32b293a5 2740 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2741 return 0;
2742}
1da177e4 2743
33120b30
AD
2744static int ipv6_route_open(struct inode *inode, struct file *file)
2745{
de05c557 2746 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2747}
2748
33120b30
AD
2749static const struct file_operations ipv6_route_proc_fops = {
2750 .owner = THIS_MODULE,
2751 .open = ipv6_route_open,
2752 .read = seq_read,
2753 .llseek = seq_lseek,
b6fcbdb4 2754 .release = single_release_net,
33120b30
AD
2755};
2756
1da177e4
LT
2757static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2758{
69ddb805 2759 struct net *net = (struct net *)seq->private;
1da177e4 2760 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2761 net->ipv6.rt6_stats->fib_nodes,
2762 net->ipv6.rt6_stats->fib_route_nodes,
2763 net->ipv6.rt6_stats->fib_rt_alloc,
2764 net->ipv6.rt6_stats->fib_rt_entries,
2765 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2766 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2767 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2768
2769 return 0;
2770}
2771
2772static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2773{
de05c557 2774 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2775}
2776
9a32144e 2777static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2778 .owner = THIS_MODULE,
2779 .open = rt6_stats_seq_open,
2780 .read = seq_read,
2781 .llseek = seq_lseek,
b6fcbdb4 2782 .release = single_release_net,
1da177e4
LT
2783};
2784#endif /* CONFIG_PROC_FS */
2785
2786#ifdef CONFIG_SYSCTL
2787
1da177e4 2788static
8d65af78 2789int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2790 void __user *buffer, size_t *lenp, loff_t *ppos)
2791{
c486da34
LAG
2792 struct net *net;
2793 int delay;
2794 if (!write)
1da177e4 2795 return -EINVAL;
c486da34
LAG
2796
2797 net = (struct net *)ctl->extra1;
2798 delay = net->ipv6.sysctl.flush_delay;
2799 proc_dointvec(ctl, write, buffer, lenp, ppos);
2800 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2801 return 0;
1da177e4
LT
2802}
2803
760f2d01 2804ctl_table ipv6_route_table_template[] = {
1ab1457c 2805 {
1da177e4 2806 .procname = "flush",
4990509f 2807 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2808 .maxlen = sizeof(int),
89c8b3a1 2809 .mode = 0200,
6d9f239a 2810 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2811 },
2812 {
1da177e4 2813 .procname = "gc_thresh",
9a7ec3a9 2814 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
6d9f239a 2817 .proc_handler = proc_dointvec,
1da177e4
LT
2818 },
2819 {
1da177e4 2820 .procname = "max_size",
4990509f 2821 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
6d9f239a 2824 .proc_handler = proc_dointvec,
1da177e4
LT
2825 },
2826 {
1da177e4 2827 .procname = "gc_min_interval",
4990509f 2828 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2829 .maxlen = sizeof(int),
2830 .mode = 0644,
6d9f239a 2831 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2832 },
2833 {
1da177e4 2834 .procname = "gc_timeout",
4990509f 2835 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2836 .maxlen = sizeof(int),
2837 .mode = 0644,
6d9f239a 2838 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2839 },
2840 {
1da177e4 2841 .procname = "gc_interval",
4990509f 2842 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2843 .maxlen = sizeof(int),
2844 .mode = 0644,
6d9f239a 2845 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2846 },
2847 {
1da177e4 2848 .procname = "gc_elasticity",
4990509f 2849 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2850 .maxlen = sizeof(int),
2851 .mode = 0644,
f3d3f616 2852 .proc_handler = proc_dointvec,
1da177e4
LT
2853 },
2854 {
1da177e4 2855 .procname = "mtu_expires",
4990509f 2856 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2857 .maxlen = sizeof(int),
2858 .mode = 0644,
6d9f239a 2859 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2860 },
2861 {
1da177e4 2862 .procname = "min_adv_mss",
4990509f 2863 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
f3d3f616 2866 .proc_handler = proc_dointvec,
1da177e4
LT
2867 },
2868 {
1da177e4 2869 .procname = "gc_min_interval_ms",
4990509f 2870 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2871 .maxlen = sizeof(int),
2872 .mode = 0644,
6d9f239a 2873 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2874 },
f8572d8f 2875 { }
1da177e4
LT
2876};
2877
2c8c1e72 2878struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2879{
2880 struct ctl_table *table;
2881
2882 table = kmemdup(ipv6_route_table_template,
2883 sizeof(ipv6_route_table_template),
2884 GFP_KERNEL);
5ee09105
YH
2885
2886 if (table) {
2887 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2888 table[0].extra1 = net;
86393e52 2889 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2890 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2891 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2892 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2893 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2894 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2895 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2896 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2897 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2898 }
2899
760f2d01
DL
2900 return table;
2901}
1da177e4
LT
2902#endif
2903
2c8c1e72 2904static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2905{
633d424b 2906 int ret = -ENOMEM;
8ed67789 2907
86393e52
AD
2908 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2909 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2910
fc66f95c
ED
2911 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2912 goto out_ip6_dst_ops;
2913
8ed67789
DL
2914 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2915 sizeof(*net->ipv6.ip6_null_entry),
2916 GFP_KERNEL);
2917 if (!net->ipv6.ip6_null_entry)
fc66f95c 2918 goto out_ip6_dst_entries;
d8d1f30b 2919 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2920 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2921 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2922 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2923 ip6_template_metrics, true);
8ed67789
DL
2924
2925#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2927 sizeof(*net->ipv6.ip6_prohibit_entry),
2928 GFP_KERNEL);
68fffc67
PZ
2929 if (!net->ipv6.ip6_prohibit_entry)
2930 goto out_ip6_null_entry;
d8d1f30b 2931 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2932 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2933 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2934 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2935 ip6_template_metrics, true);
8ed67789
DL
2936
2937 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2938 sizeof(*net->ipv6.ip6_blk_hole_entry),
2939 GFP_KERNEL);
68fffc67
PZ
2940 if (!net->ipv6.ip6_blk_hole_entry)
2941 goto out_ip6_prohibit_entry;
d8d1f30b 2942 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2943 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2944 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2945 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2946 ip6_template_metrics, true);
8ed67789
DL
2947#endif
2948
b339a47c
PZ
2949 net->ipv6.sysctl.flush_delay = 0;
2950 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2951 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2952 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2953 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2954 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2955 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2956 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2957
cdb18761
DL
2958#ifdef CONFIG_PROC_FS
2959 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2960 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2961#endif
6891a346
BT
2962 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2963
8ed67789
DL
2964 ret = 0;
2965out:
2966 return ret;
f2fc6a54 2967
68fffc67
PZ
2968#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2969out_ip6_prohibit_entry:
2970 kfree(net->ipv6.ip6_prohibit_entry);
2971out_ip6_null_entry:
2972 kfree(net->ipv6.ip6_null_entry);
2973#endif
fc66f95c
ED
2974out_ip6_dst_entries:
2975 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2976out_ip6_dst_ops:
f2fc6a54 2977 goto out;
cdb18761
DL
2978}
2979
2c8c1e72 2980static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2981{
2982#ifdef CONFIG_PROC_FS
2983 proc_net_remove(net, "ipv6_route");
2984 proc_net_remove(net, "rt6_stats");
2985#endif
8ed67789
DL
2986 kfree(net->ipv6.ip6_null_entry);
2987#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988 kfree(net->ipv6.ip6_prohibit_entry);
2989 kfree(net->ipv6.ip6_blk_hole_entry);
2990#endif
41bb78b4 2991 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2992}
2993
2994static struct pernet_operations ip6_route_net_ops = {
2995 .init = ip6_route_net_init,
2996 .exit = ip6_route_net_exit,
2997};
2998
8ed67789
DL
2999static struct notifier_block ip6_route_dev_notifier = {
3000 .notifier_call = ip6_route_dev_notify,
3001 .priority = 0,
3002};
3003
433d49c3 3004int __init ip6_route_init(void)
1da177e4 3005{
433d49c3
DL
3006 int ret;
3007
9a7ec3a9
DL
3008 ret = -ENOMEM;
3009 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3010 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3011 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3012 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3013 goto out;
14e50e57 3014
fc66f95c 3015 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3016 if (ret)
bdb3289f 3017 goto out_kmem_cache;
bdb3289f 3018
fc66f95c
ED
3019 ret = register_pernet_subsys(&ip6_route_net_ops);
3020 if (ret)
3021 goto out_dst_entries;
3022
5dc121e9
AE
3023 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3024
8ed67789
DL
3025 /* Registering of the loopback is done before this portion of code,
3026 * the loopback reference in rt6_info will not be taken, do it
3027 * manually for init_net */
d8d1f30b 3028 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3029 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3030 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3031 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3032 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3033 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3034 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3035 #endif
433d49c3
DL
3036 ret = fib6_init();
3037 if (ret)
8ed67789 3038 goto out_register_subsys;
433d49c3 3039
433d49c3
DL
3040 ret = xfrm6_init();
3041 if (ret)
cdb18761 3042 goto out_fib6_init;
c35b7e72 3043
433d49c3
DL
3044 ret = fib6_rules_init();
3045 if (ret)
3046 goto xfrm6_init;
7e5449c2 3047
433d49c3 3048 ret = -ENOBUFS;
c7ac8679
GR
3049 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3050 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3051 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3052 goto fib6_rules_init;
c127ea2c 3053
8ed67789 3054 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3055 if (ret)
3056 goto fib6_rules_init;
8ed67789 3057
433d49c3
DL
3058out:
3059 return ret;
3060
3061fib6_rules_init:
433d49c3
DL
3062 fib6_rules_cleanup();
3063xfrm6_init:
433d49c3 3064 xfrm6_fini();
433d49c3 3065out_fib6_init:
433d49c3 3066 fib6_gc_cleanup();
8ed67789
DL
3067out_register_subsys:
3068 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3069out_dst_entries:
3070 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3071out_kmem_cache:
f2fc6a54 3072 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3073 goto out;
1da177e4
LT
3074}
3075
3076void ip6_route_cleanup(void)
3077{
8ed67789 3078 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3079 fib6_rules_cleanup();
1da177e4 3080 xfrm6_fini();
1da177e4 3081 fib6_gc_cleanup();
8ed67789 3082 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3083 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3084 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3085}