net: lpc_eth: Fix rename of dev_hw_addr_random
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
a7563f34 128 if (!ipv6_addr_any(p))
39232973
DM
129 return (const void *) p;
130 return daddr;
131}
132
d3aaeb38
DM
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
8ade06c6 145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 146{
8ade06c6
DM
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
f83c7790
DM
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
d3aaeb38
DM
156}
157
9a7ec3a9 158static struct dst_ops ip6_dst_ops_template = {
1da177e4 159 .family = AF_INET6,
09640e63 160 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
0dbaee3b 164 .default_advmss = ip6_default_advmss,
ebb762f2 165 .mtu = ip6_mtu,
06582540 166 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 172 .local_out = __ip6_local_out,
d3aaeb38 173 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
174};
175
ebb762f2 176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 177{
618f9bc7
SK
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
ec831ea7
RD
181}
182
14e50e57
DM
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
0972ddb2
HB
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
14e50e57
DM
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
09640e63 195 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
ebb762f2 198 .mtu = ip6_blackhole_mtu,
214f45c9 199 .default_advmss = ip6_default_advmss,
14e50e57 200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 202 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
203};
204
62fa8a84
DM
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
bdb3289f 209static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
d8d1f30b
CG
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
1da177e4
LT
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
101367c2
TG
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
6723ab54
DM
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 228
280a34c8 229static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
d8d1f30b
CG
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
101367c2
TG
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 239 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
bdb3289f 244static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
d8d1f30b
CG
250 .input = dst_discard,
251 .output = dst_discard,
101367c2
TG
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 254 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
1da177e4 261/* allocate dst with ip6_dst_ops */
5c1e6aa3 262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
263 struct net_device *dev,
264 int flags)
1da177e4 265{
957c665f 266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 267
38308473 268 if (rt)
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
271
272 return rt;
1da177e4
LT
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 279 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
b3419363 288 if (peer) {
b3419363
DM
289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292}
293
6431cbc2
DM
294static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296static u32 rt6_peer_genid(void)
297{
298 return atomic_read(&__rt6_peer_genid);
299}
300
b3419363
DM
301void rt6_bind_peer(struct rt6_info *rt, int create)
302{
303 struct inet_peer *peer;
304
b3419363
DM
305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
6431cbc2
DM
308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
310}
311
312static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314{
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 317 struct net_device *loopback_dev =
c346dca1 318 dev_net(dev)->loopback_dev;
1da177e4 319
38308473 320 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
38308473 323 if (loopback_idev) {
1da177e4
LT
324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328}
329
330static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331{
a02cec21 332 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 333 time_after(jiffies, rt->dst.expires);
1da177e4
LT
334}
335
b71d1d42 336static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 337{
a02cec21
ED
338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
340}
341
1da177e4 342/*
c71099ac 343 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
344 */
345
8ed67789
DL
346static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
b71d1d42 348 const struct in6_addr *saddr,
1da177e4 349 int oif,
d420895e 350 int flags)
1da177e4
LT
351{
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
dd3abc4e
YH
355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
d8d1f30b 358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 359 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
360
361 if (oif) {
1da177e4
LT
362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
38308473 365 if (!sprt->rt6i_idev ||
1da177e4 366 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 367 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 368 continue;
1ab1457c 369 if (local && (!oif ||
1da177e4
LT
370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
dd3abc4e
YH
375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
1da177e4 379 }
dd3abc4e 380 }
1da177e4 381
dd3abc4e 382 if (oif) {
1da177e4
LT
383 if (local)
384 return local;
385
d420895e 386 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 387 return net->ipv6.ip6_null_entry;
1da177e4 388 }
dd3abc4e 389out:
1da177e4
LT
390 return rt;
391}
392
27097255
YH
393#ifdef CONFIG_IPV6_ROUTER_PREF
394static void rt6_probe(struct rt6_info *rt)
395{
f2c31e32 396 struct neighbour *neigh;
27097255
YH
397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
f2c31e32 405 rcu_read_lock();
27217455 406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 407 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 408 goto out;
27097255
YH
409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 421 } else {
27097255 422 read_unlock_bh(&neigh->lock);
f2c31e32
ED
423 }
424out:
425 rcu_read_unlock();
27097255
YH
426}
427#else
428static inline void rt6_probe(struct rt6_info *rt)
429{
27097255
YH
430}
431#endif
432
1da177e4 433/*
554cfb7e 434 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 435 */
b6f99a21 436static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 437{
d1918542 438 struct net_device *dev = rt->dst.dev;
161980f4 439 if (!oif || dev->ifindex == oif)
554cfb7e 440 return 2;
161980f4
DM
441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
554cfb7e 445}
1da177e4 446
b6f99a21 447static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 448{
f2c31e32 449 struct neighbour *neigh;
398bcbeb 450 int m;
f2c31e32
ED
451
452 rcu_read_lock();
27217455 453 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
554cfb7e
YH
458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
4d0c5911 460 m = 2;
398bcbeb
YH
461#ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464#endif
465 else
ea73ee23 466 m = 1;
554cfb7e 467 read_unlock_bh(&neigh->lock);
398bcbeb
YH
468 } else
469 m = 0;
f2c31e32 470 rcu_read_unlock();
554cfb7e 471 return m;
1da177e4
LT
472}
473
554cfb7e
YH
474static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
1da177e4 476{
4d0c5911 477 int m, n;
1ab1457c 478
4d0c5911 479 m = rt6_check_dev(rt, oif);
77d16f45 480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 481 return -1;
ebacaaa0
YH
482#ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484#endif
4d0c5911 485 n = rt6_check_neigh(rt);
557e92ef 486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
487 return -1;
488 return m;
489}
490
f11e6659
DM
491static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
554cfb7e 493{
f11e6659
DM
494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512out:
513 return match;
514}
515
516static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519{
520 struct rt6_info *rt, *match;
554cfb7e 521 int mpri = -1;
1da177e4 522
f11e6659
DM
523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 525 rt = rt->dst.rt6_next)
f11e6659
DM
526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 528 rt = rt->dst.rt6_next)
f11e6659 529 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 530
f11e6659
DM
531 return match;
532}
1da177e4 533
f11e6659
DM
534static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535{
536 struct rt6_info *match, *rt0;
8ed67789 537 struct net *net;
1da177e4 538
f11e6659
DM
539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 542
f11e6659 543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 544
554cfb7e 545 if (!match &&
f11e6659 546 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 547 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 548
554cfb7e 549 /* no entries matched; do round-robin */
f11e6659
DM
550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
1da177e4 555 }
1da177e4 556
d1918542 557 net = dev_net(rt0->dst.dev);
a02cec21 558 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
559}
560
70ceb4f5
YH
561#ifdef CONFIG_IPV6_ROUTE_INFO
562int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 563 const struct in6_addr *gwaddr)
70ceb4f5 564{
c346dca1 565 struct net *net = dev_net(dev);
70ceb4f5
YH
566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
4bed72e4 569 unsigned long lifetime;
70ceb4f5
YH
570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 593 return -EINVAL;
70ceb4f5 594
4bed72e4 595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
efa2cea0
DL
607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
70ceb4f5
YH
609
610 if (rt && !lifetime) {
e0a1ad73 611 ip6_del_rt(rt);
70ceb4f5
YH
612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
efa2cea0 616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
4bed72e4 623 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
d1918542 626 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
d8d1f30b 629 dst_release(&rt->dst);
70ceb4f5
YH
630 }
631 return 0;
632}
633#endif
634
8ed67789 635#define BACKTRACK(__net, saddr) \
982f56f3 636do { \
8ed67789 637 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 638 struct fib6_node *pn; \
e0eda7bb 639 while (1) { \
982f56f3
YH
640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
c71099ac 649 } \
c71099ac 650 } \
38308473 651} while (0)
c71099ac 652
8ed67789
DL
653static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
4c9483b2 655 struct flowi6 *fl6, int flags)
1da177e4
LT
656{
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
c71099ac 660 read_lock_bh(&table->tb6_lock);
4c9483b2 661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
662restart:
663 rt = fn->leaf;
4c9483b2
DM
664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
c71099ac 666out:
d8d1f30b 667 dst_use(&rt->dst, jiffies);
c71099ac 668 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
669 return rt;
670
671}
672
ea6e574e
FW
673struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675{
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677}
678EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
9acd9f3a
YH
680struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
c71099ac 682{
4c9483b2
DM
683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
c71099ac
TG
686 };
687 struct dst_entry *dst;
77d16f45 688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 689
adaa70bb 690 if (saddr) {
4c9483b2 691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
4c9483b2 695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
1da177e4
LT
701 return NULL;
702}
703
7159039a
YH
704EXPORT_SYMBOL(rt6_lookup);
705
c71099ac 706/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
86872cb5 712static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
713{
714 int err;
c71099ac 715 struct fib6_table *table;
1da177e4 716
c71099ac
TG
717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
86872cb5 719 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 720 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
721
722 return err;
723}
724
40e22e8f
TG
725int ip6_ins_rt(struct rt6_info *rt)
726{
4d1169c1 727 struct nl_info info = {
d1918542 728 .nl_net = dev_net(rt->dst.dev),
4d1169c1 729 };
528c4ceb 730 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
731}
732
21efcfa0
ED
733static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
b71d1d42 735 const struct in6_addr *saddr)
1da177e4 736{
1da177e4
LT
737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
21efcfa0 743 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
744
745 if (rt) {
14deae41
DM
746 int attempts = !in_softirq();
747
38308473 748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 749 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 751 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 752 rt->rt6i_gateway = *daddr;
58c4fb86 753 }
1da177e4 754
1da177e4 755 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
756
757#ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 759 rt->rt6i_src.addr = *saddr;
1da177e4
LT
760 rt->rt6i_src.plen = 128;
761 }
762#endif
763
14deae41 764 retry:
8ade06c6 765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 766 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
86393e52 776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
7e1b33e5 787 "ipv6: Neighbour table overflow.\n");
d8d1f30b 788 dst_free(&rt->dst);
14deae41
DM
789 return NULL;
790 }
95a9a5ba 791 }
1da177e4 792
95a9a5ba
YH
793 return rt;
794}
1da177e4 795
21efcfa0
ED
796static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
299d9939 798{
21efcfa0
ED
799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
299d9939 801 if (rt) {
299d9939 802 rt->rt6i_flags |= RTF_CACHE;
27217455 803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
804 }
805 return rt;
806}
807
8ed67789 808static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 809 struct flowi6 *fl6, int flags)
1da177e4
LT
810{
811 struct fib6_node *fn;
519fbd87 812 struct rt6_info *rt, *nrt;
c71099ac 813 int strict = 0;
1da177e4 814 int attempts = 3;
519fbd87 815 int err;
53b7997f 816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 817
77d16f45 818 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
819
820relookup:
c71099ac 821 read_lock_bh(&table->tb6_lock);
1da177e4 822
8238dd06 823restart_2:
4c9483b2 824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
825
826restart:
4acad72d 827 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 828
4c9483b2 829 BACKTRACK(net, &fl6->saddr);
8ed67789 830 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 831 rt->rt6i_flags & RTF_CACHE)
1ddef044 832 goto out;
1da177e4 833
d8d1f30b 834 dst_hold(&rt->dst);
c71099ac 835 read_unlock_bh(&table->tb6_lock);
fb9de91e 836
27217455 837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 839 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
841 else
842 goto out2;
e40cf353 843
d8d1f30b 844 dst_release(&rt->dst);
8ed67789 845 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 846
d8d1f30b 847 dst_hold(&rt->dst);
519fbd87 848 if (nrt) {
40e22e8f 849 err = ip6_ins_rt(nrt);
519fbd87 850 if (!err)
1da177e4 851 goto out2;
1da177e4 852 }
1da177e4 853
519fbd87
YH
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
c71099ac 858 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
859 * released someone could insert this route. Relookup.
860 */
d8d1f30b 861 dst_release(&rt->dst);
519fbd87
YH
862 goto relookup;
863
864out:
8238dd06
YH
865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
d8d1f30b 869 dst_hold(&rt->dst);
c71099ac 870 read_unlock_bh(&table->tb6_lock);
1da177e4 871out2:
d8d1f30b
CG
872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
c71099ac
TG
874
875 return rt;
1da177e4
LT
876}
877
8ed67789 878static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
4acad72d 880{
4c9483b2 881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
882}
883
c71099ac
TG
884void ip6_route_input(struct sk_buff *skb)
885{
b71d1d42 886 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 887 struct net *net = dev_net(skb->dev);
adaa70bb 888 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
889 struct flowi6 fl6 = {
890 .flowi6_iif = skb->dev->ifindex,
891 .daddr = iph->daddr,
892 .saddr = iph->saddr,
38308473 893 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
894 .flowi6_mark = skb->mark,
895 .flowi6_proto = iph->nexthdr,
c71099ac 896 };
adaa70bb 897
1d6e55f1 898 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 899 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 900
4c9483b2 901 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
902}
903
8ed67789 904static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 905 struct flowi6 *fl6, int flags)
1da177e4 906{
4c9483b2 907 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
908}
909
9c7a4f9c 910struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 911 struct flowi6 *fl6)
c71099ac
TG
912{
913 int flags = 0;
914
4c9483b2 915 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 916 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 917
4c9483b2 918 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 919 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
920 else if (sk)
921 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 922
4c9483b2 923 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
924}
925
7159039a 926EXPORT_SYMBOL(ip6_route_output);
1da177e4 927
2774c131 928struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 929{
5c1e6aa3 930 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
931 struct dst_entry *new = NULL;
932
5c1e6aa3 933 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 934 if (rt) {
cf911662
DM
935 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
936
d8d1f30b 937 new = &rt->dst;
14e50e57 938
14e50e57 939 new->__use = 1;
352e512c
HX
940 new->input = dst_discard;
941 new->output = dst_discard;
14e50e57 942
21efcfa0
ED
943 if (dst_metrics_read_only(&ort->dst))
944 new->_metrics = ort->dst._metrics;
945 else
946 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
947 rt->rt6i_idev = ort->rt6i_idev;
948 if (rt->rt6i_idev)
949 in6_dev_hold(rt->rt6i_idev);
d1918542 950 rt->dst.expires = 0;
14e50e57 951
4e3fd7a0 952 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
953 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
954 rt->rt6i_metric = 0;
955
956 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
957#ifdef CONFIG_IPV6_SUBTREES
958 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
959#endif
960
961 dst_free(new);
962 }
963
69ead7af
DM
964 dst_release(dst_orig);
965 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 966}
14e50e57 967
1da177e4
LT
968/*
969 * Destination cache support functions
970 */
971
972static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
973{
974 struct rt6_info *rt;
975
976 rt = (struct rt6_info *) dst;
977
6431cbc2
DM
978 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
979 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
980 if (!rt->rt6i_peer)
981 rt6_bind_peer(rt, 0);
982 rt->rt6i_peer_genid = rt6_peer_genid();
983 }
1da177e4 984 return dst;
6431cbc2 985 }
1da177e4
LT
986 return NULL;
987}
988
989static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
990{
991 struct rt6_info *rt = (struct rt6_info *) dst;
992
993 if (rt) {
54c1a859
YH
994 if (rt->rt6i_flags & RTF_CACHE) {
995 if (rt6_check_expired(rt)) {
996 ip6_del_rt(rt);
997 dst = NULL;
998 }
999 } else {
1da177e4 1000 dst_release(dst);
54c1a859
YH
1001 dst = NULL;
1002 }
1da177e4 1003 }
54c1a859 1004 return dst;
1da177e4
LT
1005}
1006
1007static void ip6_link_failure(struct sk_buff *skb)
1008{
1009 struct rt6_info *rt;
1010
3ffe533c 1011 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1012
adf30907 1013 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1014 if (rt) {
38308473 1015 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1016 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1017 rt->rt6i_flags |= RTF_EXPIRES;
1018 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1019 rt->rt6i_node->fn_sernum = -1;
1020 }
1021}
1022
1023static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1024{
1025 struct rt6_info *rt6 = (struct rt6_info*)dst;
1026
1027 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1028 rt6->rt6i_flags |= RTF_MODIFIED;
1029 if (mtu < IPV6_MIN_MTU) {
defb3519 1030 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1031 mtu = IPV6_MIN_MTU;
defb3519
DM
1032 features |= RTAX_FEATURE_ALLFRAG;
1033 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1034 }
defb3519 1035 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1036 }
1037}
1038
0dbaee3b 1039static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1040{
0dbaee3b
DM
1041 struct net_device *dev = dst->dev;
1042 unsigned int mtu = dst_mtu(dst);
1043 struct net *net = dev_net(dev);
1044
1da177e4
LT
1045 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1046
5578689a
DL
1047 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1048 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1049
1050 /*
1ab1457c
YH
1051 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1052 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1053 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1054 * rely only on pmtu discovery"
1055 */
1056 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1057 mtu = IPV6_MAXPLEN;
1058 return mtu;
1059}
1060
ebb762f2 1061static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1062{
d33e4553 1063 struct inet6_dev *idev;
618f9bc7
SK
1064 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1065
1066 if (mtu)
1067 return mtu;
1068
1069 mtu = IPV6_MIN_MTU;
d33e4553
DM
1070
1071 rcu_read_lock();
1072 idev = __in6_dev_get(dst->dev);
1073 if (idev)
1074 mtu = idev->cnf.mtu6;
1075 rcu_read_unlock();
1076
1077 return mtu;
1078}
1079
3b00944c
YH
1080static struct dst_entry *icmp6_dst_gc_list;
1081static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1082
3b00944c 1083struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1084 struct neighbour *neigh,
87a11578 1085 struct flowi6 *fl6)
1da177e4 1086{
87a11578 1087 struct dst_entry *dst;
1da177e4
LT
1088 struct rt6_info *rt;
1089 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1090 struct net *net = dev_net(dev);
1da177e4 1091
38308473 1092 if (unlikely(!idev))
122bdf67 1093 return ERR_PTR(-ENODEV);
1da177e4 1094
957c665f 1095 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1096 if (unlikely(!rt)) {
1da177e4 1097 in6_dev_put(idev);
87a11578 1098 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1099 goto out;
1100 }
1101
1da177e4
LT
1102 if (neigh)
1103 neigh_hold(neigh);
14deae41 1104 else {
f83c7790 1105 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1106 if (IS_ERR(neigh)) {
252c3d84 1107 in6_dev_put(idev);
b43faac6
DM
1108 dst_free(&rt->dst);
1109 return ERR_CAST(neigh);
1110 }
14deae41 1111 }
1da177e4 1112
8e2ec639
YZ
1113 rt->dst.flags |= DST_HOST;
1114 rt->dst.output = ip6_output;
69cce1d1 1115 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1116 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1117 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1118 rt->rt6i_dst.plen = 128;
1119 rt->rt6i_idev = idev;
7011687f 1120 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1121
3b00944c 1122 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1123 rt->dst.next = icmp6_dst_gc_list;
1124 icmp6_dst_gc_list = &rt->dst;
3b00944c 1125 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1126
5578689a 1127 fib6_force_start_gc(net);
1da177e4 1128
87a11578
DM
1129 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1130
1da177e4 1131out:
87a11578 1132 return dst;
1da177e4
LT
1133}
1134
3d0f24a7 1135int icmp6_dst_gc(void)
1da177e4 1136{
e9476e95 1137 struct dst_entry *dst, **pprev;
3d0f24a7 1138 int more = 0;
1da177e4 1139
3b00944c
YH
1140 spin_lock_bh(&icmp6_dst_lock);
1141 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1142
1da177e4
LT
1143 while ((dst = *pprev) != NULL) {
1144 if (!atomic_read(&dst->__refcnt)) {
1145 *pprev = dst->next;
1146 dst_free(dst);
1da177e4
LT
1147 } else {
1148 pprev = &dst->next;
3d0f24a7 1149 ++more;
1da177e4
LT
1150 }
1151 }
1152
3b00944c 1153 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1154
3d0f24a7 1155 return more;
1da177e4
LT
1156}
1157
1e493d19
DM
1158static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1159 void *arg)
1160{
1161 struct dst_entry *dst, **pprev;
1162
1163 spin_lock_bh(&icmp6_dst_lock);
1164 pprev = &icmp6_dst_gc_list;
1165 while ((dst = *pprev) != NULL) {
1166 struct rt6_info *rt = (struct rt6_info *) dst;
1167 if (func(rt, arg)) {
1168 *pprev = dst->next;
1169 dst_free(dst);
1170 } else {
1171 pprev = &dst->next;
1172 }
1173 }
1174 spin_unlock_bh(&icmp6_dst_lock);
1175}
1176
569d3645 1177static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1178{
1da177e4 1179 unsigned long now = jiffies;
86393e52 1180 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1181 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1182 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1183 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1184 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1185 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1186 int entries;
7019b78e 1187
fc66f95c 1188 entries = dst_entries_get_fast(ops);
7019b78e 1189 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1190 entries <= rt_max_size)
1da177e4
LT
1191 goto out;
1192
6891a346
BT
1193 net->ipv6.ip6_rt_gc_expire++;
1194 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1195 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1196 entries = dst_entries_get_slow(ops);
1197 if (entries < ops->gc_thresh)
7019b78e 1198 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1199out:
7019b78e 1200 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1201 return entries > rt_max_size;
1da177e4
LT
1202}
1203
1204/* Clean host part of a prefix. Not necessary in radix tree,
1205 but results in cleaner routing tables.
1206
1207 Remove it only when all the things will work!
1208 */
1209
6b75d090 1210int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1211{
5170ae82 1212 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1213 if (hoplimit == 0) {
6b75d090 1214 struct net_device *dev = dst->dev;
c68f24cc
ED
1215 struct inet6_dev *idev;
1216
1217 rcu_read_lock();
1218 idev = __in6_dev_get(dev);
1219 if (idev)
6b75d090 1220 hoplimit = idev->cnf.hop_limit;
c68f24cc 1221 else
53b7997f 1222 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1223 rcu_read_unlock();
1da177e4
LT
1224 }
1225 return hoplimit;
1226}
abbf46ae 1227EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1228
1229/*
1230 *
1231 */
1232
86872cb5 1233int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1234{
1235 int err;
5578689a 1236 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1237 struct rt6_info *rt = NULL;
1238 struct net_device *dev = NULL;
1239 struct inet6_dev *idev = NULL;
c71099ac 1240 struct fib6_table *table;
1da177e4
LT
1241 int addr_type;
1242
86872cb5 1243 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1244 return -EINVAL;
1245#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1246 if (cfg->fc_src_len)
1da177e4
LT
1247 return -EINVAL;
1248#endif
86872cb5 1249 if (cfg->fc_ifindex) {
1da177e4 1250 err = -ENODEV;
5578689a 1251 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1252 if (!dev)
1253 goto out;
1254 idev = in6_dev_get(dev);
1255 if (!idev)
1256 goto out;
1257 }
1258
86872cb5
TG
1259 if (cfg->fc_metric == 0)
1260 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1261
d71314b4 1262 err = -ENOBUFS;
38308473
DM
1263 if (cfg->fc_nlinfo.nlh &&
1264 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1265 table = fib6_get_table(net, cfg->fc_table);
38308473 1266 if (!table) {
d71314b4
MV
1267 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1268 table = fib6_new_table(net, cfg->fc_table);
1269 }
1270 } else {
1271 table = fib6_new_table(net, cfg->fc_table);
1272 }
38308473
DM
1273
1274 if (!table)
c71099ac 1275 goto out;
c71099ac 1276
957c665f 1277 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1278
38308473 1279 if (!rt) {
1da177e4
LT
1280 err = -ENOMEM;
1281 goto out;
1282 }
1283
d8d1f30b 1284 rt->dst.obsolete = -1;
d1918542 1285 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1286 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1287 0;
1da177e4 1288
86872cb5
TG
1289 if (cfg->fc_protocol == RTPROT_UNSPEC)
1290 cfg->fc_protocol = RTPROT_BOOT;
1291 rt->rt6i_protocol = cfg->fc_protocol;
1292
1293 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1294
1295 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1296 rt->dst.input = ip6_mc_input;
ab79ad14
1297 else if (cfg->fc_flags & RTF_LOCAL)
1298 rt->dst.input = ip6_input;
1da177e4 1299 else
d8d1f30b 1300 rt->dst.input = ip6_forward;
1da177e4 1301
d8d1f30b 1302 rt->dst.output = ip6_output;
1da177e4 1303
86872cb5
TG
1304 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1305 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1306 if (rt->rt6i_dst.plen == 128)
11d53b49 1307 rt->dst.flags |= DST_HOST;
1da177e4 1308
8e2ec639
YZ
1309 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1310 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1311 if (!metrics) {
1312 err = -ENOMEM;
1313 goto out;
1314 }
1315 dst_init_metrics(&rt->dst, metrics, 0);
1316 }
1da177e4 1317#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1318 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1319 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1320#endif
1321
86872cb5 1322 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1323
1324 /* We cannot add true routes via loopback here,
1325 they would result in kernel looping; promote them to reject routes
1326 */
86872cb5 1327 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1328 (dev && (dev->flags & IFF_LOOPBACK) &&
1329 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1330 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1331 /* hold loopback dev/idev if we haven't done so. */
5578689a 1332 if (dev != net->loopback_dev) {
1da177e4
LT
1333 if (dev) {
1334 dev_put(dev);
1335 in6_dev_put(idev);
1336 }
5578689a 1337 dev = net->loopback_dev;
1da177e4
LT
1338 dev_hold(dev);
1339 idev = in6_dev_get(dev);
1340 if (!idev) {
1341 err = -ENODEV;
1342 goto out;
1343 }
1344 }
d8d1f30b
CG
1345 rt->dst.output = ip6_pkt_discard_out;
1346 rt->dst.input = ip6_pkt_discard;
1347 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1348 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1349 goto install_route;
1350 }
1351
86872cb5 1352 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1353 const struct in6_addr *gw_addr;
1da177e4
LT
1354 int gwa_type;
1355
86872cb5 1356 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1357 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1358 gwa_type = ipv6_addr_type(gw_addr);
1359
1360 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1361 struct rt6_info *grt;
1362
1363 /* IPv6 strictly inhibits using not link-local
1364 addresses as nexthop address.
1365 Otherwise, router will not able to send redirects.
1366 It is very good, but in some (rare!) circumstances
1367 (SIT, PtP, NBMA NOARP links) it is handy to allow
1368 some exceptions. --ANK
1369 */
1370 err = -EINVAL;
38308473 1371 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1372 goto out;
1373
5578689a 1374 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1375
1376 err = -EHOSTUNREACH;
38308473 1377 if (!grt)
1da177e4
LT
1378 goto out;
1379 if (dev) {
d1918542 1380 if (dev != grt->dst.dev) {
d8d1f30b 1381 dst_release(&grt->dst);
1da177e4
LT
1382 goto out;
1383 }
1384 } else {
d1918542 1385 dev = grt->dst.dev;
1da177e4
LT
1386 idev = grt->rt6i_idev;
1387 dev_hold(dev);
1388 in6_dev_hold(grt->rt6i_idev);
1389 }
38308473 1390 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1391 err = 0;
d8d1f30b 1392 dst_release(&grt->dst);
1da177e4
LT
1393
1394 if (err)
1395 goto out;
1396 }
1397 err = -EINVAL;
38308473 1398 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1399 goto out;
1400 }
1401
1402 err = -ENODEV;
38308473 1403 if (!dev)
1da177e4
LT
1404 goto out;
1405
c3968a85
DW
1406 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1407 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1408 err = -EINVAL;
1409 goto out;
1410 }
4e3fd7a0 1411 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1412 rt->rt6i_prefsrc.plen = 128;
1413 } else
1414 rt->rt6i_prefsrc.plen = 0;
1415
86872cb5 1416 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1417 err = rt6_bind_neighbour(rt, dev);
f83c7790 1418 if (err)
1da177e4 1419 goto out;
1da177e4
LT
1420 }
1421
86872cb5 1422 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1423
1424install_route:
86872cb5
TG
1425 if (cfg->fc_mx) {
1426 struct nlattr *nla;
1427 int remaining;
1428
1429 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1430 int type = nla_type(nla);
86872cb5
TG
1431
1432 if (type) {
1433 if (type > RTAX_MAX) {
1da177e4
LT
1434 err = -EINVAL;
1435 goto out;
1436 }
86872cb5 1437
defb3519 1438 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1439 }
1da177e4
LT
1440 }
1441 }
1442
d8d1f30b 1443 rt->dst.dev = dev;
1da177e4 1444 rt->rt6i_idev = idev;
c71099ac 1445 rt->rt6i_table = table;
63152fc0 1446
c346dca1 1447 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1448
86872cb5 1449 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1450
1451out:
1452 if (dev)
1453 dev_put(dev);
1454 if (idev)
1455 in6_dev_put(idev);
1456 if (rt)
d8d1f30b 1457 dst_free(&rt->dst);
1da177e4
LT
1458 return err;
1459}
1460
86872cb5 1461static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1462{
1463 int err;
c71099ac 1464 struct fib6_table *table;
d1918542 1465 struct net *net = dev_net(rt->dst.dev);
1da177e4 1466
8ed67789 1467 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1468 return -ENOENT;
1469
c71099ac
TG
1470 table = rt->rt6i_table;
1471 write_lock_bh(&table->tb6_lock);
1da177e4 1472
86872cb5 1473 err = fib6_del(rt, info);
d8d1f30b 1474 dst_release(&rt->dst);
1da177e4 1475
c71099ac 1476 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1477
1478 return err;
1479}
1480
e0a1ad73
TG
1481int ip6_del_rt(struct rt6_info *rt)
1482{
4d1169c1 1483 struct nl_info info = {
d1918542 1484 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1485 };
528c4ceb 1486 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1487}
1488
86872cb5 1489static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1490{
c71099ac 1491 struct fib6_table *table;
1da177e4
LT
1492 struct fib6_node *fn;
1493 struct rt6_info *rt;
1494 int err = -ESRCH;
1495
5578689a 1496 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1497 if (!table)
c71099ac
TG
1498 return err;
1499
1500 read_lock_bh(&table->tb6_lock);
1da177e4 1501
c71099ac 1502 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1503 &cfg->fc_dst, cfg->fc_dst_len,
1504 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1505
1da177e4 1506 if (fn) {
d8d1f30b 1507 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1508 if (cfg->fc_ifindex &&
d1918542
DM
1509 (!rt->dst.dev ||
1510 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1511 continue;
86872cb5
TG
1512 if (cfg->fc_flags & RTF_GATEWAY &&
1513 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1514 continue;
86872cb5 1515 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1516 continue;
d8d1f30b 1517 dst_hold(&rt->dst);
c71099ac 1518 read_unlock_bh(&table->tb6_lock);
1da177e4 1519
86872cb5 1520 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1521 }
1522 }
c71099ac 1523 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1524
1525 return err;
1526}
1527
1528/*
1529 * Handle redirects
1530 */
a6279458 1531struct ip6rd_flowi {
4c9483b2 1532 struct flowi6 fl6;
a6279458
YH
1533 struct in6_addr gateway;
1534};
1535
8ed67789
DL
1536static struct rt6_info *__ip6_route_redirect(struct net *net,
1537 struct fib6_table *table,
4c9483b2 1538 struct flowi6 *fl6,
a6279458 1539 int flags)
1da177e4 1540{
4c9483b2 1541 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1542 struct rt6_info *rt;
e843b9e1 1543 struct fib6_node *fn;
c71099ac 1544
1da177e4 1545 /*
e843b9e1
YH
1546 * Get the "current" route for this destination and
1547 * check if the redirect has come from approriate router.
1548 *
1549 * RFC 2461 specifies that redirects should only be
1550 * accepted if they come from the nexthop to the target.
1551 * Due to the way the routes are chosen, this notion
1552 * is a bit fuzzy and one might need to check all possible
1553 * routes.
1da177e4 1554 */
1da177e4 1555
c71099ac 1556 read_lock_bh(&table->tb6_lock);
4c9483b2 1557 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1558restart:
d8d1f30b 1559 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1560 /*
1561 * Current route is on-link; redirect is always invalid.
1562 *
1563 * Seems, previous statement is not true. It could
1564 * be node, which looks for us as on-link (f.e. proxy ndisc)
1565 * But then router serving it might decide, that we should
1566 * know truth 8)8) --ANK (980726).
1567 */
1568 if (rt6_check_expired(rt))
1569 continue;
1570 if (!(rt->rt6i_flags & RTF_GATEWAY))
1571 continue;
d1918542 1572 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1573 continue;
a6279458 1574 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1575 continue;
1576 break;
1577 }
a6279458 1578
cb15d9c2 1579 if (!rt)
8ed67789 1580 rt = net->ipv6.ip6_null_entry;
4c9483b2 1581 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1582out:
d8d1f30b 1583 dst_hold(&rt->dst);
a6279458 1584
c71099ac 1585 read_unlock_bh(&table->tb6_lock);
e843b9e1 1586
a6279458
YH
1587 return rt;
1588};
1589
b71d1d42
ED
1590static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1591 const struct in6_addr *src,
1592 const struct in6_addr *gateway,
a6279458
YH
1593 struct net_device *dev)
1594{
adaa70bb 1595 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1596 struct net *net = dev_net(dev);
a6279458 1597 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1598 .fl6 = {
1599 .flowi6_oif = dev->ifindex,
1600 .daddr = *dest,
1601 .saddr = *src,
a6279458 1602 },
a6279458 1603 };
adaa70bb 1604
4e3fd7a0 1605 rdfl.gateway = *gateway;
86c36ce4 1606
adaa70bb
TG
1607 if (rt6_need_strict(dest))
1608 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1609
4c9483b2 1610 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1611 flags, __ip6_route_redirect);
a6279458
YH
1612}
1613
b71d1d42
ED
1614void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1615 const struct in6_addr *saddr,
a6279458
YH
1616 struct neighbour *neigh, u8 *lladdr, int on_link)
1617{
1618 struct rt6_info *rt, *nrt = NULL;
1619 struct netevent_redirect netevent;
c346dca1 1620 struct net *net = dev_net(neigh->dev);
a6279458
YH
1621
1622 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1623
8ed67789 1624 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1625 if (net_ratelimit())
1626 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1627 "for redirect target\n");
a6279458 1628 goto out;
1da177e4
LT
1629 }
1630
1da177e4
LT
1631 /*
1632 * We have finally decided to accept it.
1633 */
1634
1ab1457c 1635 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1636 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1637 NEIGH_UPDATE_F_OVERRIDE|
1638 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1639 NEIGH_UPDATE_F_ISROUTER))
1640 );
1641
1642 /*
1643 * Redirect received -> path was valid.
1644 * Look, redirects are sent only in response to data packets,
1645 * so that this nexthop apparently is reachable. --ANK
1646 */
d8d1f30b 1647 dst_confirm(&rt->dst);
1da177e4
LT
1648
1649 /* Duplicate redirect: silently ignore. */
27217455 1650 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1651 goto out;
1652
21efcfa0 1653 nrt = ip6_rt_copy(rt, dest);
38308473 1654 if (!nrt)
1da177e4
LT
1655 goto out;
1656
1657 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1658 if (on_link)
1659 nrt->rt6i_flags &= ~RTF_GATEWAY;
1660
4e3fd7a0 1661 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1662 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1663
40e22e8f 1664 if (ip6_ins_rt(nrt))
1da177e4
LT
1665 goto out;
1666
d8d1f30b
CG
1667 netevent.old = &rt->dst;
1668 netevent.new = &nrt->dst;
8d71740c
TT
1669 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1670
38308473 1671 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1672 ip6_del_rt(rt);
1da177e4
LT
1673 return;
1674 }
1675
1676out:
d8d1f30b 1677 dst_release(&rt->dst);
1da177e4
LT
1678}
1679
1680/*
1681 * Handle ICMP "packet too big" messages
1682 * i.e. Path MTU discovery
1683 */
1684
b71d1d42 1685static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1686 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1687{
1688 struct rt6_info *rt, *nrt;
1689 int allfrag = 0;
d3052b55 1690again:
ae878ae2 1691 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1692 if (!rt)
1da177e4
LT
1693 return;
1694
d3052b55
AV
1695 if (rt6_check_expired(rt)) {
1696 ip6_del_rt(rt);
1697 goto again;
1698 }
1699
d8d1f30b 1700 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1701 goto out;
1702
1703 if (pmtu < IPV6_MIN_MTU) {
1704 /*
1ab1457c 1705 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1706 * MTU (1280) and a fragment header should always be included
1707 * after a node receiving Too Big message reporting PMTU is
1708 * less than the IPv6 Minimum Link MTU.
1709 */
1710 pmtu = IPV6_MIN_MTU;
1711 allfrag = 1;
1712 }
1713
1714 /* New mtu received -> path was valid.
1715 They are sent only in response to data packets,
1716 so that this nexthop apparently is reachable. --ANK
1717 */
d8d1f30b 1718 dst_confirm(&rt->dst);
1da177e4
LT
1719
1720 /* Host route. If it is static, it would be better
1721 not to override it, but add new one, so that
1722 when cache entry will expire old pmtu
1723 would return automatically.
1724 */
1725 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1726 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1727 if (allfrag) {
1728 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1729 features |= RTAX_FEATURE_ALLFRAG;
1730 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1731 }
d8d1f30b 1732 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1733 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1734 goto out;
1735 }
1736
1737 /* Network route.
1738 Two cases are possible:
1739 1. It is connected route. Action: COW
1740 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1741 */
27217455 1742 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1743 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1744 else
1745 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1746
d5315b50 1747 if (nrt) {
defb3519
DM
1748 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1749 if (allfrag) {
1750 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1751 features |= RTAX_FEATURE_ALLFRAG;
1752 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1753 }
a1e78363
YH
1754
1755 /* According to RFC 1981, detecting PMTU increase shouldn't be
1756 * happened within 5 mins, the recommended timer is 10 mins.
1757 * Here this route expiration time is set to ip6_rt_mtu_expires
1758 * which is 10 mins. After 10 mins the decreased pmtu is expired
1759 * and detecting PMTU increase will be automatically happened.
1760 */
d8d1f30b 1761 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1762 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1763
40e22e8f 1764 ip6_ins_rt(nrt);
1da177e4 1765 }
1da177e4 1766out:
d8d1f30b 1767 dst_release(&rt->dst);
1da177e4
LT
1768}
1769
b71d1d42 1770void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1771 struct net_device *dev, u32 pmtu)
1772{
1773 struct net *net = dev_net(dev);
1774
1775 /*
1776 * RFC 1981 states that a node "MUST reduce the size of the packets it
1777 * is sending along the path" that caused the Packet Too Big message.
1778 * Since it's not possible in the general case to determine which
1779 * interface was used to send the original packet, we update the MTU
1780 * on the interface that will be used to send future packets. We also
1781 * update the MTU on the interface that received the Packet Too Big in
1782 * case the original packet was forced out that interface with
1783 * SO_BINDTODEVICE or similar. This is the next best thing to the
1784 * correct behaviour, which would be to update the MTU on all
1785 * interfaces.
1786 */
1787 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1788 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1789}
1790
1da177e4
LT
1791/*
1792 * Misc support functions
1793 */
1794
21efcfa0
ED
1795static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1796 const struct in6_addr *dest)
1da177e4 1797{
d1918542 1798 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1799 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1800 ort->dst.dev, 0);
1da177e4
LT
1801
1802 if (rt) {
d8d1f30b
CG
1803 rt->dst.input = ort->dst.input;
1804 rt->dst.output = ort->dst.output;
8e2ec639 1805 rt->dst.flags |= DST_HOST;
d8d1f30b 1806
4e3fd7a0 1807 rt->rt6i_dst.addr = *dest;
8e2ec639 1808 rt->rt6i_dst.plen = 128;
defb3519 1809 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1810 rt->dst.error = ort->dst.error;
1da177e4
LT
1811 rt->rt6i_idev = ort->rt6i_idev;
1812 if (rt->rt6i_idev)
1813 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1814 rt->dst.lastuse = jiffies;
d1918542 1815 rt->dst.expires = 0;
1da177e4 1816
4e3fd7a0 1817 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1818 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1819 rt->rt6i_metric = 0;
1820
1da177e4
LT
1821#ifdef CONFIG_IPV6_SUBTREES
1822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823#endif
0f6c6392 1824 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1825 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1826 }
1827 return rt;
1828}
1829
70ceb4f5 1830#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1831static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1832 const struct in6_addr *prefix, int prefixlen,
1833 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1834{
1835 struct fib6_node *fn;
1836 struct rt6_info *rt = NULL;
c71099ac
TG
1837 struct fib6_table *table;
1838
efa2cea0 1839 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1840 if (!table)
c71099ac 1841 return NULL;
70ceb4f5 1842
c71099ac
TG
1843 write_lock_bh(&table->tb6_lock);
1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1845 if (!fn)
1846 goto out;
1847
d8d1f30b 1848 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1849 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1850 continue;
1851 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 continue;
1853 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 continue;
d8d1f30b 1855 dst_hold(&rt->dst);
70ceb4f5
YH
1856 break;
1857 }
1858out:
c71099ac 1859 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1860 return rt;
1861}
1862
efa2cea0 1863static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1864 const struct in6_addr *prefix, int prefixlen,
1865 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1866 unsigned pref)
1867{
86872cb5
TG
1868 struct fib6_config cfg = {
1869 .fc_table = RT6_TABLE_INFO,
238fc7ea 1870 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1871 .fc_ifindex = ifindex,
1872 .fc_dst_len = prefixlen,
1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1875 .fc_nlinfo.pid = 0,
1876 .fc_nlinfo.nlh = NULL,
1877 .fc_nlinfo.nl_net = net,
86872cb5
TG
1878 };
1879
4e3fd7a0
AD
1880 cfg.fc_dst = *prefix;
1881 cfg.fc_gateway = *gwaddr;
70ceb4f5 1882
e317da96
YH
1883 /* We should treat it as a default route if prefix length is 0. */
1884 if (!prefixlen)
86872cb5 1885 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1886
86872cb5 1887 ip6_route_add(&cfg);
70ceb4f5 1888
efa2cea0 1889 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1890}
1891#endif
1892
b71d1d42 1893struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1894{
1da177e4 1895 struct rt6_info *rt;
c71099ac 1896 struct fib6_table *table;
1da177e4 1897
c346dca1 1898 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1899 if (!table)
c71099ac 1900 return NULL;
1da177e4 1901
c71099ac 1902 write_lock_bh(&table->tb6_lock);
d8d1f30b 1903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1904 if (dev == rt->dst.dev &&
045927ff 1905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1906 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 break;
1908 }
1909 if (rt)
d8d1f30b 1910 dst_hold(&rt->dst);
c71099ac 1911 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1912 return rt;
1913}
1914
b71d1d42 1915struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1916 struct net_device *dev,
1917 unsigned int pref)
1da177e4 1918{
86872cb5
TG
1919 struct fib6_config cfg = {
1920 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1921 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1922 .fc_ifindex = dev->ifindex,
1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1925 .fc_nlinfo.pid = 0,
1926 .fc_nlinfo.nlh = NULL,
c346dca1 1927 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1928 };
1da177e4 1929
4e3fd7a0 1930 cfg.fc_gateway = *gwaddr;
1da177e4 1931
86872cb5 1932 ip6_route_add(&cfg);
1da177e4 1933
1da177e4
LT
1934 return rt6_get_dflt_router(gwaddr, dev);
1935}
1936
7b4da532 1937void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1938{
1939 struct rt6_info *rt;
c71099ac
TG
1940 struct fib6_table *table;
1941
1942 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1943 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1944 if (!table)
c71099ac 1945 return;
1da177e4
LT
1946
1947restart:
c71099ac 1948 read_lock_bh(&table->tb6_lock);
d8d1f30b 1949 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1950 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1951 dst_hold(&rt->dst);
c71099ac 1952 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1953 ip6_del_rt(rt);
1da177e4
LT
1954 goto restart;
1955 }
1956 }
c71099ac 1957 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1958}
1959
5578689a
DL
1960static void rtmsg_to_fib6_config(struct net *net,
1961 struct in6_rtmsg *rtmsg,
86872cb5
TG
1962 struct fib6_config *cfg)
1963{
1964 memset(cfg, 0, sizeof(*cfg));
1965
1966 cfg->fc_table = RT6_TABLE_MAIN;
1967 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 cfg->fc_metric = rtmsg->rtmsg_metric;
1969 cfg->fc_expires = rtmsg->rtmsg_info;
1970 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 cfg->fc_flags = rtmsg->rtmsg_flags;
1973
5578689a 1974 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1975
4e3fd7a0
AD
1976 cfg->fc_dst = rtmsg->rtmsg_dst;
1977 cfg->fc_src = rtmsg->rtmsg_src;
1978 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1979}
1980
5578689a 1981int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1982{
86872cb5 1983 struct fib6_config cfg;
1da177e4
LT
1984 struct in6_rtmsg rtmsg;
1985 int err;
1986
1987 switch(cmd) {
1988 case SIOCADDRT: /* Add a route */
1989 case SIOCDELRT: /* Delete a route */
1990 if (!capable(CAP_NET_ADMIN))
1991 return -EPERM;
1992 err = copy_from_user(&rtmsg, arg,
1993 sizeof(struct in6_rtmsg));
1994 if (err)
1995 return -EFAULT;
86872cb5 1996
5578689a 1997 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1998
1da177e4
LT
1999 rtnl_lock();
2000 switch (cmd) {
2001 case SIOCADDRT:
86872cb5 2002 err = ip6_route_add(&cfg);
1da177e4
LT
2003 break;
2004 case SIOCDELRT:
86872cb5 2005 err = ip6_route_del(&cfg);
1da177e4
LT
2006 break;
2007 default:
2008 err = -EINVAL;
2009 }
2010 rtnl_unlock();
2011
2012 return err;
3ff50b79 2013 }
1da177e4
LT
2014
2015 return -EINVAL;
2016}
2017
2018/*
2019 * Drop the packet on the floor
2020 */
2021
d5fdd6ba 2022static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2023{
612f09e8 2024 int type;
adf30907 2025 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2026 switch (ipstats_mib_noroutes) {
2027 case IPSTATS_MIB_INNOROUTES:
0660e03f 2028 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2029 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2030 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2032 break;
2033 }
2034 /* FALLTHROUGH */
2035 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2036 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 ipstats_mib_noroutes);
612f09e8
YH
2038 break;
2039 }
3ffe533c 2040 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2041 kfree_skb(skb);
2042 return 0;
2043}
2044
9ce8ade0
TG
2045static int ip6_pkt_discard(struct sk_buff *skb)
2046{
612f09e8 2047 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2048}
2049
20380731 2050static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2051{
adf30907 2052 skb->dev = skb_dst(skb)->dev;
612f09e8 2053 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2054}
2055
6723ab54
DM
2056#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
9ce8ade0
TG
2058static int ip6_pkt_prohibit(struct sk_buff *skb)
2059{
612f09e8 2060 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2061}
2062
2063static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064{
adf30907 2065 skb->dev = skb_dst(skb)->dev;
612f09e8 2066 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2067}
2068
6723ab54
DM
2069#endif
2070
1da177e4
LT
2071/*
2072 * Allocate a dst for local (unicast / anycast) address.
2073 */
2074
2075struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 const struct in6_addr *addr,
8f031519 2077 bool anycast)
1da177e4 2078{
c346dca1 2079 struct net *net = dev_net(idev->dev);
5c1e6aa3 2080 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2081 net->loopback_dev, 0);
f83c7790 2082 int err;
1da177e4 2083
38308473 2084 if (!rt) {
40385653
BG
2085 if (net_ratelimit())
2086 pr_warning("IPv6: Maximum number of routes reached,"
2087 " consider increasing route/max_size.\n");
1da177e4 2088 return ERR_PTR(-ENOMEM);
40385653 2089 }
1da177e4 2090
1da177e4
LT
2091 in6_dev_hold(idev);
2092
11d53b49 2093 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2094 rt->dst.input = ip6_input;
2095 rt->dst.output = ip6_output;
1da177e4 2096 rt->rt6i_idev = idev;
d8d1f30b 2097 rt->dst.obsolete = -1;
1da177e4
LT
2098
2099 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2100 if (anycast)
2101 rt->rt6i_flags |= RTF_ANYCAST;
2102 else
1da177e4 2103 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2104 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2105 if (err) {
d8d1f30b 2106 dst_free(&rt->dst);
f83c7790 2107 return ERR_PTR(err);
1da177e4
LT
2108 }
2109
4e3fd7a0 2110 rt->rt6i_dst.addr = *addr;
1da177e4 2111 rt->rt6i_dst.plen = 128;
5578689a 2112 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2113
d8d1f30b 2114 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2115
2116 return rt;
2117}
2118
c3968a85
DW
2119int ip6_route_get_saddr(struct net *net,
2120 struct rt6_info *rt,
b71d1d42 2121 const struct in6_addr *daddr,
c3968a85
DW
2122 unsigned int prefs,
2123 struct in6_addr *saddr)
2124{
2125 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2126 int err = 0;
2127 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2128 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2129 else
2130 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2131 daddr, prefs, saddr);
2132 return err;
2133}
2134
2135/* remove deleted ip from prefsrc entries */
2136struct arg_dev_net_ip {
2137 struct net_device *dev;
2138 struct net *net;
2139 struct in6_addr *addr;
2140};
2141
2142static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2143{
2144 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2145 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2146 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2147
d1918542 2148 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2149 rt != net->ipv6.ip6_null_entry &&
2150 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2151 /* remove prefsrc entry */
2152 rt->rt6i_prefsrc.plen = 0;
2153 }
2154 return 0;
2155}
2156
2157void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2158{
2159 struct net *net = dev_net(ifp->idev->dev);
2160 struct arg_dev_net_ip adni = {
2161 .dev = ifp->idev->dev,
2162 .net = net,
2163 .addr = &ifp->addr,
2164 };
2165 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2166}
2167
8ed67789
DL
2168struct arg_dev_net {
2169 struct net_device *dev;
2170 struct net *net;
2171};
2172
1da177e4
LT
2173static int fib6_ifdown(struct rt6_info *rt, void *arg)
2174{
bc3ef660 2175 const struct arg_dev_net *adn = arg;
2176 const struct net_device *dev = adn->dev;
8ed67789 2177
d1918542 2178 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2179 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2180 return -1;
c159d30c 2181
1da177e4
LT
2182 return 0;
2183}
2184
f3db4851 2185void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2186{
8ed67789
DL
2187 struct arg_dev_net adn = {
2188 .dev = dev,
2189 .net = net,
2190 };
2191
2192 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2193 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2194}
2195
2196struct rt6_mtu_change_arg
2197{
2198 struct net_device *dev;
2199 unsigned mtu;
2200};
2201
2202static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2203{
2204 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2205 struct inet6_dev *idev;
2206
2207 /* In IPv6 pmtu discovery is not optional,
2208 so that RTAX_MTU lock cannot disable it.
2209 We still use this lock to block changes
2210 caused by addrconf/ndisc.
2211 */
2212
2213 idev = __in6_dev_get(arg->dev);
38308473 2214 if (!idev)
1da177e4
LT
2215 return 0;
2216
2217 /* For administrative MTU increase, there is no way to discover
2218 IPv6 PMTU increase, so PMTU increase should be updated here.
2219 Since RFC 1981 doesn't include administrative MTU increase
2220 update PMTU increase is a MUST. (i.e. jumbo frame)
2221 */
2222 /*
2223 If new MTU is less than route PMTU, this new MTU will be the
2224 lowest MTU in the path, update the route PMTU to reflect PMTU
2225 decreases; if new MTU is greater than route PMTU, and the
2226 old MTU is the lowest MTU in the path, update the route PMTU
2227 to reflect the increase. In this case if the other nodes' MTU
2228 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2229 PMTU discouvery.
2230 */
d1918542 2231 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2232 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2233 (dst_mtu(&rt->dst) >= arg->mtu ||
2234 (dst_mtu(&rt->dst) < arg->mtu &&
2235 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2236 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2237 }
1da177e4
LT
2238 return 0;
2239}
2240
2241void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2242{
c71099ac
TG
2243 struct rt6_mtu_change_arg arg = {
2244 .dev = dev,
2245 .mtu = mtu,
2246 };
1da177e4 2247
c346dca1 2248 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2249}
2250
ef7c79ed 2251static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2252 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2253 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2254 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2255 [RTA_PRIORITY] = { .type = NLA_U32 },
2256 [RTA_METRICS] = { .type = NLA_NESTED },
2257};
2258
2259static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2260 struct fib6_config *cfg)
1da177e4 2261{
86872cb5
TG
2262 struct rtmsg *rtm;
2263 struct nlattr *tb[RTA_MAX+1];
2264 int err;
1da177e4 2265
86872cb5
TG
2266 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2267 if (err < 0)
2268 goto errout;
1da177e4 2269
86872cb5
TG
2270 err = -EINVAL;
2271 rtm = nlmsg_data(nlh);
2272 memset(cfg, 0, sizeof(*cfg));
2273
2274 cfg->fc_table = rtm->rtm_table;
2275 cfg->fc_dst_len = rtm->rtm_dst_len;
2276 cfg->fc_src_len = rtm->rtm_src_len;
2277 cfg->fc_flags = RTF_UP;
2278 cfg->fc_protocol = rtm->rtm_protocol;
2279
2280 if (rtm->rtm_type == RTN_UNREACHABLE)
2281 cfg->fc_flags |= RTF_REJECT;
2282
ab79ad14
2283 if (rtm->rtm_type == RTN_LOCAL)
2284 cfg->fc_flags |= RTF_LOCAL;
2285
86872cb5
TG
2286 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2287 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2288 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2289
2290 if (tb[RTA_GATEWAY]) {
2291 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2292 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2293 }
86872cb5
TG
2294
2295 if (tb[RTA_DST]) {
2296 int plen = (rtm->rtm_dst_len + 7) >> 3;
2297
2298 if (nla_len(tb[RTA_DST]) < plen)
2299 goto errout;
2300
2301 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2302 }
86872cb5
TG
2303
2304 if (tb[RTA_SRC]) {
2305 int plen = (rtm->rtm_src_len + 7) >> 3;
2306
2307 if (nla_len(tb[RTA_SRC]) < plen)
2308 goto errout;
2309
2310 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2311 }
86872cb5 2312
c3968a85
DW
2313 if (tb[RTA_PREFSRC])
2314 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2315
86872cb5
TG
2316 if (tb[RTA_OIF])
2317 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2318
2319 if (tb[RTA_PRIORITY])
2320 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2321
2322 if (tb[RTA_METRICS]) {
2323 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2324 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2325 }
86872cb5
TG
2326
2327 if (tb[RTA_TABLE])
2328 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2329
2330 err = 0;
2331errout:
2332 return err;
1da177e4
LT
2333}
2334
c127ea2c 2335static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2336{
86872cb5
TG
2337 struct fib6_config cfg;
2338 int err;
1da177e4 2339
86872cb5
TG
2340 err = rtm_to_fib6_config(skb, nlh, &cfg);
2341 if (err < 0)
2342 return err;
2343
2344 return ip6_route_del(&cfg);
1da177e4
LT
2345}
2346
c127ea2c 2347static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2348{
86872cb5
TG
2349 struct fib6_config cfg;
2350 int err;
1da177e4 2351
86872cb5
TG
2352 err = rtm_to_fib6_config(skb, nlh, &cfg);
2353 if (err < 0)
2354 return err;
2355
2356 return ip6_route_add(&cfg);
1da177e4
LT
2357}
2358
339bf98f
TG
2359static inline size_t rt6_nlmsg_size(void)
2360{
2361 return NLMSG_ALIGN(sizeof(struct rtmsg))
2362 + nla_total_size(16) /* RTA_SRC */
2363 + nla_total_size(16) /* RTA_DST */
2364 + nla_total_size(16) /* RTA_GATEWAY */
2365 + nla_total_size(16) /* RTA_PREFSRC */
2366 + nla_total_size(4) /* RTA_TABLE */
2367 + nla_total_size(4) /* RTA_IIF */
2368 + nla_total_size(4) /* RTA_OIF */
2369 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2370 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2371 + nla_total_size(sizeof(struct rta_cacheinfo));
2372}
2373
191cd582
BH
2374static int rt6_fill_node(struct net *net,
2375 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2376 struct in6_addr *dst, struct in6_addr *src,
2377 int iif, int type, u32 pid, u32 seq,
7bc570c8 2378 int prefix, int nowait, unsigned int flags)
1da177e4 2379{
346f870b 2380 const struct inet_peer *peer;
1da177e4 2381 struct rtmsg *rtm;
2d7202bf 2382 struct nlmsghdr *nlh;
e3703b3d 2383 long expires;
9e762a4a 2384 u32 table;
f2c31e32 2385 struct neighbour *n;
346f870b 2386 u32 ts, tsage;
1da177e4
LT
2387
2388 if (prefix) { /* user wants prefix routes only */
2389 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390 /* success since this is not a prefix route */
2391 return 1;
2392 }
2393 }
2394
2d7202bf 2395 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2396 if (!nlh)
26932566 2397 return -EMSGSIZE;
2d7202bf
TG
2398
2399 rtm = nlmsg_data(nlh);
1da177e4
LT
2400 rtm->rtm_family = AF_INET6;
2401 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402 rtm->rtm_src_len = rt->rt6i_src.plen;
2403 rtm->rtm_tos = 0;
c71099ac 2404 if (rt->rt6i_table)
9e762a4a 2405 table = rt->rt6i_table->tb6_id;
c71099ac 2406 else
9e762a4a
PM
2407 table = RT6_TABLE_UNSPEC;
2408 rtm->rtm_table = table;
2d7202bf 2409 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2410 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2411 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2412 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2413 rtm->rtm_type = RTN_LOCAL;
d1918542 2414 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2415 rtm->rtm_type = RTN_LOCAL;
2416 else
2417 rtm->rtm_type = RTN_UNICAST;
2418 rtm->rtm_flags = 0;
2419 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2420 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2421 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2422 rtm->rtm_protocol = RTPROT_REDIRECT;
2423 else if (rt->rt6i_flags & RTF_ADDRCONF)
2424 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2425 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2426 rtm->rtm_protocol = RTPROT_RA;
2427
38308473 2428 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2429 rtm->rtm_flags |= RTM_F_CLONED;
2430
2431 if (dst) {
2d7202bf 2432 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2433 rtm->rtm_dst_len = 128;
1da177e4 2434 } else if (rtm->rtm_dst_len)
2d7202bf 2435 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2436#ifdef CONFIG_IPV6_SUBTREES
2437 if (src) {
2d7202bf 2438 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2439 rtm->rtm_src_len = 128;
1da177e4 2440 } else if (rtm->rtm_src_len)
2d7202bf 2441 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2442#endif
7bc570c8
YH
2443 if (iif) {
2444#ifdef CONFIG_IPV6_MROUTE
2445 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2446 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2447 if (err <= 0) {
2448 if (!nowait) {
2449 if (err == 0)
2450 return 0;
2451 goto nla_put_failure;
2452 } else {
2453 if (err == -EMSGSIZE)
2454 goto nla_put_failure;
2455 }
2456 }
2457 } else
2458#endif
2459 NLA_PUT_U32(skb, RTA_IIF, iif);
2460 } else if (dst) {
1da177e4 2461 struct in6_addr saddr_buf;
c3968a85 2462 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2463 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2464 }
2d7202bf 2465
c3968a85
DW
2466 if (rt->rt6i_prefsrc.plen) {
2467 struct in6_addr saddr_buf;
4e3fd7a0 2468 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2469 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2470 }
2471
defb3519 2472 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2473 goto nla_put_failure;
2474
f2c31e32 2475 rcu_read_lock();
27217455 2476 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2477 if (n) {
2478 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2479 rcu_read_unlock();
2480 goto nla_put_failure;
2481 }
2482 }
f2c31e32 2483 rcu_read_unlock();
2d7202bf 2484
d8d1f30b 2485 if (rt->dst.dev)
d1918542 2486 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2487
2488 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2489
36e3deae
YH
2490 if (!(rt->rt6i_flags & RTF_EXPIRES))
2491 expires = 0;
d1918542
DM
2492 else if (rt->dst.expires - jiffies < INT_MAX)
2493 expires = rt->dst.expires - jiffies;
36e3deae
YH
2494 else
2495 expires = INT_MAX;
69cdf8f9 2496
346f870b
DM
2497 peer = rt->rt6i_peer;
2498 ts = tsage = 0;
2499 if (peer && peer->tcp_ts_stamp) {
2500 ts = peer->tcp_ts;
2501 tsage = get_seconds() - peer->tcp_ts_stamp;
2502 }
2503
2504 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2505 expires, rt->dst.error) < 0)
e3703b3d 2506 goto nla_put_failure;
2d7202bf
TG
2507
2508 return nlmsg_end(skb, nlh);
2509
2510nla_put_failure:
26932566
PM
2511 nlmsg_cancel(skb, nlh);
2512 return -EMSGSIZE;
1da177e4
LT
2513}
2514
1b43af54 2515int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2516{
2517 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2518 int prefix;
2519
2d7202bf
TG
2520 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2521 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2522 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2523 } else
2524 prefix = 0;
2525
191cd582
BH
2526 return rt6_fill_node(arg->net,
2527 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2528 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2529 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2530}
2531
c127ea2c 2532static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2533{
3b1e0a65 2534 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2535 struct nlattr *tb[RTA_MAX+1];
2536 struct rt6_info *rt;
1da177e4 2537 struct sk_buff *skb;
ab364a6f 2538 struct rtmsg *rtm;
4c9483b2 2539 struct flowi6 fl6;
ab364a6f 2540 int err, iif = 0;
1da177e4 2541
ab364a6f
TG
2542 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2543 if (err < 0)
2544 goto errout;
1da177e4 2545
ab364a6f 2546 err = -EINVAL;
4c9483b2 2547 memset(&fl6, 0, sizeof(fl6));
1da177e4 2548
ab364a6f
TG
2549 if (tb[RTA_SRC]) {
2550 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2551 goto errout;
2552
4e3fd7a0 2553 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2554 }
2555
2556 if (tb[RTA_DST]) {
2557 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2558 goto errout;
2559
4e3fd7a0 2560 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2561 }
2562
2563 if (tb[RTA_IIF])
2564 iif = nla_get_u32(tb[RTA_IIF]);
2565
2566 if (tb[RTA_OIF])
4c9483b2 2567 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2568
2569 if (iif) {
2570 struct net_device *dev;
5578689a 2571 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2572 if (!dev) {
2573 err = -ENODEV;
ab364a6f 2574 goto errout;
1da177e4
LT
2575 }
2576 }
2577
ab364a6f 2578 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2579 if (!skb) {
ab364a6f
TG
2580 err = -ENOBUFS;
2581 goto errout;
2582 }
1da177e4 2583
ab364a6f
TG
2584 /* Reserve room for dummy headers, this skb can pass
2585 through good chunk of routing engine.
2586 */
459a98ed 2587 skb_reset_mac_header(skb);
ab364a6f 2588 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2589
4c9483b2 2590 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2591 skb_dst_set(skb, &rt->dst);
1da177e4 2592
4c9483b2 2593 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2594 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2595 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2596 if (err < 0) {
ab364a6f
TG
2597 kfree_skb(skb);
2598 goto errout;
1da177e4
LT
2599 }
2600
5578689a 2601 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2602errout:
1da177e4 2603 return err;
1da177e4
LT
2604}
2605
86872cb5 2606void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2607{
2608 struct sk_buff *skb;
5578689a 2609 struct net *net = info->nl_net;
528c4ceb
DL
2610 u32 seq;
2611 int err;
2612
2613 err = -ENOBUFS;
38308473 2614 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2615
339bf98f 2616 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2617 if (!skb)
21713ebc
TG
2618 goto errout;
2619
191cd582 2620 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2621 event, info->pid, seq, 0, 0, 0);
26932566
PM
2622 if (err < 0) {
2623 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624 WARN_ON(err == -EMSGSIZE);
2625 kfree_skb(skb);
2626 goto errout;
2627 }
1ce85fe4
PNA
2628 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629 info->nlh, gfp_any());
2630 return;
21713ebc
TG
2631errout:
2632 if (err < 0)
5578689a 2633 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2634}
2635
8ed67789
DL
2636static int ip6_route_dev_notify(struct notifier_block *this,
2637 unsigned long event, void *data)
2638{
2639 struct net_device *dev = (struct net_device *)data;
c346dca1 2640 struct net *net = dev_net(dev);
8ed67789
DL
2641
2642 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2643 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2644 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2646 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2647 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2648 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2649 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650#endif
2651 }
2652
2653 return NOTIFY_OK;
2654}
2655
1da177e4
LT
2656/*
2657 * /proc
2658 */
2659
2660#ifdef CONFIG_PROC_FS
2661
1da177e4
LT
2662struct rt6_proc_arg
2663{
2664 char *buffer;
2665 int offset;
2666 int length;
2667 int skip;
2668 int len;
2669};
2670
2671static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672{
33120b30 2673 struct seq_file *m = p_arg;
69cce1d1 2674 struct neighbour *n;
1da177e4 2675
4b7a4274 2676 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2677
2678#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2679 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2680#else
33120b30 2681 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2682#endif
f2c31e32 2683 rcu_read_lock();
27217455 2684 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2685 if (n) {
2686 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2687 } else {
33120b30 2688 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2689 }
f2c31e32 2690 rcu_read_unlock();
33120b30 2691 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2692 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693 rt->dst.__use, rt->rt6i_flags,
d1918542 2694 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2695 return 0;
2696}
2697
33120b30 2698static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2699{
f3db4851 2700 struct net *net = (struct net *)m->private;
32b293a5 2701 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2702 return 0;
2703}
1da177e4 2704
33120b30
AD
2705static int ipv6_route_open(struct inode *inode, struct file *file)
2706{
de05c557 2707 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2708}
2709
33120b30
AD
2710static const struct file_operations ipv6_route_proc_fops = {
2711 .owner = THIS_MODULE,
2712 .open = ipv6_route_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
b6fcbdb4 2715 .release = single_release_net,
33120b30
AD
2716};
2717
1da177e4
LT
2718static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719{
69ddb805 2720 struct net *net = (struct net *)seq->private;
1da177e4 2721 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2722 net->ipv6.rt6_stats->fib_nodes,
2723 net->ipv6.rt6_stats->fib_route_nodes,
2724 net->ipv6.rt6_stats->fib_rt_alloc,
2725 net->ipv6.rt6_stats->fib_rt_entries,
2726 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2727 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2728 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2729
2730 return 0;
2731}
2732
2733static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734{
de05c557 2735 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2736}
2737
9a32144e 2738static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2739 .owner = THIS_MODULE,
2740 .open = rt6_stats_seq_open,
2741 .read = seq_read,
2742 .llseek = seq_lseek,
b6fcbdb4 2743 .release = single_release_net,
1da177e4
LT
2744};
2745#endif /* CONFIG_PROC_FS */
2746
2747#ifdef CONFIG_SYSCTL
2748
1da177e4 2749static
8d65af78 2750int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2751 void __user *buffer, size_t *lenp, loff_t *ppos)
2752{
c486da34
LAG
2753 struct net *net;
2754 int delay;
2755 if (!write)
1da177e4 2756 return -EINVAL;
c486da34
LAG
2757
2758 net = (struct net *)ctl->extra1;
2759 delay = net->ipv6.sysctl.flush_delay;
2760 proc_dointvec(ctl, write, buffer, lenp, ppos);
2761 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762 return 0;
1da177e4
LT
2763}
2764
760f2d01 2765ctl_table ipv6_route_table_template[] = {
1ab1457c 2766 {
1da177e4 2767 .procname = "flush",
4990509f 2768 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2769 .maxlen = sizeof(int),
89c8b3a1 2770 .mode = 0200,
6d9f239a 2771 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2772 },
2773 {
1da177e4 2774 .procname = "gc_thresh",
9a7ec3a9 2775 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
6d9f239a 2778 .proc_handler = proc_dointvec,
1da177e4
LT
2779 },
2780 {
1da177e4 2781 .procname = "max_size",
4990509f 2782 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
6d9f239a 2785 .proc_handler = proc_dointvec,
1da177e4
LT
2786 },
2787 {
1da177e4 2788 .procname = "gc_min_interval",
4990509f 2789 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
6d9f239a 2792 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2793 },
2794 {
1da177e4 2795 .procname = "gc_timeout",
4990509f 2796 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
6d9f239a 2799 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2800 },
2801 {
1da177e4 2802 .procname = "gc_interval",
4990509f 2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
6d9f239a 2806 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2807 },
2808 {
1da177e4 2809 .procname = "gc_elasticity",
4990509f 2810 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2811 .maxlen = sizeof(int),
2812 .mode = 0644,
f3d3f616 2813 .proc_handler = proc_dointvec,
1da177e4
LT
2814 },
2815 {
1da177e4 2816 .procname = "mtu_expires",
4990509f 2817 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2818 .maxlen = sizeof(int),
2819 .mode = 0644,
6d9f239a 2820 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2821 },
2822 {
1da177e4 2823 .procname = "min_adv_mss",
4990509f 2824 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2825 .maxlen = sizeof(int),
2826 .mode = 0644,
f3d3f616 2827 .proc_handler = proc_dointvec,
1da177e4
LT
2828 },
2829 {
1da177e4 2830 .procname = "gc_min_interval_ms",
4990509f 2831 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2832 .maxlen = sizeof(int),
2833 .mode = 0644,
6d9f239a 2834 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2835 },
f8572d8f 2836 { }
1da177e4
LT
2837};
2838
2c8c1e72 2839struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2840{
2841 struct ctl_table *table;
2842
2843 table = kmemdup(ipv6_route_table_template,
2844 sizeof(ipv6_route_table_template),
2845 GFP_KERNEL);
5ee09105
YH
2846
2847 if (table) {
2848 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2849 table[0].extra1 = net;
86393e52 2850 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2851 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2858 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2859 }
2860
760f2d01
DL
2861 return table;
2862}
1da177e4
LT
2863#endif
2864
2c8c1e72 2865static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2866{
633d424b 2867 int ret = -ENOMEM;
8ed67789 2868
86393e52
AD
2869 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2871
fc66f95c
ED
2872 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873 goto out_ip6_dst_ops;
2874
8ed67789
DL
2875 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876 sizeof(*net->ipv6.ip6_null_entry),
2877 GFP_KERNEL);
2878 if (!net->ipv6.ip6_null_entry)
fc66f95c 2879 goto out_ip6_dst_entries;
d8d1f30b 2880 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2881 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2882 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2883 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884 ip6_template_metrics, true);
8ed67789
DL
2885
2886#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888 sizeof(*net->ipv6.ip6_prohibit_entry),
2889 GFP_KERNEL);
68fffc67
PZ
2890 if (!net->ipv6.ip6_prohibit_entry)
2891 goto out_ip6_null_entry;
d8d1f30b 2892 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2893 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2894 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2895 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896 ip6_template_metrics, true);
8ed67789
DL
2897
2898 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899 sizeof(*net->ipv6.ip6_blk_hole_entry),
2900 GFP_KERNEL);
68fffc67
PZ
2901 if (!net->ipv6.ip6_blk_hole_entry)
2902 goto out_ip6_prohibit_entry;
d8d1f30b 2903 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2904 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2905 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2906 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907 ip6_template_metrics, true);
8ed67789
DL
2908#endif
2909
b339a47c
PZ
2910 net->ipv6.sysctl.flush_delay = 0;
2911 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918
cdb18761
DL
2919#ifdef CONFIG_PROC_FS
2920 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2921 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2922#endif
6891a346
BT
2923 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2924
8ed67789
DL
2925 ret = 0;
2926out:
2927 return ret;
f2fc6a54 2928
68fffc67
PZ
2929#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2930out_ip6_prohibit_entry:
2931 kfree(net->ipv6.ip6_prohibit_entry);
2932out_ip6_null_entry:
2933 kfree(net->ipv6.ip6_null_entry);
2934#endif
fc66f95c
ED
2935out_ip6_dst_entries:
2936 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2937out_ip6_dst_ops:
f2fc6a54 2938 goto out;
cdb18761
DL
2939}
2940
2c8c1e72 2941static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2942{
2943#ifdef CONFIG_PROC_FS
2944 proc_net_remove(net, "ipv6_route");
2945 proc_net_remove(net, "rt6_stats");
2946#endif
8ed67789
DL
2947 kfree(net->ipv6.ip6_null_entry);
2948#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2949 kfree(net->ipv6.ip6_prohibit_entry);
2950 kfree(net->ipv6.ip6_blk_hole_entry);
2951#endif
41bb78b4 2952 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2953}
2954
2955static struct pernet_operations ip6_route_net_ops = {
2956 .init = ip6_route_net_init,
2957 .exit = ip6_route_net_exit,
2958};
2959
8ed67789
DL
2960static struct notifier_block ip6_route_dev_notifier = {
2961 .notifier_call = ip6_route_dev_notify,
2962 .priority = 0,
2963};
2964
433d49c3 2965int __init ip6_route_init(void)
1da177e4 2966{
433d49c3
DL
2967 int ret;
2968
9a7ec3a9
DL
2969 ret = -ENOMEM;
2970 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2971 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2972 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2973 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2974 goto out;
14e50e57 2975
fc66f95c 2976 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2977 if (ret)
bdb3289f 2978 goto out_kmem_cache;
bdb3289f 2979
fc66f95c
ED
2980 ret = register_pernet_subsys(&ip6_route_net_ops);
2981 if (ret)
2982 goto out_dst_entries;
2983
5dc121e9
AE
2984 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2985
8ed67789
DL
2986 /* Registering of the loopback is done before this portion of code,
2987 * the loopback reference in rt6_info will not be taken, do it
2988 * manually for init_net */
d8d1f30b 2989 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2990 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2991 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2992 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2993 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2994 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2995 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2996 #endif
433d49c3
DL
2997 ret = fib6_init();
2998 if (ret)
8ed67789 2999 goto out_register_subsys;
433d49c3 3000
433d49c3
DL
3001 ret = xfrm6_init();
3002 if (ret)
cdb18761 3003 goto out_fib6_init;
c35b7e72 3004
433d49c3
DL
3005 ret = fib6_rules_init();
3006 if (ret)
3007 goto xfrm6_init;
7e5449c2 3008
433d49c3 3009 ret = -ENOBUFS;
c7ac8679
GR
3010 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3011 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3012 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3013 goto fib6_rules_init;
c127ea2c 3014
8ed67789 3015 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3016 if (ret)
3017 goto fib6_rules_init;
8ed67789 3018
433d49c3
DL
3019out:
3020 return ret;
3021
3022fib6_rules_init:
433d49c3
DL
3023 fib6_rules_cleanup();
3024xfrm6_init:
433d49c3 3025 xfrm6_fini();
433d49c3 3026out_fib6_init:
433d49c3 3027 fib6_gc_cleanup();
8ed67789
DL
3028out_register_subsys:
3029 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3030out_dst_entries:
3031 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3032out_kmem_cache:
f2fc6a54 3033 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3034 goto out;
1da177e4
LT
3035}
3036
3037void ip6_route_cleanup(void)
3038{
8ed67789 3039 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3040 fib6_rules_cleanup();
1da177e4 3041 xfrm6_fini();
1da177e4 3042 fib6_gc_cleanup();
8ed67789 3043 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3044 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3045 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3046}