ipv4: fix checkpatch errors
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
a7563f34 128 if (!ipv6_addr_any(p))
39232973
DM
129 return (const void *) p;
130 return daddr;
131}
132
d3aaeb38
DM
133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
8ade06c6 145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 146{
8ade06c6
DM
147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
f83c7790
DM
153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
d3aaeb38
DM
156}
157
9a7ec3a9 158static struct dst_ops ip6_dst_ops_template = {
1da177e4 159 .family = AF_INET6,
09640e63 160 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
0dbaee3b 164 .default_advmss = ip6_default_advmss,
ebb762f2 165 .mtu = ip6_mtu,
06582540 166 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 172 .local_out = __ip6_local_out,
d3aaeb38 173 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
174};
175
ebb762f2 176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 177{
618f9bc7
SK
178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
ec831ea7
RD
181}
182
14e50e57
DM
183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
0972ddb2
HB
187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
14e50e57
DM
193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
09640e63 195 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
ebb762f2 198 .mtu = ip6_blackhole_mtu,
214f45c9 199 .default_advmss = ip6_default_advmss,
14e50e57 200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 202 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
203};
204
62fa8a84
DM
205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
bdb3289f 209static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
d8d1f30b
CG
215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
1da177e4
LT
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
101367c2
TG
224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
6723ab54
DM
226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 228
280a34c8 229static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
d8d1f30b
CG
235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
101367c2
TG
237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 239 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
bdb3289f 244static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
d8d1f30b
CG
250 .input = dst_discard,
251 .output = dst_discard,
101367c2
TG
252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 254 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
1da177e4 261/* allocate dst with ip6_dst_ops */
5c1e6aa3 262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
263 struct net_device *dev,
264 int flags)
1da177e4 265{
957c665f 266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 267
38308473 268 if (rt)
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
271
272 return rt;
1da177e4
LT
273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 279 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
b3419363 288 if (peer) {
b3419363
DM
289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292}
293
6431cbc2
DM
294static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296static u32 rt6_peer_genid(void)
297{
298 return atomic_read(&__rt6_peer_genid);
299}
300
b3419363
DM
301void rt6_bind_peer(struct rt6_info *rt, int create)
302{
303 struct inet_peer *peer;
304
b3419363
DM
305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
6431cbc2
DM
308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
310}
311
312static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314{
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 317 struct net_device *loopback_dev =
c346dca1 318 dev_net(dev)->loopback_dev;
1da177e4 319
38308473 320 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
38308473 323 if (loopback_idev) {
1da177e4
LT
324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328}
329
330static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331{
a02cec21 332 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 333 time_after(jiffies, rt->dst.expires);
1da177e4
LT
334}
335
b71d1d42 336static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 337{
a02cec21
ED
338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
340}
341
1da177e4 342/*
c71099ac 343 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
344 */
345
8ed67789
DL
346static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
b71d1d42 348 const struct in6_addr *saddr,
1da177e4 349 int oif,
d420895e 350 int flags)
1da177e4
LT
351{
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
dd3abc4e
YH
355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
d8d1f30b 358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 359 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
360
361 if (oif) {
1da177e4
LT
362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
38308473 365 if (!sprt->rt6i_idev ||
1da177e4 366 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 367 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 368 continue;
1ab1457c 369 if (local && (!oif ||
1da177e4
LT
370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
dd3abc4e
YH
375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
1da177e4 379 }
dd3abc4e 380 }
1da177e4 381
dd3abc4e 382 if (oif) {
1da177e4
LT
383 if (local)
384 return local;
385
d420895e 386 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 387 return net->ipv6.ip6_null_entry;
1da177e4 388 }
dd3abc4e 389out:
1da177e4
LT
390 return rt;
391}
392
27097255
YH
393#ifdef CONFIG_IPV6_ROUTER_PREF
394static void rt6_probe(struct rt6_info *rt)
395{
f2c31e32 396 struct neighbour *neigh;
27097255
YH
397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
f2c31e32 405 rcu_read_lock();
27217455 406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 407 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 408 goto out;
27097255
YH
409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 421 } else {
27097255 422 read_unlock_bh(&neigh->lock);
f2c31e32
ED
423 }
424out:
425 rcu_read_unlock();
27097255
YH
426}
427#else
428static inline void rt6_probe(struct rt6_info *rt)
429{
27097255
YH
430}
431#endif
432
1da177e4 433/*
554cfb7e 434 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 435 */
b6f99a21 436static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 437{
d1918542 438 struct net_device *dev = rt->dst.dev;
161980f4 439 if (!oif || dev->ifindex == oif)
554cfb7e 440 return 2;
161980f4
DM
441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
554cfb7e 445}
1da177e4 446
b6f99a21 447static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 448{
f2c31e32 449 struct neighbour *neigh;
398bcbeb 450 int m;
f2c31e32
ED
451
452 rcu_read_lock();
27217455 453 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
554cfb7e
YH
458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
4d0c5911 460 m = 2;
398bcbeb
YH
461#ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464#endif
465 else
ea73ee23 466 m = 1;
554cfb7e 467 read_unlock_bh(&neigh->lock);
398bcbeb
YH
468 } else
469 m = 0;
f2c31e32 470 rcu_read_unlock();
554cfb7e 471 return m;
1da177e4
LT
472}
473
554cfb7e
YH
474static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
1da177e4 476{
4d0c5911 477 int m, n;
1ab1457c 478
4d0c5911 479 m = rt6_check_dev(rt, oif);
77d16f45 480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 481 return -1;
ebacaaa0
YH
482#ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484#endif
4d0c5911 485 n = rt6_check_neigh(rt);
557e92ef 486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
487 return -1;
488 return m;
489}
490
f11e6659
DM
491static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
554cfb7e 493{
f11e6659
DM
494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512out:
513 return match;
514}
515
516static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519{
520 struct rt6_info *rt, *match;
554cfb7e 521 int mpri = -1;
1da177e4 522
f11e6659
DM
523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 525 rt = rt->dst.rt6_next)
f11e6659
DM
526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 528 rt = rt->dst.rt6_next)
f11e6659 529 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 530
f11e6659
DM
531 return match;
532}
1da177e4 533
f11e6659
DM
534static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535{
536 struct rt6_info *match, *rt0;
8ed67789 537 struct net *net;
1da177e4 538
f11e6659
DM
539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 542
f11e6659 543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 544
554cfb7e 545 if (!match &&
f11e6659 546 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 547 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 548
554cfb7e 549 /* no entries matched; do round-robin */
f11e6659
DM
550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
1da177e4 555 }
1da177e4 556
d1918542 557 net = dev_net(rt0->dst.dev);
a02cec21 558 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
559}
560
70ceb4f5
YH
561#ifdef CONFIG_IPV6_ROUTE_INFO
562int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 563 const struct in6_addr *gwaddr)
70ceb4f5 564{
c346dca1 565 struct net *net = dev_net(dev);
70ceb4f5
YH
566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
4bed72e4 569 unsigned long lifetime;
70ceb4f5
YH
570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 593 return -EINVAL;
70ceb4f5 594
4bed72e4 595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
efa2cea0
DL
607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
70ceb4f5
YH
609
610 if (rt && !lifetime) {
e0a1ad73 611 ip6_del_rt(rt);
70ceb4f5
YH
612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
efa2cea0 616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
4bed72e4 623 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
d1918542 626 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
d8d1f30b 629 dst_release(&rt->dst);
70ceb4f5
YH
630 }
631 return 0;
632}
633#endif
634
8ed67789 635#define BACKTRACK(__net, saddr) \
982f56f3 636do { \
8ed67789 637 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 638 struct fib6_node *pn; \
e0eda7bb 639 while (1) { \
982f56f3
YH
640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
c71099ac 649 } \
c71099ac 650 } \
38308473 651} while (0)
c71099ac 652
8ed67789
DL
653static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
4c9483b2 655 struct flowi6 *fl6, int flags)
1da177e4
LT
656{
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
c71099ac 660 read_lock_bh(&table->tb6_lock);
4c9483b2 661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
662restart:
663 rt = fn->leaf;
4c9483b2
DM
664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
c71099ac 666out:
d8d1f30b 667 dst_use(&rt->dst, jiffies);
c71099ac 668 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
669 return rt;
670
671}
672
ea6e574e
FW
673struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675{
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677}
678EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
9acd9f3a
YH
680struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
c71099ac 682{
4c9483b2
DM
683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
c71099ac
TG
686 };
687 struct dst_entry *dst;
77d16f45 688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 689
adaa70bb 690 if (saddr) {
4c9483b2 691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
4c9483b2 695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
1da177e4
LT
701 return NULL;
702}
703
7159039a
YH
704EXPORT_SYMBOL(rt6_lookup);
705
c71099ac 706/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
86872cb5 712static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
713{
714 int err;
c71099ac 715 struct fib6_table *table;
1da177e4 716
c71099ac
TG
717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
86872cb5 719 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 720 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
721
722 return err;
723}
724
40e22e8f
TG
725int ip6_ins_rt(struct rt6_info *rt)
726{
4d1169c1 727 struct nl_info info = {
d1918542 728 .nl_net = dev_net(rt->dst.dev),
4d1169c1 729 };
528c4ceb 730 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
731}
732
21efcfa0
ED
733static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
b71d1d42 735 const struct in6_addr *saddr)
1da177e4 736{
1da177e4
LT
737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
21efcfa0 743 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
744
745 if (rt) {
14deae41
DM
746 int attempts = !in_softirq();
747
38308473 748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 749 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 751 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 752 rt->rt6i_gateway = *daddr;
58c4fb86 753 }
1da177e4 754
1da177e4 755 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
756
757#ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 759 rt->rt6i_src.addr = *saddr;
1da177e4
LT
760 rt->rt6i_src.plen = 128;
761 }
762#endif
763
14deae41 764 retry:
8ade06c6 765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 766 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
86393e52 776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
7e1b33e5 787 "ipv6: Neighbour table overflow.\n");
d8d1f30b 788 dst_free(&rt->dst);
14deae41
DM
789 return NULL;
790 }
95a9a5ba 791 }
1da177e4 792
95a9a5ba
YH
793 return rt;
794}
1da177e4 795
21efcfa0
ED
796static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
299d9939 798{
21efcfa0
ED
799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
299d9939 801 if (rt) {
299d9939 802 rt->rt6i_flags |= RTF_CACHE;
27217455 803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
804 }
805 return rt;
806}
807
8ed67789 808static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 809 struct flowi6 *fl6, int flags)
1da177e4
LT
810{
811 struct fib6_node *fn;
519fbd87 812 struct rt6_info *rt, *nrt;
c71099ac 813 int strict = 0;
1da177e4 814 int attempts = 3;
519fbd87 815 int err;
53b7997f 816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 817
77d16f45 818 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
819
820relookup:
c71099ac 821 read_lock_bh(&table->tb6_lock);
1da177e4 822
8238dd06 823restart_2:
4c9483b2 824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
825
826restart:
4acad72d 827 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 828
4c9483b2 829 BACKTRACK(net, &fl6->saddr);
8ed67789 830 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 831 rt->rt6i_flags & RTF_CACHE)
1ddef044 832 goto out;
1da177e4 833
d8d1f30b 834 dst_hold(&rt->dst);
c71099ac 835 read_unlock_bh(&table->tb6_lock);
fb9de91e 836
27217455 837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 839 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
841 else
842 goto out2;
e40cf353 843
d8d1f30b 844 dst_release(&rt->dst);
8ed67789 845 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 846
d8d1f30b 847 dst_hold(&rt->dst);
519fbd87 848 if (nrt) {
40e22e8f 849 err = ip6_ins_rt(nrt);
519fbd87 850 if (!err)
1da177e4 851 goto out2;
1da177e4 852 }
1da177e4 853
519fbd87
YH
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
c71099ac 858 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
859 * released someone could insert this route. Relookup.
860 */
d8d1f30b 861 dst_release(&rt->dst);
519fbd87
YH
862 goto relookup;
863
864out:
8238dd06
YH
865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
d8d1f30b 869 dst_hold(&rt->dst);
c71099ac 870 read_unlock_bh(&table->tb6_lock);
1da177e4 871out2:
d8d1f30b
CG
872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
c71099ac
TG
874
875 return rt;
1da177e4
LT
876}
877
8ed67789 878static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 879 struct flowi6 *fl6, int flags)
4acad72d 880{
4c9483b2 881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
882}
883
72331bc0
SL
884static struct dst_entry *ip6_route_input_lookup(struct net *net,
885 struct net_device *dev,
886 struct flowi6 *fl6, int flags)
887{
888 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
889 flags |= RT6_LOOKUP_F_IFACE;
890
891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
892}
893
c71099ac
TG
894void ip6_route_input(struct sk_buff *skb)
895{
b71d1d42 896 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 897 struct net *net = dev_net(skb->dev);
adaa70bb 898 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
899 struct flowi6 fl6 = {
900 .flowi6_iif = skb->dev->ifindex,
901 .daddr = iph->daddr,
902 .saddr = iph->saddr,
38308473 903 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
904 .flowi6_mark = skb->mark,
905 .flowi6_proto = iph->nexthdr,
c71099ac 906 };
adaa70bb 907
72331bc0 908 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
909}
910
8ed67789 911static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 912 struct flowi6 *fl6, int flags)
1da177e4 913{
4c9483b2 914 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
915}
916
9c7a4f9c 917struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 918 struct flowi6 *fl6)
c71099ac
TG
919{
920 int flags = 0;
921
4c9483b2 922 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 923 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 924
4c9483b2 925 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 926 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
927 else if (sk)
928 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 929
4c9483b2 930 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
931}
932
7159039a 933EXPORT_SYMBOL(ip6_route_output);
1da177e4 934
2774c131 935struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 936{
5c1e6aa3 937 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
938 struct dst_entry *new = NULL;
939
5c1e6aa3 940 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 941 if (rt) {
cf911662
DM
942 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
943
d8d1f30b 944 new = &rt->dst;
14e50e57 945
14e50e57 946 new->__use = 1;
352e512c
HX
947 new->input = dst_discard;
948 new->output = dst_discard;
14e50e57 949
21efcfa0
ED
950 if (dst_metrics_read_only(&ort->dst))
951 new->_metrics = ort->dst._metrics;
952 else
953 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
954 rt->rt6i_idev = ort->rt6i_idev;
955 if (rt->rt6i_idev)
956 in6_dev_hold(rt->rt6i_idev);
d1918542 957 rt->dst.expires = 0;
14e50e57 958
4e3fd7a0 959 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
960 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
961 rt->rt6i_metric = 0;
962
963 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
964#ifdef CONFIG_IPV6_SUBTREES
965 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
966#endif
967
968 dst_free(new);
969 }
970
69ead7af
DM
971 dst_release(dst_orig);
972 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 973}
14e50e57 974
1da177e4
LT
975/*
976 * Destination cache support functions
977 */
978
979static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
980{
981 struct rt6_info *rt;
982
983 rt = (struct rt6_info *) dst;
984
6431cbc2
DM
985 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
986 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
987 if (!rt->rt6i_peer)
988 rt6_bind_peer(rt, 0);
989 rt->rt6i_peer_genid = rt6_peer_genid();
990 }
1da177e4 991 return dst;
6431cbc2 992 }
1da177e4
LT
993 return NULL;
994}
995
996static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
997{
998 struct rt6_info *rt = (struct rt6_info *) dst;
999
1000 if (rt) {
54c1a859
YH
1001 if (rt->rt6i_flags & RTF_CACHE) {
1002 if (rt6_check_expired(rt)) {
1003 ip6_del_rt(rt);
1004 dst = NULL;
1005 }
1006 } else {
1da177e4 1007 dst_release(dst);
54c1a859
YH
1008 dst = NULL;
1009 }
1da177e4 1010 }
54c1a859 1011 return dst;
1da177e4
LT
1012}
1013
1014static void ip6_link_failure(struct sk_buff *skb)
1015{
1016 struct rt6_info *rt;
1017
3ffe533c 1018 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1019
adf30907 1020 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1021 if (rt) {
38308473 1022 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1023 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1024 rt->rt6i_flags |= RTF_EXPIRES;
1025 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1026 rt->rt6i_node->fn_sernum = -1;
1027 }
1028}
1029
1030static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1031{
1032 struct rt6_info *rt6 = (struct rt6_info*)dst;
1033
1034 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1035 rt6->rt6i_flags |= RTF_MODIFIED;
1036 if (mtu < IPV6_MIN_MTU) {
defb3519 1037 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1038 mtu = IPV6_MIN_MTU;
defb3519
DM
1039 features |= RTAX_FEATURE_ALLFRAG;
1040 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1041 }
defb3519 1042 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1043 }
1044}
1045
0dbaee3b 1046static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1047{
0dbaee3b
DM
1048 struct net_device *dev = dst->dev;
1049 unsigned int mtu = dst_mtu(dst);
1050 struct net *net = dev_net(dev);
1051
1da177e4
LT
1052 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1053
5578689a
DL
1054 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1055 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1056
1057 /*
1ab1457c
YH
1058 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1059 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1060 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1061 * rely only on pmtu discovery"
1062 */
1063 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1064 mtu = IPV6_MAXPLEN;
1065 return mtu;
1066}
1067
ebb762f2 1068static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1069{
d33e4553 1070 struct inet6_dev *idev;
618f9bc7
SK
1071 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1072
1073 if (mtu)
1074 return mtu;
1075
1076 mtu = IPV6_MIN_MTU;
d33e4553
DM
1077
1078 rcu_read_lock();
1079 idev = __in6_dev_get(dst->dev);
1080 if (idev)
1081 mtu = idev->cnf.mtu6;
1082 rcu_read_unlock();
1083
1084 return mtu;
1085}
1086
3b00944c
YH
1087static struct dst_entry *icmp6_dst_gc_list;
1088static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1089
3b00944c 1090struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1091 struct neighbour *neigh,
87a11578 1092 struct flowi6 *fl6)
1da177e4 1093{
87a11578 1094 struct dst_entry *dst;
1da177e4
LT
1095 struct rt6_info *rt;
1096 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1097 struct net *net = dev_net(dev);
1da177e4 1098
38308473 1099 if (unlikely(!idev))
122bdf67 1100 return ERR_PTR(-ENODEV);
1da177e4 1101
957c665f 1102 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1103 if (unlikely(!rt)) {
1da177e4 1104 in6_dev_put(idev);
87a11578 1105 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1106 goto out;
1107 }
1108
1da177e4
LT
1109 if (neigh)
1110 neigh_hold(neigh);
14deae41 1111 else {
f83c7790 1112 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1113 if (IS_ERR(neigh)) {
252c3d84 1114 in6_dev_put(idev);
b43faac6
DM
1115 dst_free(&rt->dst);
1116 return ERR_CAST(neigh);
1117 }
14deae41 1118 }
1da177e4 1119
8e2ec639
YZ
1120 rt->dst.flags |= DST_HOST;
1121 rt->dst.output = ip6_output;
69cce1d1 1122 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1123 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1124 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1125 rt->rt6i_dst.plen = 128;
1126 rt->rt6i_idev = idev;
7011687f 1127 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1128
3b00944c 1129 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1130 rt->dst.next = icmp6_dst_gc_list;
1131 icmp6_dst_gc_list = &rt->dst;
3b00944c 1132 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1133
5578689a 1134 fib6_force_start_gc(net);
1da177e4 1135
87a11578
DM
1136 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1137
1da177e4 1138out:
87a11578 1139 return dst;
1da177e4
LT
1140}
1141
3d0f24a7 1142int icmp6_dst_gc(void)
1da177e4 1143{
e9476e95 1144 struct dst_entry *dst, **pprev;
3d0f24a7 1145 int more = 0;
1da177e4 1146
3b00944c
YH
1147 spin_lock_bh(&icmp6_dst_lock);
1148 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1149
1da177e4
LT
1150 while ((dst = *pprev) != NULL) {
1151 if (!atomic_read(&dst->__refcnt)) {
1152 *pprev = dst->next;
1153 dst_free(dst);
1da177e4
LT
1154 } else {
1155 pprev = &dst->next;
3d0f24a7 1156 ++more;
1da177e4
LT
1157 }
1158 }
1159
3b00944c 1160 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1161
3d0f24a7 1162 return more;
1da177e4
LT
1163}
1164
1e493d19
DM
1165static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1166 void *arg)
1167{
1168 struct dst_entry *dst, **pprev;
1169
1170 spin_lock_bh(&icmp6_dst_lock);
1171 pprev = &icmp6_dst_gc_list;
1172 while ((dst = *pprev) != NULL) {
1173 struct rt6_info *rt = (struct rt6_info *) dst;
1174 if (func(rt, arg)) {
1175 *pprev = dst->next;
1176 dst_free(dst);
1177 } else {
1178 pprev = &dst->next;
1179 }
1180 }
1181 spin_unlock_bh(&icmp6_dst_lock);
1182}
1183
569d3645 1184static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1185{
1da177e4 1186 unsigned long now = jiffies;
86393e52 1187 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1188 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1189 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1190 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1191 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1192 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1193 int entries;
7019b78e 1194
fc66f95c 1195 entries = dst_entries_get_fast(ops);
7019b78e 1196 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1197 entries <= rt_max_size)
1da177e4
LT
1198 goto out;
1199
6891a346
BT
1200 net->ipv6.ip6_rt_gc_expire++;
1201 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1202 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1203 entries = dst_entries_get_slow(ops);
1204 if (entries < ops->gc_thresh)
7019b78e 1205 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1206out:
7019b78e 1207 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1208 return entries > rt_max_size;
1da177e4
LT
1209}
1210
1211/* Clean host part of a prefix. Not necessary in radix tree,
1212 but results in cleaner routing tables.
1213
1214 Remove it only when all the things will work!
1215 */
1216
6b75d090 1217int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1218{
5170ae82 1219 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1220 if (hoplimit == 0) {
6b75d090 1221 struct net_device *dev = dst->dev;
c68f24cc
ED
1222 struct inet6_dev *idev;
1223
1224 rcu_read_lock();
1225 idev = __in6_dev_get(dev);
1226 if (idev)
6b75d090 1227 hoplimit = idev->cnf.hop_limit;
c68f24cc 1228 else
53b7997f 1229 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1230 rcu_read_unlock();
1da177e4
LT
1231 }
1232 return hoplimit;
1233}
abbf46ae 1234EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1235
1236/*
1237 *
1238 */
1239
86872cb5 1240int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1241{
1242 int err;
5578689a 1243 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1244 struct rt6_info *rt = NULL;
1245 struct net_device *dev = NULL;
1246 struct inet6_dev *idev = NULL;
c71099ac 1247 struct fib6_table *table;
1da177e4
LT
1248 int addr_type;
1249
86872cb5 1250 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1251 return -EINVAL;
1252#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1253 if (cfg->fc_src_len)
1da177e4
LT
1254 return -EINVAL;
1255#endif
86872cb5 1256 if (cfg->fc_ifindex) {
1da177e4 1257 err = -ENODEV;
5578689a 1258 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1259 if (!dev)
1260 goto out;
1261 idev = in6_dev_get(dev);
1262 if (!idev)
1263 goto out;
1264 }
1265
86872cb5
TG
1266 if (cfg->fc_metric == 0)
1267 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1268
d71314b4 1269 err = -ENOBUFS;
38308473
DM
1270 if (cfg->fc_nlinfo.nlh &&
1271 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1272 table = fib6_get_table(net, cfg->fc_table);
38308473 1273 if (!table) {
d71314b4
MV
1274 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1275 table = fib6_new_table(net, cfg->fc_table);
1276 }
1277 } else {
1278 table = fib6_new_table(net, cfg->fc_table);
1279 }
38308473
DM
1280
1281 if (!table)
c71099ac 1282 goto out;
c71099ac 1283
957c665f 1284 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1285
38308473 1286 if (!rt) {
1da177e4
LT
1287 err = -ENOMEM;
1288 goto out;
1289 }
1290
d8d1f30b 1291 rt->dst.obsolete = -1;
d1918542 1292 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1293 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1294 0;
1da177e4 1295
86872cb5
TG
1296 if (cfg->fc_protocol == RTPROT_UNSPEC)
1297 cfg->fc_protocol = RTPROT_BOOT;
1298 rt->rt6i_protocol = cfg->fc_protocol;
1299
1300 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1301
1302 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1303 rt->dst.input = ip6_mc_input;
ab79ad14
1304 else if (cfg->fc_flags & RTF_LOCAL)
1305 rt->dst.input = ip6_input;
1da177e4 1306 else
d8d1f30b 1307 rt->dst.input = ip6_forward;
1da177e4 1308
d8d1f30b 1309 rt->dst.output = ip6_output;
1da177e4 1310
86872cb5
TG
1311 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1312 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1313 if (rt->rt6i_dst.plen == 128)
11d53b49 1314 rt->dst.flags |= DST_HOST;
1da177e4 1315
8e2ec639
YZ
1316 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1317 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1318 if (!metrics) {
1319 err = -ENOMEM;
1320 goto out;
1321 }
1322 dst_init_metrics(&rt->dst, metrics, 0);
1323 }
1da177e4 1324#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1325 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1326 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1327#endif
1328
86872cb5 1329 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1330
1331 /* We cannot add true routes via loopback here,
1332 they would result in kernel looping; promote them to reject routes
1333 */
86872cb5 1334 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1335 (dev && (dev->flags & IFF_LOOPBACK) &&
1336 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1337 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1338 /* hold loopback dev/idev if we haven't done so. */
5578689a 1339 if (dev != net->loopback_dev) {
1da177e4
LT
1340 if (dev) {
1341 dev_put(dev);
1342 in6_dev_put(idev);
1343 }
5578689a 1344 dev = net->loopback_dev;
1da177e4
LT
1345 dev_hold(dev);
1346 idev = in6_dev_get(dev);
1347 if (!idev) {
1348 err = -ENODEV;
1349 goto out;
1350 }
1351 }
d8d1f30b
CG
1352 rt->dst.output = ip6_pkt_discard_out;
1353 rt->dst.input = ip6_pkt_discard;
1354 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1355 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1356 goto install_route;
1357 }
1358
86872cb5 1359 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1360 const struct in6_addr *gw_addr;
1da177e4
LT
1361 int gwa_type;
1362
86872cb5 1363 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1364 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1365 gwa_type = ipv6_addr_type(gw_addr);
1366
1367 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1368 struct rt6_info *grt;
1369
1370 /* IPv6 strictly inhibits using not link-local
1371 addresses as nexthop address.
1372 Otherwise, router will not able to send redirects.
1373 It is very good, but in some (rare!) circumstances
1374 (SIT, PtP, NBMA NOARP links) it is handy to allow
1375 some exceptions. --ANK
1376 */
1377 err = -EINVAL;
38308473 1378 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1379 goto out;
1380
5578689a 1381 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1382
1383 err = -EHOSTUNREACH;
38308473 1384 if (!grt)
1da177e4
LT
1385 goto out;
1386 if (dev) {
d1918542 1387 if (dev != grt->dst.dev) {
d8d1f30b 1388 dst_release(&grt->dst);
1da177e4
LT
1389 goto out;
1390 }
1391 } else {
d1918542 1392 dev = grt->dst.dev;
1da177e4
LT
1393 idev = grt->rt6i_idev;
1394 dev_hold(dev);
1395 in6_dev_hold(grt->rt6i_idev);
1396 }
38308473 1397 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1398 err = 0;
d8d1f30b 1399 dst_release(&grt->dst);
1da177e4
LT
1400
1401 if (err)
1402 goto out;
1403 }
1404 err = -EINVAL;
38308473 1405 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1406 goto out;
1407 }
1408
1409 err = -ENODEV;
38308473 1410 if (!dev)
1da177e4
LT
1411 goto out;
1412
c3968a85
DW
1413 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1414 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1415 err = -EINVAL;
1416 goto out;
1417 }
4e3fd7a0 1418 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1419 rt->rt6i_prefsrc.plen = 128;
1420 } else
1421 rt->rt6i_prefsrc.plen = 0;
1422
86872cb5 1423 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1424 err = rt6_bind_neighbour(rt, dev);
f83c7790 1425 if (err)
1da177e4 1426 goto out;
1da177e4
LT
1427 }
1428
86872cb5 1429 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1430
1431install_route:
86872cb5
TG
1432 if (cfg->fc_mx) {
1433 struct nlattr *nla;
1434 int remaining;
1435
1436 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1437 int type = nla_type(nla);
86872cb5
TG
1438
1439 if (type) {
1440 if (type > RTAX_MAX) {
1da177e4
LT
1441 err = -EINVAL;
1442 goto out;
1443 }
86872cb5 1444
defb3519 1445 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1446 }
1da177e4
LT
1447 }
1448 }
1449
d8d1f30b 1450 rt->dst.dev = dev;
1da177e4 1451 rt->rt6i_idev = idev;
c71099ac 1452 rt->rt6i_table = table;
63152fc0 1453
c346dca1 1454 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1455
86872cb5 1456 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1457
1458out:
1459 if (dev)
1460 dev_put(dev);
1461 if (idev)
1462 in6_dev_put(idev);
1463 if (rt)
d8d1f30b 1464 dst_free(&rt->dst);
1da177e4
LT
1465 return err;
1466}
1467
86872cb5 1468static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1469{
1470 int err;
c71099ac 1471 struct fib6_table *table;
d1918542 1472 struct net *net = dev_net(rt->dst.dev);
1da177e4 1473
8ed67789 1474 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1475 return -ENOENT;
1476
c71099ac
TG
1477 table = rt->rt6i_table;
1478 write_lock_bh(&table->tb6_lock);
1da177e4 1479
86872cb5 1480 err = fib6_del(rt, info);
d8d1f30b 1481 dst_release(&rt->dst);
1da177e4 1482
c71099ac 1483 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1484
1485 return err;
1486}
1487
e0a1ad73
TG
1488int ip6_del_rt(struct rt6_info *rt)
1489{
4d1169c1 1490 struct nl_info info = {
d1918542 1491 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1492 };
528c4ceb 1493 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1494}
1495
86872cb5 1496static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1497{
c71099ac 1498 struct fib6_table *table;
1da177e4
LT
1499 struct fib6_node *fn;
1500 struct rt6_info *rt;
1501 int err = -ESRCH;
1502
5578689a 1503 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1504 if (!table)
c71099ac
TG
1505 return err;
1506
1507 read_lock_bh(&table->tb6_lock);
1da177e4 1508
c71099ac 1509 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1510 &cfg->fc_dst, cfg->fc_dst_len,
1511 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1512
1da177e4 1513 if (fn) {
d8d1f30b 1514 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1515 if (cfg->fc_ifindex &&
d1918542
DM
1516 (!rt->dst.dev ||
1517 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1518 continue;
86872cb5
TG
1519 if (cfg->fc_flags & RTF_GATEWAY &&
1520 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1521 continue;
86872cb5 1522 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1523 continue;
d8d1f30b 1524 dst_hold(&rt->dst);
c71099ac 1525 read_unlock_bh(&table->tb6_lock);
1da177e4 1526
86872cb5 1527 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1528 }
1529 }
c71099ac 1530 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1531
1532 return err;
1533}
1534
1535/*
1536 * Handle redirects
1537 */
a6279458 1538struct ip6rd_flowi {
4c9483b2 1539 struct flowi6 fl6;
a6279458
YH
1540 struct in6_addr gateway;
1541};
1542
8ed67789
DL
1543static struct rt6_info *__ip6_route_redirect(struct net *net,
1544 struct fib6_table *table,
4c9483b2 1545 struct flowi6 *fl6,
a6279458 1546 int flags)
1da177e4 1547{
4c9483b2 1548 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1549 struct rt6_info *rt;
e843b9e1 1550 struct fib6_node *fn;
c71099ac 1551
1da177e4 1552 /*
e843b9e1
YH
1553 * Get the "current" route for this destination and
1554 * check if the redirect has come from approriate router.
1555 *
1556 * RFC 2461 specifies that redirects should only be
1557 * accepted if they come from the nexthop to the target.
1558 * Due to the way the routes are chosen, this notion
1559 * is a bit fuzzy and one might need to check all possible
1560 * routes.
1da177e4 1561 */
1da177e4 1562
c71099ac 1563 read_lock_bh(&table->tb6_lock);
4c9483b2 1564 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1565restart:
d8d1f30b 1566 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1567 /*
1568 * Current route is on-link; redirect is always invalid.
1569 *
1570 * Seems, previous statement is not true. It could
1571 * be node, which looks for us as on-link (f.e. proxy ndisc)
1572 * But then router serving it might decide, that we should
1573 * know truth 8)8) --ANK (980726).
1574 */
1575 if (rt6_check_expired(rt))
1576 continue;
1577 if (!(rt->rt6i_flags & RTF_GATEWAY))
1578 continue;
d1918542 1579 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1580 continue;
a6279458 1581 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1582 continue;
1583 break;
1584 }
a6279458 1585
cb15d9c2 1586 if (!rt)
8ed67789 1587 rt = net->ipv6.ip6_null_entry;
4c9483b2 1588 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1589out:
d8d1f30b 1590 dst_hold(&rt->dst);
a6279458 1591
c71099ac 1592 read_unlock_bh(&table->tb6_lock);
e843b9e1 1593
a6279458
YH
1594 return rt;
1595};
1596
b71d1d42
ED
1597static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1598 const struct in6_addr *src,
1599 const struct in6_addr *gateway,
a6279458
YH
1600 struct net_device *dev)
1601{
adaa70bb 1602 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1603 struct net *net = dev_net(dev);
a6279458 1604 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1605 .fl6 = {
1606 .flowi6_oif = dev->ifindex,
1607 .daddr = *dest,
1608 .saddr = *src,
a6279458 1609 },
a6279458 1610 };
adaa70bb 1611
4e3fd7a0 1612 rdfl.gateway = *gateway;
86c36ce4 1613
adaa70bb
TG
1614 if (rt6_need_strict(dest))
1615 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1616
4c9483b2 1617 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1618 flags, __ip6_route_redirect);
a6279458
YH
1619}
1620
b71d1d42
ED
1621void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1622 const struct in6_addr *saddr,
a6279458
YH
1623 struct neighbour *neigh, u8 *lladdr, int on_link)
1624{
1625 struct rt6_info *rt, *nrt = NULL;
1626 struct netevent_redirect netevent;
c346dca1 1627 struct net *net = dev_net(neigh->dev);
a6279458
YH
1628
1629 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1630
8ed67789 1631 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1632 if (net_ratelimit())
1633 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1634 "for redirect target\n");
a6279458 1635 goto out;
1da177e4
LT
1636 }
1637
1da177e4
LT
1638 /*
1639 * We have finally decided to accept it.
1640 */
1641
1ab1457c 1642 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1643 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1644 NEIGH_UPDATE_F_OVERRIDE|
1645 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1646 NEIGH_UPDATE_F_ISROUTER))
1647 );
1648
1649 /*
1650 * Redirect received -> path was valid.
1651 * Look, redirects are sent only in response to data packets,
1652 * so that this nexthop apparently is reachable. --ANK
1653 */
d8d1f30b 1654 dst_confirm(&rt->dst);
1da177e4
LT
1655
1656 /* Duplicate redirect: silently ignore. */
27217455 1657 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1658 goto out;
1659
21efcfa0 1660 nrt = ip6_rt_copy(rt, dest);
38308473 1661 if (!nrt)
1da177e4
LT
1662 goto out;
1663
1664 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1665 if (on_link)
1666 nrt->rt6i_flags &= ~RTF_GATEWAY;
1667
4e3fd7a0 1668 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1669 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1670
40e22e8f 1671 if (ip6_ins_rt(nrt))
1da177e4
LT
1672 goto out;
1673
d8d1f30b
CG
1674 netevent.old = &rt->dst;
1675 netevent.new = &nrt->dst;
8d71740c
TT
1676 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1677
38308473 1678 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1679 ip6_del_rt(rt);
1da177e4
LT
1680 return;
1681 }
1682
1683out:
d8d1f30b 1684 dst_release(&rt->dst);
1da177e4
LT
1685}
1686
1687/*
1688 * Handle ICMP "packet too big" messages
1689 * i.e. Path MTU discovery
1690 */
1691
b71d1d42 1692static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1693 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1694{
1695 struct rt6_info *rt, *nrt;
1696 int allfrag = 0;
d3052b55 1697again:
ae878ae2 1698 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1699 if (!rt)
1da177e4
LT
1700 return;
1701
d3052b55
AV
1702 if (rt6_check_expired(rt)) {
1703 ip6_del_rt(rt);
1704 goto again;
1705 }
1706
d8d1f30b 1707 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1708 goto out;
1709
1710 if (pmtu < IPV6_MIN_MTU) {
1711 /*
1ab1457c 1712 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1713 * MTU (1280) and a fragment header should always be included
1714 * after a node receiving Too Big message reporting PMTU is
1715 * less than the IPv6 Minimum Link MTU.
1716 */
1717 pmtu = IPV6_MIN_MTU;
1718 allfrag = 1;
1719 }
1720
1721 /* New mtu received -> path was valid.
1722 They are sent only in response to data packets,
1723 so that this nexthop apparently is reachable. --ANK
1724 */
d8d1f30b 1725 dst_confirm(&rt->dst);
1da177e4
LT
1726
1727 /* Host route. If it is static, it would be better
1728 not to override it, but add new one, so that
1729 when cache entry will expire old pmtu
1730 would return automatically.
1731 */
1732 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1733 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1734 if (allfrag) {
1735 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1736 features |= RTAX_FEATURE_ALLFRAG;
1737 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1738 }
d8d1f30b 1739 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1740 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1741 goto out;
1742 }
1743
1744 /* Network route.
1745 Two cases are possible:
1746 1. It is connected route. Action: COW
1747 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1748 */
27217455 1749 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1750 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1751 else
1752 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1753
d5315b50 1754 if (nrt) {
defb3519
DM
1755 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1756 if (allfrag) {
1757 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1758 features |= RTAX_FEATURE_ALLFRAG;
1759 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1760 }
a1e78363
YH
1761
1762 /* According to RFC 1981, detecting PMTU increase shouldn't be
1763 * happened within 5 mins, the recommended timer is 10 mins.
1764 * Here this route expiration time is set to ip6_rt_mtu_expires
1765 * which is 10 mins. After 10 mins the decreased pmtu is expired
1766 * and detecting PMTU increase will be automatically happened.
1767 */
d8d1f30b 1768 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1769 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1770
40e22e8f 1771 ip6_ins_rt(nrt);
1da177e4 1772 }
1da177e4 1773out:
d8d1f30b 1774 dst_release(&rt->dst);
1da177e4
LT
1775}
1776
b71d1d42 1777void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1778 struct net_device *dev, u32 pmtu)
1779{
1780 struct net *net = dev_net(dev);
1781
1782 /*
1783 * RFC 1981 states that a node "MUST reduce the size of the packets it
1784 * is sending along the path" that caused the Packet Too Big message.
1785 * Since it's not possible in the general case to determine which
1786 * interface was used to send the original packet, we update the MTU
1787 * on the interface that will be used to send future packets. We also
1788 * update the MTU on the interface that received the Packet Too Big in
1789 * case the original packet was forced out that interface with
1790 * SO_BINDTODEVICE or similar. This is the next best thing to the
1791 * correct behaviour, which would be to update the MTU on all
1792 * interfaces.
1793 */
1794 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1795 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1796}
1797
1da177e4
LT
1798/*
1799 * Misc support functions
1800 */
1801
21efcfa0
ED
1802static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1803 const struct in6_addr *dest)
1da177e4 1804{
d1918542 1805 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1806 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1807 ort->dst.dev, 0);
1da177e4
LT
1808
1809 if (rt) {
d8d1f30b
CG
1810 rt->dst.input = ort->dst.input;
1811 rt->dst.output = ort->dst.output;
8e2ec639 1812 rt->dst.flags |= DST_HOST;
d8d1f30b 1813
4e3fd7a0 1814 rt->rt6i_dst.addr = *dest;
8e2ec639 1815 rt->rt6i_dst.plen = 128;
defb3519 1816 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1817 rt->dst.error = ort->dst.error;
1da177e4
LT
1818 rt->rt6i_idev = ort->rt6i_idev;
1819 if (rt->rt6i_idev)
1820 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1821 rt->dst.lastuse = jiffies;
d1918542 1822 rt->dst.expires = 0;
1da177e4 1823
4e3fd7a0 1824 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1825 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1826 rt->rt6i_metric = 0;
1827
1da177e4
LT
1828#ifdef CONFIG_IPV6_SUBTREES
1829 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1830#endif
0f6c6392 1831 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1832 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1833 }
1834 return rt;
1835}
1836
70ceb4f5 1837#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1838static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1841{
1842 struct fib6_node *fn;
1843 struct rt6_info *rt = NULL;
c71099ac
TG
1844 struct fib6_table *table;
1845
efa2cea0 1846 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1847 if (!table)
c71099ac 1848 return NULL;
70ceb4f5 1849
c71099ac
TG
1850 write_lock_bh(&table->tb6_lock);
1851 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1852 if (!fn)
1853 goto out;
1854
d8d1f30b 1855 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1856 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1857 continue;
1858 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1859 continue;
1860 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1861 continue;
d8d1f30b 1862 dst_hold(&rt->dst);
70ceb4f5
YH
1863 break;
1864 }
1865out:
c71099ac 1866 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1867 return rt;
1868}
1869
efa2cea0 1870static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1871 const struct in6_addr *prefix, int prefixlen,
1872 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1873 unsigned pref)
1874{
86872cb5
TG
1875 struct fib6_config cfg = {
1876 .fc_table = RT6_TABLE_INFO,
238fc7ea 1877 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1878 .fc_ifindex = ifindex,
1879 .fc_dst_len = prefixlen,
1880 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1881 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1882 .fc_nlinfo.pid = 0,
1883 .fc_nlinfo.nlh = NULL,
1884 .fc_nlinfo.nl_net = net,
86872cb5
TG
1885 };
1886
4e3fd7a0
AD
1887 cfg.fc_dst = *prefix;
1888 cfg.fc_gateway = *gwaddr;
70ceb4f5 1889
e317da96
YH
1890 /* We should treat it as a default route if prefix length is 0. */
1891 if (!prefixlen)
86872cb5 1892 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1893
86872cb5 1894 ip6_route_add(&cfg);
70ceb4f5 1895
efa2cea0 1896 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1897}
1898#endif
1899
b71d1d42 1900struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1901{
1da177e4 1902 struct rt6_info *rt;
c71099ac 1903 struct fib6_table *table;
1da177e4 1904
c346dca1 1905 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1906 if (!table)
c71099ac 1907 return NULL;
1da177e4 1908
c71099ac 1909 write_lock_bh(&table->tb6_lock);
d8d1f30b 1910 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1911 if (dev == rt->dst.dev &&
045927ff 1912 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1913 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1914 break;
1915 }
1916 if (rt)
d8d1f30b 1917 dst_hold(&rt->dst);
c71099ac 1918 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1919 return rt;
1920}
1921
b71d1d42 1922struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1923 struct net_device *dev,
1924 unsigned int pref)
1da177e4 1925{
86872cb5
TG
1926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1928 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1929 .fc_ifindex = dev->ifindex,
1930 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1931 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1932 .fc_nlinfo.pid = 0,
1933 .fc_nlinfo.nlh = NULL,
c346dca1 1934 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1935 };
1da177e4 1936
4e3fd7a0 1937 cfg.fc_gateway = *gwaddr;
1da177e4 1938
86872cb5 1939 ip6_route_add(&cfg);
1da177e4 1940
1da177e4
LT
1941 return rt6_get_dflt_router(gwaddr, dev);
1942}
1943
7b4da532 1944void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1945{
1946 struct rt6_info *rt;
c71099ac
TG
1947 struct fib6_table *table;
1948
1949 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1950 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1951 if (!table)
c71099ac 1952 return;
1da177e4
LT
1953
1954restart:
c71099ac 1955 read_lock_bh(&table->tb6_lock);
d8d1f30b 1956 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1957 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1958 dst_hold(&rt->dst);
c71099ac 1959 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1960 ip6_del_rt(rt);
1da177e4
LT
1961 goto restart;
1962 }
1963 }
c71099ac 1964 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1965}
1966
5578689a
DL
1967static void rtmsg_to_fib6_config(struct net *net,
1968 struct in6_rtmsg *rtmsg,
86872cb5
TG
1969 struct fib6_config *cfg)
1970{
1971 memset(cfg, 0, sizeof(*cfg));
1972
1973 cfg->fc_table = RT6_TABLE_MAIN;
1974 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1975 cfg->fc_metric = rtmsg->rtmsg_metric;
1976 cfg->fc_expires = rtmsg->rtmsg_info;
1977 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1978 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1979 cfg->fc_flags = rtmsg->rtmsg_flags;
1980
5578689a 1981 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1982
4e3fd7a0
AD
1983 cfg->fc_dst = rtmsg->rtmsg_dst;
1984 cfg->fc_src = rtmsg->rtmsg_src;
1985 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1986}
1987
5578689a 1988int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1989{
86872cb5 1990 struct fib6_config cfg;
1da177e4
LT
1991 struct in6_rtmsg rtmsg;
1992 int err;
1993
1994 switch(cmd) {
1995 case SIOCADDRT: /* Add a route */
1996 case SIOCDELRT: /* Delete a route */
1997 if (!capable(CAP_NET_ADMIN))
1998 return -EPERM;
1999 err = copy_from_user(&rtmsg, arg,
2000 sizeof(struct in6_rtmsg));
2001 if (err)
2002 return -EFAULT;
86872cb5 2003
5578689a 2004 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2005
1da177e4
LT
2006 rtnl_lock();
2007 switch (cmd) {
2008 case SIOCADDRT:
86872cb5 2009 err = ip6_route_add(&cfg);
1da177e4
LT
2010 break;
2011 case SIOCDELRT:
86872cb5 2012 err = ip6_route_del(&cfg);
1da177e4
LT
2013 break;
2014 default:
2015 err = -EINVAL;
2016 }
2017 rtnl_unlock();
2018
2019 return err;
3ff50b79 2020 }
1da177e4
LT
2021
2022 return -EINVAL;
2023}
2024
2025/*
2026 * Drop the packet on the floor
2027 */
2028
d5fdd6ba 2029static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2030{
612f09e8 2031 int type;
adf30907 2032 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2033 switch (ipstats_mib_noroutes) {
2034 case IPSTATS_MIB_INNOROUTES:
0660e03f 2035 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2036 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2037 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2039 break;
2040 }
2041 /* FALLTHROUGH */
2042 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2043 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 ipstats_mib_noroutes);
612f09e8
YH
2045 break;
2046 }
3ffe533c 2047 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2048 kfree_skb(skb);
2049 return 0;
2050}
2051
9ce8ade0
TG
2052static int ip6_pkt_discard(struct sk_buff *skb)
2053{
612f09e8 2054 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2055}
2056
20380731 2057static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2058{
adf30907 2059 skb->dev = skb_dst(skb)->dev;
612f09e8 2060 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2061}
2062
6723ab54
DM
2063#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2064
9ce8ade0
TG
2065static int ip6_pkt_prohibit(struct sk_buff *skb)
2066{
612f09e8 2067 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2068}
2069
2070static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2071{
adf30907 2072 skb->dev = skb_dst(skb)->dev;
612f09e8 2073 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2074}
2075
6723ab54
DM
2076#endif
2077
1da177e4
LT
2078/*
2079 * Allocate a dst for local (unicast / anycast) address.
2080 */
2081
2082struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2083 const struct in6_addr *addr,
8f031519 2084 bool anycast)
1da177e4 2085{
c346dca1 2086 struct net *net = dev_net(idev->dev);
5c1e6aa3 2087 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2088 net->loopback_dev, 0);
f83c7790 2089 int err;
1da177e4 2090
38308473 2091 if (!rt) {
40385653
BG
2092 if (net_ratelimit())
2093 pr_warning("IPv6: Maximum number of routes reached,"
2094 " consider increasing route/max_size.\n");
1da177e4 2095 return ERR_PTR(-ENOMEM);
40385653 2096 }
1da177e4 2097
1da177e4
LT
2098 in6_dev_hold(idev);
2099
11d53b49 2100 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2101 rt->dst.input = ip6_input;
2102 rt->dst.output = ip6_output;
1da177e4 2103 rt->rt6i_idev = idev;
d8d1f30b 2104 rt->dst.obsolete = -1;
1da177e4
LT
2105
2106 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2107 if (anycast)
2108 rt->rt6i_flags |= RTF_ANYCAST;
2109 else
1da177e4 2110 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2111 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2112 if (err) {
d8d1f30b 2113 dst_free(&rt->dst);
f83c7790 2114 return ERR_PTR(err);
1da177e4
LT
2115 }
2116
4e3fd7a0 2117 rt->rt6i_dst.addr = *addr;
1da177e4 2118 rt->rt6i_dst.plen = 128;
5578689a 2119 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2120
d8d1f30b 2121 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2122
2123 return rt;
2124}
2125
c3968a85
DW
2126int ip6_route_get_saddr(struct net *net,
2127 struct rt6_info *rt,
b71d1d42 2128 const struct in6_addr *daddr,
c3968a85
DW
2129 unsigned int prefs,
2130 struct in6_addr *saddr)
2131{
2132 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133 int err = 0;
2134 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2135 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2136 else
2137 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138 daddr, prefs, saddr);
2139 return err;
2140}
2141
2142/* remove deleted ip from prefsrc entries */
2143struct arg_dev_net_ip {
2144 struct net_device *dev;
2145 struct net *net;
2146 struct in6_addr *addr;
2147};
2148
2149static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150{
2151 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154
d1918542 2155 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2156 rt != net->ipv6.ip6_null_entry &&
2157 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158 /* remove prefsrc entry */
2159 rt->rt6i_prefsrc.plen = 0;
2160 }
2161 return 0;
2162}
2163
2164void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165{
2166 struct net *net = dev_net(ifp->idev->dev);
2167 struct arg_dev_net_ip adni = {
2168 .dev = ifp->idev->dev,
2169 .net = net,
2170 .addr = &ifp->addr,
2171 };
2172 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173}
2174
8ed67789
DL
2175struct arg_dev_net {
2176 struct net_device *dev;
2177 struct net *net;
2178};
2179
1da177e4
LT
2180static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181{
bc3ef660 2182 const struct arg_dev_net *adn = arg;
2183 const struct net_device *dev = adn->dev;
8ed67789 2184
d1918542 2185 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2186 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2187 return -1;
c159d30c 2188
1da177e4
LT
2189 return 0;
2190}
2191
f3db4851 2192void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2193{
8ed67789
DL
2194 struct arg_dev_net adn = {
2195 .dev = dev,
2196 .net = net,
2197 };
2198
2199 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2200 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2201}
2202
2203struct rt6_mtu_change_arg
2204{
2205 struct net_device *dev;
2206 unsigned mtu;
2207};
2208
2209static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2210{
2211 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2212 struct inet6_dev *idev;
2213
2214 /* In IPv6 pmtu discovery is not optional,
2215 so that RTAX_MTU lock cannot disable it.
2216 We still use this lock to block changes
2217 caused by addrconf/ndisc.
2218 */
2219
2220 idev = __in6_dev_get(arg->dev);
38308473 2221 if (!idev)
1da177e4
LT
2222 return 0;
2223
2224 /* For administrative MTU increase, there is no way to discover
2225 IPv6 PMTU increase, so PMTU increase should be updated here.
2226 Since RFC 1981 doesn't include administrative MTU increase
2227 update PMTU increase is a MUST. (i.e. jumbo frame)
2228 */
2229 /*
2230 If new MTU is less than route PMTU, this new MTU will be the
2231 lowest MTU in the path, update the route PMTU to reflect PMTU
2232 decreases; if new MTU is greater than route PMTU, and the
2233 old MTU is the lowest MTU in the path, update the route PMTU
2234 to reflect the increase. In this case if the other nodes' MTU
2235 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2236 PMTU discouvery.
2237 */
d1918542 2238 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2239 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2240 (dst_mtu(&rt->dst) >= arg->mtu ||
2241 (dst_mtu(&rt->dst) < arg->mtu &&
2242 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2243 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2244 }
1da177e4
LT
2245 return 0;
2246}
2247
2248void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2249{
c71099ac
TG
2250 struct rt6_mtu_change_arg arg = {
2251 .dev = dev,
2252 .mtu = mtu,
2253 };
1da177e4 2254
c346dca1 2255 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2256}
2257
ef7c79ed 2258static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2259 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2260 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2261 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2262 [RTA_PRIORITY] = { .type = NLA_U32 },
2263 [RTA_METRICS] = { .type = NLA_NESTED },
2264};
2265
2266static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267 struct fib6_config *cfg)
1da177e4 2268{
86872cb5
TG
2269 struct rtmsg *rtm;
2270 struct nlattr *tb[RTA_MAX+1];
2271 int err;
1da177e4 2272
86872cb5
TG
2273 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274 if (err < 0)
2275 goto errout;
1da177e4 2276
86872cb5
TG
2277 err = -EINVAL;
2278 rtm = nlmsg_data(nlh);
2279 memset(cfg, 0, sizeof(*cfg));
2280
2281 cfg->fc_table = rtm->rtm_table;
2282 cfg->fc_dst_len = rtm->rtm_dst_len;
2283 cfg->fc_src_len = rtm->rtm_src_len;
2284 cfg->fc_flags = RTF_UP;
2285 cfg->fc_protocol = rtm->rtm_protocol;
2286
2287 if (rtm->rtm_type == RTN_UNREACHABLE)
2288 cfg->fc_flags |= RTF_REJECT;
2289
ab79ad14
2290 if (rtm->rtm_type == RTN_LOCAL)
2291 cfg->fc_flags |= RTF_LOCAL;
2292
86872cb5
TG
2293 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2294 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2295 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2296
2297 if (tb[RTA_GATEWAY]) {
2298 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2299 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2300 }
86872cb5
TG
2301
2302 if (tb[RTA_DST]) {
2303 int plen = (rtm->rtm_dst_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_DST]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2309 }
86872cb5
TG
2310
2311 if (tb[RTA_SRC]) {
2312 int plen = (rtm->rtm_src_len + 7) >> 3;
2313
2314 if (nla_len(tb[RTA_SRC]) < plen)
2315 goto errout;
2316
2317 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2318 }
86872cb5 2319
c3968a85
DW
2320 if (tb[RTA_PREFSRC])
2321 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2322
86872cb5
TG
2323 if (tb[RTA_OIF])
2324 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2325
2326 if (tb[RTA_PRIORITY])
2327 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2328
2329 if (tb[RTA_METRICS]) {
2330 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2331 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2332 }
86872cb5
TG
2333
2334 if (tb[RTA_TABLE])
2335 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2336
2337 err = 0;
2338errout:
2339 return err;
1da177e4
LT
2340}
2341
c127ea2c 2342static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2343{
86872cb5
TG
2344 struct fib6_config cfg;
2345 int err;
1da177e4 2346
86872cb5
TG
2347 err = rtm_to_fib6_config(skb, nlh, &cfg);
2348 if (err < 0)
2349 return err;
2350
2351 return ip6_route_del(&cfg);
1da177e4
LT
2352}
2353
c127ea2c 2354static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2355{
86872cb5
TG
2356 struct fib6_config cfg;
2357 int err;
1da177e4 2358
86872cb5
TG
2359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_add(&cfg);
1da177e4
LT
2364}
2365
339bf98f
TG
2366static inline size_t rt6_nlmsg_size(void)
2367{
2368 return NLMSG_ALIGN(sizeof(struct rtmsg))
2369 + nla_total_size(16) /* RTA_SRC */
2370 + nla_total_size(16) /* RTA_DST */
2371 + nla_total_size(16) /* RTA_GATEWAY */
2372 + nla_total_size(16) /* RTA_PREFSRC */
2373 + nla_total_size(4) /* RTA_TABLE */
2374 + nla_total_size(4) /* RTA_IIF */
2375 + nla_total_size(4) /* RTA_OIF */
2376 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2377 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2378 + nla_total_size(sizeof(struct rta_cacheinfo));
2379}
2380
191cd582
BH
2381static int rt6_fill_node(struct net *net,
2382 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2383 struct in6_addr *dst, struct in6_addr *src,
2384 int iif, int type, u32 pid, u32 seq,
7bc570c8 2385 int prefix, int nowait, unsigned int flags)
1da177e4 2386{
346f870b 2387 const struct inet_peer *peer;
1da177e4 2388 struct rtmsg *rtm;
2d7202bf 2389 struct nlmsghdr *nlh;
e3703b3d 2390 long expires;
9e762a4a 2391 u32 table;
f2c31e32 2392 struct neighbour *n;
346f870b 2393 u32 ts, tsage;
1da177e4
LT
2394
2395 if (prefix) { /* user wants prefix routes only */
2396 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2397 /* success since this is not a prefix route */
2398 return 1;
2399 }
2400 }
2401
2d7202bf 2402 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2403 if (!nlh)
26932566 2404 return -EMSGSIZE;
2d7202bf
TG
2405
2406 rtm = nlmsg_data(nlh);
1da177e4
LT
2407 rtm->rtm_family = AF_INET6;
2408 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2409 rtm->rtm_src_len = rt->rt6i_src.plen;
2410 rtm->rtm_tos = 0;
c71099ac 2411 if (rt->rt6i_table)
9e762a4a 2412 table = rt->rt6i_table->tb6_id;
c71099ac 2413 else
9e762a4a
PM
2414 table = RT6_TABLE_UNSPEC;
2415 rtm->rtm_table = table;
c78679e8
DM
2416 if (nla_put_u32(skb, RTA_TABLE, table))
2417 goto nla_put_failure;
38308473 2418 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2419 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2420 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2421 rtm->rtm_type = RTN_LOCAL;
d1918542 2422 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2423 rtm->rtm_type = RTN_LOCAL;
2424 else
2425 rtm->rtm_type = RTN_UNICAST;
2426 rtm->rtm_flags = 0;
2427 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2428 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2429 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2430 rtm->rtm_protocol = RTPROT_REDIRECT;
2431 else if (rt->rt6i_flags & RTF_ADDRCONF)
2432 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2433 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2434 rtm->rtm_protocol = RTPROT_RA;
2435
38308473 2436 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2437 rtm->rtm_flags |= RTM_F_CLONED;
2438
2439 if (dst) {
c78679e8
DM
2440 if (nla_put(skb, RTA_DST, 16, dst))
2441 goto nla_put_failure;
1ab1457c 2442 rtm->rtm_dst_len = 128;
1da177e4 2443 } else if (rtm->rtm_dst_len)
c78679e8
DM
2444 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2445 goto nla_put_failure;
1da177e4
LT
2446#ifdef CONFIG_IPV6_SUBTREES
2447 if (src) {
c78679e8
DM
2448 if (nla_put(skb, RTA_SRC, 16, src))
2449 goto nla_put_failure;
1ab1457c 2450 rtm->rtm_src_len = 128;
c78679e8
DM
2451 } else if (rtm->rtm_src_len &&
2452 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2453 goto nla_put_failure;
1da177e4 2454#endif
7bc570c8
YH
2455 if (iif) {
2456#ifdef CONFIG_IPV6_MROUTE
2457 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2458 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2459 if (err <= 0) {
2460 if (!nowait) {
2461 if (err == 0)
2462 return 0;
2463 goto nla_put_failure;
2464 } else {
2465 if (err == -EMSGSIZE)
2466 goto nla_put_failure;
2467 }
2468 }
2469 } else
2470#endif
c78679e8
DM
2471 if (nla_put_u32(skb, RTA_IIF, iif))
2472 goto nla_put_failure;
7bc570c8 2473 } else if (dst) {
1da177e4 2474 struct in6_addr saddr_buf;
c78679e8
DM
2475 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2476 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2477 goto nla_put_failure;
1da177e4 2478 }
2d7202bf 2479
c3968a85
DW
2480 if (rt->rt6i_prefsrc.plen) {
2481 struct in6_addr saddr_buf;
4e3fd7a0 2482 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2483 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2484 goto nla_put_failure;
c3968a85
DW
2485 }
2486
defb3519 2487 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2488 goto nla_put_failure;
2489
f2c31e32 2490 rcu_read_lock();
27217455 2491 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2492 if (n) {
2493 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2494 rcu_read_unlock();
2495 goto nla_put_failure;
2496 }
2497 }
f2c31e32 2498 rcu_read_unlock();
2d7202bf 2499
c78679e8
DM
2500 if (rt->dst.dev &&
2501 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2502 goto nla_put_failure;
2503 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2504 goto nla_put_failure;
36e3deae
YH
2505 if (!(rt->rt6i_flags & RTF_EXPIRES))
2506 expires = 0;
d1918542
DM
2507 else if (rt->dst.expires - jiffies < INT_MAX)
2508 expires = rt->dst.expires - jiffies;
36e3deae
YH
2509 else
2510 expires = INT_MAX;
69cdf8f9 2511
346f870b
DM
2512 peer = rt->rt6i_peer;
2513 ts = tsage = 0;
2514 if (peer && peer->tcp_ts_stamp) {
2515 ts = peer->tcp_ts;
2516 tsage = get_seconds() - peer->tcp_ts_stamp;
2517 }
2518
2519 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2520 expires, rt->dst.error) < 0)
e3703b3d 2521 goto nla_put_failure;
2d7202bf
TG
2522
2523 return nlmsg_end(skb, nlh);
2524
2525nla_put_failure:
26932566
PM
2526 nlmsg_cancel(skb, nlh);
2527 return -EMSGSIZE;
1da177e4
LT
2528}
2529
1b43af54 2530int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2531{
2532 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2533 int prefix;
2534
2d7202bf
TG
2535 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2536 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2537 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2538 } else
2539 prefix = 0;
2540
191cd582
BH
2541 return rt6_fill_node(arg->net,
2542 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2543 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2544 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2545}
2546
c127ea2c 2547static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2548{
3b1e0a65 2549 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2550 struct nlattr *tb[RTA_MAX+1];
2551 struct rt6_info *rt;
1da177e4 2552 struct sk_buff *skb;
ab364a6f 2553 struct rtmsg *rtm;
4c9483b2 2554 struct flowi6 fl6;
72331bc0 2555 int err, iif = 0, oif = 0;
1da177e4 2556
ab364a6f
TG
2557 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2558 if (err < 0)
2559 goto errout;
1da177e4 2560
ab364a6f 2561 err = -EINVAL;
4c9483b2 2562 memset(&fl6, 0, sizeof(fl6));
1da177e4 2563
ab364a6f
TG
2564 if (tb[RTA_SRC]) {
2565 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2566 goto errout;
2567
4e3fd7a0 2568 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2569 }
2570
2571 if (tb[RTA_DST]) {
2572 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2573 goto errout;
2574
4e3fd7a0 2575 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2576 }
2577
2578 if (tb[RTA_IIF])
2579 iif = nla_get_u32(tb[RTA_IIF]);
2580
2581 if (tb[RTA_OIF])
72331bc0 2582 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2583
2584 if (iif) {
2585 struct net_device *dev;
72331bc0
SL
2586 int flags = 0;
2587
5578689a 2588 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2589 if (!dev) {
2590 err = -ENODEV;
ab364a6f 2591 goto errout;
1da177e4 2592 }
72331bc0
SL
2593
2594 fl6.flowi6_iif = iif;
2595
2596 if (!ipv6_addr_any(&fl6.saddr))
2597 flags |= RT6_LOOKUP_F_HAS_SADDR;
2598
2599 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2600 flags);
2601 } else {
2602 fl6.flowi6_oif = oif;
2603
2604 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2605 }
2606
ab364a6f 2607 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2608 if (!skb) {
2173bff5 2609 dst_release(&rt->dst);
ab364a6f
TG
2610 err = -ENOBUFS;
2611 goto errout;
2612 }
1da177e4 2613
ab364a6f
TG
2614 /* Reserve room for dummy headers, this skb can pass
2615 through good chunk of routing engine.
2616 */
459a98ed 2617 skb_reset_mac_header(skb);
ab364a6f 2618 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2619
d8d1f30b 2620 skb_dst_set(skb, &rt->dst);
1da177e4 2621
4c9483b2 2622 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2623 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2624 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2625 if (err < 0) {
ab364a6f
TG
2626 kfree_skb(skb);
2627 goto errout;
1da177e4
LT
2628 }
2629
5578689a 2630 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2631errout:
1da177e4 2632 return err;
1da177e4
LT
2633}
2634
86872cb5 2635void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2636{
2637 struct sk_buff *skb;
5578689a 2638 struct net *net = info->nl_net;
528c4ceb
DL
2639 u32 seq;
2640 int err;
2641
2642 err = -ENOBUFS;
38308473 2643 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2644
339bf98f 2645 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2646 if (!skb)
21713ebc
TG
2647 goto errout;
2648
191cd582 2649 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2650 event, info->pid, seq, 0, 0, 0);
26932566
PM
2651 if (err < 0) {
2652 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2653 WARN_ON(err == -EMSGSIZE);
2654 kfree_skb(skb);
2655 goto errout;
2656 }
1ce85fe4
PNA
2657 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2658 info->nlh, gfp_any());
2659 return;
21713ebc
TG
2660errout:
2661 if (err < 0)
5578689a 2662 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2663}
2664
8ed67789
DL
2665static int ip6_route_dev_notify(struct notifier_block *this,
2666 unsigned long event, void *data)
2667{
2668 struct net_device *dev = (struct net_device *)data;
c346dca1 2669 struct net *net = dev_net(dev);
8ed67789
DL
2670
2671 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2672 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2673 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2674#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2675 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2676 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2677 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2678 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2679#endif
2680 }
2681
2682 return NOTIFY_OK;
2683}
2684
1da177e4
LT
2685/*
2686 * /proc
2687 */
2688
2689#ifdef CONFIG_PROC_FS
2690
1da177e4
LT
2691struct rt6_proc_arg
2692{
2693 char *buffer;
2694 int offset;
2695 int length;
2696 int skip;
2697 int len;
2698};
2699
2700static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2701{
33120b30 2702 struct seq_file *m = p_arg;
69cce1d1 2703 struct neighbour *n;
1da177e4 2704
4b7a4274 2705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2706
2707#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2708 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2709#else
33120b30 2710 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2711#endif
f2c31e32 2712 rcu_read_lock();
27217455 2713 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2714 if (n) {
2715 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2716 } else {
33120b30 2717 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2718 }
f2c31e32 2719 rcu_read_unlock();
33120b30 2720 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2721 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2722 rt->dst.__use, rt->rt6i_flags,
d1918542 2723 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2724 return 0;
2725}
2726
33120b30 2727static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2728{
f3db4851 2729 struct net *net = (struct net *)m->private;
32b293a5 2730 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2731 return 0;
2732}
1da177e4 2733
33120b30
AD
2734static int ipv6_route_open(struct inode *inode, struct file *file)
2735{
de05c557 2736 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2737}
2738
33120b30
AD
2739static const struct file_operations ipv6_route_proc_fops = {
2740 .owner = THIS_MODULE,
2741 .open = ipv6_route_open,
2742 .read = seq_read,
2743 .llseek = seq_lseek,
b6fcbdb4 2744 .release = single_release_net,
33120b30
AD
2745};
2746
1da177e4
LT
2747static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2748{
69ddb805 2749 struct net *net = (struct net *)seq->private;
1da177e4 2750 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2751 net->ipv6.rt6_stats->fib_nodes,
2752 net->ipv6.rt6_stats->fib_route_nodes,
2753 net->ipv6.rt6_stats->fib_rt_alloc,
2754 net->ipv6.rt6_stats->fib_rt_entries,
2755 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2756 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2757 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2758
2759 return 0;
2760}
2761
2762static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2763{
de05c557 2764 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2765}
2766
9a32144e 2767static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2768 .owner = THIS_MODULE,
2769 .open = rt6_stats_seq_open,
2770 .read = seq_read,
2771 .llseek = seq_lseek,
b6fcbdb4 2772 .release = single_release_net,
1da177e4
LT
2773};
2774#endif /* CONFIG_PROC_FS */
2775
2776#ifdef CONFIG_SYSCTL
2777
1da177e4 2778static
8d65af78 2779int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2780 void __user *buffer, size_t *lenp, loff_t *ppos)
2781{
c486da34
LAG
2782 struct net *net;
2783 int delay;
2784 if (!write)
1da177e4 2785 return -EINVAL;
c486da34
LAG
2786
2787 net = (struct net *)ctl->extra1;
2788 delay = net->ipv6.sysctl.flush_delay;
2789 proc_dointvec(ctl, write, buffer, lenp, ppos);
2790 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2791 return 0;
1da177e4
LT
2792}
2793
760f2d01 2794ctl_table ipv6_route_table_template[] = {
1ab1457c 2795 {
1da177e4 2796 .procname = "flush",
4990509f 2797 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2798 .maxlen = sizeof(int),
89c8b3a1 2799 .mode = 0200,
6d9f239a 2800 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2801 },
2802 {
1da177e4 2803 .procname = "gc_thresh",
9a7ec3a9 2804 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2805 .maxlen = sizeof(int),
2806 .mode = 0644,
6d9f239a 2807 .proc_handler = proc_dointvec,
1da177e4
LT
2808 },
2809 {
1da177e4 2810 .procname = "max_size",
4990509f 2811 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2812 .maxlen = sizeof(int),
2813 .mode = 0644,
6d9f239a 2814 .proc_handler = proc_dointvec,
1da177e4
LT
2815 },
2816 {
1da177e4 2817 .procname = "gc_min_interval",
4990509f 2818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2819 .maxlen = sizeof(int),
2820 .mode = 0644,
6d9f239a 2821 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2822 },
2823 {
1da177e4 2824 .procname = "gc_timeout",
4990509f 2825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2826 .maxlen = sizeof(int),
2827 .mode = 0644,
6d9f239a 2828 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2829 },
2830 {
1da177e4 2831 .procname = "gc_interval",
4990509f 2832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
6d9f239a 2835 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2836 },
2837 {
1da177e4 2838 .procname = "gc_elasticity",
4990509f 2839 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2840 .maxlen = sizeof(int),
2841 .mode = 0644,
f3d3f616 2842 .proc_handler = proc_dointvec,
1da177e4
LT
2843 },
2844 {
1da177e4 2845 .procname = "mtu_expires",
4990509f 2846 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2847 .maxlen = sizeof(int),
2848 .mode = 0644,
6d9f239a 2849 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2850 },
2851 {
1da177e4 2852 .procname = "min_adv_mss",
4990509f 2853 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2854 .maxlen = sizeof(int),
2855 .mode = 0644,
f3d3f616 2856 .proc_handler = proc_dointvec,
1da177e4
LT
2857 },
2858 {
1da177e4 2859 .procname = "gc_min_interval_ms",
4990509f 2860 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2861 .maxlen = sizeof(int),
2862 .mode = 0644,
6d9f239a 2863 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2864 },
f8572d8f 2865 { }
1da177e4
LT
2866};
2867
2c8c1e72 2868struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2869{
2870 struct ctl_table *table;
2871
2872 table = kmemdup(ipv6_route_table_template,
2873 sizeof(ipv6_route_table_template),
2874 GFP_KERNEL);
5ee09105
YH
2875
2876 if (table) {
2877 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2878 table[0].extra1 = net;
86393e52 2879 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2880 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2881 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2882 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2883 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2884 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2885 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2886 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2887 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2888 }
2889
760f2d01
DL
2890 return table;
2891}
1da177e4
LT
2892#endif
2893
2c8c1e72 2894static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2895{
633d424b 2896 int ret = -ENOMEM;
8ed67789 2897
86393e52
AD
2898 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2899 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2900
fc66f95c
ED
2901 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2902 goto out_ip6_dst_ops;
2903
8ed67789
DL
2904 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2905 sizeof(*net->ipv6.ip6_null_entry),
2906 GFP_KERNEL);
2907 if (!net->ipv6.ip6_null_entry)
fc66f95c 2908 goto out_ip6_dst_entries;
d8d1f30b 2909 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2910 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2911 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2912 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2913 ip6_template_metrics, true);
8ed67789
DL
2914
2915#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2916 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2917 sizeof(*net->ipv6.ip6_prohibit_entry),
2918 GFP_KERNEL);
68fffc67
PZ
2919 if (!net->ipv6.ip6_prohibit_entry)
2920 goto out_ip6_null_entry;
d8d1f30b 2921 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2922 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2923 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2924 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2925 ip6_template_metrics, true);
8ed67789
DL
2926
2927 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2928 sizeof(*net->ipv6.ip6_blk_hole_entry),
2929 GFP_KERNEL);
68fffc67
PZ
2930 if (!net->ipv6.ip6_blk_hole_entry)
2931 goto out_ip6_prohibit_entry;
d8d1f30b 2932 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2933 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2934 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2935 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2936 ip6_template_metrics, true);
8ed67789
DL
2937#endif
2938
b339a47c
PZ
2939 net->ipv6.sysctl.flush_delay = 0;
2940 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2941 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2942 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2943 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2944 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2945 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2946 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2947
cdb18761
DL
2948#ifdef CONFIG_PROC_FS
2949 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2950 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2951#endif
6891a346
BT
2952 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2953
8ed67789
DL
2954 ret = 0;
2955out:
2956 return ret;
f2fc6a54 2957
68fffc67
PZ
2958#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2959out_ip6_prohibit_entry:
2960 kfree(net->ipv6.ip6_prohibit_entry);
2961out_ip6_null_entry:
2962 kfree(net->ipv6.ip6_null_entry);
2963#endif
fc66f95c
ED
2964out_ip6_dst_entries:
2965 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2966out_ip6_dst_ops:
f2fc6a54 2967 goto out;
cdb18761
DL
2968}
2969
2c8c1e72 2970static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2971{
2972#ifdef CONFIG_PROC_FS
2973 proc_net_remove(net, "ipv6_route");
2974 proc_net_remove(net, "rt6_stats");
2975#endif
8ed67789
DL
2976 kfree(net->ipv6.ip6_null_entry);
2977#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2978 kfree(net->ipv6.ip6_prohibit_entry);
2979 kfree(net->ipv6.ip6_blk_hole_entry);
2980#endif
41bb78b4 2981 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2982}
2983
2984static struct pernet_operations ip6_route_net_ops = {
2985 .init = ip6_route_net_init,
2986 .exit = ip6_route_net_exit,
2987};
2988
8ed67789
DL
2989static struct notifier_block ip6_route_dev_notifier = {
2990 .notifier_call = ip6_route_dev_notify,
2991 .priority = 0,
2992};
2993
433d49c3 2994int __init ip6_route_init(void)
1da177e4 2995{
433d49c3
DL
2996 int ret;
2997
9a7ec3a9
DL
2998 ret = -ENOMEM;
2999 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3000 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3001 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3002 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3003 goto out;
14e50e57 3004
fc66f95c 3005 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3006 if (ret)
bdb3289f 3007 goto out_kmem_cache;
bdb3289f 3008
fc66f95c
ED
3009 ret = register_pernet_subsys(&ip6_route_net_ops);
3010 if (ret)
3011 goto out_dst_entries;
3012
5dc121e9
AE
3013 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3014
8ed67789
DL
3015 /* Registering of the loopback is done before this portion of code,
3016 * the loopback reference in rt6_info will not be taken, do it
3017 * manually for init_net */
d8d1f30b 3018 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3019 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3021 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3022 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3023 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3024 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #endif
433d49c3
DL
3026 ret = fib6_init();
3027 if (ret)
8ed67789 3028 goto out_register_subsys;
433d49c3 3029
433d49c3
DL
3030 ret = xfrm6_init();
3031 if (ret)
cdb18761 3032 goto out_fib6_init;
c35b7e72 3033
433d49c3
DL
3034 ret = fib6_rules_init();
3035 if (ret)
3036 goto xfrm6_init;
7e5449c2 3037
433d49c3 3038 ret = -ENOBUFS;
c7ac8679
GR
3039 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3040 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3041 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3042 goto fib6_rules_init;
c127ea2c 3043
8ed67789 3044 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3045 if (ret)
3046 goto fib6_rules_init;
8ed67789 3047
433d49c3
DL
3048out:
3049 return ret;
3050
3051fib6_rules_init:
433d49c3
DL
3052 fib6_rules_cleanup();
3053xfrm6_init:
433d49c3 3054 xfrm6_fini();
433d49c3 3055out_fib6_init:
433d49c3 3056 fib6_gc_cleanup();
8ed67789
DL
3057out_register_subsys:
3058 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3059out_dst_entries:
3060 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3061out_kmem_cache:
f2fc6a54 3062 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3063 goto out;
1da177e4
LT
3064}
3065
3066void ip6_route_cleanup(void)
3067{
8ed67789 3068 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3069 fib6_rules_cleanup();
1da177e4 3070 xfrm6_fini();
1da177e4 3071 fib6_gc_cleanup();
8ed67789 3072 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3073 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3074 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3075}