Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
6e157b6a 82static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb);
1da177e4 83
70ceb4f5 84#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 85static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
86 const struct in6_addr *prefix, int prefixlen,
87 const struct in6_addr *gwaddr, int ifindex,
95c96174 88 unsigned int pref);
efa2cea0 89static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
90 const struct in6_addr *prefix, int prefixlen,
91 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
92#endif
93
06582540
DM
94static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
95{
96 struct rt6_info *rt = (struct rt6_info *) dst;
97 struct inet_peer *peer;
98 u32 *p = NULL;
99
8e2ec639
YZ
100 if (!(rt->dst.flags & DST_HOST))
101 return NULL;
102
fbfe95a4 103 peer = rt6_get_peer_create(rt);
06582540
DM
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
f894cbf8
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt,
125 struct sk_buff *skb,
126 const void *daddr)
39232973
DM
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
a7563f34 130 if (!ipv6_addr_any(p))
39232973 131 return (const void *) p;
f894cbf8
DM
132 else if (skb)
133 return &ipv6_hdr(skb)->daddr;
39232973
DM
134 return daddr;
135}
136
f894cbf8
DM
137static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
138 struct sk_buff *skb,
139 const void *daddr)
d3aaeb38 140{
39232973
DM
141 struct rt6_info *rt = (struct rt6_info *) dst;
142 struct neighbour *n;
143
f894cbf8 144 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
146 if (n)
147 return n;
148 return neigh_create(&nd_tbl, daddr, dst->dev);
149}
150
8ade06c6 151static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 152{
8ade06c6
DM
153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
154 if (!n) {
155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
156 if (IS_ERR(n))
157 return PTR_ERR(n);
158 }
97cac082 159 rt->n = n;
f83c7790
DM
160
161 return 0;
d3aaeb38
DM
162}
163
9a7ec3a9 164static struct dst_ops ip6_dst_ops_template = {
1da177e4 165 .family = AF_INET6,
09640e63 166 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
167 .gc = ip6_dst_gc,
168 .gc_thresh = 1024,
169 .check = ip6_dst_check,
0dbaee3b 170 .default_advmss = ip6_default_advmss,
ebb762f2 171 .mtu = ip6_mtu,
06582540 172 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
173 .destroy = ip6_dst_destroy,
174 .ifdown = ip6_dst_ifdown,
175 .negative_advice = ip6_negative_advice,
176 .link_failure = ip6_link_failure,
177 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 178 .redirect = rt6_do_redirect,
1ac06e03 179 .local_out = __ip6_local_out,
d3aaeb38 180 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
181};
182
ebb762f2 183static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 184{
618f9bc7
SK
185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
186
187 return mtu ? : dst->dev->mtu;
ec831ea7
RD
188}
189
14e50e57
DM
190static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
191{
192}
193
b587ee3b
DM
194static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb)
195{
196}
197
0972ddb2
HB
198static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
199 unsigned long old)
200{
201 return NULL;
202}
203
14e50e57
DM
204static struct dst_ops ip6_dst_blackhole_ops = {
205 .family = AF_INET6,
09640e63 206 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
207 .destroy = ip6_dst_destroy,
208 .check = ip6_dst_check,
ebb762f2 209 .mtu = ip6_blackhole_mtu,
214f45c9 210 .default_advmss = ip6_default_advmss,
14e50e57 211 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 212 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 213 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 214 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
215};
216
62fa8a84
DM
217static const u32 ip6_template_metrics[RTAX_MAX] = {
218 [RTAX_HOPLIMIT - 1] = 255,
219};
220
bdb3289f 221static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
222 .dst = {
223 .__refcnt = ATOMIC_INIT(1),
224 .__use = 1,
225 .obsolete = -1,
226 .error = -ENETUNREACH,
d8d1f30b
CG
227 .input = ip6_pkt_discard,
228 .output = ip6_pkt_discard_out,
1da177e4
LT
229 },
230 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 231 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
232 .rt6i_metric = ~(u32) 0,
233 .rt6i_ref = ATOMIC_INIT(1),
234};
235
101367c2
TG
236#ifdef CONFIG_IPV6_MULTIPLE_TABLES
237
6723ab54
DM
238static int ip6_pkt_prohibit(struct sk_buff *skb);
239static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 240
280a34c8 241static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
242 .dst = {
243 .__refcnt = ATOMIC_INIT(1),
244 .__use = 1,
245 .obsolete = -1,
246 .error = -EACCES,
d8d1f30b
CG
247 .input = ip6_pkt_prohibit,
248 .output = ip6_pkt_prohibit_out,
101367c2
TG
249 },
250 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 251 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
252 .rt6i_metric = ~(u32) 0,
253 .rt6i_ref = ATOMIC_INIT(1),
254};
255
bdb3289f 256static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
257 .dst = {
258 .__refcnt = ATOMIC_INIT(1),
259 .__use = 1,
260 .obsolete = -1,
261 .error = -EINVAL,
d8d1f30b
CG
262 .input = dst_discard,
263 .output = dst_discard,
101367c2
TG
264 },
265 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 266 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
267 .rt6i_metric = ~(u32) 0,
268 .rt6i_ref = ATOMIC_INIT(1),
269};
270
271#endif
272
1da177e4 273/* allocate dst with ip6_dst_ops */
97bab73f 274static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 275 struct net_device *dev,
8b96d22d
DM
276 int flags,
277 struct fib6_table *table)
1da177e4 278{
97bab73f
DM
279 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
280 0, 0, flags);
cf911662 281
97bab73f 282 if (rt) {
a2de86f6 283 memset(&rt->n, 0,
38308473 284 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 285 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 286 }
cf911662 287 return rt;
1da177e4
LT
288}
289
290static void ip6_dst_destroy(struct dst_entry *dst)
291{
292 struct rt6_info *rt = (struct rt6_info *)dst;
293 struct inet6_dev *idev = rt->rt6i_idev;
294
97cac082
DM
295 if (rt->n)
296 neigh_release(rt->n);
297
8e2ec639
YZ
298 if (!(rt->dst.flags & DST_HOST))
299 dst_destroy_metrics_generic(dst);
300
38308473 301 if (idev) {
1da177e4
LT
302 rt->rt6i_idev = NULL;
303 in6_dev_put(idev);
1ab1457c 304 }
1716a961
G
305
306 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
307 dst_release(dst->from);
308
97bab73f
DM
309 if (rt6_has_peer(rt)) {
310 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
311 inet_putpeer(peer);
312 }
313}
314
6431cbc2
DM
315static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
316
317static u32 rt6_peer_genid(void)
318{
319 return atomic_read(&__rt6_peer_genid);
320}
321
b3419363
DM
322void rt6_bind_peer(struct rt6_info *rt, int create)
323{
97bab73f 324 struct inet_peer_base *base;
b3419363
DM
325 struct inet_peer *peer;
326
97bab73f
DM
327 base = inetpeer_base_ptr(rt->_rt6i_peer);
328 if (!base)
329 return;
330
331 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
332 if (peer) {
333 if (!rt6_set_peer(rt, peer))
334 inet_putpeer(peer);
335 else
336 rt->rt6i_peer_genid = rt6_peer_genid();
337 }
1da177e4
LT
338}
339
340static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341 int how)
342{
343 struct rt6_info *rt = (struct rt6_info *)dst;
344 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 345 struct net_device *loopback_dev =
c346dca1 346 dev_net(dev)->loopback_dev;
1da177e4 347
97cac082
DM
348 if (dev != loopback_dev) {
349 if (idev && idev->dev == dev) {
350 struct inet6_dev *loopback_idev =
351 in6_dev_get(loopback_dev);
352 if (loopback_idev) {
353 rt->rt6i_idev = loopback_idev;
354 in6_dev_put(idev);
355 }
356 }
357 if (rt->n && rt->n->dev == dev) {
358 rt->n->dev = loopback_dev;
359 dev_hold(loopback_dev);
360 dev_put(dev);
1da177e4
LT
361 }
362 }
363}
364
a50feda5 365static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 366{
1716a961
G
367 struct rt6_info *ort = NULL;
368
369 if (rt->rt6i_flags & RTF_EXPIRES) {
370 if (time_after(jiffies, rt->dst.expires))
a50feda5 371 return true;
1716a961
G
372 } else if (rt->dst.from) {
373 ort = (struct rt6_info *) rt->dst.from;
374 return (ort->rt6i_flags & RTF_EXPIRES) &&
375 time_after(jiffies, ort->dst.expires);
376 }
a50feda5 377 return false;
1da177e4
LT
378}
379
a50feda5 380static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 381{
a02cec21
ED
382 return ipv6_addr_type(daddr) &
383 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
384}
385
1da177e4 386/*
c71099ac 387 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
388 */
389
8ed67789
DL
390static inline struct rt6_info *rt6_device_match(struct net *net,
391 struct rt6_info *rt,
b71d1d42 392 const struct in6_addr *saddr,
1da177e4 393 int oif,
d420895e 394 int flags)
1da177e4
LT
395{
396 struct rt6_info *local = NULL;
397 struct rt6_info *sprt;
398
dd3abc4e
YH
399 if (!oif && ipv6_addr_any(saddr))
400 goto out;
401
d8d1f30b 402 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 403 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
404
405 if (oif) {
1da177e4
LT
406 if (dev->ifindex == oif)
407 return sprt;
408 if (dev->flags & IFF_LOOPBACK) {
38308473 409 if (!sprt->rt6i_idev ||
1da177e4 410 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 411 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 412 continue;
1ab1457c 413 if (local && (!oif ||
1da177e4
LT
414 local->rt6i_idev->dev->ifindex == oif))
415 continue;
416 }
417 local = sprt;
418 }
dd3abc4e
YH
419 } else {
420 if (ipv6_chk_addr(net, saddr, dev,
421 flags & RT6_LOOKUP_F_IFACE))
422 return sprt;
1da177e4 423 }
dd3abc4e 424 }
1da177e4 425
dd3abc4e 426 if (oif) {
1da177e4
LT
427 if (local)
428 return local;
429
d420895e 430 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 431 return net->ipv6.ip6_null_entry;
1da177e4 432 }
dd3abc4e 433out:
1da177e4
LT
434 return rt;
435}
436
27097255
YH
437#ifdef CONFIG_IPV6_ROUTER_PREF
438static void rt6_probe(struct rt6_info *rt)
439{
f2c31e32 440 struct neighbour *neigh;
27097255
YH
441 /*
442 * Okay, this does not seem to be appropriate
443 * for now, however, we need to check if it
444 * is really so; aka Router Reachability Probing.
445 *
446 * Router Reachability Probe MUST be rate-limited
447 * to no more than one per minute.
448 */
f2c31e32 449 rcu_read_lock();
97cac082 450 neigh = rt ? rt->n : NULL;
27097255 451 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 452 goto out;
27097255
YH
453 read_lock_bh(&neigh->lock);
454 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 455 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
456 struct in6_addr mcaddr;
457 struct in6_addr *target;
458
459 neigh->updated = jiffies;
460 read_unlock_bh(&neigh->lock);
461
462 target = (struct in6_addr *)&neigh->primary_key;
463 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 464 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 465 } else {
27097255 466 read_unlock_bh(&neigh->lock);
f2c31e32
ED
467 }
468out:
469 rcu_read_unlock();
27097255
YH
470}
471#else
472static inline void rt6_probe(struct rt6_info *rt)
473{
27097255
YH
474}
475#endif
476
1da177e4 477/*
554cfb7e 478 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 479 */
b6f99a21 480static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 481{
d1918542 482 struct net_device *dev = rt->dst.dev;
161980f4 483 if (!oif || dev->ifindex == oif)
554cfb7e 484 return 2;
161980f4
DM
485 if ((dev->flags & IFF_LOOPBACK) &&
486 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
487 return 1;
488 return 0;
554cfb7e 489}
1da177e4 490
b6f99a21 491static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 492{
f2c31e32 493 struct neighbour *neigh;
398bcbeb 494 int m;
f2c31e32
ED
495
496 rcu_read_lock();
97cac082 497 neigh = rt->n;
4d0c5911
YH
498 if (rt->rt6i_flags & RTF_NONEXTHOP ||
499 !(rt->rt6i_flags & RTF_GATEWAY))
500 m = 1;
501 else if (neigh) {
554cfb7e
YH
502 read_lock_bh(&neigh->lock);
503 if (neigh->nud_state & NUD_VALID)
4d0c5911 504 m = 2;
398bcbeb
YH
505#ifdef CONFIG_IPV6_ROUTER_PREF
506 else if (neigh->nud_state & NUD_FAILED)
507 m = 0;
508#endif
509 else
ea73ee23 510 m = 1;
554cfb7e 511 read_unlock_bh(&neigh->lock);
398bcbeb
YH
512 } else
513 m = 0;
f2c31e32 514 rcu_read_unlock();
554cfb7e 515 return m;
1da177e4
LT
516}
517
554cfb7e
YH
518static int rt6_score_route(struct rt6_info *rt, int oif,
519 int strict)
1da177e4 520{
4d0c5911 521 int m, n;
1ab1457c 522
4d0c5911 523 m = rt6_check_dev(rt, oif);
77d16f45 524 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 525 return -1;
ebacaaa0
YH
526#ifdef CONFIG_IPV6_ROUTER_PREF
527 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
528#endif
4d0c5911 529 n = rt6_check_neigh(rt);
557e92ef 530 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
531 return -1;
532 return m;
533}
534
f11e6659
DM
535static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
536 int *mpri, struct rt6_info *match)
554cfb7e 537{
f11e6659
DM
538 int m;
539
540 if (rt6_check_expired(rt))
541 goto out;
542
543 m = rt6_score_route(rt, oif, strict);
544 if (m < 0)
545 goto out;
546
547 if (m > *mpri) {
548 if (strict & RT6_LOOKUP_F_REACHABLE)
549 rt6_probe(match);
550 *mpri = m;
551 match = rt;
552 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
553 rt6_probe(rt);
554 }
555
556out:
557 return match;
558}
559
560static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
561 struct rt6_info *rr_head,
562 u32 metric, int oif, int strict)
563{
564 struct rt6_info *rt, *match;
554cfb7e 565 int mpri = -1;
1da177e4 566
f11e6659
DM
567 match = NULL;
568 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 569 rt = rt->dst.rt6_next)
f11e6659
DM
570 match = find_match(rt, oif, strict, &mpri, match);
571 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 572 rt = rt->dst.rt6_next)
f11e6659 573 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 574
f11e6659
DM
575 return match;
576}
1da177e4 577
f11e6659
DM
578static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
579{
580 struct rt6_info *match, *rt0;
8ed67789 581 struct net *net;
1da177e4 582
f11e6659
DM
583 rt0 = fn->rr_ptr;
584 if (!rt0)
585 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 586
f11e6659 587 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 588
554cfb7e 589 if (!match &&
f11e6659 590 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 591 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 592
554cfb7e 593 /* no entries matched; do round-robin */
f11e6659
DM
594 if (!next || next->rt6i_metric != rt0->rt6i_metric)
595 next = fn->leaf;
596
597 if (next != rt0)
598 fn->rr_ptr = next;
1da177e4 599 }
1da177e4 600
d1918542 601 net = dev_net(rt0->dst.dev);
a02cec21 602 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
603}
604
70ceb4f5
YH
605#ifdef CONFIG_IPV6_ROUTE_INFO
606int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 607 const struct in6_addr *gwaddr)
70ceb4f5 608{
c346dca1 609 struct net *net = dev_net(dev);
70ceb4f5
YH
610 struct route_info *rinfo = (struct route_info *) opt;
611 struct in6_addr prefix_buf, *prefix;
612 unsigned int pref;
4bed72e4 613 unsigned long lifetime;
70ceb4f5
YH
614 struct rt6_info *rt;
615
616 if (len < sizeof(struct route_info)) {
617 return -EINVAL;
618 }
619
620 /* Sanity check for prefix_len and length */
621 if (rinfo->length > 3) {
622 return -EINVAL;
623 } else if (rinfo->prefix_len > 128) {
624 return -EINVAL;
625 } else if (rinfo->prefix_len > 64) {
626 if (rinfo->length < 2) {
627 return -EINVAL;
628 }
629 } else if (rinfo->prefix_len > 0) {
630 if (rinfo->length < 1) {
631 return -EINVAL;
632 }
633 }
634
635 pref = rinfo->route_pref;
636 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 637 return -EINVAL;
70ceb4f5 638
4bed72e4 639 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
640
641 if (rinfo->length == 3)
642 prefix = (struct in6_addr *)rinfo->prefix;
643 else {
644 /* this function is safe */
645 ipv6_addr_prefix(&prefix_buf,
646 (struct in6_addr *)rinfo->prefix,
647 rinfo->prefix_len);
648 prefix = &prefix_buf;
649 }
650
efa2cea0
DL
651 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
652 dev->ifindex);
70ceb4f5
YH
653
654 if (rt && !lifetime) {
e0a1ad73 655 ip6_del_rt(rt);
70ceb4f5
YH
656 rt = NULL;
657 }
658
659 if (!rt && lifetime)
efa2cea0 660 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
661 pref);
662 else if (rt)
663 rt->rt6i_flags = RTF_ROUTEINFO |
664 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
665
666 if (rt) {
1716a961
G
667 if (!addrconf_finite_timeout(lifetime))
668 rt6_clean_expires(rt);
669 else
670 rt6_set_expires(rt, jiffies + HZ * lifetime);
671
d8d1f30b 672 dst_release(&rt->dst);
70ceb4f5
YH
673 }
674 return 0;
675}
676#endif
677
8ed67789 678#define BACKTRACK(__net, saddr) \
982f56f3 679do { \
8ed67789 680 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 681 struct fib6_node *pn; \
e0eda7bb 682 while (1) { \
982f56f3
YH
683 if (fn->fn_flags & RTN_TL_ROOT) \
684 goto out; \
685 pn = fn->parent; \
686 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 687 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
688 else \
689 fn = pn; \
690 if (fn->fn_flags & RTN_RTINFO) \
691 goto restart; \
c71099ac 692 } \
c71099ac 693 } \
38308473 694} while (0)
c71099ac 695
8ed67789
DL
696static struct rt6_info *ip6_pol_route_lookup(struct net *net,
697 struct fib6_table *table,
4c9483b2 698 struct flowi6 *fl6, int flags)
1da177e4
LT
699{
700 struct fib6_node *fn;
701 struct rt6_info *rt;
702
c71099ac 703 read_lock_bh(&table->tb6_lock);
4c9483b2 704 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
705restart:
706 rt = fn->leaf;
4c9483b2
DM
707 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
708 BACKTRACK(net, &fl6->saddr);
c71099ac 709out:
d8d1f30b 710 dst_use(&rt->dst, jiffies);
c71099ac 711 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
712 return rt;
713
714}
715
ea6e574e
FW
716struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
717 int flags)
718{
719 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
720}
721EXPORT_SYMBOL_GPL(ip6_route_lookup);
722
9acd9f3a
YH
723struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
724 const struct in6_addr *saddr, int oif, int strict)
c71099ac 725{
4c9483b2
DM
726 struct flowi6 fl6 = {
727 .flowi6_oif = oif,
728 .daddr = *daddr,
c71099ac
TG
729 };
730 struct dst_entry *dst;
77d16f45 731 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 732
adaa70bb 733 if (saddr) {
4c9483b2 734 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
735 flags |= RT6_LOOKUP_F_HAS_SADDR;
736 }
737
4c9483b2 738 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
739 if (dst->error == 0)
740 return (struct rt6_info *) dst;
741
742 dst_release(dst);
743
1da177e4
LT
744 return NULL;
745}
746
7159039a
YH
747EXPORT_SYMBOL(rt6_lookup);
748
c71099ac 749/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
750 It takes new route entry, the addition fails by any reason the
751 route is freed. In any case, if caller does not hold it, it may
752 be destroyed.
753 */
754
86872cb5 755static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
756{
757 int err;
c71099ac 758 struct fib6_table *table;
1da177e4 759
c71099ac
TG
760 table = rt->rt6i_table;
761 write_lock_bh(&table->tb6_lock);
86872cb5 762 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 763 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
764
765 return err;
766}
767
40e22e8f
TG
768int ip6_ins_rt(struct rt6_info *rt)
769{
4d1169c1 770 struct nl_info info = {
d1918542 771 .nl_net = dev_net(rt->dst.dev),
4d1169c1 772 };
528c4ceb 773 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
774}
775
1716a961 776static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 777 const struct in6_addr *daddr,
b71d1d42 778 const struct in6_addr *saddr)
1da177e4 779{
1da177e4
LT
780 struct rt6_info *rt;
781
782 /*
783 * Clone the route.
784 */
785
21efcfa0 786 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
787
788 if (rt) {
14deae41
DM
789 int attempts = !in_softirq();
790
38308473 791 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 792 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 793 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 794 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 795 rt->rt6i_gateway = *daddr;
58c4fb86 796 }
1da177e4 797
1da177e4 798 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
799
800#ifdef CONFIG_IPV6_SUBTREES
801 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 802 rt->rt6i_src.addr = *saddr;
1da177e4
LT
803 rt->rt6i_src.plen = 128;
804 }
805#endif
806
14deae41 807 retry:
8ade06c6 808 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 809 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
810 int saved_rt_min_interval =
811 net->ipv6.sysctl.ip6_rt_gc_min_interval;
812 int saved_rt_elasticity =
813 net->ipv6.sysctl.ip6_rt_gc_elasticity;
814
815 if (attempts-- > 0) {
816 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
817 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
818
86393e52 819 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
820
821 net->ipv6.sysctl.ip6_rt_gc_elasticity =
822 saved_rt_elasticity;
823 net->ipv6.sysctl.ip6_rt_gc_min_interval =
824 saved_rt_min_interval;
825 goto retry;
826 }
827
f3213831 828 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 829 dst_free(&rt->dst);
14deae41
DM
830 return NULL;
831 }
95a9a5ba 832 }
1da177e4 833
95a9a5ba
YH
834 return rt;
835}
1da177e4 836
21efcfa0
ED
837static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
838 const struct in6_addr *daddr)
299d9939 839{
21efcfa0
ED
840 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
841
299d9939 842 if (rt) {
299d9939 843 rt->rt6i_flags |= RTF_CACHE;
97cac082 844 rt->n = neigh_clone(ort->n);
299d9939
YH
845 }
846 return rt;
847}
848
8ed67789 849static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 850 struct flowi6 *fl6, int flags)
1da177e4
LT
851{
852 struct fib6_node *fn;
519fbd87 853 struct rt6_info *rt, *nrt;
c71099ac 854 int strict = 0;
1da177e4 855 int attempts = 3;
519fbd87 856 int err;
53b7997f 857 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 858
77d16f45 859 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
860
861relookup:
c71099ac 862 read_lock_bh(&table->tb6_lock);
1da177e4 863
8238dd06 864restart_2:
4c9483b2 865 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
866
867restart:
4acad72d 868 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 869
4c9483b2 870 BACKTRACK(net, &fl6->saddr);
8ed67789 871 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 872 rt->rt6i_flags & RTF_CACHE)
1ddef044 873 goto out;
1da177e4 874
d8d1f30b 875 dst_hold(&rt->dst);
c71099ac 876 read_unlock_bh(&table->tb6_lock);
fb9de91e 877
97cac082 878 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 879 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 880 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 881 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
882 else
883 goto out2;
e40cf353 884
d8d1f30b 885 dst_release(&rt->dst);
8ed67789 886 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 887
d8d1f30b 888 dst_hold(&rt->dst);
519fbd87 889 if (nrt) {
40e22e8f 890 err = ip6_ins_rt(nrt);
519fbd87 891 if (!err)
1da177e4 892 goto out2;
1da177e4 893 }
1da177e4 894
519fbd87
YH
895 if (--attempts <= 0)
896 goto out2;
897
898 /*
c71099ac 899 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
900 * released someone could insert this route. Relookup.
901 */
d8d1f30b 902 dst_release(&rt->dst);
519fbd87
YH
903 goto relookup;
904
905out:
8238dd06
YH
906 if (reachable) {
907 reachable = 0;
908 goto restart_2;
909 }
d8d1f30b 910 dst_hold(&rt->dst);
c71099ac 911 read_unlock_bh(&table->tb6_lock);
1da177e4 912out2:
d8d1f30b
CG
913 rt->dst.lastuse = jiffies;
914 rt->dst.__use++;
c71099ac
TG
915
916 return rt;
1da177e4
LT
917}
918
8ed67789 919static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 920 struct flowi6 *fl6, int flags)
4acad72d 921{
4c9483b2 922 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
923}
924
72331bc0
SL
925static struct dst_entry *ip6_route_input_lookup(struct net *net,
926 struct net_device *dev,
927 struct flowi6 *fl6, int flags)
928{
929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
930 flags |= RT6_LOOKUP_F_IFACE;
931
932 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
933}
934
c71099ac
TG
935void ip6_route_input(struct sk_buff *skb)
936{
b71d1d42 937 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 938 struct net *net = dev_net(skb->dev);
adaa70bb 939 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
940 struct flowi6 fl6 = {
941 .flowi6_iif = skb->dev->ifindex,
942 .daddr = iph->daddr,
943 .saddr = iph->saddr,
38308473 944 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
945 .flowi6_mark = skb->mark,
946 .flowi6_proto = iph->nexthdr,
c71099ac 947 };
adaa70bb 948
72331bc0 949 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
950}
951
8ed67789 952static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 953 struct flowi6 *fl6, int flags)
1da177e4 954{
4c9483b2 955 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
956}
957
9c7a4f9c 958struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 959 struct flowi6 *fl6)
c71099ac
TG
960{
961 int flags = 0;
962
4dc27d1c
DM
963 fl6->flowi6_iif = net->loopback_dev->ifindex;
964
4c9483b2 965 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 966 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 967
4c9483b2 968 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 969 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
970 else if (sk)
971 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 972
4c9483b2 973 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
974}
975
7159039a 976EXPORT_SYMBOL(ip6_route_output);
1da177e4 977
2774c131 978struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 979{
5c1e6aa3 980 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
981 struct dst_entry *new = NULL;
982
5c1e6aa3 983 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 984 if (rt) {
cf911662 985 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 986 rt6_init_peer(rt, net->ipv6.peers);
cf911662 987
d8d1f30b 988 new = &rt->dst;
14e50e57 989
14e50e57 990 new->__use = 1;
352e512c
HX
991 new->input = dst_discard;
992 new->output = dst_discard;
14e50e57 993
21efcfa0
ED
994 if (dst_metrics_read_only(&ort->dst))
995 new->_metrics = ort->dst._metrics;
996 else
997 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
998 rt->rt6i_idev = ort->rt6i_idev;
999 if (rt->rt6i_idev)
1000 in6_dev_hold(rt->rt6i_idev);
14e50e57 1001
4e3fd7a0 1002 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1003 rt->rt6i_flags = ort->rt6i_flags;
1004 rt6_clean_expires(rt);
14e50e57
DM
1005 rt->rt6i_metric = 0;
1006
1007 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1008#ifdef CONFIG_IPV6_SUBTREES
1009 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1010#endif
1011
1012 dst_free(new);
1013 }
1014
69ead7af
DM
1015 dst_release(dst_orig);
1016 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1017}
14e50e57 1018
1da177e4
LT
1019/*
1020 * Destination cache support functions
1021 */
1022
1023static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1024{
1025 struct rt6_info *rt;
1026
1027 rt = (struct rt6_info *) dst;
1028
6431cbc2
DM
1029 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1030 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1031 if (!rt6_has_peer(rt))
6431cbc2
DM
1032 rt6_bind_peer(rt, 0);
1033 rt->rt6i_peer_genid = rt6_peer_genid();
1034 }
1da177e4 1035 return dst;
6431cbc2 1036 }
1da177e4
LT
1037 return NULL;
1038}
1039
1040static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1041{
1042 struct rt6_info *rt = (struct rt6_info *) dst;
1043
1044 if (rt) {
54c1a859
YH
1045 if (rt->rt6i_flags & RTF_CACHE) {
1046 if (rt6_check_expired(rt)) {
1047 ip6_del_rt(rt);
1048 dst = NULL;
1049 }
1050 } else {
1da177e4 1051 dst_release(dst);
54c1a859
YH
1052 dst = NULL;
1053 }
1da177e4 1054 }
54c1a859 1055 return dst;
1da177e4
LT
1056}
1057
1058static void ip6_link_failure(struct sk_buff *skb)
1059{
1060 struct rt6_info *rt;
1061
3ffe533c 1062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1063
adf30907 1064 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1065 if (rt) {
1716a961
G
1066 if (rt->rt6i_flags & RTF_CACHE)
1067 rt6_update_expires(rt, 0);
1068 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1069 rt->rt6i_node->fn_sernum = -1;
1070 }
1071}
1072
1073static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1074{
1075 struct rt6_info *rt6 = (struct rt6_info*)dst;
1076
81aded24 1077 dst_confirm(dst);
1da177e4 1078 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1079 struct net *net = dev_net(dst->dev);
1080
1da177e4
LT
1081 rt6->rt6i_flags |= RTF_MODIFIED;
1082 if (mtu < IPV6_MIN_MTU) {
defb3519 1083 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1084 mtu = IPV6_MIN_MTU;
defb3519
DM
1085 features |= RTAX_FEATURE_ALLFRAG;
1086 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1087 }
defb3519 1088 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1089 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1090 }
1091}
1092
42ae66c8
DM
1093void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1094 int oif, u32 mark)
81aded24
DM
1095{
1096 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1097 struct dst_entry *dst;
1098 struct flowi6 fl6;
1099
1100 memset(&fl6, 0, sizeof(fl6));
1101 fl6.flowi6_oif = oif;
1102 fl6.flowi6_mark = mark;
3e12939a 1103 fl6.flowi6_flags = 0;
81aded24
DM
1104 fl6.daddr = iph->daddr;
1105 fl6.saddr = iph->saddr;
1106 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1107
1108 dst = ip6_route_output(net, NULL, &fl6);
1109 if (!dst->error)
1110 ip6_rt_update_pmtu(dst, ntohl(mtu));
1111 dst_release(dst);
1112}
1113EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1114
1115void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1116{
1117 ip6_update_pmtu(skb, sock_net(sk), mtu,
1118 sk->sk_bound_dev_if, sk->sk_mark);
1119}
1120EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1121
3a5ad2ee
DM
1122void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1123{
1124 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1125 struct dst_entry *dst;
1126 struct flowi6 fl6;
1127
1128 memset(&fl6, 0, sizeof(fl6));
1129 fl6.flowi6_oif = oif;
1130 fl6.flowi6_mark = mark;
1131 fl6.flowi6_flags = 0;
1132 fl6.daddr = iph->daddr;
1133 fl6.saddr = iph->saddr;
1134 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1135
1136 dst = ip6_route_output(net, NULL, &fl6);
1137 if (!dst->error)
1138 rt6_do_redirect(dst, skb);
1139 dst_release(dst);
1140}
1141EXPORT_SYMBOL_GPL(ip6_redirect);
1142
1143void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1144{
1145 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1146}
1147EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1148
0dbaee3b 1149static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1150{
0dbaee3b
DM
1151 struct net_device *dev = dst->dev;
1152 unsigned int mtu = dst_mtu(dst);
1153 struct net *net = dev_net(dev);
1154
1da177e4
LT
1155 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1156
5578689a
DL
1157 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1158 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1159
1160 /*
1ab1457c
YH
1161 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1162 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1163 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1164 * rely only on pmtu discovery"
1165 */
1166 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1167 mtu = IPV6_MAXPLEN;
1168 return mtu;
1169}
1170
ebb762f2 1171static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1172{
d33e4553 1173 struct inet6_dev *idev;
618f9bc7
SK
1174 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1175
1176 if (mtu)
1177 return mtu;
1178
1179 mtu = IPV6_MIN_MTU;
d33e4553
DM
1180
1181 rcu_read_lock();
1182 idev = __in6_dev_get(dst->dev);
1183 if (idev)
1184 mtu = idev->cnf.mtu6;
1185 rcu_read_unlock();
1186
1187 return mtu;
1188}
1189
3b00944c
YH
1190static struct dst_entry *icmp6_dst_gc_list;
1191static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1192
3b00944c 1193struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1194 struct neighbour *neigh,
87a11578 1195 struct flowi6 *fl6)
1da177e4 1196{
87a11578 1197 struct dst_entry *dst;
1da177e4
LT
1198 struct rt6_info *rt;
1199 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1200 struct net *net = dev_net(dev);
1da177e4 1201
38308473 1202 if (unlikely(!idev))
122bdf67 1203 return ERR_PTR(-ENODEV);
1da177e4 1204
8b96d22d 1205 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1206 if (unlikely(!rt)) {
1da177e4 1207 in6_dev_put(idev);
87a11578 1208 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1209 goto out;
1210 }
1211
1da177e4
LT
1212 if (neigh)
1213 neigh_hold(neigh);
14deae41 1214 else {
f894cbf8 1215 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1216 if (IS_ERR(neigh)) {
252c3d84 1217 in6_dev_put(idev);
b43faac6
DM
1218 dst_free(&rt->dst);
1219 return ERR_CAST(neigh);
1220 }
14deae41 1221 }
1da177e4 1222
8e2ec639
YZ
1223 rt->dst.flags |= DST_HOST;
1224 rt->dst.output = ip6_output;
97cac082 1225 rt->n = neigh;
d8d1f30b 1226 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1227 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1228 rt->rt6i_dst.plen = 128;
1229 rt->rt6i_idev = idev;
7011687f 1230 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1231
3b00944c 1232 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1233 rt->dst.next = icmp6_dst_gc_list;
1234 icmp6_dst_gc_list = &rt->dst;
3b00944c 1235 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1236
5578689a 1237 fib6_force_start_gc(net);
1da177e4 1238
87a11578
DM
1239 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1240
1da177e4 1241out:
87a11578 1242 return dst;
1da177e4
LT
1243}
1244
3d0f24a7 1245int icmp6_dst_gc(void)
1da177e4 1246{
e9476e95 1247 struct dst_entry *dst, **pprev;
3d0f24a7 1248 int more = 0;
1da177e4 1249
3b00944c
YH
1250 spin_lock_bh(&icmp6_dst_lock);
1251 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1252
1da177e4
LT
1253 while ((dst = *pprev) != NULL) {
1254 if (!atomic_read(&dst->__refcnt)) {
1255 *pprev = dst->next;
1256 dst_free(dst);
1da177e4
LT
1257 } else {
1258 pprev = &dst->next;
3d0f24a7 1259 ++more;
1da177e4
LT
1260 }
1261 }
1262
3b00944c 1263 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1264
3d0f24a7 1265 return more;
1da177e4
LT
1266}
1267
1e493d19
DM
1268static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1269 void *arg)
1270{
1271 struct dst_entry *dst, **pprev;
1272
1273 spin_lock_bh(&icmp6_dst_lock);
1274 pprev = &icmp6_dst_gc_list;
1275 while ((dst = *pprev) != NULL) {
1276 struct rt6_info *rt = (struct rt6_info *) dst;
1277 if (func(rt, arg)) {
1278 *pprev = dst->next;
1279 dst_free(dst);
1280 } else {
1281 pprev = &dst->next;
1282 }
1283 }
1284 spin_unlock_bh(&icmp6_dst_lock);
1285}
1286
569d3645 1287static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1288{
1da177e4 1289 unsigned long now = jiffies;
86393e52 1290 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1291 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1292 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1293 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1294 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1295 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1296 int entries;
7019b78e 1297
fc66f95c 1298 entries = dst_entries_get_fast(ops);
7019b78e 1299 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1300 entries <= rt_max_size)
1da177e4
LT
1301 goto out;
1302
6891a346
BT
1303 net->ipv6.ip6_rt_gc_expire++;
1304 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1305 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1306 entries = dst_entries_get_slow(ops);
1307 if (entries < ops->gc_thresh)
7019b78e 1308 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1309out:
7019b78e 1310 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1311 return entries > rt_max_size;
1da177e4
LT
1312}
1313
1314/* Clean host part of a prefix. Not necessary in radix tree,
1315 but results in cleaner routing tables.
1316
1317 Remove it only when all the things will work!
1318 */
1319
6b75d090 1320int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1321{
5170ae82 1322 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1323 if (hoplimit == 0) {
6b75d090 1324 struct net_device *dev = dst->dev;
c68f24cc
ED
1325 struct inet6_dev *idev;
1326
1327 rcu_read_lock();
1328 idev = __in6_dev_get(dev);
1329 if (idev)
6b75d090 1330 hoplimit = idev->cnf.hop_limit;
c68f24cc 1331 else
53b7997f 1332 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1333 rcu_read_unlock();
1da177e4
LT
1334 }
1335 return hoplimit;
1336}
abbf46ae 1337EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1338
1339/*
1340 *
1341 */
1342
86872cb5 1343int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1344{
1345 int err;
5578689a 1346 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1347 struct rt6_info *rt = NULL;
1348 struct net_device *dev = NULL;
1349 struct inet6_dev *idev = NULL;
c71099ac 1350 struct fib6_table *table;
1da177e4
LT
1351 int addr_type;
1352
86872cb5 1353 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1354 return -EINVAL;
1355#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1356 if (cfg->fc_src_len)
1da177e4
LT
1357 return -EINVAL;
1358#endif
86872cb5 1359 if (cfg->fc_ifindex) {
1da177e4 1360 err = -ENODEV;
5578689a 1361 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1362 if (!dev)
1363 goto out;
1364 idev = in6_dev_get(dev);
1365 if (!idev)
1366 goto out;
1367 }
1368
86872cb5
TG
1369 if (cfg->fc_metric == 0)
1370 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1371
d71314b4 1372 err = -ENOBUFS;
38308473
DM
1373 if (cfg->fc_nlinfo.nlh &&
1374 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1375 table = fib6_get_table(net, cfg->fc_table);
38308473 1376 if (!table) {
f3213831 1377 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1378 table = fib6_new_table(net, cfg->fc_table);
1379 }
1380 } else {
1381 table = fib6_new_table(net, cfg->fc_table);
1382 }
38308473
DM
1383
1384 if (!table)
c71099ac 1385 goto out;
c71099ac 1386
8b96d22d 1387 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1388
38308473 1389 if (!rt) {
1da177e4
LT
1390 err = -ENOMEM;
1391 goto out;
1392 }
1393
d8d1f30b 1394 rt->dst.obsolete = -1;
1716a961
G
1395
1396 if (cfg->fc_flags & RTF_EXPIRES)
1397 rt6_set_expires(rt, jiffies +
1398 clock_t_to_jiffies(cfg->fc_expires));
1399 else
1400 rt6_clean_expires(rt);
1da177e4 1401
86872cb5
TG
1402 if (cfg->fc_protocol == RTPROT_UNSPEC)
1403 cfg->fc_protocol = RTPROT_BOOT;
1404 rt->rt6i_protocol = cfg->fc_protocol;
1405
1406 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1407
1408 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1409 rt->dst.input = ip6_mc_input;
ab79ad14
1410 else if (cfg->fc_flags & RTF_LOCAL)
1411 rt->dst.input = ip6_input;
1da177e4 1412 else
d8d1f30b 1413 rt->dst.input = ip6_forward;
1da177e4 1414
d8d1f30b 1415 rt->dst.output = ip6_output;
1da177e4 1416
86872cb5
TG
1417 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1418 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1419 if (rt->rt6i_dst.plen == 128)
11d53b49 1420 rt->dst.flags |= DST_HOST;
1da177e4 1421
8e2ec639
YZ
1422 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1423 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1424 if (!metrics) {
1425 err = -ENOMEM;
1426 goto out;
1427 }
1428 dst_init_metrics(&rt->dst, metrics, 0);
1429 }
1da177e4 1430#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1431 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1432 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1433#endif
1434
86872cb5 1435 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1436
1437 /* We cannot add true routes via loopback here,
1438 they would result in kernel looping; promote them to reject routes
1439 */
86872cb5 1440 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1441 (dev && (dev->flags & IFF_LOOPBACK) &&
1442 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1443 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1444 /* hold loopback dev/idev if we haven't done so. */
5578689a 1445 if (dev != net->loopback_dev) {
1da177e4
LT
1446 if (dev) {
1447 dev_put(dev);
1448 in6_dev_put(idev);
1449 }
5578689a 1450 dev = net->loopback_dev;
1da177e4
LT
1451 dev_hold(dev);
1452 idev = in6_dev_get(dev);
1453 if (!idev) {
1454 err = -ENODEV;
1455 goto out;
1456 }
1457 }
d8d1f30b
CG
1458 rt->dst.output = ip6_pkt_discard_out;
1459 rt->dst.input = ip6_pkt_discard;
1460 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1461 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1462 goto install_route;
1463 }
1464
86872cb5 1465 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1466 const struct in6_addr *gw_addr;
1da177e4
LT
1467 int gwa_type;
1468
86872cb5 1469 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1470 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1471 gwa_type = ipv6_addr_type(gw_addr);
1472
1473 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1474 struct rt6_info *grt;
1475
1476 /* IPv6 strictly inhibits using not link-local
1477 addresses as nexthop address.
1478 Otherwise, router will not able to send redirects.
1479 It is very good, but in some (rare!) circumstances
1480 (SIT, PtP, NBMA NOARP links) it is handy to allow
1481 some exceptions. --ANK
1482 */
1483 err = -EINVAL;
38308473 1484 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1485 goto out;
1486
5578689a 1487 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1488
1489 err = -EHOSTUNREACH;
38308473 1490 if (!grt)
1da177e4
LT
1491 goto out;
1492 if (dev) {
d1918542 1493 if (dev != grt->dst.dev) {
d8d1f30b 1494 dst_release(&grt->dst);
1da177e4
LT
1495 goto out;
1496 }
1497 } else {
d1918542 1498 dev = grt->dst.dev;
1da177e4
LT
1499 idev = grt->rt6i_idev;
1500 dev_hold(dev);
1501 in6_dev_hold(grt->rt6i_idev);
1502 }
38308473 1503 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1504 err = 0;
d8d1f30b 1505 dst_release(&grt->dst);
1da177e4
LT
1506
1507 if (err)
1508 goto out;
1509 }
1510 err = -EINVAL;
38308473 1511 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1512 goto out;
1513 }
1514
1515 err = -ENODEV;
38308473 1516 if (!dev)
1da177e4
LT
1517 goto out;
1518
c3968a85
DW
1519 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1520 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1521 err = -EINVAL;
1522 goto out;
1523 }
4e3fd7a0 1524 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1525 rt->rt6i_prefsrc.plen = 128;
1526 } else
1527 rt->rt6i_prefsrc.plen = 0;
1528
86872cb5 1529 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1530 err = rt6_bind_neighbour(rt, dev);
f83c7790 1531 if (err)
1da177e4 1532 goto out;
1da177e4
LT
1533 }
1534
86872cb5 1535 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1536
1537install_route:
86872cb5
TG
1538 if (cfg->fc_mx) {
1539 struct nlattr *nla;
1540 int remaining;
1541
1542 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1543 int type = nla_type(nla);
86872cb5
TG
1544
1545 if (type) {
1546 if (type > RTAX_MAX) {
1da177e4
LT
1547 err = -EINVAL;
1548 goto out;
1549 }
86872cb5 1550
defb3519 1551 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1552 }
1da177e4
LT
1553 }
1554 }
1555
d8d1f30b 1556 rt->dst.dev = dev;
1da177e4 1557 rt->rt6i_idev = idev;
c71099ac 1558 rt->rt6i_table = table;
63152fc0 1559
c346dca1 1560 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1561
86872cb5 1562 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1563
1564out:
1565 if (dev)
1566 dev_put(dev);
1567 if (idev)
1568 in6_dev_put(idev);
1569 if (rt)
d8d1f30b 1570 dst_free(&rt->dst);
1da177e4
LT
1571 return err;
1572}
1573
86872cb5 1574static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1575{
1576 int err;
c71099ac 1577 struct fib6_table *table;
d1918542 1578 struct net *net = dev_net(rt->dst.dev);
1da177e4 1579
8ed67789 1580 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1581 return -ENOENT;
1582
c71099ac
TG
1583 table = rt->rt6i_table;
1584 write_lock_bh(&table->tb6_lock);
1da177e4 1585
86872cb5 1586 err = fib6_del(rt, info);
d8d1f30b 1587 dst_release(&rt->dst);
1da177e4 1588
c71099ac 1589 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1590
1591 return err;
1592}
1593
e0a1ad73
TG
1594int ip6_del_rt(struct rt6_info *rt)
1595{
4d1169c1 1596 struct nl_info info = {
d1918542 1597 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1598 };
528c4ceb 1599 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1600}
1601
86872cb5 1602static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1603{
c71099ac 1604 struct fib6_table *table;
1da177e4
LT
1605 struct fib6_node *fn;
1606 struct rt6_info *rt;
1607 int err = -ESRCH;
1608
5578689a 1609 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1610 if (!table)
c71099ac
TG
1611 return err;
1612
1613 read_lock_bh(&table->tb6_lock);
1da177e4 1614
c71099ac 1615 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1616 &cfg->fc_dst, cfg->fc_dst_len,
1617 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1618
1da177e4 1619 if (fn) {
d8d1f30b 1620 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1621 if (cfg->fc_ifindex &&
d1918542
DM
1622 (!rt->dst.dev ||
1623 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1624 continue;
86872cb5
TG
1625 if (cfg->fc_flags & RTF_GATEWAY &&
1626 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1627 continue;
86872cb5 1628 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1629 continue;
d8d1f30b 1630 dst_hold(&rt->dst);
c71099ac 1631 read_unlock_bh(&table->tb6_lock);
1da177e4 1632
86872cb5 1633 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1634 }
1635 }
c71099ac 1636 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1637
1638 return err;
1639}
1640
6e157b6a 1641static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb)
a6279458 1642{
e8599ff4 1643 struct net *net = dev_net(skb->dev);
a6279458 1644 struct netevent_redirect netevent;
e8599ff4
DM
1645 struct rt6_info *rt, *nrt = NULL;
1646 const struct in6_addr *target;
e8599ff4 1647 struct ndisc_options ndopts;
6e157b6a
DM
1648 const struct in6_addr *dest;
1649 struct neighbour *old_neigh;
e8599ff4
DM
1650 struct inet6_dev *in6_dev;
1651 struct neighbour *neigh;
1652 struct icmp6hdr *icmph;
6e157b6a
DM
1653 int optlen, on_link;
1654 u8 *lladdr;
e8599ff4
DM
1655
1656 optlen = skb->tail - skb->transport_header;
1657 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1658
1659 if (optlen < 0) {
6e157b6a 1660 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1661 return;
1662 }
1663
1664 icmph = icmp6_hdr(skb);
1665 target = (const struct in6_addr *) (icmph + 1);
1666 dest = target + 1;
1667
1668 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1669 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1670 return;
1671 }
1672
6e157b6a 1673 on_link = 0;
e8599ff4
DM
1674 if (ipv6_addr_equal(dest, target)) {
1675 on_link = 1;
1676 } else if (ipv6_addr_type(target) !=
1677 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1678 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1679 return;
1680 }
1681
1682 in6_dev = __in6_dev_get(skb->dev);
1683 if (!in6_dev)
1684 return;
1685 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1686 return;
1687
1688 /* RFC2461 8.1:
1689 * The IP source address of the Redirect MUST be the same as the current
1690 * first-hop router for the specified ICMP Destination Address.
1691 */
1692
1693 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1694 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1695 return;
1696 }
6e157b6a
DM
1697
1698 lladdr = NULL;
e8599ff4
DM
1699 if (ndopts.nd_opts_tgt_lladdr) {
1700 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1701 skb->dev);
1702 if (!lladdr) {
1703 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1704 return;
1705 }
1706 }
1707
6e157b6a
DM
1708 rt = (struct rt6_info *) dst;
1709 if (rt == net->ipv6.ip6_null_entry) {
1710 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1711 return;
6e157b6a 1712 }
e8599ff4 1713
6e157b6a
DM
1714 /* Redirect received -> path was valid.
1715 * Look, redirects are sent only in response to data packets,
1716 * so that this nexthop apparently is reachable. --ANK
1717 */
1718 dst_confirm(&rt->dst);
a6279458 1719
6e157b6a
DM
1720 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1721 if (!neigh)
1722 return;
a6279458 1723
6e157b6a
DM
1724 /* Duplicate redirect: silently ignore. */
1725 old_neigh = rt->n;
1726 if (neigh == old_neigh)
a6279458 1727 goto out;
1da177e4 1728
1da177e4
LT
1729 /*
1730 * We have finally decided to accept it.
1731 */
1732
1ab1457c 1733 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1734 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1735 NEIGH_UPDATE_F_OVERRIDE|
1736 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1737 NEIGH_UPDATE_F_ISROUTER))
1738 );
1739
21efcfa0 1740 nrt = ip6_rt_copy(rt, dest);
38308473 1741 if (!nrt)
1da177e4
LT
1742 goto out;
1743
1744 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1745 if (on_link)
1746 nrt->rt6i_flags &= ~RTF_GATEWAY;
1747
4e3fd7a0 1748 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1749 nrt->n = neigh_clone(neigh);
1da177e4 1750
40e22e8f 1751 if (ip6_ins_rt(nrt))
1da177e4
LT
1752 goto out;
1753
d8d1f30b 1754 netevent.old = &rt->dst;
1d248b1c 1755 netevent.old_neigh = old_neigh;
d8d1f30b 1756 netevent.new = &nrt->dst;
1d248b1c
DM
1757 netevent.new_neigh = neigh;
1758 netevent.daddr = dest;
8d71740c
TT
1759 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1760
38308473 1761 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1762 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1763 ip6_del_rt(rt);
1da177e4
LT
1764 }
1765
1766out:
e8599ff4 1767 neigh_release(neigh);
6e157b6a
DM
1768}
1769
1da177e4
LT
1770/*
1771 * Misc support functions
1772 */
1773
1716a961 1774static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1775 const struct in6_addr *dest)
1da177e4 1776{
d1918542 1777 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1778 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1779 ort->rt6i_table);
1da177e4
LT
1780
1781 if (rt) {
d8d1f30b
CG
1782 rt->dst.input = ort->dst.input;
1783 rt->dst.output = ort->dst.output;
8e2ec639 1784 rt->dst.flags |= DST_HOST;
d8d1f30b 1785
4e3fd7a0 1786 rt->rt6i_dst.addr = *dest;
8e2ec639 1787 rt->rt6i_dst.plen = 128;
defb3519 1788 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1789 rt->dst.error = ort->dst.error;
1da177e4
LT
1790 rt->rt6i_idev = ort->rt6i_idev;
1791 if (rt->rt6i_idev)
1792 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1793 rt->dst.lastuse = jiffies;
1da177e4 1794
4e3fd7a0 1795 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1796 rt->rt6i_flags = ort->rt6i_flags;
1797 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1798 (RTF_DEFAULT | RTF_ADDRCONF))
1799 rt6_set_from(rt, ort);
1800 else
1801 rt6_clean_expires(rt);
1da177e4
LT
1802 rt->rt6i_metric = 0;
1803
1da177e4
LT
1804#ifdef CONFIG_IPV6_SUBTREES
1805 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1806#endif
0f6c6392 1807 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1808 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1809 }
1810 return rt;
1811}
1812
70ceb4f5 1813#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1814static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1815 const struct in6_addr *prefix, int prefixlen,
1816 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1817{
1818 struct fib6_node *fn;
1819 struct rt6_info *rt = NULL;
c71099ac
TG
1820 struct fib6_table *table;
1821
efa2cea0 1822 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1823 if (!table)
c71099ac 1824 return NULL;
70ceb4f5 1825
c71099ac
TG
1826 write_lock_bh(&table->tb6_lock);
1827 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1828 if (!fn)
1829 goto out;
1830
d8d1f30b 1831 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1832 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1833 continue;
1834 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1835 continue;
1836 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1837 continue;
d8d1f30b 1838 dst_hold(&rt->dst);
70ceb4f5
YH
1839 break;
1840 }
1841out:
c71099ac 1842 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1843 return rt;
1844}
1845
efa2cea0 1846static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1847 const struct in6_addr *prefix, int prefixlen,
1848 const struct in6_addr *gwaddr, int ifindex,
95c96174 1849 unsigned int pref)
70ceb4f5 1850{
86872cb5
TG
1851 struct fib6_config cfg = {
1852 .fc_table = RT6_TABLE_INFO,
238fc7ea 1853 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1854 .fc_ifindex = ifindex,
1855 .fc_dst_len = prefixlen,
1856 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1857 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1858 .fc_nlinfo.pid = 0,
1859 .fc_nlinfo.nlh = NULL,
1860 .fc_nlinfo.nl_net = net,
86872cb5
TG
1861 };
1862
4e3fd7a0
AD
1863 cfg.fc_dst = *prefix;
1864 cfg.fc_gateway = *gwaddr;
70ceb4f5 1865
e317da96
YH
1866 /* We should treat it as a default route if prefix length is 0. */
1867 if (!prefixlen)
86872cb5 1868 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1869
86872cb5 1870 ip6_route_add(&cfg);
70ceb4f5 1871
efa2cea0 1872 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1873}
1874#endif
1875
b71d1d42 1876struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1877{
1da177e4 1878 struct rt6_info *rt;
c71099ac 1879 struct fib6_table *table;
1da177e4 1880
c346dca1 1881 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1882 if (!table)
c71099ac 1883 return NULL;
1da177e4 1884
c71099ac 1885 write_lock_bh(&table->tb6_lock);
d8d1f30b 1886 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1887 if (dev == rt->dst.dev &&
045927ff 1888 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1889 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1890 break;
1891 }
1892 if (rt)
d8d1f30b 1893 dst_hold(&rt->dst);
c71099ac 1894 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1895 return rt;
1896}
1897
b71d1d42 1898struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1899 struct net_device *dev,
1900 unsigned int pref)
1da177e4 1901{
86872cb5
TG
1902 struct fib6_config cfg = {
1903 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1904 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1905 .fc_ifindex = dev->ifindex,
1906 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1907 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1908 .fc_nlinfo.pid = 0,
1909 .fc_nlinfo.nlh = NULL,
c346dca1 1910 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1911 };
1da177e4 1912
4e3fd7a0 1913 cfg.fc_gateway = *gwaddr;
1da177e4 1914
86872cb5 1915 ip6_route_add(&cfg);
1da177e4 1916
1da177e4
LT
1917 return rt6_get_dflt_router(gwaddr, dev);
1918}
1919
7b4da532 1920void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1921{
1922 struct rt6_info *rt;
c71099ac
TG
1923 struct fib6_table *table;
1924
1925 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1926 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1927 if (!table)
c71099ac 1928 return;
1da177e4
LT
1929
1930restart:
c71099ac 1931 read_lock_bh(&table->tb6_lock);
d8d1f30b 1932 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1933 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1934 dst_hold(&rt->dst);
c71099ac 1935 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1936 ip6_del_rt(rt);
1da177e4
LT
1937 goto restart;
1938 }
1939 }
c71099ac 1940 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1941}
1942
5578689a
DL
1943static void rtmsg_to_fib6_config(struct net *net,
1944 struct in6_rtmsg *rtmsg,
86872cb5
TG
1945 struct fib6_config *cfg)
1946{
1947 memset(cfg, 0, sizeof(*cfg));
1948
1949 cfg->fc_table = RT6_TABLE_MAIN;
1950 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1951 cfg->fc_metric = rtmsg->rtmsg_metric;
1952 cfg->fc_expires = rtmsg->rtmsg_info;
1953 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1954 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1955 cfg->fc_flags = rtmsg->rtmsg_flags;
1956
5578689a 1957 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1958
4e3fd7a0
AD
1959 cfg->fc_dst = rtmsg->rtmsg_dst;
1960 cfg->fc_src = rtmsg->rtmsg_src;
1961 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1962}
1963
5578689a 1964int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1965{
86872cb5 1966 struct fib6_config cfg;
1da177e4
LT
1967 struct in6_rtmsg rtmsg;
1968 int err;
1969
1970 switch(cmd) {
1971 case SIOCADDRT: /* Add a route */
1972 case SIOCDELRT: /* Delete a route */
1973 if (!capable(CAP_NET_ADMIN))
1974 return -EPERM;
1975 err = copy_from_user(&rtmsg, arg,
1976 sizeof(struct in6_rtmsg));
1977 if (err)
1978 return -EFAULT;
86872cb5 1979
5578689a 1980 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1981
1da177e4
LT
1982 rtnl_lock();
1983 switch (cmd) {
1984 case SIOCADDRT:
86872cb5 1985 err = ip6_route_add(&cfg);
1da177e4
LT
1986 break;
1987 case SIOCDELRT:
86872cb5 1988 err = ip6_route_del(&cfg);
1da177e4
LT
1989 break;
1990 default:
1991 err = -EINVAL;
1992 }
1993 rtnl_unlock();
1994
1995 return err;
3ff50b79 1996 }
1da177e4
LT
1997
1998 return -EINVAL;
1999}
2000
2001/*
2002 * Drop the packet on the floor
2003 */
2004
d5fdd6ba 2005static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2006{
612f09e8 2007 int type;
adf30907 2008 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2009 switch (ipstats_mib_noroutes) {
2010 case IPSTATS_MIB_INNOROUTES:
0660e03f 2011 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2012 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2013 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2014 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2015 break;
2016 }
2017 /* FALLTHROUGH */
2018 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2019 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2020 ipstats_mib_noroutes);
612f09e8
YH
2021 break;
2022 }
3ffe533c 2023 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2024 kfree_skb(skb);
2025 return 0;
2026}
2027
9ce8ade0
TG
2028static int ip6_pkt_discard(struct sk_buff *skb)
2029{
612f09e8 2030 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2031}
2032
20380731 2033static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2034{
adf30907 2035 skb->dev = skb_dst(skb)->dev;
612f09e8 2036 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2037}
2038
6723ab54
DM
2039#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2040
9ce8ade0
TG
2041static int ip6_pkt_prohibit(struct sk_buff *skb)
2042{
612f09e8 2043 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2044}
2045
2046static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2047{
adf30907 2048 skb->dev = skb_dst(skb)->dev;
612f09e8 2049 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2050}
2051
6723ab54
DM
2052#endif
2053
1da177e4
LT
2054/*
2055 * Allocate a dst for local (unicast / anycast) address.
2056 */
2057
2058struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2059 const struct in6_addr *addr,
8f031519 2060 bool anycast)
1da177e4 2061{
c346dca1 2062 struct net *net = dev_net(idev->dev);
8b96d22d 2063 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2064 int err;
1da177e4 2065
38308473 2066 if (!rt) {
f3213831 2067 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2068 return ERR_PTR(-ENOMEM);
40385653 2069 }
1da177e4 2070
1da177e4
LT
2071 in6_dev_hold(idev);
2072
11d53b49 2073 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2074 rt->dst.input = ip6_input;
2075 rt->dst.output = ip6_output;
1da177e4 2076 rt->rt6i_idev = idev;
d8d1f30b 2077 rt->dst.obsolete = -1;
1da177e4
LT
2078
2079 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2080 if (anycast)
2081 rt->rt6i_flags |= RTF_ANYCAST;
2082 else
1da177e4 2083 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2084 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2085 if (err) {
d8d1f30b 2086 dst_free(&rt->dst);
f83c7790 2087 return ERR_PTR(err);
1da177e4
LT
2088 }
2089
4e3fd7a0 2090 rt->rt6i_dst.addr = *addr;
1da177e4 2091 rt->rt6i_dst.plen = 128;
5578689a 2092 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2093
d8d1f30b 2094 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2095
2096 return rt;
2097}
2098
c3968a85
DW
2099int ip6_route_get_saddr(struct net *net,
2100 struct rt6_info *rt,
b71d1d42 2101 const struct in6_addr *daddr,
c3968a85
DW
2102 unsigned int prefs,
2103 struct in6_addr *saddr)
2104{
2105 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2106 int err = 0;
2107 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2108 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2109 else
2110 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2111 daddr, prefs, saddr);
2112 return err;
2113}
2114
2115/* remove deleted ip from prefsrc entries */
2116struct arg_dev_net_ip {
2117 struct net_device *dev;
2118 struct net *net;
2119 struct in6_addr *addr;
2120};
2121
2122static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2123{
2124 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2125 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2126 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2127
d1918542 2128 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2129 rt != net->ipv6.ip6_null_entry &&
2130 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2131 /* remove prefsrc entry */
2132 rt->rt6i_prefsrc.plen = 0;
2133 }
2134 return 0;
2135}
2136
2137void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2138{
2139 struct net *net = dev_net(ifp->idev->dev);
2140 struct arg_dev_net_ip adni = {
2141 .dev = ifp->idev->dev,
2142 .net = net,
2143 .addr = &ifp->addr,
2144 };
2145 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2146}
2147
8ed67789
DL
2148struct arg_dev_net {
2149 struct net_device *dev;
2150 struct net *net;
2151};
2152
1da177e4
LT
2153static int fib6_ifdown(struct rt6_info *rt, void *arg)
2154{
bc3ef660 2155 const struct arg_dev_net *adn = arg;
2156 const struct net_device *dev = adn->dev;
8ed67789 2157
d1918542 2158 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2159 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2160 return -1;
c159d30c 2161
1da177e4
LT
2162 return 0;
2163}
2164
f3db4851 2165void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2166{
8ed67789
DL
2167 struct arg_dev_net adn = {
2168 .dev = dev,
2169 .net = net,
2170 };
2171
2172 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2173 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2174}
2175
95c96174 2176struct rt6_mtu_change_arg {
1da177e4 2177 struct net_device *dev;
95c96174 2178 unsigned int mtu;
1da177e4
LT
2179};
2180
2181static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2182{
2183 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2184 struct inet6_dev *idev;
2185
2186 /* In IPv6 pmtu discovery is not optional,
2187 so that RTAX_MTU lock cannot disable it.
2188 We still use this lock to block changes
2189 caused by addrconf/ndisc.
2190 */
2191
2192 idev = __in6_dev_get(arg->dev);
38308473 2193 if (!idev)
1da177e4
LT
2194 return 0;
2195
2196 /* For administrative MTU increase, there is no way to discover
2197 IPv6 PMTU increase, so PMTU increase should be updated here.
2198 Since RFC 1981 doesn't include administrative MTU increase
2199 update PMTU increase is a MUST. (i.e. jumbo frame)
2200 */
2201 /*
2202 If new MTU is less than route PMTU, this new MTU will be the
2203 lowest MTU in the path, update the route PMTU to reflect PMTU
2204 decreases; if new MTU is greater than route PMTU, and the
2205 old MTU is the lowest MTU in the path, update the route PMTU
2206 to reflect the increase. In this case if the other nodes' MTU
2207 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2208 PMTU discouvery.
2209 */
d1918542 2210 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2211 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2212 (dst_mtu(&rt->dst) >= arg->mtu ||
2213 (dst_mtu(&rt->dst) < arg->mtu &&
2214 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2215 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2216 }
1da177e4
LT
2217 return 0;
2218}
2219
95c96174 2220void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2221{
c71099ac
TG
2222 struct rt6_mtu_change_arg arg = {
2223 .dev = dev,
2224 .mtu = mtu,
2225 };
1da177e4 2226
c346dca1 2227 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2228}
2229
ef7c79ed 2230static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2231 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2232 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2233 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2234 [RTA_PRIORITY] = { .type = NLA_U32 },
2235 [RTA_METRICS] = { .type = NLA_NESTED },
2236};
2237
2238static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2239 struct fib6_config *cfg)
1da177e4 2240{
86872cb5
TG
2241 struct rtmsg *rtm;
2242 struct nlattr *tb[RTA_MAX+1];
2243 int err;
1da177e4 2244
86872cb5
TG
2245 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2246 if (err < 0)
2247 goto errout;
1da177e4 2248
86872cb5
TG
2249 err = -EINVAL;
2250 rtm = nlmsg_data(nlh);
2251 memset(cfg, 0, sizeof(*cfg));
2252
2253 cfg->fc_table = rtm->rtm_table;
2254 cfg->fc_dst_len = rtm->rtm_dst_len;
2255 cfg->fc_src_len = rtm->rtm_src_len;
2256 cfg->fc_flags = RTF_UP;
2257 cfg->fc_protocol = rtm->rtm_protocol;
2258
2259 if (rtm->rtm_type == RTN_UNREACHABLE)
2260 cfg->fc_flags |= RTF_REJECT;
2261
ab79ad14
2262 if (rtm->rtm_type == RTN_LOCAL)
2263 cfg->fc_flags |= RTF_LOCAL;
2264
86872cb5
TG
2265 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2266 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2267 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2268
2269 if (tb[RTA_GATEWAY]) {
2270 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2271 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2272 }
86872cb5
TG
2273
2274 if (tb[RTA_DST]) {
2275 int plen = (rtm->rtm_dst_len + 7) >> 3;
2276
2277 if (nla_len(tb[RTA_DST]) < plen)
2278 goto errout;
2279
2280 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2281 }
86872cb5
TG
2282
2283 if (tb[RTA_SRC]) {
2284 int plen = (rtm->rtm_src_len + 7) >> 3;
2285
2286 if (nla_len(tb[RTA_SRC]) < plen)
2287 goto errout;
2288
2289 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2290 }
86872cb5 2291
c3968a85
DW
2292 if (tb[RTA_PREFSRC])
2293 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2294
86872cb5
TG
2295 if (tb[RTA_OIF])
2296 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2297
2298 if (tb[RTA_PRIORITY])
2299 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2300
2301 if (tb[RTA_METRICS]) {
2302 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2303 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2304 }
86872cb5
TG
2305
2306 if (tb[RTA_TABLE])
2307 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2308
2309 err = 0;
2310errout:
2311 return err;
1da177e4
LT
2312}
2313
c127ea2c 2314static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2315{
86872cb5
TG
2316 struct fib6_config cfg;
2317 int err;
1da177e4 2318
86872cb5
TG
2319 err = rtm_to_fib6_config(skb, nlh, &cfg);
2320 if (err < 0)
2321 return err;
2322
2323 return ip6_route_del(&cfg);
1da177e4
LT
2324}
2325
c127ea2c 2326static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2327{
86872cb5
TG
2328 struct fib6_config cfg;
2329 int err;
1da177e4 2330
86872cb5
TG
2331 err = rtm_to_fib6_config(skb, nlh, &cfg);
2332 if (err < 0)
2333 return err;
2334
2335 return ip6_route_add(&cfg);
1da177e4
LT
2336}
2337
339bf98f
TG
2338static inline size_t rt6_nlmsg_size(void)
2339{
2340 return NLMSG_ALIGN(sizeof(struct rtmsg))
2341 + nla_total_size(16) /* RTA_SRC */
2342 + nla_total_size(16) /* RTA_DST */
2343 + nla_total_size(16) /* RTA_GATEWAY */
2344 + nla_total_size(16) /* RTA_PREFSRC */
2345 + nla_total_size(4) /* RTA_TABLE */
2346 + nla_total_size(4) /* RTA_IIF */
2347 + nla_total_size(4) /* RTA_OIF */
2348 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2349 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2350 + nla_total_size(sizeof(struct rta_cacheinfo));
2351}
2352
191cd582
BH
2353static int rt6_fill_node(struct net *net,
2354 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2355 struct in6_addr *dst, struct in6_addr *src,
2356 int iif, int type, u32 pid, u32 seq,
7bc570c8 2357 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2358{
2359 struct rtmsg *rtm;
2d7202bf 2360 struct nlmsghdr *nlh;
e3703b3d 2361 long expires;
9e762a4a 2362 u32 table;
f2c31e32 2363 struct neighbour *n;
1da177e4
LT
2364
2365 if (prefix) { /* user wants prefix routes only */
2366 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2367 /* success since this is not a prefix route */
2368 return 1;
2369 }
2370 }
2371
2d7202bf 2372 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2373 if (!nlh)
26932566 2374 return -EMSGSIZE;
2d7202bf
TG
2375
2376 rtm = nlmsg_data(nlh);
1da177e4
LT
2377 rtm->rtm_family = AF_INET6;
2378 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2379 rtm->rtm_src_len = rt->rt6i_src.plen;
2380 rtm->rtm_tos = 0;
c71099ac 2381 if (rt->rt6i_table)
9e762a4a 2382 table = rt->rt6i_table->tb6_id;
c71099ac 2383 else
9e762a4a
PM
2384 table = RT6_TABLE_UNSPEC;
2385 rtm->rtm_table = table;
c78679e8
DM
2386 if (nla_put_u32(skb, RTA_TABLE, table))
2387 goto nla_put_failure;
38308473 2388 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2389 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2390 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2391 rtm->rtm_type = RTN_LOCAL;
d1918542 2392 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2393 rtm->rtm_type = RTN_LOCAL;
2394 else
2395 rtm->rtm_type = RTN_UNICAST;
2396 rtm->rtm_flags = 0;
2397 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2398 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2399 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2400 rtm->rtm_protocol = RTPROT_REDIRECT;
2401 else if (rt->rt6i_flags & RTF_ADDRCONF)
2402 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2403 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2404 rtm->rtm_protocol = RTPROT_RA;
2405
38308473 2406 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2407 rtm->rtm_flags |= RTM_F_CLONED;
2408
2409 if (dst) {
c78679e8
DM
2410 if (nla_put(skb, RTA_DST, 16, dst))
2411 goto nla_put_failure;
1ab1457c 2412 rtm->rtm_dst_len = 128;
1da177e4 2413 } else if (rtm->rtm_dst_len)
c78679e8
DM
2414 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2415 goto nla_put_failure;
1da177e4
LT
2416#ifdef CONFIG_IPV6_SUBTREES
2417 if (src) {
c78679e8
DM
2418 if (nla_put(skb, RTA_SRC, 16, src))
2419 goto nla_put_failure;
1ab1457c 2420 rtm->rtm_src_len = 128;
c78679e8
DM
2421 } else if (rtm->rtm_src_len &&
2422 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2423 goto nla_put_failure;
1da177e4 2424#endif
7bc570c8
YH
2425 if (iif) {
2426#ifdef CONFIG_IPV6_MROUTE
2427 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2428 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2429 if (err <= 0) {
2430 if (!nowait) {
2431 if (err == 0)
2432 return 0;
2433 goto nla_put_failure;
2434 } else {
2435 if (err == -EMSGSIZE)
2436 goto nla_put_failure;
2437 }
2438 }
2439 } else
2440#endif
c78679e8
DM
2441 if (nla_put_u32(skb, RTA_IIF, iif))
2442 goto nla_put_failure;
7bc570c8 2443 } else if (dst) {
1da177e4 2444 struct in6_addr saddr_buf;
c78679e8
DM
2445 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2446 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2447 goto nla_put_failure;
1da177e4 2448 }
2d7202bf 2449
c3968a85
DW
2450 if (rt->rt6i_prefsrc.plen) {
2451 struct in6_addr saddr_buf;
4e3fd7a0 2452 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2453 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2454 goto nla_put_failure;
c3968a85
DW
2455 }
2456
defb3519 2457 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2458 goto nla_put_failure;
2459
f2c31e32 2460 rcu_read_lock();
97cac082 2461 n = rt->n;
94f826b8
ED
2462 if (n) {
2463 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2464 rcu_read_unlock();
2465 goto nla_put_failure;
2466 }
2467 }
f2c31e32 2468 rcu_read_unlock();
2d7202bf 2469
c78679e8
DM
2470 if (rt->dst.dev &&
2471 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2472 goto nla_put_failure;
2473 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2474 goto nla_put_failure;
36e3deae
YH
2475 if (!(rt->rt6i_flags & RTF_EXPIRES))
2476 expires = 0;
d1918542
DM
2477 else if (rt->dst.expires - jiffies < INT_MAX)
2478 expires = rt->dst.expires - jiffies;
36e3deae
YH
2479 else
2480 expires = INT_MAX;
69cdf8f9 2481
87a50699 2482 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2483 goto nla_put_failure;
2d7202bf
TG
2484
2485 return nlmsg_end(skb, nlh);
2486
2487nla_put_failure:
26932566
PM
2488 nlmsg_cancel(skb, nlh);
2489 return -EMSGSIZE;
1da177e4
LT
2490}
2491
1b43af54 2492int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2493{
2494 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2495 int prefix;
2496
2d7202bf
TG
2497 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2498 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2499 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2500 } else
2501 prefix = 0;
2502
191cd582
BH
2503 return rt6_fill_node(arg->net,
2504 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2505 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2506 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2507}
2508
c127ea2c 2509static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2510{
3b1e0a65 2511 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2512 struct nlattr *tb[RTA_MAX+1];
2513 struct rt6_info *rt;
1da177e4 2514 struct sk_buff *skb;
ab364a6f 2515 struct rtmsg *rtm;
4c9483b2 2516 struct flowi6 fl6;
72331bc0 2517 int err, iif = 0, oif = 0;
1da177e4 2518
ab364a6f
TG
2519 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2520 if (err < 0)
2521 goto errout;
1da177e4 2522
ab364a6f 2523 err = -EINVAL;
4c9483b2 2524 memset(&fl6, 0, sizeof(fl6));
1da177e4 2525
ab364a6f
TG
2526 if (tb[RTA_SRC]) {
2527 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2528 goto errout;
2529
4e3fd7a0 2530 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2531 }
2532
2533 if (tb[RTA_DST]) {
2534 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2535 goto errout;
2536
4e3fd7a0 2537 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2538 }
2539
2540 if (tb[RTA_IIF])
2541 iif = nla_get_u32(tb[RTA_IIF]);
2542
2543 if (tb[RTA_OIF])
72331bc0 2544 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2545
2546 if (iif) {
2547 struct net_device *dev;
72331bc0
SL
2548 int flags = 0;
2549
5578689a 2550 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2551 if (!dev) {
2552 err = -ENODEV;
ab364a6f 2553 goto errout;
1da177e4 2554 }
72331bc0
SL
2555
2556 fl6.flowi6_iif = iif;
2557
2558 if (!ipv6_addr_any(&fl6.saddr))
2559 flags |= RT6_LOOKUP_F_HAS_SADDR;
2560
2561 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2562 flags);
2563 } else {
2564 fl6.flowi6_oif = oif;
2565
2566 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2567 }
2568
ab364a6f 2569 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2570 if (!skb) {
2173bff5 2571 dst_release(&rt->dst);
ab364a6f
TG
2572 err = -ENOBUFS;
2573 goto errout;
2574 }
1da177e4 2575
ab364a6f
TG
2576 /* Reserve room for dummy headers, this skb can pass
2577 through good chunk of routing engine.
2578 */
459a98ed 2579 skb_reset_mac_header(skb);
ab364a6f 2580 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2581
d8d1f30b 2582 skb_dst_set(skb, &rt->dst);
1da177e4 2583
4c9483b2 2584 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2585 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2586 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2587 if (err < 0) {
ab364a6f
TG
2588 kfree_skb(skb);
2589 goto errout;
1da177e4
LT
2590 }
2591
5578689a 2592 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2593errout:
1da177e4 2594 return err;
1da177e4
LT
2595}
2596
86872cb5 2597void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2598{
2599 struct sk_buff *skb;
5578689a 2600 struct net *net = info->nl_net;
528c4ceb
DL
2601 u32 seq;
2602 int err;
2603
2604 err = -ENOBUFS;
38308473 2605 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2606
339bf98f 2607 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2608 if (!skb)
21713ebc
TG
2609 goto errout;
2610
191cd582 2611 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2612 event, info->pid, seq, 0, 0, 0);
26932566
PM
2613 if (err < 0) {
2614 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2615 WARN_ON(err == -EMSGSIZE);
2616 kfree_skb(skb);
2617 goto errout;
2618 }
1ce85fe4
PNA
2619 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2620 info->nlh, gfp_any());
2621 return;
21713ebc
TG
2622errout:
2623 if (err < 0)
5578689a 2624 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2625}
2626
8ed67789
DL
2627static int ip6_route_dev_notify(struct notifier_block *this,
2628 unsigned long event, void *data)
2629{
2630 struct net_device *dev = (struct net_device *)data;
c346dca1 2631 struct net *net = dev_net(dev);
8ed67789
DL
2632
2633 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2634 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2635 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2636#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2637 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2638 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2639 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2640 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2641#endif
2642 }
2643
2644 return NOTIFY_OK;
2645}
2646
1da177e4
LT
2647/*
2648 * /proc
2649 */
2650
2651#ifdef CONFIG_PROC_FS
2652
1da177e4
LT
2653struct rt6_proc_arg
2654{
2655 char *buffer;
2656 int offset;
2657 int length;
2658 int skip;
2659 int len;
2660};
2661
2662static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2663{
33120b30 2664 struct seq_file *m = p_arg;
69cce1d1 2665 struct neighbour *n;
1da177e4 2666
4b7a4274 2667 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2668
2669#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2670 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2671#else
33120b30 2672 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2673#endif
f2c31e32 2674 rcu_read_lock();
97cac082 2675 n = rt->n;
69cce1d1
DM
2676 if (n) {
2677 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2678 } else {
33120b30 2679 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2680 }
f2c31e32 2681 rcu_read_unlock();
33120b30 2682 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2683 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2684 rt->dst.__use, rt->rt6i_flags,
d1918542 2685 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2686 return 0;
2687}
2688
33120b30 2689static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2690{
f3db4851 2691 struct net *net = (struct net *)m->private;
32b293a5 2692 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2693 return 0;
2694}
1da177e4 2695
33120b30
AD
2696static int ipv6_route_open(struct inode *inode, struct file *file)
2697{
de05c557 2698 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2699}
2700
33120b30
AD
2701static const struct file_operations ipv6_route_proc_fops = {
2702 .owner = THIS_MODULE,
2703 .open = ipv6_route_open,
2704 .read = seq_read,
2705 .llseek = seq_lseek,
b6fcbdb4 2706 .release = single_release_net,
33120b30
AD
2707};
2708
1da177e4
LT
2709static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2710{
69ddb805 2711 struct net *net = (struct net *)seq->private;
1da177e4 2712 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2713 net->ipv6.rt6_stats->fib_nodes,
2714 net->ipv6.rt6_stats->fib_route_nodes,
2715 net->ipv6.rt6_stats->fib_rt_alloc,
2716 net->ipv6.rt6_stats->fib_rt_entries,
2717 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2718 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2719 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2720
2721 return 0;
2722}
2723
2724static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2725{
de05c557 2726 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2727}
2728
9a32144e 2729static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2730 .owner = THIS_MODULE,
2731 .open = rt6_stats_seq_open,
2732 .read = seq_read,
2733 .llseek = seq_lseek,
b6fcbdb4 2734 .release = single_release_net,
1da177e4
LT
2735};
2736#endif /* CONFIG_PROC_FS */
2737
2738#ifdef CONFIG_SYSCTL
2739
1da177e4 2740static
8d65af78 2741int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2742 void __user *buffer, size_t *lenp, loff_t *ppos)
2743{
c486da34
LAG
2744 struct net *net;
2745 int delay;
2746 if (!write)
1da177e4 2747 return -EINVAL;
c486da34
LAG
2748
2749 net = (struct net *)ctl->extra1;
2750 delay = net->ipv6.sysctl.flush_delay;
2751 proc_dointvec(ctl, write, buffer, lenp, ppos);
2752 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2753 return 0;
1da177e4
LT
2754}
2755
760f2d01 2756ctl_table ipv6_route_table_template[] = {
1ab1457c 2757 {
1da177e4 2758 .procname = "flush",
4990509f 2759 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2760 .maxlen = sizeof(int),
89c8b3a1 2761 .mode = 0200,
6d9f239a 2762 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2763 },
2764 {
1da177e4 2765 .procname = "gc_thresh",
9a7ec3a9 2766 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2767 .maxlen = sizeof(int),
2768 .mode = 0644,
6d9f239a 2769 .proc_handler = proc_dointvec,
1da177e4
LT
2770 },
2771 {
1da177e4 2772 .procname = "max_size",
4990509f 2773 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2774 .maxlen = sizeof(int),
2775 .mode = 0644,
6d9f239a 2776 .proc_handler = proc_dointvec,
1da177e4
LT
2777 },
2778 {
1da177e4 2779 .procname = "gc_min_interval",
4990509f 2780 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2781 .maxlen = sizeof(int),
2782 .mode = 0644,
6d9f239a 2783 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2784 },
2785 {
1da177e4 2786 .procname = "gc_timeout",
4990509f 2787 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2788 .maxlen = sizeof(int),
2789 .mode = 0644,
6d9f239a 2790 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2791 },
2792 {
1da177e4 2793 .procname = "gc_interval",
4990509f 2794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2795 .maxlen = sizeof(int),
2796 .mode = 0644,
6d9f239a 2797 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2798 },
2799 {
1da177e4 2800 .procname = "gc_elasticity",
4990509f 2801 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
f3d3f616 2804 .proc_handler = proc_dointvec,
1da177e4
LT
2805 },
2806 {
1da177e4 2807 .procname = "mtu_expires",
4990509f 2808 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
6d9f239a 2811 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2812 },
2813 {
1da177e4 2814 .procname = "min_adv_mss",
4990509f 2815 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
f3d3f616 2818 .proc_handler = proc_dointvec,
1da177e4
LT
2819 },
2820 {
1da177e4 2821 .procname = "gc_min_interval_ms",
4990509f 2822 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2823 .maxlen = sizeof(int),
2824 .mode = 0644,
6d9f239a 2825 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2826 },
f8572d8f 2827 { }
1da177e4
LT
2828};
2829
2c8c1e72 2830struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2831{
2832 struct ctl_table *table;
2833
2834 table = kmemdup(ipv6_route_table_template,
2835 sizeof(ipv6_route_table_template),
2836 GFP_KERNEL);
5ee09105
YH
2837
2838 if (table) {
2839 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2840 table[0].extra1 = net;
86393e52 2841 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2842 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2843 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2844 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2845 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2846 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2847 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2848 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2849 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2850 }
2851
760f2d01
DL
2852 return table;
2853}
1da177e4
LT
2854#endif
2855
2c8c1e72 2856static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2857{
633d424b 2858 int ret = -ENOMEM;
8ed67789 2859
86393e52
AD
2860 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2861 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2862
fc66f95c
ED
2863 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2864 goto out_ip6_dst_ops;
2865
8ed67789
DL
2866 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2867 sizeof(*net->ipv6.ip6_null_entry),
2868 GFP_KERNEL);
2869 if (!net->ipv6.ip6_null_entry)
fc66f95c 2870 goto out_ip6_dst_entries;
d8d1f30b 2871 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2872 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2873 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2874 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2875 ip6_template_metrics, true);
8ed67789
DL
2876
2877#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2878 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2879 sizeof(*net->ipv6.ip6_prohibit_entry),
2880 GFP_KERNEL);
68fffc67
PZ
2881 if (!net->ipv6.ip6_prohibit_entry)
2882 goto out_ip6_null_entry;
d8d1f30b 2883 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2884 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2885 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2886 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2887 ip6_template_metrics, true);
8ed67789
DL
2888
2889 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2890 sizeof(*net->ipv6.ip6_blk_hole_entry),
2891 GFP_KERNEL);
68fffc67
PZ
2892 if (!net->ipv6.ip6_blk_hole_entry)
2893 goto out_ip6_prohibit_entry;
d8d1f30b 2894 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2895 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2896 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2897 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2898 ip6_template_metrics, true);
8ed67789
DL
2899#endif
2900
b339a47c
PZ
2901 net->ipv6.sysctl.flush_delay = 0;
2902 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2903 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2904 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2905 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2906 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2907 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2908 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2909
6891a346
BT
2910 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2911
8ed67789
DL
2912 ret = 0;
2913out:
2914 return ret;
f2fc6a54 2915
68fffc67
PZ
2916#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2917out_ip6_prohibit_entry:
2918 kfree(net->ipv6.ip6_prohibit_entry);
2919out_ip6_null_entry:
2920 kfree(net->ipv6.ip6_null_entry);
2921#endif
fc66f95c
ED
2922out_ip6_dst_entries:
2923 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2924out_ip6_dst_ops:
f2fc6a54 2925 goto out;
cdb18761
DL
2926}
2927
2c8c1e72 2928static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2929{
8ed67789
DL
2930 kfree(net->ipv6.ip6_null_entry);
2931#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 kfree(net->ipv6.ip6_prohibit_entry);
2933 kfree(net->ipv6.ip6_blk_hole_entry);
2934#endif
41bb78b4 2935 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2936}
2937
d189634e
TG
2938static int __net_init ip6_route_net_init_late(struct net *net)
2939{
2940#ifdef CONFIG_PROC_FS
2941 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2942 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2943#endif
2944 return 0;
2945}
2946
2947static void __net_exit ip6_route_net_exit_late(struct net *net)
2948{
2949#ifdef CONFIG_PROC_FS
2950 proc_net_remove(net, "ipv6_route");
2951 proc_net_remove(net, "rt6_stats");
2952#endif
2953}
2954
cdb18761
DL
2955static struct pernet_operations ip6_route_net_ops = {
2956 .init = ip6_route_net_init,
2957 .exit = ip6_route_net_exit,
2958};
2959
c3426b47
DM
2960static int __net_init ipv6_inetpeer_init(struct net *net)
2961{
2962 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2963
2964 if (!bp)
2965 return -ENOMEM;
2966 inet_peer_base_init(bp);
2967 net->ipv6.peers = bp;
2968 return 0;
2969}
2970
2971static void __net_exit ipv6_inetpeer_exit(struct net *net)
2972{
2973 struct inet_peer_base *bp = net->ipv6.peers;
2974
2975 net->ipv6.peers = NULL;
56a6b248 2976 inetpeer_invalidate_tree(bp);
c3426b47
DM
2977 kfree(bp);
2978}
2979
2b823f72 2980static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2981 .init = ipv6_inetpeer_init,
2982 .exit = ipv6_inetpeer_exit,
2983};
2984
d189634e
TG
2985static struct pernet_operations ip6_route_net_late_ops = {
2986 .init = ip6_route_net_init_late,
2987 .exit = ip6_route_net_exit_late,
2988};
2989
8ed67789
DL
2990static struct notifier_block ip6_route_dev_notifier = {
2991 .notifier_call = ip6_route_dev_notify,
2992 .priority = 0,
2993};
2994
433d49c3 2995int __init ip6_route_init(void)
1da177e4 2996{
433d49c3
DL
2997 int ret;
2998
9a7ec3a9
DL
2999 ret = -ENOMEM;
3000 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3001 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3002 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3003 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3004 goto out;
14e50e57 3005
fc66f95c 3006 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3007 if (ret)
bdb3289f 3008 goto out_kmem_cache;
bdb3289f 3009
c3426b47
DM
3010 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3011 if (ret)
e8803b6c 3012 goto out_dst_entries;
2a0c451a 3013
7e52b33b
DM
3014 ret = register_pernet_subsys(&ip6_route_net_ops);
3015 if (ret)
3016 goto out_register_inetpeer;
c3426b47 3017
5dc121e9
AE
3018 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3019
8ed67789
DL
3020 /* Registering of the loopback is done before this portion of code,
3021 * the loopback reference in rt6_info will not be taken, do it
3022 * manually for init_net */
d8d1f30b 3023 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3024 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3026 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3027 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3028 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3029 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3030 #endif
e8803b6c 3031 ret = fib6_init();
433d49c3 3032 if (ret)
8ed67789 3033 goto out_register_subsys;
433d49c3 3034
433d49c3
DL
3035 ret = xfrm6_init();
3036 if (ret)
e8803b6c 3037 goto out_fib6_init;
c35b7e72 3038
433d49c3
DL
3039 ret = fib6_rules_init();
3040 if (ret)
3041 goto xfrm6_init;
7e5449c2 3042
d189634e
TG
3043 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3044 if (ret)
3045 goto fib6_rules_init;
3046
433d49c3 3047 ret = -ENOBUFS;
c7ac8679
GR
3048 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3049 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3050 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3051 goto out_register_late_subsys;
c127ea2c 3052
8ed67789 3053 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3054 if (ret)
d189634e 3055 goto out_register_late_subsys;
8ed67789 3056
433d49c3
DL
3057out:
3058 return ret;
3059
d189634e
TG
3060out_register_late_subsys:
3061 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3062fib6_rules_init:
433d49c3
DL
3063 fib6_rules_cleanup();
3064xfrm6_init:
433d49c3 3065 xfrm6_fini();
2a0c451a
TG
3066out_fib6_init:
3067 fib6_gc_cleanup();
8ed67789
DL
3068out_register_subsys:
3069 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3070out_register_inetpeer:
3071 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3072out_dst_entries:
3073 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3074out_kmem_cache:
f2fc6a54 3075 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3076 goto out;
1da177e4
LT
3077}
3078
3079void ip6_route_cleanup(void)
3080{
8ed67789 3081 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3082 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3083 fib6_rules_cleanup();
1da177e4 3084 xfrm6_fini();
1da177e4 3085 fib6_gc_cleanup();
c3426b47 3086 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3087 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3088 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3089 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3090}