ipv6: Compare addresses only bits up to the prefix length (RFC6724).
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
bdb3289f 225static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = -1,
230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
280a34c8 245static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
bdb3289f 260static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = -1,
265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
f5b0a874 284 0, DST_OBSOLETE_NONE, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 291 }
cf911662 292 return rt;
1da177e4
LT
293}
294
295static void ip6_dst_destroy(struct dst_entry *dst)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299
97cac082
DM
300 if (rt->n)
301 neigh_release(rt->n);
302
8e2ec639
YZ
303 if (!(rt->dst.flags & DST_HOST))
304 dst_destroy_metrics_generic(dst);
305
38308473 306 if (idev) {
1da177e4
LT
307 rt->rt6i_idev = NULL;
308 in6_dev_put(idev);
1ab1457c 309 }
1716a961
G
310
311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312 dst_release(dst->from);
313
97bab73f
DM
314 if (rt6_has_peer(rt)) {
315 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
316 inet_putpeer(peer);
317 }
318}
319
6431cbc2
DM
320static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322static u32 rt6_peer_genid(void)
323{
324 return atomic_read(&__rt6_peer_genid);
325}
326
b3419363
DM
327void rt6_bind_peer(struct rt6_info *rt, int create)
328{
97bab73f 329 struct inet_peer_base *base;
b3419363
DM
330 struct inet_peer *peer;
331
97bab73f
DM
332 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 if (!base)
334 return;
335
336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
1da177e4
LT
343}
344
345static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347{
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 350 struct net_device *loopback_dev =
c346dca1 351 dev_net(dev)->loopback_dev;
1da177e4 352
97cac082
DM
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
1da177e4
LT
366 }
367 }
368}
369
a50feda5 370static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 371{
1716a961
G
372 struct rt6_info *ort = NULL;
373
374 if (rt->rt6i_flags & RTF_EXPIRES) {
375 if (time_after(jiffies, rt->dst.expires))
a50feda5 376 return true;
1716a961
G
377 } else if (rt->dst.from) {
378 ort = (struct rt6_info *) rt->dst.from;
379 return (ort->rt6i_flags & RTF_EXPIRES) &&
380 time_after(jiffies, ort->dst.expires);
381 }
a50feda5 382 return false;
1da177e4
LT
383}
384
a50feda5 385static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 386{
a02cec21
ED
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
389}
390
1da177e4 391/*
c71099ac 392 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
393 */
394
8ed67789
DL
395static inline struct rt6_info *rt6_device_match(struct net *net,
396 struct rt6_info *rt,
b71d1d42 397 const struct in6_addr *saddr,
1da177e4 398 int oif,
d420895e 399 int flags)
1da177e4
LT
400{
401 struct rt6_info *local = NULL;
402 struct rt6_info *sprt;
403
dd3abc4e
YH
404 if (!oif && ipv6_addr_any(saddr))
405 goto out;
406
d8d1f30b 407 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 408 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
409
410 if (oif) {
1da177e4
LT
411 if (dev->ifindex == oif)
412 return sprt;
413 if (dev->flags & IFF_LOOPBACK) {
38308473 414 if (!sprt->rt6i_idev ||
1da177e4 415 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 416 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 417 continue;
1ab1457c 418 if (local && (!oif ||
1da177e4
LT
419 local->rt6i_idev->dev->ifindex == oif))
420 continue;
421 }
422 local = sprt;
423 }
dd3abc4e
YH
424 } else {
425 if (ipv6_chk_addr(net, saddr, dev,
426 flags & RT6_LOOKUP_F_IFACE))
427 return sprt;
1da177e4 428 }
dd3abc4e 429 }
1da177e4 430
dd3abc4e 431 if (oif) {
1da177e4
LT
432 if (local)
433 return local;
434
d420895e 435 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 436 return net->ipv6.ip6_null_entry;
1da177e4 437 }
dd3abc4e 438out:
1da177e4
LT
439 return rt;
440}
441
27097255
YH
442#ifdef CONFIG_IPV6_ROUTER_PREF
443static void rt6_probe(struct rt6_info *rt)
444{
f2c31e32 445 struct neighbour *neigh;
27097255
YH
446 /*
447 * Okay, this does not seem to be appropriate
448 * for now, however, we need to check if it
449 * is really so; aka Router Reachability Probing.
450 *
451 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute.
453 */
97cac082 454 neigh = rt ? rt->n : NULL;
27097255 455 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 456 return;
27097255
YH
457 read_lock_bh(&neigh->lock);
458 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 459 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
460 struct in6_addr mcaddr;
461 struct in6_addr *target;
462
463 neigh->updated = jiffies;
464 read_unlock_bh(&neigh->lock);
465
466 target = (struct in6_addr *)&neigh->primary_key;
467 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 468 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 469 } else {
27097255 470 read_unlock_bh(&neigh->lock);
f2c31e32 471 }
27097255
YH
472}
473#else
474static inline void rt6_probe(struct rt6_info *rt)
475{
27097255
YH
476}
477#endif
478
1da177e4 479/*
554cfb7e 480 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 481 */
b6f99a21 482static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 483{
d1918542 484 struct net_device *dev = rt->dst.dev;
161980f4 485 if (!oif || dev->ifindex == oif)
554cfb7e 486 return 2;
161980f4
DM
487 if ((dev->flags & IFF_LOOPBACK) &&
488 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
489 return 1;
490 return 0;
554cfb7e 491}
1da177e4 492
b6f99a21 493static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 494{
f2c31e32 495 struct neighbour *neigh;
398bcbeb 496 int m;
f2c31e32 497
97cac082 498 neigh = rt->n;
4d0c5911
YH
499 if (rt->rt6i_flags & RTF_NONEXTHOP ||
500 !(rt->rt6i_flags & RTF_GATEWAY))
501 m = 1;
502 else if (neigh) {
554cfb7e
YH
503 read_lock_bh(&neigh->lock);
504 if (neigh->nud_state & NUD_VALID)
4d0c5911 505 m = 2;
398bcbeb
YH
506#ifdef CONFIG_IPV6_ROUTER_PREF
507 else if (neigh->nud_state & NUD_FAILED)
508 m = 0;
509#endif
510 else
ea73ee23 511 m = 1;
554cfb7e 512 read_unlock_bh(&neigh->lock);
398bcbeb
YH
513 } else
514 m = 0;
554cfb7e 515 return m;
1da177e4
LT
516}
517
554cfb7e
YH
518static int rt6_score_route(struct rt6_info *rt, int oif,
519 int strict)
1da177e4 520{
4d0c5911 521 int m, n;
1ab1457c 522
4d0c5911 523 m = rt6_check_dev(rt, oif);
77d16f45 524 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 525 return -1;
ebacaaa0
YH
526#ifdef CONFIG_IPV6_ROUTER_PREF
527 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
528#endif
4d0c5911 529 n = rt6_check_neigh(rt);
557e92ef 530 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
531 return -1;
532 return m;
533}
534
f11e6659
DM
535static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
536 int *mpri, struct rt6_info *match)
554cfb7e 537{
f11e6659
DM
538 int m;
539
540 if (rt6_check_expired(rt))
541 goto out;
542
543 m = rt6_score_route(rt, oif, strict);
544 if (m < 0)
545 goto out;
546
547 if (m > *mpri) {
548 if (strict & RT6_LOOKUP_F_REACHABLE)
549 rt6_probe(match);
550 *mpri = m;
551 match = rt;
552 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
553 rt6_probe(rt);
554 }
555
556out:
557 return match;
558}
559
560static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
561 struct rt6_info *rr_head,
562 u32 metric, int oif, int strict)
563{
564 struct rt6_info *rt, *match;
554cfb7e 565 int mpri = -1;
1da177e4 566
f11e6659
DM
567 match = NULL;
568 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 569 rt = rt->dst.rt6_next)
f11e6659
DM
570 match = find_match(rt, oif, strict, &mpri, match);
571 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 572 rt = rt->dst.rt6_next)
f11e6659 573 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 574
f11e6659
DM
575 return match;
576}
1da177e4 577
f11e6659
DM
578static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
579{
580 struct rt6_info *match, *rt0;
8ed67789 581 struct net *net;
1da177e4 582
f11e6659
DM
583 rt0 = fn->rr_ptr;
584 if (!rt0)
585 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 586
f11e6659 587 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 588
554cfb7e 589 if (!match &&
f11e6659 590 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 591 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 592
554cfb7e 593 /* no entries matched; do round-robin */
f11e6659
DM
594 if (!next || next->rt6i_metric != rt0->rt6i_metric)
595 next = fn->leaf;
596
597 if (next != rt0)
598 fn->rr_ptr = next;
1da177e4 599 }
1da177e4 600
d1918542 601 net = dev_net(rt0->dst.dev);
a02cec21 602 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
603}
604
70ceb4f5
YH
605#ifdef CONFIG_IPV6_ROUTE_INFO
606int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 607 const struct in6_addr *gwaddr)
70ceb4f5 608{
c346dca1 609 struct net *net = dev_net(dev);
70ceb4f5
YH
610 struct route_info *rinfo = (struct route_info *) opt;
611 struct in6_addr prefix_buf, *prefix;
612 unsigned int pref;
4bed72e4 613 unsigned long lifetime;
70ceb4f5
YH
614 struct rt6_info *rt;
615
616 if (len < sizeof(struct route_info)) {
617 return -EINVAL;
618 }
619
620 /* Sanity check for prefix_len and length */
621 if (rinfo->length > 3) {
622 return -EINVAL;
623 } else if (rinfo->prefix_len > 128) {
624 return -EINVAL;
625 } else if (rinfo->prefix_len > 64) {
626 if (rinfo->length < 2) {
627 return -EINVAL;
628 }
629 } else if (rinfo->prefix_len > 0) {
630 if (rinfo->length < 1) {
631 return -EINVAL;
632 }
633 }
634
635 pref = rinfo->route_pref;
636 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 637 return -EINVAL;
70ceb4f5 638
4bed72e4 639 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
640
641 if (rinfo->length == 3)
642 prefix = (struct in6_addr *)rinfo->prefix;
643 else {
644 /* this function is safe */
645 ipv6_addr_prefix(&prefix_buf,
646 (struct in6_addr *)rinfo->prefix,
647 rinfo->prefix_len);
648 prefix = &prefix_buf;
649 }
650
efa2cea0
DL
651 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
652 dev->ifindex);
70ceb4f5
YH
653
654 if (rt && !lifetime) {
e0a1ad73 655 ip6_del_rt(rt);
70ceb4f5
YH
656 rt = NULL;
657 }
658
659 if (!rt && lifetime)
efa2cea0 660 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
661 pref);
662 else if (rt)
663 rt->rt6i_flags = RTF_ROUTEINFO |
664 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
665
666 if (rt) {
1716a961
G
667 if (!addrconf_finite_timeout(lifetime))
668 rt6_clean_expires(rt);
669 else
670 rt6_set_expires(rt, jiffies + HZ * lifetime);
671
d8d1f30b 672 dst_release(&rt->dst);
70ceb4f5
YH
673 }
674 return 0;
675}
676#endif
677
8ed67789 678#define BACKTRACK(__net, saddr) \
982f56f3 679do { \
8ed67789 680 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 681 struct fib6_node *pn; \
e0eda7bb 682 while (1) { \
982f56f3
YH
683 if (fn->fn_flags & RTN_TL_ROOT) \
684 goto out; \
685 pn = fn->parent; \
686 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 687 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
688 else \
689 fn = pn; \
690 if (fn->fn_flags & RTN_RTINFO) \
691 goto restart; \
c71099ac 692 } \
c71099ac 693 } \
38308473 694} while (0)
c71099ac 695
8ed67789
DL
696static struct rt6_info *ip6_pol_route_lookup(struct net *net,
697 struct fib6_table *table,
4c9483b2 698 struct flowi6 *fl6, int flags)
1da177e4
LT
699{
700 struct fib6_node *fn;
701 struct rt6_info *rt;
702
c71099ac 703 read_lock_bh(&table->tb6_lock);
4c9483b2 704 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
705restart:
706 rt = fn->leaf;
4c9483b2
DM
707 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
708 BACKTRACK(net, &fl6->saddr);
c71099ac 709out:
d8d1f30b 710 dst_use(&rt->dst, jiffies);
c71099ac 711 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
712 return rt;
713
714}
715
ea6e574e
FW
716struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
717 int flags)
718{
719 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
720}
721EXPORT_SYMBOL_GPL(ip6_route_lookup);
722
9acd9f3a
YH
723struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
724 const struct in6_addr *saddr, int oif, int strict)
c71099ac 725{
4c9483b2
DM
726 struct flowi6 fl6 = {
727 .flowi6_oif = oif,
728 .daddr = *daddr,
c71099ac
TG
729 };
730 struct dst_entry *dst;
77d16f45 731 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 732
adaa70bb 733 if (saddr) {
4c9483b2 734 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
735 flags |= RT6_LOOKUP_F_HAS_SADDR;
736 }
737
4c9483b2 738 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
739 if (dst->error == 0)
740 return (struct rt6_info *) dst;
741
742 dst_release(dst);
743
1da177e4
LT
744 return NULL;
745}
746
7159039a
YH
747EXPORT_SYMBOL(rt6_lookup);
748
c71099ac 749/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
750 It takes new route entry, the addition fails by any reason the
751 route is freed. In any case, if caller does not hold it, it may
752 be destroyed.
753 */
754
86872cb5 755static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
756{
757 int err;
c71099ac 758 struct fib6_table *table;
1da177e4 759
c71099ac
TG
760 table = rt->rt6i_table;
761 write_lock_bh(&table->tb6_lock);
86872cb5 762 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 763 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
764
765 return err;
766}
767
40e22e8f
TG
768int ip6_ins_rt(struct rt6_info *rt)
769{
4d1169c1 770 struct nl_info info = {
d1918542 771 .nl_net = dev_net(rt->dst.dev),
4d1169c1 772 };
528c4ceb 773 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
774}
775
1716a961 776static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 777 const struct in6_addr *daddr,
b71d1d42 778 const struct in6_addr *saddr)
1da177e4 779{
1da177e4
LT
780 struct rt6_info *rt;
781
782 /*
783 * Clone the route.
784 */
785
21efcfa0 786 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
787
788 if (rt) {
14deae41
DM
789 int attempts = !in_softirq();
790
38308473 791 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 792 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 793 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 794 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 795 rt->rt6i_gateway = *daddr;
58c4fb86 796 }
1da177e4 797
1da177e4 798 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
799
800#ifdef CONFIG_IPV6_SUBTREES
801 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 802 rt->rt6i_src.addr = *saddr;
1da177e4
LT
803 rt->rt6i_src.plen = 128;
804 }
805#endif
806
14deae41 807 retry:
8ade06c6 808 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 809 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
810 int saved_rt_min_interval =
811 net->ipv6.sysctl.ip6_rt_gc_min_interval;
812 int saved_rt_elasticity =
813 net->ipv6.sysctl.ip6_rt_gc_elasticity;
814
815 if (attempts-- > 0) {
816 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
817 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
818
86393e52 819 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
820
821 net->ipv6.sysctl.ip6_rt_gc_elasticity =
822 saved_rt_elasticity;
823 net->ipv6.sysctl.ip6_rt_gc_min_interval =
824 saved_rt_min_interval;
825 goto retry;
826 }
827
f3213831 828 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 829 dst_free(&rt->dst);
14deae41
DM
830 return NULL;
831 }
95a9a5ba 832 }
1da177e4 833
95a9a5ba
YH
834 return rt;
835}
1da177e4 836
21efcfa0
ED
837static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
838 const struct in6_addr *daddr)
299d9939 839{
21efcfa0
ED
840 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
841
299d9939 842 if (rt) {
299d9939 843 rt->rt6i_flags |= RTF_CACHE;
97cac082 844 rt->n = neigh_clone(ort->n);
299d9939
YH
845 }
846 return rt;
847}
848
8ed67789 849static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 850 struct flowi6 *fl6, int flags)
1da177e4
LT
851{
852 struct fib6_node *fn;
519fbd87 853 struct rt6_info *rt, *nrt;
c71099ac 854 int strict = 0;
1da177e4 855 int attempts = 3;
519fbd87 856 int err;
53b7997f 857 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 858
77d16f45 859 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
860
861relookup:
c71099ac 862 read_lock_bh(&table->tb6_lock);
1da177e4 863
8238dd06 864restart_2:
4c9483b2 865 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
866
867restart:
4acad72d 868 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 869
4c9483b2 870 BACKTRACK(net, &fl6->saddr);
8ed67789 871 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 872 rt->rt6i_flags & RTF_CACHE)
1ddef044 873 goto out;
1da177e4 874
d8d1f30b 875 dst_hold(&rt->dst);
c71099ac 876 read_unlock_bh(&table->tb6_lock);
fb9de91e 877
97cac082 878 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 879 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 880 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 881 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
882 else
883 goto out2;
e40cf353 884
d8d1f30b 885 dst_release(&rt->dst);
8ed67789 886 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 887
d8d1f30b 888 dst_hold(&rt->dst);
519fbd87 889 if (nrt) {
40e22e8f 890 err = ip6_ins_rt(nrt);
519fbd87 891 if (!err)
1da177e4 892 goto out2;
1da177e4 893 }
1da177e4 894
519fbd87
YH
895 if (--attempts <= 0)
896 goto out2;
897
898 /*
c71099ac 899 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
900 * released someone could insert this route. Relookup.
901 */
d8d1f30b 902 dst_release(&rt->dst);
519fbd87
YH
903 goto relookup;
904
905out:
8238dd06
YH
906 if (reachable) {
907 reachable = 0;
908 goto restart_2;
909 }
d8d1f30b 910 dst_hold(&rt->dst);
c71099ac 911 read_unlock_bh(&table->tb6_lock);
1da177e4 912out2:
d8d1f30b
CG
913 rt->dst.lastuse = jiffies;
914 rt->dst.__use++;
c71099ac
TG
915
916 return rt;
1da177e4
LT
917}
918
8ed67789 919static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 920 struct flowi6 *fl6, int flags)
4acad72d 921{
4c9483b2 922 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
923}
924
72331bc0
SL
925static struct dst_entry *ip6_route_input_lookup(struct net *net,
926 struct net_device *dev,
927 struct flowi6 *fl6, int flags)
928{
929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
930 flags |= RT6_LOOKUP_F_IFACE;
931
932 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
933}
934
c71099ac
TG
935void ip6_route_input(struct sk_buff *skb)
936{
b71d1d42 937 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 938 struct net *net = dev_net(skb->dev);
adaa70bb 939 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
940 struct flowi6 fl6 = {
941 .flowi6_iif = skb->dev->ifindex,
942 .daddr = iph->daddr,
943 .saddr = iph->saddr,
38308473 944 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
945 .flowi6_mark = skb->mark,
946 .flowi6_proto = iph->nexthdr,
c71099ac 947 };
adaa70bb 948
72331bc0 949 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
950}
951
8ed67789 952static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 953 struct flowi6 *fl6, int flags)
1da177e4 954{
4c9483b2 955 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
956}
957
9c7a4f9c 958struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 959 struct flowi6 *fl6)
c71099ac
TG
960{
961 int flags = 0;
962
1fb9489b 963 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 964
4c9483b2 965 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 966 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 967
4c9483b2 968 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 969 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
970 else if (sk)
971 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 972
4c9483b2 973 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
974}
975
7159039a 976EXPORT_SYMBOL(ip6_route_output);
1da177e4 977
2774c131 978struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 979{
5c1e6aa3 980 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
981 struct dst_entry *new = NULL;
982
f5b0a874 983 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 984 if (rt) {
d8d1f30b 985 new = &rt->dst;
14e50e57 986
8104891b
SK
987 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
988 rt6_init_peer(rt, net->ipv6.peers);
989
14e50e57 990 new->__use = 1;
352e512c
HX
991 new->input = dst_discard;
992 new->output = dst_discard;
14e50e57 993
21efcfa0
ED
994 if (dst_metrics_read_only(&ort->dst))
995 new->_metrics = ort->dst._metrics;
996 else
997 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
998 rt->rt6i_idev = ort->rt6i_idev;
999 if (rt->rt6i_idev)
1000 in6_dev_hold(rt->rt6i_idev);
14e50e57 1001
4e3fd7a0 1002 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1003 rt->rt6i_flags = ort->rt6i_flags;
1004 rt6_clean_expires(rt);
14e50e57
DM
1005 rt->rt6i_metric = 0;
1006
1007 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1008#ifdef CONFIG_IPV6_SUBTREES
1009 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1010#endif
1011
1012 dst_free(new);
1013 }
1014
69ead7af
DM
1015 dst_release(dst_orig);
1016 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1017}
14e50e57 1018
1da177e4
LT
1019/*
1020 * Destination cache support functions
1021 */
1022
1023static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1024{
1025 struct rt6_info *rt;
1026
1027 rt = (struct rt6_info *) dst;
1028
6431cbc2
DM
1029 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1030 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1031 if (!rt6_has_peer(rt))
6431cbc2
DM
1032 rt6_bind_peer(rt, 0);
1033 rt->rt6i_peer_genid = rt6_peer_genid();
1034 }
1da177e4 1035 return dst;
6431cbc2 1036 }
1da177e4
LT
1037 return NULL;
1038}
1039
1040static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1041{
1042 struct rt6_info *rt = (struct rt6_info *) dst;
1043
1044 if (rt) {
54c1a859
YH
1045 if (rt->rt6i_flags & RTF_CACHE) {
1046 if (rt6_check_expired(rt)) {
1047 ip6_del_rt(rt);
1048 dst = NULL;
1049 }
1050 } else {
1da177e4 1051 dst_release(dst);
54c1a859
YH
1052 dst = NULL;
1053 }
1da177e4 1054 }
54c1a859 1055 return dst;
1da177e4
LT
1056}
1057
1058static void ip6_link_failure(struct sk_buff *skb)
1059{
1060 struct rt6_info *rt;
1061
3ffe533c 1062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1063
adf30907 1064 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1065 if (rt) {
1716a961
G
1066 if (rt->rt6i_flags & RTF_CACHE)
1067 rt6_update_expires(rt, 0);
1068 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1069 rt->rt6i_node->fn_sernum = -1;
1070 }
1071}
1072
6700c270
DM
1073static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1074 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1075{
1076 struct rt6_info *rt6 = (struct rt6_info*)dst;
1077
81aded24 1078 dst_confirm(dst);
1da177e4 1079 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1080 struct net *net = dev_net(dst->dev);
1081
1da177e4
LT
1082 rt6->rt6i_flags |= RTF_MODIFIED;
1083 if (mtu < IPV6_MIN_MTU) {
defb3519 1084 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1085 mtu = IPV6_MIN_MTU;
defb3519
DM
1086 features |= RTAX_FEATURE_ALLFRAG;
1087 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1088 }
defb3519 1089 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1090 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1091 }
1092}
1093
42ae66c8
DM
1094void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1095 int oif, u32 mark)
81aded24
DM
1096{
1097 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1098 struct dst_entry *dst;
1099 struct flowi6 fl6;
1100
1101 memset(&fl6, 0, sizeof(fl6));
1102 fl6.flowi6_oif = oif;
1103 fl6.flowi6_mark = mark;
3e12939a 1104 fl6.flowi6_flags = 0;
81aded24
DM
1105 fl6.daddr = iph->daddr;
1106 fl6.saddr = iph->saddr;
1107 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1108
1109 dst = ip6_route_output(net, NULL, &fl6);
1110 if (!dst->error)
6700c270 1111 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1112 dst_release(dst);
1113}
1114EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1115
1116void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1117{
1118 ip6_update_pmtu(skb, sock_net(sk), mtu,
1119 sk->sk_bound_dev_if, sk->sk_mark);
1120}
1121EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1122
3a5ad2ee
DM
1123void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1124{
1125 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1126 struct dst_entry *dst;
1127 struct flowi6 fl6;
1128
1129 memset(&fl6, 0, sizeof(fl6));
1130 fl6.flowi6_oif = oif;
1131 fl6.flowi6_mark = mark;
1132 fl6.flowi6_flags = 0;
1133 fl6.daddr = iph->daddr;
1134 fl6.saddr = iph->saddr;
1135 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1136
1137 dst = ip6_route_output(net, NULL, &fl6);
1138 if (!dst->error)
6700c270 1139 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1140 dst_release(dst);
1141}
1142EXPORT_SYMBOL_GPL(ip6_redirect);
1143
1144void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1145{
1146 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1147}
1148EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1149
0dbaee3b 1150static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1151{
0dbaee3b
DM
1152 struct net_device *dev = dst->dev;
1153 unsigned int mtu = dst_mtu(dst);
1154 struct net *net = dev_net(dev);
1155
1da177e4
LT
1156 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1157
5578689a
DL
1158 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1159 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1160
1161 /*
1ab1457c
YH
1162 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1163 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1164 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1165 * rely only on pmtu discovery"
1166 */
1167 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1168 mtu = IPV6_MAXPLEN;
1169 return mtu;
1170}
1171
ebb762f2 1172static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1173{
d33e4553 1174 struct inet6_dev *idev;
618f9bc7
SK
1175 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1176
1177 if (mtu)
1178 return mtu;
1179
1180 mtu = IPV6_MIN_MTU;
d33e4553
DM
1181
1182 rcu_read_lock();
1183 idev = __in6_dev_get(dst->dev);
1184 if (idev)
1185 mtu = idev->cnf.mtu6;
1186 rcu_read_unlock();
1187
1188 return mtu;
1189}
1190
3b00944c
YH
1191static struct dst_entry *icmp6_dst_gc_list;
1192static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1193
3b00944c 1194struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1195 struct neighbour *neigh,
87a11578 1196 struct flowi6 *fl6)
1da177e4 1197{
87a11578 1198 struct dst_entry *dst;
1da177e4
LT
1199 struct rt6_info *rt;
1200 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1201 struct net *net = dev_net(dev);
1da177e4 1202
38308473 1203 if (unlikely(!idev))
122bdf67 1204 return ERR_PTR(-ENODEV);
1da177e4 1205
8b96d22d 1206 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1207 if (unlikely(!rt)) {
1da177e4 1208 in6_dev_put(idev);
87a11578 1209 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1210 goto out;
1211 }
1212
1da177e4
LT
1213 if (neigh)
1214 neigh_hold(neigh);
14deae41 1215 else {
f894cbf8 1216 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1217 if (IS_ERR(neigh)) {
252c3d84 1218 in6_dev_put(idev);
b43faac6
DM
1219 dst_free(&rt->dst);
1220 return ERR_CAST(neigh);
1221 }
14deae41 1222 }
1da177e4 1223
8e2ec639
YZ
1224 rt->dst.flags |= DST_HOST;
1225 rt->dst.output = ip6_output;
97cac082 1226 rt->n = neigh;
d8d1f30b 1227 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1228 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1229 rt->rt6i_dst.plen = 128;
1230 rt->rt6i_idev = idev;
7011687f 1231 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1232
3b00944c 1233 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1234 rt->dst.next = icmp6_dst_gc_list;
1235 icmp6_dst_gc_list = &rt->dst;
3b00944c 1236 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1237
5578689a 1238 fib6_force_start_gc(net);
1da177e4 1239
87a11578
DM
1240 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1241
1da177e4 1242out:
87a11578 1243 return dst;
1da177e4
LT
1244}
1245
3d0f24a7 1246int icmp6_dst_gc(void)
1da177e4 1247{
e9476e95 1248 struct dst_entry *dst, **pprev;
3d0f24a7 1249 int more = 0;
1da177e4 1250
3b00944c
YH
1251 spin_lock_bh(&icmp6_dst_lock);
1252 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1253
1da177e4
LT
1254 while ((dst = *pprev) != NULL) {
1255 if (!atomic_read(&dst->__refcnt)) {
1256 *pprev = dst->next;
1257 dst_free(dst);
1da177e4
LT
1258 } else {
1259 pprev = &dst->next;
3d0f24a7 1260 ++more;
1da177e4
LT
1261 }
1262 }
1263
3b00944c 1264 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1265
3d0f24a7 1266 return more;
1da177e4
LT
1267}
1268
1e493d19
DM
1269static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1270 void *arg)
1271{
1272 struct dst_entry *dst, **pprev;
1273
1274 spin_lock_bh(&icmp6_dst_lock);
1275 pprev = &icmp6_dst_gc_list;
1276 while ((dst = *pprev) != NULL) {
1277 struct rt6_info *rt = (struct rt6_info *) dst;
1278 if (func(rt, arg)) {
1279 *pprev = dst->next;
1280 dst_free(dst);
1281 } else {
1282 pprev = &dst->next;
1283 }
1284 }
1285 spin_unlock_bh(&icmp6_dst_lock);
1286}
1287
569d3645 1288static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1289{
1da177e4 1290 unsigned long now = jiffies;
86393e52 1291 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1292 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1293 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1294 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1295 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1296 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1297 int entries;
7019b78e 1298
fc66f95c 1299 entries = dst_entries_get_fast(ops);
7019b78e 1300 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1301 entries <= rt_max_size)
1da177e4
LT
1302 goto out;
1303
6891a346
BT
1304 net->ipv6.ip6_rt_gc_expire++;
1305 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1306 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1307 entries = dst_entries_get_slow(ops);
1308 if (entries < ops->gc_thresh)
7019b78e 1309 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1310out:
7019b78e 1311 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1312 return entries > rt_max_size;
1da177e4
LT
1313}
1314
1315/* Clean host part of a prefix. Not necessary in radix tree,
1316 but results in cleaner routing tables.
1317
1318 Remove it only when all the things will work!
1319 */
1320
6b75d090 1321int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1322{
5170ae82 1323 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1324 if (hoplimit == 0) {
6b75d090 1325 struct net_device *dev = dst->dev;
c68f24cc
ED
1326 struct inet6_dev *idev;
1327
1328 rcu_read_lock();
1329 idev = __in6_dev_get(dev);
1330 if (idev)
6b75d090 1331 hoplimit = idev->cnf.hop_limit;
c68f24cc 1332 else
53b7997f 1333 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1334 rcu_read_unlock();
1da177e4
LT
1335 }
1336 return hoplimit;
1337}
abbf46ae 1338EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1339
1340/*
1341 *
1342 */
1343
86872cb5 1344int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1345{
1346 int err;
5578689a 1347 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1348 struct rt6_info *rt = NULL;
1349 struct net_device *dev = NULL;
1350 struct inet6_dev *idev = NULL;
c71099ac 1351 struct fib6_table *table;
1da177e4
LT
1352 int addr_type;
1353
86872cb5 1354 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1355 return -EINVAL;
1356#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1357 if (cfg->fc_src_len)
1da177e4
LT
1358 return -EINVAL;
1359#endif
86872cb5 1360 if (cfg->fc_ifindex) {
1da177e4 1361 err = -ENODEV;
5578689a 1362 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1363 if (!dev)
1364 goto out;
1365 idev = in6_dev_get(dev);
1366 if (!idev)
1367 goto out;
1368 }
1369
86872cb5
TG
1370 if (cfg->fc_metric == 0)
1371 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1372
d71314b4 1373 err = -ENOBUFS;
38308473
DM
1374 if (cfg->fc_nlinfo.nlh &&
1375 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1376 table = fib6_get_table(net, cfg->fc_table);
38308473 1377 if (!table) {
f3213831 1378 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1379 table = fib6_new_table(net, cfg->fc_table);
1380 }
1381 } else {
1382 table = fib6_new_table(net, cfg->fc_table);
1383 }
38308473
DM
1384
1385 if (!table)
c71099ac 1386 goto out;
c71099ac 1387
8b96d22d 1388 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1389
38308473 1390 if (!rt) {
1da177e4
LT
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394
d8d1f30b 1395 rt->dst.obsolete = -1;
1716a961
G
1396
1397 if (cfg->fc_flags & RTF_EXPIRES)
1398 rt6_set_expires(rt, jiffies +
1399 clock_t_to_jiffies(cfg->fc_expires));
1400 else
1401 rt6_clean_expires(rt);
1da177e4 1402
86872cb5
TG
1403 if (cfg->fc_protocol == RTPROT_UNSPEC)
1404 cfg->fc_protocol = RTPROT_BOOT;
1405 rt->rt6i_protocol = cfg->fc_protocol;
1406
1407 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1408
1409 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1410 rt->dst.input = ip6_mc_input;
ab79ad14
1411 else if (cfg->fc_flags & RTF_LOCAL)
1412 rt->dst.input = ip6_input;
1da177e4 1413 else
d8d1f30b 1414 rt->dst.input = ip6_forward;
1da177e4 1415
d8d1f30b 1416 rt->dst.output = ip6_output;
1da177e4 1417
86872cb5
TG
1418 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1419 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1420 if (rt->rt6i_dst.plen == 128)
11d53b49 1421 rt->dst.flags |= DST_HOST;
1da177e4 1422
8e2ec639
YZ
1423 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1424 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1425 if (!metrics) {
1426 err = -ENOMEM;
1427 goto out;
1428 }
1429 dst_init_metrics(&rt->dst, metrics, 0);
1430 }
1da177e4 1431#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1432 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1433 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1434#endif
1435
86872cb5 1436 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1437
1438 /* We cannot add true routes via loopback here,
1439 they would result in kernel looping; promote them to reject routes
1440 */
86872cb5 1441 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1442 (dev && (dev->flags & IFF_LOOPBACK) &&
1443 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1444 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1445 /* hold loopback dev/idev if we haven't done so. */
5578689a 1446 if (dev != net->loopback_dev) {
1da177e4
LT
1447 if (dev) {
1448 dev_put(dev);
1449 in6_dev_put(idev);
1450 }
5578689a 1451 dev = net->loopback_dev;
1da177e4
LT
1452 dev_hold(dev);
1453 idev = in6_dev_get(dev);
1454 if (!idev) {
1455 err = -ENODEV;
1456 goto out;
1457 }
1458 }
d8d1f30b
CG
1459 rt->dst.output = ip6_pkt_discard_out;
1460 rt->dst.input = ip6_pkt_discard;
1da177e4 1461 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1462 switch (cfg->fc_type) {
1463 case RTN_BLACKHOLE:
1464 rt->dst.error = -EINVAL;
1465 break;
1466 case RTN_PROHIBIT:
1467 rt->dst.error = -EACCES;
1468 break;
b4949ab2
ND
1469 case RTN_THROW:
1470 rt->dst.error = -EAGAIN;
1471 break;
ef2c7d7b
ND
1472 default:
1473 rt->dst.error = -ENETUNREACH;
1474 break;
1475 }
1da177e4
LT
1476 goto install_route;
1477 }
1478
86872cb5 1479 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1480 const struct in6_addr *gw_addr;
1da177e4
LT
1481 int gwa_type;
1482
86872cb5 1483 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1484 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1485 gwa_type = ipv6_addr_type(gw_addr);
1486
1487 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1488 struct rt6_info *grt;
1489
1490 /* IPv6 strictly inhibits using not link-local
1491 addresses as nexthop address.
1492 Otherwise, router will not able to send redirects.
1493 It is very good, but in some (rare!) circumstances
1494 (SIT, PtP, NBMA NOARP links) it is handy to allow
1495 some exceptions. --ANK
1496 */
1497 err = -EINVAL;
38308473 1498 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1499 goto out;
1500
5578689a 1501 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1502
1503 err = -EHOSTUNREACH;
38308473 1504 if (!grt)
1da177e4
LT
1505 goto out;
1506 if (dev) {
d1918542 1507 if (dev != grt->dst.dev) {
d8d1f30b 1508 dst_release(&grt->dst);
1da177e4
LT
1509 goto out;
1510 }
1511 } else {
d1918542 1512 dev = grt->dst.dev;
1da177e4
LT
1513 idev = grt->rt6i_idev;
1514 dev_hold(dev);
1515 in6_dev_hold(grt->rt6i_idev);
1516 }
38308473 1517 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1518 err = 0;
d8d1f30b 1519 dst_release(&grt->dst);
1da177e4
LT
1520
1521 if (err)
1522 goto out;
1523 }
1524 err = -EINVAL;
38308473 1525 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1526 goto out;
1527 }
1528
1529 err = -ENODEV;
38308473 1530 if (!dev)
1da177e4
LT
1531 goto out;
1532
c3968a85
DW
1533 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1534 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1535 err = -EINVAL;
1536 goto out;
1537 }
4e3fd7a0 1538 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1539 rt->rt6i_prefsrc.plen = 128;
1540 } else
1541 rt->rt6i_prefsrc.plen = 0;
1542
86872cb5 1543 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1544 err = rt6_bind_neighbour(rt, dev);
f83c7790 1545 if (err)
1da177e4 1546 goto out;
1da177e4
LT
1547 }
1548
86872cb5 1549 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1550
1551install_route:
86872cb5
TG
1552 if (cfg->fc_mx) {
1553 struct nlattr *nla;
1554 int remaining;
1555
1556 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1557 int type = nla_type(nla);
86872cb5
TG
1558
1559 if (type) {
1560 if (type > RTAX_MAX) {
1da177e4
LT
1561 err = -EINVAL;
1562 goto out;
1563 }
86872cb5 1564
defb3519 1565 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1566 }
1da177e4
LT
1567 }
1568 }
1569
d8d1f30b 1570 rt->dst.dev = dev;
1da177e4 1571 rt->rt6i_idev = idev;
c71099ac 1572 rt->rt6i_table = table;
63152fc0 1573
c346dca1 1574 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1575
86872cb5 1576 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1577
1578out:
1579 if (dev)
1580 dev_put(dev);
1581 if (idev)
1582 in6_dev_put(idev);
1583 if (rt)
d8d1f30b 1584 dst_free(&rt->dst);
1da177e4
LT
1585 return err;
1586}
1587
86872cb5 1588static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1589{
1590 int err;
c71099ac 1591 struct fib6_table *table;
d1918542 1592 struct net *net = dev_net(rt->dst.dev);
1da177e4 1593
8ed67789 1594 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1595 return -ENOENT;
1596
c71099ac
TG
1597 table = rt->rt6i_table;
1598 write_lock_bh(&table->tb6_lock);
1da177e4 1599
86872cb5 1600 err = fib6_del(rt, info);
d8d1f30b 1601 dst_release(&rt->dst);
1da177e4 1602
c71099ac 1603 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1604
1605 return err;
1606}
1607
e0a1ad73
TG
1608int ip6_del_rt(struct rt6_info *rt)
1609{
4d1169c1 1610 struct nl_info info = {
d1918542 1611 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1612 };
528c4ceb 1613 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1614}
1615
86872cb5 1616static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1617{
c71099ac 1618 struct fib6_table *table;
1da177e4
LT
1619 struct fib6_node *fn;
1620 struct rt6_info *rt;
1621 int err = -ESRCH;
1622
5578689a 1623 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1624 if (!table)
c71099ac
TG
1625 return err;
1626
1627 read_lock_bh(&table->tb6_lock);
1da177e4 1628
c71099ac 1629 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1630 &cfg->fc_dst, cfg->fc_dst_len,
1631 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1632
1da177e4 1633 if (fn) {
d8d1f30b 1634 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1635 if (cfg->fc_ifindex &&
d1918542
DM
1636 (!rt->dst.dev ||
1637 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1638 continue;
86872cb5
TG
1639 if (cfg->fc_flags & RTF_GATEWAY &&
1640 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1641 continue;
86872cb5 1642 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1643 continue;
d8d1f30b 1644 dst_hold(&rt->dst);
c71099ac 1645 read_unlock_bh(&table->tb6_lock);
1da177e4 1646
86872cb5 1647 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1648 }
1649 }
c71099ac 1650 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1651
1652 return err;
1653}
1654
6700c270 1655static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1656{
e8599ff4 1657 struct net *net = dev_net(skb->dev);
a6279458 1658 struct netevent_redirect netevent;
e8599ff4
DM
1659 struct rt6_info *rt, *nrt = NULL;
1660 const struct in6_addr *target;
e8599ff4 1661 struct ndisc_options ndopts;
6e157b6a
DM
1662 const struct in6_addr *dest;
1663 struct neighbour *old_neigh;
e8599ff4
DM
1664 struct inet6_dev *in6_dev;
1665 struct neighbour *neigh;
1666 struct icmp6hdr *icmph;
6e157b6a
DM
1667 int optlen, on_link;
1668 u8 *lladdr;
e8599ff4
DM
1669
1670 optlen = skb->tail - skb->transport_header;
1671 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1672
1673 if (optlen < 0) {
6e157b6a 1674 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1675 return;
1676 }
1677
1678 icmph = icmp6_hdr(skb);
1679 target = (const struct in6_addr *) (icmph + 1);
1680 dest = target + 1;
1681
1682 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1683 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1684 return;
1685 }
1686
6e157b6a 1687 on_link = 0;
e8599ff4
DM
1688 if (ipv6_addr_equal(dest, target)) {
1689 on_link = 1;
1690 } else if (ipv6_addr_type(target) !=
1691 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1692 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1693 return;
1694 }
1695
1696 in6_dev = __in6_dev_get(skb->dev);
1697 if (!in6_dev)
1698 return;
1699 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1700 return;
1701
1702 /* RFC2461 8.1:
1703 * The IP source address of the Redirect MUST be the same as the current
1704 * first-hop router for the specified ICMP Destination Address.
1705 */
1706
1707 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1708 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1709 return;
1710 }
6e157b6a
DM
1711
1712 lladdr = NULL;
e8599ff4
DM
1713 if (ndopts.nd_opts_tgt_lladdr) {
1714 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1715 skb->dev);
1716 if (!lladdr) {
1717 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1718 return;
1719 }
1720 }
1721
6e157b6a
DM
1722 rt = (struct rt6_info *) dst;
1723 if (rt == net->ipv6.ip6_null_entry) {
1724 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1725 return;
6e157b6a 1726 }
e8599ff4 1727
6e157b6a
DM
1728 /* Redirect received -> path was valid.
1729 * Look, redirects are sent only in response to data packets,
1730 * so that this nexthop apparently is reachable. --ANK
1731 */
1732 dst_confirm(&rt->dst);
a6279458 1733
6e157b6a
DM
1734 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1735 if (!neigh)
1736 return;
a6279458 1737
6e157b6a
DM
1738 /* Duplicate redirect: silently ignore. */
1739 old_neigh = rt->n;
1740 if (neigh == old_neigh)
a6279458 1741 goto out;
1da177e4 1742
1da177e4
LT
1743 /*
1744 * We have finally decided to accept it.
1745 */
1746
1ab1457c 1747 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1748 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1749 NEIGH_UPDATE_F_OVERRIDE|
1750 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1751 NEIGH_UPDATE_F_ISROUTER))
1752 );
1753
21efcfa0 1754 nrt = ip6_rt_copy(rt, dest);
38308473 1755 if (!nrt)
1da177e4
LT
1756 goto out;
1757
1758 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1759 if (on_link)
1760 nrt->rt6i_flags &= ~RTF_GATEWAY;
1761
4e3fd7a0 1762 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1763 nrt->n = neigh_clone(neigh);
1da177e4 1764
40e22e8f 1765 if (ip6_ins_rt(nrt))
1da177e4
LT
1766 goto out;
1767
d8d1f30b 1768 netevent.old = &rt->dst;
1d248b1c 1769 netevent.old_neigh = old_neigh;
d8d1f30b 1770 netevent.new = &nrt->dst;
1d248b1c
DM
1771 netevent.new_neigh = neigh;
1772 netevent.daddr = dest;
8d71740c
TT
1773 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1774
38308473 1775 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1776 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1777 ip6_del_rt(rt);
1da177e4
LT
1778 }
1779
1780out:
e8599ff4 1781 neigh_release(neigh);
6e157b6a
DM
1782}
1783
1da177e4
LT
1784/*
1785 * Misc support functions
1786 */
1787
1716a961 1788static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1789 const struct in6_addr *dest)
1da177e4 1790{
d1918542 1791 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1792 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1793 ort->rt6i_table);
1da177e4
LT
1794
1795 if (rt) {
d8d1f30b
CG
1796 rt->dst.input = ort->dst.input;
1797 rt->dst.output = ort->dst.output;
8e2ec639 1798 rt->dst.flags |= DST_HOST;
d8d1f30b 1799
4e3fd7a0 1800 rt->rt6i_dst.addr = *dest;
8e2ec639 1801 rt->rt6i_dst.plen = 128;
defb3519 1802 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1803 rt->dst.error = ort->dst.error;
1da177e4
LT
1804 rt->rt6i_idev = ort->rt6i_idev;
1805 if (rt->rt6i_idev)
1806 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1807 rt->dst.lastuse = jiffies;
1da177e4 1808
4e3fd7a0 1809 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1810 rt->rt6i_flags = ort->rt6i_flags;
1811 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1812 (RTF_DEFAULT | RTF_ADDRCONF))
1813 rt6_set_from(rt, ort);
1814 else
1815 rt6_clean_expires(rt);
1da177e4
LT
1816 rt->rt6i_metric = 0;
1817
1da177e4
LT
1818#ifdef CONFIG_IPV6_SUBTREES
1819 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1820#endif
0f6c6392 1821 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1822 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1823 }
1824 return rt;
1825}
1826
70ceb4f5 1827#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1828static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1829 const struct in6_addr *prefix, int prefixlen,
1830 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1831{
1832 struct fib6_node *fn;
1833 struct rt6_info *rt = NULL;
c71099ac
TG
1834 struct fib6_table *table;
1835
efa2cea0 1836 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1837 if (!table)
c71099ac 1838 return NULL;
70ceb4f5 1839
c71099ac
TG
1840 write_lock_bh(&table->tb6_lock);
1841 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1842 if (!fn)
1843 goto out;
1844
d8d1f30b 1845 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1846 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1847 continue;
1848 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1849 continue;
1850 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1851 continue;
d8d1f30b 1852 dst_hold(&rt->dst);
70ceb4f5
YH
1853 break;
1854 }
1855out:
c71099ac 1856 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1857 return rt;
1858}
1859
efa2cea0 1860static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1861 const struct in6_addr *prefix, int prefixlen,
1862 const struct in6_addr *gwaddr, int ifindex,
95c96174 1863 unsigned int pref)
70ceb4f5 1864{
86872cb5
TG
1865 struct fib6_config cfg = {
1866 .fc_table = RT6_TABLE_INFO,
238fc7ea 1867 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1868 .fc_ifindex = ifindex,
1869 .fc_dst_len = prefixlen,
1870 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1871 RTF_UP | RTF_PREF(pref),
15e47304 1872 .fc_nlinfo.portid = 0,
efa2cea0
DL
1873 .fc_nlinfo.nlh = NULL,
1874 .fc_nlinfo.nl_net = net,
86872cb5
TG
1875 };
1876
4e3fd7a0
AD
1877 cfg.fc_dst = *prefix;
1878 cfg.fc_gateway = *gwaddr;
70ceb4f5 1879
e317da96
YH
1880 /* We should treat it as a default route if prefix length is 0. */
1881 if (!prefixlen)
86872cb5 1882 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1883
86872cb5 1884 ip6_route_add(&cfg);
70ceb4f5 1885
efa2cea0 1886 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1887}
1888#endif
1889
b71d1d42 1890struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1891{
1da177e4 1892 struct rt6_info *rt;
c71099ac 1893 struct fib6_table *table;
1da177e4 1894
c346dca1 1895 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1896 if (!table)
c71099ac 1897 return NULL;
1da177e4 1898
c71099ac 1899 write_lock_bh(&table->tb6_lock);
d8d1f30b 1900 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1901 if (dev == rt->dst.dev &&
045927ff 1902 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1903 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1904 break;
1905 }
1906 if (rt)
d8d1f30b 1907 dst_hold(&rt->dst);
c71099ac 1908 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1909 return rt;
1910}
1911
b71d1d42 1912struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1913 struct net_device *dev,
1914 unsigned int pref)
1da177e4 1915{
86872cb5
TG
1916 struct fib6_config cfg = {
1917 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1918 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1919 .fc_ifindex = dev->ifindex,
1920 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1921 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1922 .fc_nlinfo.portid = 0,
5578689a 1923 .fc_nlinfo.nlh = NULL,
c346dca1 1924 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1925 };
1da177e4 1926
4e3fd7a0 1927 cfg.fc_gateway = *gwaddr;
1da177e4 1928
86872cb5 1929 ip6_route_add(&cfg);
1da177e4 1930
1da177e4
LT
1931 return rt6_get_dflt_router(gwaddr, dev);
1932}
1933
7b4da532 1934void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1935{
1936 struct rt6_info *rt;
c71099ac
TG
1937 struct fib6_table *table;
1938
1939 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1940 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1941 if (!table)
c71099ac 1942 return;
1da177e4
LT
1943
1944restart:
c71099ac 1945 read_lock_bh(&table->tb6_lock);
d8d1f30b 1946 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1947 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1948 dst_hold(&rt->dst);
c71099ac 1949 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1950 ip6_del_rt(rt);
1da177e4
LT
1951 goto restart;
1952 }
1953 }
c71099ac 1954 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1955}
1956
5578689a
DL
1957static void rtmsg_to_fib6_config(struct net *net,
1958 struct in6_rtmsg *rtmsg,
86872cb5
TG
1959 struct fib6_config *cfg)
1960{
1961 memset(cfg, 0, sizeof(*cfg));
1962
1963 cfg->fc_table = RT6_TABLE_MAIN;
1964 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1965 cfg->fc_metric = rtmsg->rtmsg_metric;
1966 cfg->fc_expires = rtmsg->rtmsg_info;
1967 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1968 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1969 cfg->fc_flags = rtmsg->rtmsg_flags;
1970
5578689a 1971 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1972
4e3fd7a0
AD
1973 cfg->fc_dst = rtmsg->rtmsg_dst;
1974 cfg->fc_src = rtmsg->rtmsg_src;
1975 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1976}
1977
5578689a 1978int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1979{
86872cb5 1980 struct fib6_config cfg;
1da177e4
LT
1981 struct in6_rtmsg rtmsg;
1982 int err;
1983
1984 switch(cmd) {
1985 case SIOCADDRT: /* Add a route */
1986 case SIOCDELRT: /* Delete a route */
1987 if (!capable(CAP_NET_ADMIN))
1988 return -EPERM;
1989 err = copy_from_user(&rtmsg, arg,
1990 sizeof(struct in6_rtmsg));
1991 if (err)
1992 return -EFAULT;
86872cb5 1993
5578689a 1994 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1995
1da177e4
LT
1996 rtnl_lock();
1997 switch (cmd) {
1998 case SIOCADDRT:
86872cb5 1999 err = ip6_route_add(&cfg);
1da177e4
LT
2000 break;
2001 case SIOCDELRT:
86872cb5 2002 err = ip6_route_del(&cfg);
1da177e4
LT
2003 break;
2004 default:
2005 err = -EINVAL;
2006 }
2007 rtnl_unlock();
2008
2009 return err;
3ff50b79 2010 }
1da177e4
LT
2011
2012 return -EINVAL;
2013}
2014
2015/*
2016 * Drop the packet on the floor
2017 */
2018
d5fdd6ba 2019static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2020{
612f09e8 2021 int type;
adf30907 2022 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2023 switch (ipstats_mib_noroutes) {
2024 case IPSTATS_MIB_INNOROUTES:
0660e03f 2025 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2026 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2027 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2028 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2029 break;
2030 }
2031 /* FALLTHROUGH */
2032 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2033 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2034 ipstats_mib_noroutes);
612f09e8
YH
2035 break;
2036 }
3ffe533c 2037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2038 kfree_skb(skb);
2039 return 0;
2040}
2041
9ce8ade0
TG
2042static int ip6_pkt_discard(struct sk_buff *skb)
2043{
612f09e8 2044 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2045}
2046
20380731 2047static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2048{
adf30907 2049 skb->dev = skb_dst(skb)->dev;
612f09e8 2050 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2051}
2052
6723ab54
DM
2053#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2054
9ce8ade0
TG
2055static int ip6_pkt_prohibit(struct sk_buff *skb)
2056{
612f09e8 2057 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2058}
2059
2060static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2061{
adf30907 2062 skb->dev = skb_dst(skb)->dev;
612f09e8 2063 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2064}
2065
6723ab54
DM
2066#endif
2067
1da177e4
LT
2068/*
2069 * Allocate a dst for local (unicast / anycast) address.
2070 */
2071
2072struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2073 const struct in6_addr *addr,
8f031519 2074 bool anycast)
1da177e4 2075{
c346dca1 2076 struct net *net = dev_net(idev->dev);
8b96d22d 2077 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2078 int err;
1da177e4 2079
38308473 2080 if (!rt) {
f3213831 2081 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2082 return ERR_PTR(-ENOMEM);
40385653 2083 }
1da177e4 2084
1da177e4
LT
2085 in6_dev_hold(idev);
2086
11d53b49 2087 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2088 rt->dst.input = ip6_input;
2089 rt->dst.output = ip6_output;
1da177e4 2090 rt->rt6i_idev = idev;
d8d1f30b 2091 rt->dst.obsolete = -1;
1da177e4
LT
2092
2093 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2094 if (anycast)
2095 rt->rt6i_flags |= RTF_ANYCAST;
2096 else
1da177e4 2097 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2098 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2099 if (err) {
d8d1f30b 2100 dst_free(&rt->dst);
f83c7790 2101 return ERR_PTR(err);
1da177e4
LT
2102 }
2103
4e3fd7a0 2104 rt->rt6i_dst.addr = *addr;
1da177e4 2105 rt->rt6i_dst.plen = 128;
5578689a 2106 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2107
d8d1f30b 2108 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2109
2110 return rt;
2111}
2112
c3968a85
DW
2113int ip6_route_get_saddr(struct net *net,
2114 struct rt6_info *rt,
b71d1d42 2115 const struct in6_addr *daddr,
c3968a85
DW
2116 unsigned int prefs,
2117 struct in6_addr *saddr)
2118{
2119 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2120 int err = 0;
2121 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2122 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2123 else
2124 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2125 daddr, prefs, saddr);
2126 return err;
2127}
2128
2129/* remove deleted ip from prefsrc entries */
2130struct arg_dev_net_ip {
2131 struct net_device *dev;
2132 struct net *net;
2133 struct in6_addr *addr;
2134};
2135
2136static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2137{
2138 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2139 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2140 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2141
d1918542 2142 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2143 rt != net->ipv6.ip6_null_entry &&
2144 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2145 /* remove prefsrc entry */
2146 rt->rt6i_prefsrc.plen = 0;
2147 }
2148 return 0;
2149}
2150
2151void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2152{
2153 struct net *net = dev_net(ifp->idev->dev);
2154 struct arg_dev_net_ip adni = {
2155 .dev = ifp->idev->dev,
2156 .net = net,
2157 .addr = &ifp->addr,
2158 };
2159 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2160}
2161
8ed67789
DL
2162struct arg_dev_net {
2163 struct net_device *dev;
2164 struct net *net;
2165};
2166
1da177e4
LT
2167static int fib6_ifdown(struct rt6_info *rt, void *arg)
2168{
bc3ef660 2169 const struct arg_dev_net *adn = arg;
2170 const struct net_device *dev = adn->dev;
8ed67789 2171
d1918542 2172 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2173 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2174 return -1;
c159d30c 2175
1da177e4
LT
2176 return 0;
2177}
2178
f3db4851 2179void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2180{
8ed67789
DL
2181 struct arg_dev_net adn = {
2182 .dev = dev,
2183 .net = net,
2184 };
2185
2186 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2187 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2188}
2189
95c96174 2190struct rt6_mtu_change_arg {
1da177e4 2191 struct net_device *dev;
95c96174 2192 unsigned int mtu;
1da177e4
LT
2193};
2194
2195static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2196{
2197 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2198 struct inet6_dev *idev;
2199
2200 /* In IPv6 pmtu discovery is not optional,
2201 so that RTAX_MTU lock cannot disable it.
2202 We still use this lock to block changes
2203 caused by addrconf/ndisc.
2204 */
2205
2206 idev = __in6_dev_get(arg->dev);
38308473 2207 if (!idev)
1da177e4
LT
2208 return 0;
2209
2210 /* For administrative MTU increase, there is no way to discover
2211 IPv6 PMTU increase, so PMTU increase should be updated here.
2212 Since RFC 1981 doesn't include administrative MTU increase
2213 update PMTU increase is a MUST. (i.e. jumbo frame)
2214 */
2215 /*
2216 If new MTU is less than route PMTU, this new MTU will be the
2217 lowest MTU in the path, update the route PMTU to reflect PMTU
2218 decreases; if new MTU is greater than route PMTU, and the
2219 old MTU is the lowest MTU in the path, update the route PMTU
2220 to reflect the increase. In this case if the other nodes' MTU
2221 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2222 PMTU discouvery.
2223 */
d1918542 2224 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2225 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2226 (dst_mtu(&rt->dst) >= arg->mtu ||
2227 (dst_mtu(&rt->dst) < arg->mtu &&
2228 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2229 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2230 }
1da177e4
LT
2231 return 0;
2232}
2233
95c96174 2234void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2235{
c71099ac
TG
2236 struct rt6_mtu_change_arg arg = {
2237 .dev = dev,
2238 .mtu = mtu,
2239 };
1da177e4 2240
c346dca1 2241 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2242}
2243
ef7c79ed 2244static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2245 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2246 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2247 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2248 [RTA_PRIORITY] = { .type = NLA_U32 },
2249 [RTA_METRICS] = { .type = NLA_NESTED },
2250};
2251
2252static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2253 struct fib6_config *cfg)
1da177e4 2254{
86872cb5
TG
2255 struct rtmsg *rtm;
2256 struct nlattr *tb[RTA_MAX+1];
2257 int err;
1da177e4 2258
86872cb5
TG
2259 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2260 if (err < 0)
2261 goto errout;
1da177e4 2262
86872cb5
TG
2263 err = -EINVAL;
2264 rtm = nlmsg_data(nlh);
2265 memset(cfg, 0, sizeof(*cfg));
2266
2267 cfg->fc_table = rtm->rtm_table;
2268 cfg->fc_dst_len = rtm->rtm_dst_len;
2269 cfg->fc_src_len = rtm->rtm_src_len;
2270 cfg->fc_flags = RTF_UP;
2271 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2272 cfg->fc_type = rtm->rtm_type;
86872cb5 2273
ef2c7d7b
ND
2274 if (rtm->rtm_type == RTN_UNREACHABLE ||
2275 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2276 rtm->rtm_type == RTN_PROHIBIT ||
2277 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2278 cfg->fc_flags |= RTF_REJECT;
2279
ab79ad14
2280 if (rtm->rtm_type == RTN_LOCAL)
2281 cfg->fc_flags |= RTF_LOCAL;
2282
15e47304 2283 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2284 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2285 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2286
2287 if (tb[RTA_GATEWAY]) {
2288 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2289 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2290 }
86872cb5
TG
2291
2292 if (tb[RTA_DST]) {
2293 int plen = (rtm->rtm_dst_len + 7) >> 3;
2294
2295 if (nla_len(tb[RTA_DST]) < plen)
2296 goto errout;
2297
2298 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2299 }
86872cb5
TG
2300
2301 if (tb[RTA_SRC]) {
2302 int plen = (rtm->rtm_src_len + 7) >> 3;
2303
2304 if (nla_len(tb[RTA_SRC]) < plen)
2305 goto errout;
2306
2307 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2308 }
86872cb5 2309
c3968a85
DW
2310 if (tb[RTA_PREFSRC])
2311 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2312
86872cb5
TG
2313 if (tb[RTA_OIF])
2314 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2315
2316 if (tb[RTA_PRIORITY])
2317 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2318
2319 if (tb[RTA_METRICS]) {
2320 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2321 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2322 }
86872cb5
TG
2323
2324 if (tb[RTA_TABLE])
2325 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2326
2327 err = 0;
2328errout:
2329 return err;
1da177e4
LT
2330}
2331
c127ea2c 2332static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2333{
86872cb5
TG
2334 struct fib6_config cfg;
2335 int err;
1da177e4 2336
86872cb5
TG
2337 err = rtm_to_fib6_config(skb, nlh, &cfg);
2338 if (err < 0)
2339 return err;
2340
2341 return ip6_route_del(&cfg);
1da177e4
LT
2342}
2343
c127ea2c 2344static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2345{
86872cb5
TG
2346 struct fib6_config cfg;
2347 int err;
1da177e4 2348
86872cb5
TG
2349 err = rtm_to_fib6_config(skb, nlh, &cfg);
2350 if (err < 0)
2351 return err;
2352
2353 return ip6_route_add(&cfg);
1da177e4
LT
2354}
2355
339bf98f
TG
2356static inline size_t rt6_nlmsg_size(void)
2357{
2358 return NLMSG_ALIGN(sizeof(struct rtmsg))
2359 + nla_total_size(16) /* RTA_SRC */
2360 + nla_total_size(16) /* RTA_DST */
2361 + nla_total_size(16) /* RTA_GATEWAY */
2362 + nla_total_size(16) /* RTA_PREFSRC */
2363 + nla_total_size(4) /* RTA_TABLE */
2364 + nla_total_size(4) /* RTA_IIF */
2365 + nla_total_size(4) /* RTA_OIF */
2366 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2367 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2368 + nla_total_size(sizeof(struct rta_cacheinfo));
2369}
2370
191cd582
BH
2371static int rt6_fill_node(struct net *net,
2372 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2373 struct in6_addr *dst, struct in6_addr *src,
15e47304 2374 int iif, int type, u32 portid, u32 seq,
7bc570c8 2375 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2376{
2377 struct rtmsg *rtm;
2d7202bf 2378 struct nlmsghdr *nlh;
e3703b3d 2379 long expires;
9e762a4a 2380 u32 table;
f2c31e32 2381 struct neighbour *n;
1da177e4
LT
2382
2383 if (prefix) { /* user wants prefix routes only */
2384 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2385 /* success since this is not a prefix route */
2386 return 1;
2387 }
2388 }
2389
15e47304 2390 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2391 if (!nlh)
26932566 2392 return -EMSGSIZE;
2d7202bf
TG
2393
2394 rtm = nlmsg_data(nlh);
1da177e4
LT
2395 rtm->rtm_family = AF_INET6;
2396 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2397 rtm->rtm_src_len = rt->rt6i_src.plen;
2398 rtm->rtm_tos = 0;
c71099ac 2399 if (rt->rt6i_table)
9e762a4a 2400 table = rt->rt6i_table->tb6_id;
c71099ac 2401 else
9e762a4a
PM
2402 table = RT6_TABLE_UNSPEC;
2403 rtm->rtm_table = table;
c78679e8
DM
2404 if (nla_put_u32(skb, RTA_TABLE, table))
2405 goto nla_put_failure;
ef2c7d7b
ND
2406 if (rt->rt6i_flags & RTF_REJECT) {
2407 switch (rt->dst.error) {
2408 case -EINVAL:
2409 rtm->rtm_type = RTN_BLACKHOLE;
2410 break;
2411 case -EACCES:
2412 rtm->rtm_type = RTN_PROHIBIT;
2413 break;
b4949ab2
ND
2414 case -EAGAIN:
2415 rtm->rtm_type = RTN_THROW;
2416 break;
ef2c7d7b
ND
2417 default:
2418 rtm->rtm_type = RTN_UNREACHABLE;
2419 break;
2420 }
2421 }
38308473 2422 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2423 rtm->rtm_type = RTN_LOCAL;
d1918542 2424 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2425 rtm->rtm_type = RTN_LOCAL;
2426 else
2427 rtm->rtm_type = RTN_UNICAST;
2428 rtm->rtm_flags = 0;
2429 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2430 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2431 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2432 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2433 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2434 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2435 rtm->rtm_protocol = RTPROT_RA;
2436 else
2437 rtm->rtm_protocol = RTPROT_KERNEL;
2438 }
1da177e4 2439
38308473 2440 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2441 rtm->rtm_flags |= RTM_F_CLONED;
2442
2443 if (dst) {
c78679e8
DM
2444 if (nla_put(skb, RTA_DST, 16, dst))
2445 goto nla_put_failure;
1ab1457c 2446 rtm->rtm_dst_len = 128;
1da177e4 2447 } else if (rtm->rtm_dst_len)
c78679e8
DM
2448 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2449 goto nla_put_failure;
1da177e4
LT
2450#ifdef CONFIG_IPV6_SUBTREES
2451 if (src) {
c78679e8
DM
2452 if (nla_put(skb, RTA_SRC, 16, src))
2453 goto nla_put_failure;
1ab1457c 2454 rtm->rtm_src_len = 128;
c78679e8
DM
2455 } else if (rtm->rtm_src_len &&
2456 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2457 goto nla_put_failure;
1da177e4 2458#endif
7bc570c8
YH
2459 if (iif) {
2460#ifdef CONFIG_IPV6_MROUTE
2461 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2462 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2463 if (err <= 0) {
2464 if (!nowait) {
2465 if (err == 0)
2466 return 0;
2467 goto nla_put_failure;
2468 } else {
2469 if (err == -EMSGSIZE)
2470 goto nla_put_failure;
2471 }
2472 }
2473 } else
2474#endif
c78679e8
DM
2475 if (nla_put_u32(skb, RTA_IIF, iif))
2476 goto nla_put_failure;
7bc570c8 2477 } else if (dst) {
1da177e4 2478 struct in6_addr saddr_buf;
c78679e8
DM
2479 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2480 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2481 goto nla_put_failure;
1da177e4 2482 }
2d7202bf 2483
c3968a85
DW
2484 if (rt->rt6i_prefsrc.plen) {
2485 struct in6_addr saddr_buf;
4e3fd7a0 2486 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2487 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2488 goto nla_put_failure;
c3968a85
DW
2489 }
2490
defb3519 2491 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2492 goto nla_put_failure;
2493
97cac082 2494 n = rt->n;
94f826b8 2495 if (n) {
fdd6681d 2496 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2497 goto nla_put_failure;
94f826b8 2498 }
2d7202bf 2499
c78679e8
DM
2500 if (rt->dst.dev &&
2501 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2502 goto nla_put_failure;
2503 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2504 goto nla_put_failure;
8253947e
LW
2505
2506 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2507
87a50699 2508 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2509 goto nla_put_failure;
2d7202bf
TG
2510
2511 return nlmsg_end(skb, nlh);
2512
2513nla_put_failure:
26932566
PM
2514 nlmsg_cancel(skb, nlh);
2515 return -EMSGSIZE;
1da177e4
LT
2516}
2517
1b43af54 2518int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2519{
2520 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2521 int prefix;
2522
2d7202bf
TG
2523 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2524 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2525 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2526 } else
2527 prefix = 0;
2528
191cd582
BH
2529 return rt6_fill_node(arg->net,
2530 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2531 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2532 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2533}
2534
c127ea2c 2535static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2536{
3b1e0a65 2537 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2538 struct nlattr *tb[RTA_MAX+1];
2539 struct rt6_info *rt;
1da177e4 2540 struct sk_buff *skb;
ab364a6f 2541 struct rtmsg *rtm;
4c9483b2 2542 struct flowi6 fl6;
72331bc0 2543 int err, iif = 0, oif = 0;
1da177e4 2544
ab364a6f
TG
2545 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2546 if (err < 0)
2547 goto errout;
1da177e4 2548
ab364a6f 2549 err = -EINVAL;
4c9483b2 2550 memset(&fl6, 0, sizeof(fl6));
1da177e4 2551
ab364a6f
TG
2552 if (tb[RTA_SRC]) {
2553 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2554 goto errout;
2555
4e3fd7a0 2556 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2557 }
2558
2559 if (tb[RTA_DST]) {
2560 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2561 goto errout;
2562
4e3fd7a0 2563 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2564 }
2565
2566 if (tb[RTA_IIF])
2567 iif = nla_get_u32(tb[RTA_IIF]);
2568
2569 if (tb[RTA_OIF])
72331bc0 2570 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2571
2572 if (iif) {
2573 struct net_device *dev;
72331bc0
SL
2574 int flags = 0;
2575
5578689a 2576 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2577 if (!dev) {
2578 err = -ENODEV;
ab364a6f 2579 goto errout;
1da177e4 2580 }
72331bc0
SL
2581
2582 fl6.flowi6_iif = iif;
2583
2584 if (!ipv6_addr_any(&fl6.saddr))
2585 flags |= RT6_LOOKUP_F_HAS_SADDR;
2586
2587 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2588 flags);
2589 } else {
2590 fl6.flowi6_oif = oif;
2591
2592 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2593 }
2594
ab364a6f 2595 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2596 if (!skb) {
2173bff5 2597 dst_release(&rt->dst);
ab364a6f
TG
2598 err = -ENOBUFS;
2599 goto errout;
2600 }
1da177e4 2601
ab364a6f
TG
2602 /* Reserve room for dummy headers, this skb can pass
2603 through good chunk of routing engine.
2604 */
459a98ed 2605 skb_reset_mac_header(skb);
ab364a6f 2606 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2607
d8d1f30b 2608 skb_dst_set(skb, &rt->dst);
1da177e4 2609
4c9483b2 2610 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2611 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2612 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2613 if (err < 0) {
ab364a6f
TG
2614 kfree_skb(skb);
2615 goto errout;
1da177e4
LT
2616 }
2617
15e47304 2618 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2619errout:
1da177e4 2620 return err;
1da177e4
LT
2621}
2622
86872cb5 2623void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2624{
2625 struct sk_buff *skb;
5578689a 2626 struct net *net = info->nl_net;
528c4ceb
DL
2627 u32 seq;
2628 int err;
2629
2630 err = -ENOBUFS;
38308473 2631 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2632
339bf98f 2633 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2634 if (!skb)
21713ebc
TG
2635 goto errout;
2636
191cd582 2637 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2638 event, info->portid, seq, 0, 0, 0);
26932566
PM
2639 if (err < 0) {
2640 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2641 WARN_ON(err == -EMSGSIZE);
2642 kfree_skb(skb);
2643 goto errout;
2644 }
15e47304 2645 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2646 info->nlh, gfp_any());
2647 return;
21713ebc
TG
2648errout:
2649 if (err < 0)
5578689a 2650 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2651}
2652
8ed67789
DL
2653static int ip6_route_dev_notify(struct notifier_block *this,
2654 unsigned long event, void *data)
2655{
2656 struct net_device *dev = (struct net_device *)data;
c346dca1 2657 struct net *net = dev_net(dev);
8ed67789
DL
2658
2659 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2660 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2661 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2662#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2663 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2664 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2665 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2666 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2667#endif
2668 }
2669
2670 return NOTIFY_OK;
2671}
2672
1da177e4
LT
2673/*
2674 * /proc
2675 */
2676
2677#ifdef CONFIG_PROC_FS
2678
1da177e4
LT
2679struct rt6_proc_arg
2680{
2681 char *buffer;
2682 int offset;
2683 int length;
2684 int skip;
2685 int len;
2686};
2687
2688static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2689{
33120b30 2690 struct seq_file *m = p_arg;
69cce1d1 2691 struct neighbour *n;
1da177e4 2692
4b7a4274 2693 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2694
2695#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2696 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2697#else
33120b30 2698 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2699#endif
97cac082 2700 n = rt->n;
69cce1d1
DM
2701 if (n) {
2702 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2703 } else {
33120b30 2704 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2705 }
33120b30 2706 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2707 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2708 rt->dst.__use, rt->rt6i_flags,
d1918542 2709 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2710 return 0;
2711}
2712
33120b30 2713static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2714{
f3db4851 2715 struct net *net = (struct net *)m->private;
32b293a5 2716 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2717 return 0;
2718}
1da177e4 2719
33120b30
AD
2720static int ipv6_route_open(struct inode *inode, struct file *file)
2721{
de05c557 2722 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2723}
2724
33120b30
AD
2725static const struct file_operations ipv6_route_proc_fops = {
2726 .owner = THIS_MODULE,
2727 .open = ipv6_route_open,
2728 .read = seq_read,
2729 .llseek = seq_lseek,
b6fcbdb4 2730 .release = single_release_net,
33120b30
AD
2731};
2732
1da177e4
LT
2733static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2734{
69ddb805 2735 struct net *net = (struct net *)seq->private;
1da177e4 2736 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2737 net->ipv6.rt6_stats->fib_nodes,
2738 net->ipv6.rt6_stats->fib_route_nodes,
2739 net->ipv6.rt6_stats->fib_rt_alloc,
2740 net->ipv6.rt6_stats->fib_rt_entries,
2741 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2742 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2743 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2744
2745 return 0;
2746}
2747
2748static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2749{
de05c557 2750 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2751}
2752
9a32144e 2753static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2754 .owner = THIS_MODULE,
2755 .open = rt6_stats_seq_open,
2756 .read = seq_read,
2757 .llseek = seq_lseek,
b6fcbdb4 2758 .release = single_release_net,
1da177e4
LT
2759};
2760#endif /* CONFIG_PROC_FS */
2761
2762#ifdef CONFIG_SYSCTL
2763
1da177e4 2764static
8d65af78 2765int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2766 void __user *buffer, size_t *lenp, loff_t *ppos)
2767{
c486da34
LAG
2768 struct net *net;
2769 int delay;
2770 if (!write)
1da177e4 2771 return -EINVAL;
c486da34
LAG
2772
2773 net = (struct net *)ctl->extra1;
2774 delay = net->ipv6.sysctl.flush_delay;
2775 proc_dointvec(ctl, write, buffer, lenp, ppos);
2776 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2777 return 0;
1da177e4
LT
2778}
2779
760f2d01 2780ctl_table ipv6_route_table_template[] = {
1ab1457c 2781 {
1da177e4 2782 .procname = "flush",
4990509f 2783 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2784 .maxlen = sizeof(int),
89c8b3a1 2785 .mode = 0200,
6d9f239a 2786 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2787 },
2788 {
1da177e4 2789 .procname = "gc_thresh",
9a7ec3a9 2790 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2791 .maxlen = sizeof(int),
2792 .mode = 0644,
6d9f239a 2793 .proc_handler = proc_dointvec,
1da177e4
LT
2794 },
2795 {
1da177e4 2796 .procname = "max_size",
4990509f 2797 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2798 .maxlen = sizeof(int),
2799 .mode = 0644,
6d9f239a 2800 .proc_handler = proc_dointvec,
1da177e4
LT
2801 },
2802 {
1da177e4 2803 .procname = "gc_min_interval",
4990509f 2804 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2805 .maxlen = sizeof(int),
2806 .mode = 0644,
6d9f239a 2807 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2808 },
2809 {
1da177e4 2810 .procname = "gc_timeout",
4990509f 2811 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2812 .maxlen = sizeof(int),
2813 .mode = 0644,
6d9f239a 2814 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2815 },
2816 {
1da177e4 2817 .procname = "gc_interval",
4990509f 2818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2819 .maxlen = sizeof(int),
2820 .mode = 0644,
6d9f239a 2821 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2822 },
2823 {
1da177e4 2824 .procname = "gc_elasticity",
4990509f 2825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2826 .maxlen = sizeof(int),
2827 .mode = 0644,
f3d3f616 2828 .proc_handler = proc_dointvec,
1da177e4
LT
2829 },
2830 {
1da177e4 2831 .procname = "mtu_expires",
4990509f 2832 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
6d9f239a 2835 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2836 },
2837 {
1da177e4 2838 .procname = "min_adv_mss",
4990509f 2839 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2840 .maxlen = sizeof(int),
2841 .mode = 0644,
f3d3f616 2842 .proc_handler = proc_dointvec,
1da177e4
LT
2843 },
2844 {
1da177e4 2845 .procname = "gc_min_interval_ms",
4990509f 2846 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2847 .maxlen = sizeof(int),
2848 .mode = 0644,
6d9f239a 2849 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2850 },
f8572d8f 2851 { }
1da177e4
LT
2852};
2853
2c8c1e72 2854struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2855{
2856 struct ctl_table *table;
2857
2858 table = kmemdup(ipv6_route_table_template,
2859 sizeof(ipv6_route_table_template),
2860 GFP_KERNEL);
5ee09105
YH
2861
2862 if (table) {
2863 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2864 table[0].extra1 = net;
86393e52 2865 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2866 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2867 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2868 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2869 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2870 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2871 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2872 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2873 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2874 }
2875
760f2d01
DL
2876 return table;
2877}
1da177e4
LT
2878#endif
2879
2c8c1e72 2880static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2881{
633d424b 2882 int ret = -ENOMEM;
8ed67789 2883
86393e52
AD
2884 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2885 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2886
fc66f95c
ED
2887 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2888 goto out_ip6_dst_ops;
2889
8ed67789
DL
2890 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2891 sizeof(*net->ipv6.ip6_null_entry),
2892 GFP_KERNEL);
2893 if (!net->ipv6.ip6_null_entry)
fc66f95c 2894 goto out_ip6_dst_entries;
d8d1f30b 2895 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2896 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2897 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2898 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2899 ip6_template_metrics, true);
8ed67789
DL
2900
2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2903 sizeof(*net->ipv6.ip6_prohibit_entry),
2904 GFP_KERNEL);
68fffc67
PZ
2905 if (!net->ipv6.ip6_prohibit_entry)
2906 goto out_ip6_null_entry;
d8d1f30b 2907 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2908 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2909 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2910 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2911 ip6_template_metrics, true);
8ed67789
DL
2912
2913 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2914 sizeof(*net->ipv6.ip6_blk_hole_entry),
2915 GFP_KERNEL);
68fffc67
PZ
2916 if (!net->ipv6.ip6_blk_hole_entry)
2917 goto out_ip6_prohibit_entry;
d8d1f30b 2918 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2919 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2920 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2921 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2922 ip6_template_metrics, true);
8ed67789
DL
2923#endif
2924
b339a47c
PZ
2925 net->ipv6.sysctl.flush_delay = 0;
2926 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2927 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2928 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2929 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2930 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2931 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2932 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2933
6891a346
BT
2934 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2935
8ed67789
DL
2936 ret = 0;
2937out:
2938 return ret;
f2fc6a54 2939
68fffc67
PZ
2940#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941out_ip6_prohibit_entry:
2942 kfree(net->ipv6.ip6_prohibit_entry);
2943out_ip6_null_entry:
2944 kfree(net->ipv6.ip6_null_entry);
2945#endif
fc66f95c
ED
2946out_ip6_dst_entries:
2947 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2948out_ip6_dst_ops:
f2fc6a54 2949 goto out;
cdb18761
DL
2950}
2951
2c8c1e72 2952static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2953{
8ed67789
DL
2954 kfree(net->ipv6.ip6_null_entry);
2955#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2956 kfree(net->ipv6.ip6_prohibit_entry);
2957 kfree(net->ipv6.ip6_blk_hole_entry);
2958#endif
41bb78b4 2959 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2960}
2961
d189634e
TG
2962static int __net_init ip6_route_net_init_late(struct net *net)
2963{
2964#ifdef CONFIG_PROC_FS
2965 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2966 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2967#endif
2968 return 0;
2969}
2970
2971static void __net_exit ip6_route_net_exit_late(struct net *net)
2972{
2973#ifdef CONFIG_PROC_FS
2974 proc_net_remove(net, "ipv6_route");
2975 proc_net_remove(net, "rt6_stats");
2976#endif
2977}
2978
cdb18761
DL
2979static struct pernet_operations ip6_route_net_ops = {
2980 .init = ip6_route_net_init,
2981 .exit = ip6_route_net_exit,
2982};
2983
c3426b47
DM
2984static int __net_init ipv6_inetpeer_init(struct net *net)
2985{
2986 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2987
2988 if (!bp)
2989 return -ENOMEM;
2990 inet_peer_base_init(bp);
2991 net->ipv6.peers = bp;
2992 return 0;
2993}
2994
2995static void __net_exit ipv6_inetpeer_exit(struct net *net)
2996{
2997 struct inet_peer_base *bp = net->ipv6.peers;
2998
2999 net->ipv6.peers = NULL;
56a6b248 3000 inetpeer_invalidate_tree(bp);
c3426b47
DM
3001 kfree(bp);
3002}
3003
2b823f72 3004static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3005 .init = ipv6_inetpeer_init,
3006 .exit = ipv6_inetpeer_exit,
3007};
3008
d189634e
TG
3009static struct pernet_operations ip6_route_net_late_ops = {
3010 .init = ip6_route_net_init_late,
3011 .exit = ip6_route_net_exit_late,
3012};
3013
8ed67789
DL
3014static struct notifier_block ip6_route_dev_notifier = {
3015 .notifier_call = ip6_route_dev_notify,
3016 .priority = 0,
3017};
3018
433d49c3 3019int __init ip6_route_init(void)
1da177e4 3020{
433d49c3
DL
3021 int ret;
3022
9a7ec3a9
DL
3023 ret = -ENOMEM;
3024 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3025 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3026 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3027 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3028 goto out;
14e50e57 3029
fc66f95c 3030 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3031 if (ret)
bdb3289f 3032 goto out_kmem_cache;
bdb3289f 3033
c3426b47
DM
3034 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3035 if (ret)
e8803b6c 3036 goto out_dst_entries;
2a0c451a 3037
7e52b33b
DM
3038 ret = register_pernet_subsys(&ip6_route_net_ops);
3039 if (ret)
3040 goto out_register_inetpeer;
c3426b47 3041
5dc121e9
AE
3042 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3043
8ed67789
DL
3044 /* Registering of the loopback is done before this portion of code,
3045 * the loopback reference in rt6_info will not be taken, do it
3046 * manually for init_net */
d8d1f30b 3047 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3048 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3049 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3050 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3051 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3052 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3053 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054 #endif
e8803b6c 3055 ret = fib6_init();
433d49c3 3056 if (ret)
8ed67789 3057 goto out_register_subsys;
433d49c3 3058
433d49c3
DL
3059 ret = xfrm6_init();
3060 if (ret)
e8803b6c 3061 goto out_fib6_init;
c35b7e72 3062
433d49c3
DL
3063 ret = fib6_rules_init();
3064 if (ret)
3065 goto xfrm6_init;
7e5449c2 3066
d189634e
TG
3067 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3068 if (ret)
3069 goto fib6_rules_init;
3070
433d49c3 3071 ret = -ENOBUFS;
c7ac8679
GR
3072 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3073 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3074 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3075 goto out_register_late_subsys;
c127ea2c 3076
8ed67789 3077 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3078 if (ret)
d189634e 3079 goto out_register_late_subsys;
8ed67789 3080
433d49c3
DL
3081out:
3082 return ret;
3083
d189634e
TG
3084out_register_late_subsys:
3085 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3086fib6_rules_init:
433d49c3
DL
3087 fib6_rules_cleanup();
3088xfrm6_init:
433d49c3 3089 xfrm6_fini();
2a0c451a
TG
3090out_fib6_init:
3091 fib6_gc_cleanup();
8ed67789
DL
3092out_register_subsys:
3093 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3094out_register_inetpeer:
3095 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3096out_dst_entries:
3097 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3098out_kmem_cache:
f2fc6a54 3099 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3100 goto out;
1da177e4
LT
3101}
3102
3103void ip6_route_cleanup(void)
3104{
8ed67789 3105 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3106 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3107 fib6_rules_cleanup();
1da177e4 3108 xfrm6_fini();
1da177e4 3109 fib6_gc_cleanup();
c3426b47 3110 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3111 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3112 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3113 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3114}