netlink: Rename pid to portid to avoid confusion
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
bdb3289f 225static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = -1,
230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
280a34c8 245static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
bdb3289f 260static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = -1,
265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
f5b0a874 284 0, DST_OBSOLETE_NONE, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 291 }
cf911662 292 return rt;
1da177e4
LT
293}
294
295static void ip6_dst_destroy(struct dst_entry *dst)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299
97cac082
DM
300 if (rt->n)
301 neigh_release(rt->n);
302
8e2ec639
YZ
303 if (!(rt->dst.flags & DST_HOST))
304 dst_destroy_metrics_generic(dst);
305
38308473 306 if (idev) {
1da177e4
LT
307 rt->rt6i_idev = NULL;
308 in6_dev_put(idev);
1ab1457c 309 }
1716a961
G
310
311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312 dst_release(dst->from);
313
97bab73f
DM
314 if (rt6_has_peer(rt)) {
315 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
316 inet_putpeer(peer);
317 }
318}
319
6431cbc2
DM
320static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322static u32 rt6_peer_genid(void)
323{
324 return atomic_read(&__rt6_peer_genid);
325}
326
b3419363
DM
327void rt6_bind_peer(struct rt6_info *rt, int create)
328{
97bab73f 329 struct inet_peer_base *base;
b3419363
DM
330 struct inet_peer *peer;
331
97bab73f
DM
332 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 if (!base)
334 return;
335
336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
1da177e4
LT
343}
344
345static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347{
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 350 struct net_device *loopback_dev =
c346dca1 351 dev_net(dev)->loopback_dev;
1da177e4 352
97cac082
DM
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
1da177e4
LT
366 }
367 }
368}
369
a50feda5 370static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 371{
1716a961
G
372 struct rt6_info *ort = NULL;
373
374 if (rt->rt6i_flags & RTF_EXPIRES) {
375 if (time_after(jiffies, rt->dst.expires))
a50feda5 376 return true;
1716a961
G
377 } else if (rt->dst.from) {
378 ort = (struct rt6_info *) rt->dst.from;
379 return (ort->rt6i_flags & RTF_EXPIRES) &&
380 time_after(jiffies, ort->dst.expires);
381 }
a50feda5 382 return false;
1da177e4
LT
383}
384
a50feda5 385static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 386{
a02cec21
ED
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
389}
390
1da177e4 391/*
c71099ac 392 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
393 */
394
8ed67789
DL
395static inline struct rt6_info *rt6_device_match(struct net *net,
396 struct rt6_info *rt,
b71d1d42 397 const struct in6_addr *saddr,
1da177e4 398 int oif,
d420895e 399 int flags)
1da177e4
LT
400{
401 struct rt6_info *local = NULL;
402 struct rt6_info *sprt;
403
dd3abc4e
YH
404 if (!oif && ipv6_addr_any(saddr))
405 goto out;
406
d8d1f30b 407 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 408 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
409
410 if (oif) {
1da177e4
LT
411 if (dev->ifindex == oif)
412 return sprt;
413 if (dev->flags & IFF_LOOPBACK) {
38308473 414 if (!sprt->rt6i_idev ||
1da177e4 415 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 416 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 417 continue;
1ab1457c 418 if (local && (!oif ||
1da177e4
LT
419 local->rt6i_idev->dev->ifindex == oif))
420 continue;
421 }
422 local = sprt;
423 }
dd3abc4e
YH
424 } else {
425 if (ipv6_chk_addr(net, saddr, dev,
426 flags & RT6_LOOKUP_F_IFACE))
427 return sprt;
1da177e4 428 }
dd3abc4e 429 }
1da177e4 430
dd3abc4e 431 if (oif) {
1da177e4
LT
432 if (local)
433 return local;
434
d420895e 435 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 436 return net->ipv6.ip6_null_entry;
1da177e4 437 }
dd3abc4e 438out:
1da177e4
LT
439 return rt;
440}
441
27097255
YH
442#ifdef CONFIG_IPV6_ROUTER_PREF
443static void rt6_probe(struct rt6_info *rt)
444{
f2c31e32 445 struct neighbour *neigh;
27097255
YH
446 /*
447 * Okay, this does not seem to be appropriate
448 * for now, however, we need to check if it
449 * is really so; aka Router Reachability Probing.
450 *
451 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute.
453 */
f2c31e32 454 rcu_read_lock();
97cac082 455 neigh = rt ? rt->n : NULL;
27097255 456 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 457 goto out;
27097255
YH
458 read_lock_bh(&neigh->lock);
459 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 460 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
461 struct in6_addr mcaddr;
462 struct in6_addr *target;
463
464 neigh->updated = jiffies;
465 read_unlock_bh(&neigh->lock);
466
467 target = (struct in6_addr *)&neigh->primary_key;
468 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 469 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 470 } else {
27097255 471 read_unlock_bh(&neigh->lock);
f2c31e32
ED
472 }
473out:
474 rcu_read_unlock();
27097255
YH
475}
476#else
477static inline void rt6_probe(struct rt6_info *rt)
478{
27097255
YH
479}
480#endif
481
1da177e4 482/*
554cfb7e 483 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 484 */
b6f99a21 485static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 486{
d1918542 487 struct net_device *dev = rt->dst.dev;
161980f4 488 if (!oif || dev->ifindex == oif)
554cfb7e 489 return 2;
161980f4
DM
490 if ((dev->flags & IFF_LOOPBACK) &&
491 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
492 return 1;
493 return 0;
554cfb7e 494}
1da177e4 495
b6f99a21 496static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 497{
f2c31e32 498 struct neighbour *neigh;
398bcbeb 499 int m;
f2c31e32
ED
500
501 rcu_read_lock();
97cac082 502 neigh = rt->n;
4d0c5911
YH
503 if (rt->rt6i_flags & RTF_NONEXTHOP ||
504 !(rt->rt6i_flags & RTF_GATEWAY))
505 m = 1;
506 else if (neigh) {
554cfb7e
YH
507 read_lock_bh(&neigh->lock);
508 if (neigh->nud_state & NUD_VALID)
4d0c5911 509 m = 2;
398bcbeb
YH
510#ifdef CONFIG_IPV6_ROUTER_PREF
511 else if (neigh->nud_state & NUD_FAILED)
512 m = 0;
513#endif
514 else
ea73ee23 515 m = 1;
554cfb7e 516 read_unlock_bh(&neigh->lock);
398bcbeb
YH
517 } else
518 m = 0;
f2c31e32 519 rcu_read_unlock();
554cfb7e 520 return m;
1da177e4
LT
521}
522
554cfb7e
YH
523static int rt6_score_route(struct rt6_info *rt, int oif,
524 int strict)
1da177e4 525{
4d0c5911 526 int m, n;
1ab1457c 527
4d0c5911 528 m = rt6_check_dev(rt, oif);
77d16f45 529 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 530 return -1;
ebacaaa0
YH
531#ifdef CONFIG_IPV6_ROUTER_PREF
532 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
533#endif
4d0c5911 534 n = rt6_check_neigh(rt);
557e92ef 535 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
536 return -1;
537 return m;
538}
539
f11e6659
DM
540static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
541 int *mpri, struct rt6_info *match)
554cfb7e 542{
f11e6659
DM
543 int m;
544
545 if (rt6_check_expired(rt))
546 goto out;
547
548 m = rt6_score_route(rt, oif, strict);
549 if (m < 0)
550 goto out;
551
552 if (m > *mpri) {
553 if (strict & RT6_LOOKUP_F_REACHABLE)
554 rt6_probe(match);
555 *mpri = m;
556 match = rt;
557 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
558 rt6_probe(rt);
559 }
560
561out:
562 return match;
563}
564
565static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
566 struct rt6_info *rr_head,
567 u32 metric, int oif, int strict)
568{
569 struct rt6_info *rt, *match;
554cfb7e 570 int mpri = -1;
1da177e4 571
f11e6659
DM
572 match = NULL;
573 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 574 rt = rt->dst.rt6_next)
f11e6659
DM
575 match = find_match(rt, oif, strict, &mpri, match);
576 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 577 rt = rt->dst.rt6_next)
f11e6659 578 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 579
f11e6659
DM
580 return match;
581}
1da177e4 582
f11e6659
DM
583static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
584{
585 struct rt6_info *match, *rt0;
8ed67789 586 struct net *net;
1da177e4 587
f11e6659
DM
588 rt0 = fn->rr_ptr;
589 if (!rt0)
590 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 591
f11e6659 592 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 593
554cfb7e 594 if (!match &&
f11e6659 595 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 596 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 597
554cfb7e 598 /* no entries matched; do round-robin */
f11e6659
DM
599 if (!next || next->rt6i_metric != rt0->rt6i_metric)
600 next = fn->leaf;
601
602 if (next != rt0)
603 fn->rr_ptr = next;
1da177e4 604 }
1da177e4 605
d1918542 606 net = dev_net(rt0->dst.dev);
a02cec21 607 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
608}
609
70ceb4f5
YH
610#ifdef CONFIG_IPV6_ROUTE_INFO
611int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 612 const struct in6_addr *gwaddr)
70ceb4f5 613{
c346dca1 614 struct net *net = dev_net(dev);
70ceb4f5
YH
615 struct route_info *rinfo = (struct route_info *) opt;
616 struct in6_addr prefix_buf, *prefix;
617 unsigned int pref;
4bed72e4 618 unsigned long lifetime;
70ceb4f5
YH
619 struct rt6_info *rt;
620
621 if (len < sizeof(struct route_info)) {
622 return -EINVAL;
623 }
624
625 /* Sanity check for prefix_len and length */
626 if (rinfo->length > 3) {
627 return -EINVAL;
628 } else if (rinfo->prefix_len > 128) {
629 return -EINVAL;
630 } else if (rinfo->prefix_len > 64) {
631 if (rinfo->length < 2) {
632 return -EINVAL;
633 }
634 } else if (rinfo->prefix_len > 0) {
635 if (rinfo->length < 1) {
636 return -EINVAL;
637 }
638 }
639
640 pref = rinfo->route_pref;
641 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 642 return -EINVAL;
70ceb4f5 643
4bed72e4 644 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
645
646 if (rinfo->length == 3)
647 prefix = (struct in6_addr *)rinfo->prefix;
648 else {
649 /* this function is safe */
650 ipv6_addr_prefix(&prefix_buf,
651 (struct in6_addr *)rinfo->prefix,
652 rinfo->prefix_len);
653 prefix = &prefix_buf;
654 }
655
efa2cea0
DL
656 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
657 dev->ifindex);
70ceb4f5
YH
658
659 if (rt && !lifetime) {
e0a1ad73 660 ip6_del_rt(rt);
70ceb4f5
YH
661 rt = NULL;
662 }
663
664 if (!rt && lifetime)
efa2cea0 665 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
666 pref);
667 else if (rt)
668 rt->rt6i_flags = RTF_ROUTEINFO |
669 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
670
671 if (rt) {
1716a961
G
672 if (!addrconf_finite_timeout(lifetime))
673 rt6_clean_expires(rt);
674 else
675 rt6_set_expires(rt, jiffies + HZ * lifetime);
676
d8d1f30b 677 dst_release(&rt->dst);
70ceb4f5
YH
678 }
679 return 0;
680}
681#endif
682
8ed67789 683#define BACKTRACK(__net, saddr) \
982f56f3 684do { \
8ed67789 685 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 686 struct fib6_node *pn; \
e0eda7bb 687 while (1) { \
982f56f3
YH
688 if (fn->fn_flags & RTN_TL_ROOT) \
689 goto out; \
690 pn = fn->parent; \
691 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 692 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
693 else \
694 fn = pn; \
695 if (fn->fn_flags & RTN_RTINFO) \
696 goto restart; \
c71099ac 697 } \
c71099ac 698 } \
38308473 699} while (0)
c71099ac 700
8ed67789
DL
701static struct rt6_info *ip6_pol_route_lookup(struct net *net,
702 struct fib6_table *table,
4c9483b2 703 struct flowi6 *fl6, int flags)
1da177e4
LT
704{
705 struct fib6_node *fn;
706 struct rt6_info *rt;
707
c71099ac 708 read_lock_bh(&table->tb6_lock);
4c9483b2 709 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
710restart:
711 rt = fn->leaf;
4c9483b2
DM
712 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
713 BACKTRACK(net, &fl6->saddr);
c71099ac 714out:
d8d1f30b 715 dst_use(&rt->dst, jiffies);
c71099ac 716 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
717 return rt;
718
719}
720
ea6e574e
FW
721struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
722 int flags)
723{
724 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
725}
726EXPORT_SYMBOL_GPL(ip6_route_lookup);
727
9acd9f3a
YH
728struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
729 const struct in6_addr *saddr, int oif, int strict)
c71099ac 730{
4c9483b2
DM
731 struct flowi6 fl6 = {
732 .flowi6_oif = oif,
733 .daddr = *daddr,
c71099ac
TG
734 };
735 struct dst_entry *dst;
77d16f45 736 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 737
adaa70bb 738 if (saddr) {
4c9483b2 739 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
740 flags |= RT6_LOOKUP_F_HAS_SADDR;
741 }
742
4c9483b2 743 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
744 if (dst->error == 0)
745 return (struct rt6_info *) dst;
746
747 dst_release(dst);
748
1da177e4
LT
749 return NULL;
750}
751
7159039a
YH
752EXPORT_SYMBOL(rt6_lookup);
753
c71099ac 754/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
755 It takes new route entry, the addition fails by any reason the
756 route is freed. In any case, if caller does not hold it, it may
757 be destroyed.
758 */
759
86872cb5 760static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
761{
762 int err;
c71099ac 763 struct fib6_table *table;
1da177e4 764
c71099ac
TG
765 table = rt->rt6i_table;
766 write_lock_bh(&table->tb6_lock);
86872cb5 767 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 768 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
769
770 return err;
771}
772
40e22e8f
TG
773int ip6_ins_rt(struct rt6_info *rt)
774{
4d1169c1 775 struct nl_info info = {
d1918542 776 .nl_net = dev_net(rt->dst.dev),
4d1169c1 777 };
528c4ceb 778 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
779}
780
1716a961 781static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 782 const struct in6_addr *daddr,
b71d1d42 783 const struct in6_addr *saddr)
1da177e4 784{
1da177e4
LT
785 struct rt6_info *rt;
786
787 /*
788 * Clone the route.
789 */
790
21efcfa0 791 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
792
793 if (rt) {
14deae41
DM
794 int attempts = !in_softirq();
795
38308473 796 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 797 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 798 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 799 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 800 rt->rt6i_gateway = *daddr;
58c4fb86 801 }
1da177e4 802
1da177e4 803 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
804
805#ifdef CONFIG_IPV6_SUBTREES
806 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 807 rt->rt6i_src.addr = *saddr;
1da177e4
LT
808 rt->rt6i_src.plen = 128;
809 }
810#endif
811
14deae41 812 retry:
8ade06c6 813 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 814 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
815 int saved_rt_min_interval =
816 net->ipv6.sysctl.ip6_rt_gc_min_interval;
817 int saved_rt_elasticity =
818 net->ipv6.sysctl.ip6_rt_gc_elasticity;
819
820 if (attempts-- > 0) {
821 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
822 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
823
86393e52 824 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
825
826 net->ipv6.sysctl.ip6_rt_gc_elasticity =
827 saved_rt_elasticity;
828 net->ipv6.sysctl.ip6_rt_gc_min_interval =
829 saved_rt_min_interval;
830 goto retry;
831 }
832
f3213831 833 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 834 dst_free(&rt->dst);
14deae41
DM
835 return NULL;
836 }
95a9a5ba 837 }
1da177e4 838
95a9a5ba
YH
839 return rt;
840}
1da177e4 841
21efcfa0
ED
842static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
843 const struct in6_addr *daddr)
299d9939 844{
21efcfa0
ED
845 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
846
299d9939 847 if (rt) {
299d9939 848 rt->rt6i_flags |= RTF_CACHE;
97cac082 849 rt->n = neigh_clone(ort->n);
299d9939
YH
850 }
851 return rt;
852}
853
8ed67789 854static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 855 struct flowi6 *fl6, int flags)
1da177e4
LT
856{
857 struct fib6_node *fn;
519fbd87 858 struct rt6_info *rt, *nrt;
c71099ac 859 int strict = 0;
1da177e4 860 int attempts = 3;
519fbd87 861 int err;
53b7997f 862 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 863
77d16f45 864 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
865
866relookup:
c71099ac 867 read_lock_bh(&table->tb6_lock);
1da177e4 868
8238dd06 869restart_2:
4c9483b2 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
871
872restart:
4acad72d 873 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 874
4c9483b2 875 BACKTRACK(net, &fl6->saddr);
8ed67789 876 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 877 rt->rt6i_flags & RTF_CACHE)
1ddef044 878 goto out;
1da177e4 879
d8d1f30b 880 dst_hold(&rt->dst);
c71099ac 881 read_unlock_bh(&table->tb6_lock);
fb9de91e 882
97cac082 883 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 884 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 885 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 886 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
887 else
888 goto out2;
e40cf353 889
d8d1f30b 890 dst_release(&rt->dst);
8ed67789 891 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 892
d8d1f30b 893 dst_hold(&rt->dst);
519fbd87 894 if (nrt) {
40e22e8f 895 err = ip6_ins_rt(nrt);
519fbd87 896 if (!err)
1da177e4 897 goto out2;
1da177e4 898 }
1da177e4 899
519fbd87
YH
900 if (--attempts <= 0)
901 goto out2;
902
903 /*
c71099ac 904 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
905 * released someone could insert this route. Relookup.
906 */
d8d1f30b 907 dst_release(&rt->dst);
519fbd87
YH
908 goto relookup;
909
910out:
8238dd06
YH
911 if (reachable) {
912 reachable = 0;
913 goto restart_2;
914 }
d8d1f30b 915 dst_hold(&rt->dst);
c71099ac 916 read_unlock_bh(&table->tb6_lock);
1da177e4 917out2:
d8d1f30b
CG
918 rt->dst.lastuse = jiffies;
919 rt->dst.__use++;
c71099ac
TG
920
921 return rt;
1da177e4
LT
922}
923
8ed67789 924static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 925 struct flowi6 *fl6, int flags)
4acad72d 926{
4c9483b2 927 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
928}
929
72331bc0
SL
930static struct dst_entry *ip6_route_input_lookup(struct net *net,
931 struct net_device *dev,
932 struct flowi6 *fl6, int flags)
933{
934 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
935 flags |= RT6_LOOKUP_F_IFACE;
936
937 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
938}
939
c71099ac
TG
940void ip6_route_input(struct sk_buff *skb)
941{
b71d1d42 942 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 943 struct net *net = dev_net(skb->dev);
adaa70bb 944 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
945 struct flowi6 fl6 = {
946 .flowi6_iif = skb->dev->ifindex,
947 .daddr = iph->daddr,
948 .saddr = iph->saddr,
38308473 949 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
950 .flowi6_mark = skb->mark,
951 .flowi6_proto = iph->nexthdr,
c71099ac 952 };
adaa70bb 953
72331bc0 954 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
955}
956
8ed67789 957static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 958 struct flowi6 *fl6, int flags)
1da177e4 959{
4c9483b2 960 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
961}
962
9c7a4f9c 963struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 964 struct flowi6 *fl6)
c71099ac
TG
965{
966 int flags = 0;
967
1fb9489b 968 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 969
4c9483b2 970 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 971 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 972
4c9483b2 973 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 974 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
975 else if (sk)
976 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 977
4c9483b2 978 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
979}
980
7159039a 981EXPORT_SYMBOL(ip6_route_output);
1da177e4 982
2774c131 983struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 984{
5c1e6aa3 985 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
986 struct dst_entry *new = NULL;
987
f5b0a874 988 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 989 if (rt) {
d8d1f30b 990 new = &rt->dst;
14e50e57 991
8104891b
SK
992 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
993 rt6_init_peer(rt, net->ipv6.peers);
994
14e50e57 995 new->__use = 1;
352e512c
HX
996 new->input = dst_discard;
997 new->output = dst_discard;
14e50e57 998
21efcfa0
ED
999 if (dst_metrics_read_only(&ort->dst))
1000 new->_metrics = ort->dst._metrics;
1001 else
1002 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1003 rt->rt6i_idev = ort->rt6i_idev;
1004 if (rt->rt6i_idev)
1005 in6_dev_hold(rt->rt6i_idev);
14e50e57 1006
4e3fd7a0 1007 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1008 rt->rt6i_flags = ort->rt6i_flags;
1009 rt6_clean_expires(rt);
14e50e57
DM
1010 rt->rt6i_metric = 0;
1011
1012 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1013#ifdef CONFIG_IPV6_SUBTREES
1014 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1015#endif
1016
1017 dst_free(new);
1018 }
1019
69ead7af
DM
1020 dst_release(dst_orig);
1021 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1022}
14e50e57 1023
1da177e4
LT
1024/*
1025 * Destination cache support functions
1026 */
1027
1028static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1029{
1030 struct rt6_info *rt;
1031
1032 rt = (struct rt6_info *) dst;
1033
6431cbc2
DM
1034 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1035 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1036 if (!rt6_has_peer(rt))
6431cbc2
DM
1037 rt6_bind_peer(rt, 0);
1038 rt->rt6i_peer_genid = rt6_peer_genid();
1039 }
1da177e4 1040 return dst;
6431cbc2 1041 }
1da177e4
LT
1042 return NULL;
1043}
1044
1045static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1046{
1047 struct rt6_info *rt = (struct rt6_info *) dst;
1048
1049 if (rt) {
54c1a859
YH
1050 if (rt->rt6i_flags & RTF_CACHE) {
1051 if (rt6_check_expired(rt)) {
1052 ip6_del_rt(rt);
1053 dst = NULL;
1054 }
1055 } else {
1da177e4 1056 dst_release(dst);
54c1a859
YH
1057 dst = NULL;
1058 }
1da177e4 1059 }
54c1a859 1060 return dst;
1da177e4
LT
1061}
1062
1063static void ip6_link_failure(struct sk_buff *skb)
1064{
1065 struct rt6_info *rt;
1066
3ffe533c 1067 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1068
adf30907 1069 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1070 if (rt) {
1716a961
G
1071 if (rt->rt6i_flags & RTF_CACHE)
1072 rt6_update_expires(rt, 0);
1073 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1074 rt->rt6i_node->fn_sernum = -1;
1075 }
1076}
1077
6700c270
DM
1078static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1080{
1081 struct rt6_info *rt6 = (struct rt6_info*)dst;
1082
81aded24 1083 dst_confirm(dst);
1da177e4 1084 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1085 struct net *net = dev_net(dst->dev);
1086
1da177e4
LT
1087 rt6->rt6i_flags |= RTF_MODIFIED;
1088 if (mtu < IPV6_MIN_MTU) {
defb3519 1089 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1090 mtu = IPV6_MIN_MTU;
defb3519
DM
1091 features |= RTAX_FEATURE_ALLFRAG;
1092 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1093 }
defb3519 1094 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1095 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1096 }
1097}
1098
42ae66c8
DM
1099void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1100 int oif, u32 mark)
81aded24
DM
1101{
1102 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1103 struct dst_entry *dst;
1104 struct flowi6 fl6;
1105
1106 memset(&fl6, 0, sizeof(fl6));
1107 fl6.flowi6_oif = oif;
1108 fl6.flowi6_mark = mark;
3e12939a 1109 fl6.flowi6_flags = 0;
81aded24
DM
1110 fl6.daddr = iph->daddr;
1111 fl6.saddr = iph->saddr;
1112 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1113
1114 dst = ip6_route_output(net, NULL, &fl6);
1115 if (!dst->error)
6700c270 1116 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1117 dst_release(dst);
1118}
1119EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1120
1121void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1122{
1123 ip6_update_pmtu(skb, sock_net(sk), mtu,
1124 sk->sk_bound_dev_if, sk->sk_mark);
1125}
1126EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1127
3a5ad2ee
DM
1128void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1129{
1130 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1131 struct dst_entry *dst;
1132 struct flowi6 fl6;
1133
1134 memset(&fl6, 0, sizeof(fl6));
1135 fl6.flowi6_oif = oif;
1136 fl6.flowi6_mark = mark;
1137 fl6.flowi6_flags = 0;
1138 fl6.daddr = iph->daddr;
1139 fl6.saddr = iph->saddr;
1140 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1141
1142 dst = ip6_route_output(net, NULL, &fl6);
1143 if (!dst->error)
6700c270 1144 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1145 dst_release(dst);
1146}
1147EXPORT_SYMBOL_GPL(ip6_redirect);
1148
1149void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1150{
1151 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1152}
1153EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1154
0dbaee3b 1155static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1156{
0dbaee3b
DM
1157 struct net_device *dev = dst->dev;
1158 unsigned int mtu = dst_mtu(dst);
1159 struct net *net = dev_net(dev);
1160
1da177e4
LT
1161 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1162
5578689a
DL
1163 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1164 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1165
1166 /*
1ab1457c
YH
1167 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1168 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1169 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1170 * rely only on pmtu discovery"
1171 */
1172 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1173 mtu = IPV6_MAXPLEN;
1174 return mtu;
1175}
1176
ebb762f2 1177static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1178{
d33e4553 1179 struct inet6_dev *idev;
618f9bc7
SK
1180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1181
1182 if (mtu)
1183 return mtu;
1184
1185 mtu = IPV6_MIN_MTU;
d33e4553
DM
1186
1187 rcu_read_lock();
1188 idev = __in6_dev_get(dst->dev);
1189 if (idev)
1190 mtu = idev->cnf.mtu6;
1191 rcu_read_unlock();
1192
1193 return mtu;
1194}
1195
3b00944c
YH
1196static struct dst_entry *icmp6_dst_gc_list;
1197static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1198
3b00944c 1199struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1200 struct neighbour *neigh,
87a11578 1201 struct flowi6 *fl6)
1da177e4 1202{
87a11578 1203 struct dst_entry *dst;
1da177e4
LT
1204 struct rt6_info *rt;
1205 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1206 struct net *net = dev_net(dev);
1da177e4 1207
38308473 1208 if (unlikely(!idev))
122bdf67 1209 return ERR_PTR(-ENODEV);
1da177e4 1210
8b96d22d 1211 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1212 if (unlikely(!rt)) {
1da177e4 1213 in6_dev_put(idev);
87a11578 1214 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1215 goto out;
1216 }
1217
1da177e4
LT
1218 if (neigh)
1219 neigh_hold(neigh);
14deae41 1220 else {
f894cbf8 1221 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1222 if (IS_ERR(neigh)) {
252c3d84 1223 in6_dev_put(idev);
b43faac6
DM
1224 dst_free(&rt->dst);
1225 return ERR_CAST(neigh);
1226 }
14deae41 1227 }
1da177e4 1228
8e2ec639
YZ
1229 rt->dst.flags |= DST_HOST;
1230 rt->dst.output = ip6_output;
97cac082 1231 rt->n = neigh;
d8d1f30b 1232 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1233 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1234 rt->rt6i_dst.plen = 128;
1235 rt->rt6i_idev = idev;
7011687f 1236 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1237
3b00944c 1238 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1239 rt->dst.next = icmp6_dst_gc_list;
1240 icmp6_dst_gc_list = &rt->dst;
3b00944c 1241 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1242
5578689a 1243 fib6_force_start_gc(net);
1da177e4 1244
87a11578
DM
1245 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1246
1da177e4 1247out:
87a11578 1248 return dst;
1da177e4
LT
1249}
1250
3d0f24a7 1251int icmp6_dst_gc(void)
1da177e4 1252{
e9476e95 1253 struct dst_entry *dst, **pprev;
3d0f24a7 1254 int more = 0;
1da177e4 1255
3b00944c
YH
1256 spin_lock_bh(&icmp6_dst_lock);
1257 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1258
1da177e4
LT
1259 while ((dst = *pprev) != NULL) {
1260 if (!atomic_read(&dst->__refcnt)) {
1261 *pprev = dst->next;
1262 dst_free(dst);
1da177e4
LT
1263 } else {
1264 pprev = &dst->next;
3d0f24a7 1265 ++more;
1da177e4
LT
1266 }
1267 }
1268
3b00944c 1269 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1270
3d0f24a7 1271 return more;
1da177e4
LT
1272}
1273
1e493d19
DM
1274static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1275 void *arg)
1276{
1277 struct dst_entry *dst, **pprev;
1278
1279 spin_lock_bh(&icmp6_dst_lock);
1280 pprev = &icmp6_dst_gc_list;
1281 while ((dst = *pprev) != NULL) {
1282 struct rt6_info *rt = (struct rt6_info *) dst;
1283 if (func(rt, arg)) {
1284 *pprev = dst->next;
1285 dst_free(dst);
1286 } else {
1287 pprev = &dst->next;
1288 }
1289 }
1290 spin_unlock_bh(&icmp6_dst_lock);
1291}
1292
569d3645 1293static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1294{
1da177e4 1295 unsigned long now = jiffies;
86393e52 1296 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1297 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1298 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1299 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1300 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1301 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1302 int entries;
7019b78e 1303
fc66f95c 1304 entries = dst_entries_get_fast(ops);
7019b78e 1305 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1306 entries <= rt_max_size)
1da177e4
LT
1307 goto out;
1308
6891a346
BT
1309 net->ipv6.ip6_rt_gc_expire++;
1310 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1311 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1312 entries = dst_entries_get_slow(ops);
1313 if (entries < ops->gc_thresh)
7019b78e 1314 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1315out:
7019b78e 1316 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1317 return entries > rt_max_size;
1da177e4
LT
1318}
1319
1320/* Clean host part of a prefix. Not necessary in radix tree,
1321 but results in cleaner routing tables.
1322
1323 Remove it only when all the things will work!
1324 */
1325
6b75d090 1326int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1327{
5170ae82 1328 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1329 if (hoplimit == 0) {
6b75d090 1330 struct net_device *dev = dst->dev;
c68f24cc
ED
1331 struct inet6_dev *idev;
1332
1333 rcu_read_lock();
1334 idev = __in6_dev_get(dev);
1335 if (idev)
6b75d090 1336 hoplimit = idev->cnf.hop_limit;
c68f24cc 1337 else
53b7997f 1338 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1339 rcu_read_unlock();
1da177e4
LT
1340 }
1341 return hoplimit;
1342}
abbf46ae 1343EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1344
1345/*
1346 *
1347 */
1348
86872cb5 1349int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1350{
1351 int err;
5578689a 1352 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1353 struct rt6_info *rt = NULL;
1354 struct net_device *dev = NULL;
1355 struct inet6_dev *idev = NULL;
c71099ac 1356 struct fib6_table *table;
1da177e4
LT
1357 int addr_type;
1358
86872cb5 1359 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1360 return -EINVAL;
1361#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1362 if (cfg->fc_src_len)
1da177e4
LT
1363 return -EINVAL;
1364#endif
86872cb5 1365 if (cfg->fc_ifindex) {
1da177e4 1366 err = -ENODEV;
5578689a 1367 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1368 if (!dev)
1369 goto out;
1370 idev = in6_dev_get(dev);
1371 if (!idev)
1372 goto out;
1373 }
1374
86872cb5
TG
1375 if (cfg->fc_metric == 0)
1376 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1377
d71314b4 1378 err = -ENOBUFS;
38308473
DM
1379 if (cfg->fc_nlinfo.nlh &&
1380 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1381 table = fib6_get_table(net, cfg->fc_table);
38308473 1382 if (!table) {
f3213831 1383 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1384 table = fib6_new_table(net, cfg->fc_table);
1385 }
1386 } else {
1387 table = fib6_new_table(net, cfg->fc_table);
1388 }
38308473
DM
1389
1390 if (!table)
c71099ac 1391 goto out;
c71099ac 1392
8b96d22d 1393 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1394
38308473 1395 if (!rt) {
1da177e4
LT
1396 err = -ENOMEM;
1397 goto out;
1398 }
1399
d8d1f30b 1400 rt->dst.obsolete = -1;
1716a961
G
1401
1402 if (cfg->fc_flags & RTF_EXPIRES)
1403 rt6_set_expires(rt, jiffies +
1404 clock_t_to_jiffies(cfg->fc_expires));
1405 else
1406 rt6_clean_expires(rt);
1da177e4 1407
86872cb5
TG
1408 if (cfg->fc_protocol == RTPROT_UNSPEC)
1409 cfg->fc_protocol = RTPROT_BOOT;
1410 rt->rt6i_protocol = cfg->fc_protocol;
1411
1412 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1413
1414 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1415 rt->dst.input = ip6_mc_input;
ab79ad14
1416 else if (cfg->fc_flags & RTF_LOCAL)
1417 rt->dst.input = ip6_input;
1da177e4 1418 else
d8d1f30b 1419 rt->dst.input = ip6_forward;
1da177e4 1420
d8d1f30b 1421 rt->dst.output = ip6_output;
1da177e4 1422
86872cb5
TG
1423 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1424 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1425 if (rt->rt6i_dst.plen == 128)
11d53b49 1426 rt->dst.flags |= DST_HOST;
1da177e4 1427
8e2ec639
YZ
1428 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1429 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1430 if (!metrics) {
1431 err = -ENOMEM;
1432 goto out;
1433 }
1434 dst_init_metrics(&rt->dst, metrics, 0);
1435 }
1da177e4 1436#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1437 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1438 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1439#endif
1440
86872cb5 1441 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1442
1443 /* We cannot add true routes via loopback here,
1444 they would result in kernel looping; promote them to reject routes
1445 */
86872cb5 1446 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1447 (dev && (dev->flags & IFF_LOOPBACK) &&
1448 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1449 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1450 /* hold loopback dev/idev if we haven't done so. */
5578689a 1451 if (dev != net->loopback_dev) {
1da177e4
LT
1452 if (dev) {
1453 dev_put(dev);
1454 in6_dev_put(idev);
1455 }
5578689a 1456 dev = net->loopback_dev;
1da177e4
LT
1457 dev_hold(dev);
1458 idev = in6_dev_get(dev);
1459 if (!idev) {
1460 err = -ENODEV;
1461 goto out;
1462 }
1463 }
d8d1f30b
CG
1464 rt->dst.output = ip6_pkt_discard_out;
1465 rt->dst.input = ip6_pkt_discard;
1da177e4 1466 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1467 switch (cfg->fc_type) {
1468 case RTN_BLACKHOLE:
1469 rt->dst.error = -EINVAL;
1470 break;
1471 case RTN_PROHIBIT:
1472 rt->dst.error = -EACCES;
1473 break;
b4949ab2
ND
1474 case RTN_THROW:
1475 rt->dst.error = -EAGAIN;
1476 break;
ef2c7d7b
ND
1477 default:
1478 rt->dst.error = -ENETUNREACH;
1479 break;
1480 }
1da177e4
LT
1481 goto install_route;
1482 }
1483
86872cb5 1484 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1485 const struct in6_addr *gw_addr;
1da177e4
LT
1486 int gwa_type;
1487
86872cb5 1488 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1489 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1490 gwa_type = ipv6_addr_type(gw_addr);
1491
1492 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1493 struct rt6_info *grt;
1494
1495 /* IPv6 strictly inhibits using not link-local
1496 addresses as nexthop address.
1497 Otherwise, router will not able to send redirects.
1498 It is very good, but in some (rare!) circumstances
1499 (SIT, PtP, NBMA NOARP links) it is handy to allow
1500 some exceptions. --ANK
1501 */
1502 err = -EINVAL;
38308473 1503 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1504 goto out;
1505
5578689a 1506 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1507
1508 err = -EHOSTUNREACH;
38308473 1509 if (!grt)
1da177e4
LT
1510 goto out;
1511 if (dev) {
d1918542 1512 if (dev != grt->dst.dev) {
d8d1f30b 1513 dst_release(&grt->dst);
1da177e4
LT
1514 goto out;
1515 }
1516 } else {
d1918542 1517 dev = grt->dst.dev;
1da177e4
LT
1518 idev = grt->rt6i_idev;
1519 dev_hold(dev);
1520 in6_dev_hold(grt->rt6i_idev);
1521 }
38308473 1522 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1523 err = 0;
d8d1f30b 1524 dst_release(&grt->dst);
1da177e4
LT
1525
1526 if (err)
1527 goto out;
1528 }
1529 err = -EINVAL;
38308473 1530 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1531 goto out;
1532 }
1533
1534 err = -ENODEV;
38308473 1535 if (!dev)
1da177e4
LT
1536 goto out;
1537
c3968a85
DW
1538 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1539 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1540 err = -EINVAL;
1541 goto out;
1542 }
4e3fd7a0 1543 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1544 rt->rt6i_prefsrc.plen = 128;
1545 } else
1546 rt->rt6i_prefsrc.plen = 0;
1547
86872cb5 1548 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1549 err = rt6_bind_neighbour(rt, dev);
f83c7790 1550 if (err)
1da177e4 1551 goto out;
1da177e4
LT
1552 }
1553
86872cb5 1554 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1555
1556install_route:
86872cb5
TG
1557 if (cfg->fc_mx) {
1558 struct nlattr *nla;
1559 int remaining;
1560
1561 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1562 int type = nla_type(nla);
86872cb5
TG
1563
1564 if (type) {
1565 if (type > RTAX_MAX) {
1da177e4
LT
1566 err = -EINVAL;
1567 goto out;
1568 }
86872cb5 1569
defb3519 1570 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1571 }
1da177e4
LT
1572 }
1573 }
1574
d8d1f30b 1575 rt->dst.dev = dev;
1da177e4 1576 rt->rt6i_idev = idev;
c71099ac 1577 rt->rt6i_table = table;
63152fc0 1578
c346dca1 1579 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1580
86872cb5 1581 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1582
1583out:
1584 if (dev)
1585 dev_put(dev);
1586 if (idev)
1587 in6_dev_put(idev);
1588 if (rt)
d8d1f30b 1589 dst_free(&rt->dst);
1da177e4
LT
1590 return err;
1591}
1592
86872cb5 1593static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1594{
1595 int err;
c71099ac 1596 struct fib6_table *table;
d1918542 1597 struct net *net = dev_net(rt->dst.dev);
1da177e4 1598
8ed67789 1599 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1600 return -ENOENT;
1601
c71099ac
TG
1602 table = rt->rt6i_table;
1603 write_lock_bh(&table->tb6_lock);
1da177e4 1604
86872cb5 1605 err = fib6_del(rt, info);
d8d1f30b 1606 dst_release(&rt->dst);
1da177e4 1607
c71099ac 1608 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1609
1610 return err;
1611}
1612
e0a1ad73
TG
1613int ip6_del_rt(struct rt6_info *rt)
1614{
4d1169c1 1615 struct nl_info info = {
d1918542 1616 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1617 };
528c4ceb 1618 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1619}
1620
86872cb5 1621static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1622{
c71099ac 1623 struct fib6_table *table;
1da177e4
LT
1624 struct fib6_node *fn;
1625 struct rt6_info *rt;
1626 int err = -ESRCH;
1627
5578689a 1628 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1629 if (!table)
c71099ac
TG
1630 return err;
1631
1632 read_lock_bh(&table->tb6_lock);
1da177e4 1633
c71099ac 1634 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1635 &cfg->fc_dst, cfg->fc_dst_len,
1636 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1637
1da177e4 1638 if (fn) {
d8d1f30b 1639 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1640 if (cfg->fc_ifindex &&
d1918542
DM
1641 (!rt->dst.dev ||
1642 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1643 continue;
86872cb5
TG
1644 if (cfg->fc_flags & RTF_GATEWAY &&
1645 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1646 continue;
86872cb5 1647 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1648 continue;
d8d1f30b 1649 dst_hold(&rt->dst);
c71099ac 1650 read_unlock_bh(&table->tb6_lock);
1da177e4 1651
86872cb5 1652 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1653 }
1654 }
c71099ac 1655 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1656
1657 return err;
1658}
1659
6700c270 1660static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1661{
e8599ff4 1662 struct net *net = dev_net(skb->dev);
a6279458 1663 struct netevent_redirect netevent;
e8599ff4
DM
1664 struct rt6_info *rt, *nrt = NULL;
1665 const struct in6_addr *target;
e8599ff4 1666 struct ndisc_options ndopts;
6e157b6a
DM
1667 const struct in6_addr *dest;
1668 struct neighbour *old_neigh;
e8599ff4
DM
1669 struct inet6_dev *in6_dev;
1670 struct neighbour *neigh;
1671 struct icmp6hdr *icmph;
6e157b6a
DM
1672 int optlen, on_link;
1673 u8 *lladdr;
e8599ff4
DM
1674
1675 optlen = skb->tail - skb->transport_header;
1676 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1677
1678 if (optlen < 0) {
6e157b6a 1679 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1680 return;
1681 }
1682
1683 icmph = icmp6_hdr(skb);
1684 target = (const struct in6_addr *) (icmph + 1);
1685 dest = target + 1;
1686
1687 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1688 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1689 return;
1690 }
1691
6e157b6a 1692 on_link = 0;
e8599ff4
DM
1693 if (ipv6_addr_equal(dest, target)) {
1694 on_link = 1;
1695 } else if (ipv6_addr_type(target) !=
1696 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1697 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1698 return;
1699 }
1700
1701 in6_dev = __in6_dev_get(skb->dev);
1702 if (!in6_dev)
1703 return;
1704 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1705 return;
1706
1707 /* RFC2461 8.1:
1708 * The IP source address of the Redirect MUST be the same as the current
1709 * first-hop router for the specified ICMP Destination Address.
1710 */
1711
1712 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1713 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1714 return;
1715 }
6e157b6a
DM
1716
1717 lladdr = NULL;
e8599ff4
DM
1718 if (ndopts.nd_opts_tgt_lladdr) {
1719 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1720 skb->dev);
1721 if (!lladdr) {
1722 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1723 return;
1724 }
1725 }
1726
6e157b6a
DM
1727 rt = (struct rt6_info *) dst;
1728 if (rt == net->ipv6.ip6_null_entry) {
1729 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1730 return;
6e157b6a 1731 }
e8599ff4 1732
6e157b6a
DM
1733 /* Redirect received -> path was valid.
1734 * Look, redirects are sent only in response to data packets,
1735 * so that this nexthop apparently is reachable. --ANK
1736 */
1737 dst_confirm(&rt->dst);
a6279458 1738
6e157b6a
DM
1739 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1740 if (!neigh)
1741 return;
a6279458 1742
6e157b6a
DM
1743 /* Duplicate redirect: silently ignore. */
1744 old_neigh = rt->n;
1745 if (neigh == old_neigh)
a6279458 1746 goto out;
1da177e4 1747
1da177e4
LT
1748 /*
1749 * We have finally decided to accept it.
1750 */
1751
1ab1457c 1752 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1753 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1754 NEIGH_UPDATE_F_OVERRIDE|
1755 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1756 NEIGH_UPDATE_F_ISROUTER))
1757 );
1758
21efcfa0 1759 nrt = ip6_rt_copy(rt, dest);
38308473 1760 if (!nrt)
1da177e4
LT
1761 goto out;
1762
1763 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1764 if (on_link)
1765 nrt->rt6i_flags &= ~RTF_GATEWAY;
1766
4e3fd7a0 1767 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1768 nrt->n = neigh_clone(neigh);
1da177e4 1769
40e22e8f 1770 if (ip6_ins_rt(nrt))
1da177e4
LT
1771 goto out;
1772
d8d1f30b 1773 netevent.old = &rt->dst;
1d248b1c 1774 netevent.old_neigh = old_neigh;
d8d1f30b 1775 netevent.new = &nrt->dst;
1d248b1c
DM
1776 netevent.new_neigh = neigh;
1777 netevent.daddr = dest;
8d71740c
TT
1778 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1779
38308473 1780 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1781 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1782 ip6_del_rt(rt);
1da177e4
LT
1783 }
1784
1785out:
e8599ff4 1786 neigh_release(neigh);
6e157b6a
DM
1787}
1788
1da177e4
LT
1789/*
1790 * Misc support functions
1791 */
1792
1716a961 1793static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1794 const struct in6_addr *dest)
1da177e4 1795{
d1918542 1796 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1797 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1798 ort->rt6i_table);
1da177e4
LT
1799
1800 if (rt) {
d8d1f30b
CG
1801 rt->dst.input = ort->dst.input;
1802 rt->dst.output = ort->dst.output;
8e2ec639 1803 rt->dst.flags |= DST_HOST;
d8d1f30b 1804
4e3fd7a0 1805 rt->rt6i_dst.addr = *dest;
8e2ec639 1806 rt->rt6i_dst.plen = 128;
defb3519 1807 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1808 rt->dst.error = ort->dst.error;
1da177e4
LT
1809 rt->rt6i_idev = ort->rt6i_idev;
1810 if (rt->rt6i_idev)
1811 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1812 rt->dst.lastuse = jiffies;
1da177e4 1813
4e3fd7a0 1814 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1815 rt->rt6i_flags = ort->rt6i_flags;
1816 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1817 (RTF_DEFAULT | RTF_ADDRCONF))
1818 rt6_set_from(rt, ort);
1819 else
1820 rt6_clean_expires(rt);
1da177e4
LT
1821 rt->rt6i_metric = 0;
1822
1da177e4
LT
1823#ifdef CONFIG_IPV6_SUBTREES
1824 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1825#endif
0f6c6392 1826 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1827 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1828 }
1829 return rt;
1830}
1831
70ceb4f5 1832#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1833static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1834 const struct in6_addr *prefix, int prefixlen,
1835 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1836{
1837 struct fib6_node *fn;
1838 struct rt6_info *rt = NULL;
c71099ac
TG
1839 struct fib6_table *table;
1840
efa2cea0 1841 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1842 if (!table)
c71099ac 1843 return NULL;
70ceb4f5 1844
c71099ac
TG
1845 write_lock_bh(&table->tb6_lock);
1846 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1847 if (!fn)
1848 goto out;
1849
d8d1f30b 1850 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1851 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1852 continue;
1853 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1854 continue;
1855 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1856 continue;
d8d1f30b 1857 dst_hold(&rt->dst);
70ceb4f5
YH
1858 break;
1859 }
1860out:
c71099ac 1861 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1862 return rt;
1863}
1864
efa2cea0 1865static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1866 const struct in6_addr *prefix, int prefixlen,
1867 const struct in6_addr *gwaddr, int ifindex,
95c96174 1868 unsigned int pref)
70ceb4f5 1869{
86872cb5
TG
1870 struct fib6_config cfg = {
1871 .fc_table = RT6_TABLE_INFO,
238fc7ea 1872 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1873 .fc_ifindex = ifindex,
1874 .fc_dst_len = prefixlen,
1875 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1876 RTF_UP | RTF_PREF(pref),
15e47304 1877 .fc_nlinfo.portid = 0,
efa2cea0
DL
1878 .fc_nlinfo.nlh = NULL,
1879 .fc_nlinfo.nl_net = net,
86872cb5
TG
1880 };
1881
4e3fd7a0
AD
1882 cfg.fc_dst = *prefix;
1883 cfg.fc_gateway = *gwaddr;
70ceb4f5 1884
e317da96
YH
1885 /* We should treat it as a default route if prefix length is 0. */
1886 if (!prefixlen)
86872cb5 1887 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1888
86872cb5 1889 ip6_route_add(&cfg);
70ceb4f5 1890
efa2cea0 1891 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1892}
1893#endif
1894
b71d1d42 1895struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1896{
1da177e4 1897 struct rt6_info *rt;
c71099ac 1898 struct fib6_table *table;
1da177e4 1899
c346dca1 1900 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1901 if (!table)
c71099ac 1902 return NULL;
1da177e4 1903
c71099ac 1904 write_lock_bh(&table->tb6_lock);
d8d1f30b 1905 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1906 if (dev == rt->dst.dev &&
045927ff 1907 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1908 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1909 break;
1910 }
1911 if (rt)
d8d1f30b 1912 dst_hold(&rt->dst);
c71099ac 1913 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1914 return rt;
1915}
1916
b71d1d42 1917struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1918 struct net_device *dev,
1919 unsigned int pref)
1da177e4 1920{
86872cb5
TG
1921 struct fib6_config cfg = {
1922 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1923 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1924 .fc_ifindex = dev->ifindex,
1925 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1926 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1927 .fc_nlinfo.portid = 0,
5578689a 1928 .fc_nlinfo.nlh = NULL,
c346dca1 1929 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1930 };
1da177e4 1931
4e3fd7a0 1932 cfg.fc_gateway = *gwaddr;
1da177e4 1933
86872cb5 1934 ip6_route_add(&cfg);
1da177e4 1935
1da177e4
LT
1936 return rt6_get_dflt_router(gwaddr, dev);
1937}
1938
7b4da532 1939void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1940{
1941 struct rt6_info *rt;
c71099ac
TG
1942 struct fib6_table *table;
1943
1944 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1945 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1946 if (!table)
c71099ac 1947 return;
1da177e4
LT
1948
1949restart:
c71099ac 1950 read_lock_bh(&table->tb6_lock);
d8d1f30b 1951 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1952 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1953 dst_hold(&rt->dst);
c71099ac 1954 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1955 ip6_del_rt(rt);
1da177e4
LT
1956 goto restart;
1957 }
1958 }
c71099ac 1959 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1960}
1961
5578689a
DL
1962static void rtmsg_to_fib6_config(struct net *net,
1963 struct in6_rtmsg *rtmsg,
86872cb5
TG
1964 struct fib6_config *cfg)
1965{
1966 memset(cfg, 0, sizeof(*cfg));
1967
1968 cfg->fc_table = RT6_TABLE_MAIN;
1969 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1970 cfg->fc_metric = rtmsg->rtmsg_metric;
1971 cfg->fc_expires = rtmsg->rtmsg_info;
1972 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1973 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1974 cfg->fc_flags = rtmsg->rtmsg_flags;
1975
5578689a 1976 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1977
4e3fd7a0
AD
1978 cfg->fc_dst = rtmsg->rtmsg_dst;
1979 cfg->fc_src = rtmsg->rtmsg_src;
1980 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1981}
1982
5578689a 1983int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1984{
86872cb5 1985 struct fib6_config cfg;
1da177e4
LT
1986 struct in6_rtmsg rtmsg;
1987 int err;
1988
1989 switch(cmd) {
1990 case SIOCADDRT: /* Add a route */
1991 case SIOCDELRT: /* Delete a route */
1992 if (!capable(CAP_NET_ADMIN))
1993 return -EPERM;
1994 err = copy_from_user(&rtmsg, arg,
1995 sizeof(struct in6_rtmsg));
1996 if (err)
1997 return -EFAULT;
86872cb5 1998
5578689a 1999 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2000
1da177e4
LT
2001 rtnl_lock();
2002 switch (cmd) {
2003 case SIOCADDRT:
86872cb5 2004 err = ip6_route_add(&cfg);
1da177e4
LT
2005 break;
2006 case SIOCDELRT:
86872cb5 2007 err = ip6_route_del(&cfg);
1da177e4
LT
2008 break;
2009 default:
2010 err = -EINVAL;
2011 }
2012 rtnl_unlock();
2013
2014 return err;
3ff50b79 2015 }
1da177e4
LT
2016
2017 return -EINVAL;
2018}
2019
2020/*
2021 * Drop the packet on the floor
2022 */
2023
d5fdd6ba 2024static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2025{
612f09e8 2026 int type;
adf30907 2027 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2028 switch (ipstats_mib_noroutes) {
2029 case IPSTATS_MIB_INNOROUTES:
0660e03f 2030 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2031 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2032 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2033 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2034 break;
2035 }
2036 /* FALLTHROUGH */
2037 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2038 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2039 ipstats_mib_noroutes);
612f09e8
YH
2040 break;
2041 }
3ffe533c 2042 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2043 kfree_skb(skb);
2044 return 0;
2045}
2046
9ce8ade0
TG
2047static int ip6_pkt_discard(struct sk_buff *skb)
2048{
612f09e8 2049 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2050}
2051
20380731 2052static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2053{
adf30907 2054 skb->dev = skb_dst(skb)->dev;
612f09e8 2055 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2056}
2057
6723ab54
DM
2058#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2059
9ce8ade0
TG
2060static int ip6_pkt_prohibit(struct sk_buff *skb)
2061{
612f09e8 2062 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2063}
2064
2065static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2066{
adf30907 2067 skb->dev = skb_dst(skb)->dev;
612f09e8 2068 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2069}
2070
6723ab54
DM
2071#endif
2072
1da177e4
LT
2073/*
2074 * Allocate a dst for local (unicast / anycast) address.
2075 */
2076
2077struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2078 const struct in6_addr *addr,
8f031519 2079 bool anycast)
1da177e4 2080{
c346dca1 2081 struct net *net = dev_net(idev->dev);
8b96d22d 2082 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2083 int err;
1da177e4 2084
38308473 2085 if (!rt) {
f3213831 2086 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2087 return ERR_PTR(-ENOMEM);
40385653 2088 }
1da177e4 2089
1da177e4
LT
2090 in6_dev_hold(idev);
2091
11d53b49 2092 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2093 rt->dst.input = ip6_input;
2094 rt->dst.output = ip6_output;
1da177e4 2095 rt->rt6i_idev = idev;
d8d1f30b 2096 rt->dst.obsolete = -1;
1da177e4
LT
2097
2098 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2099 if (anycast)
2100 rt->rt6i_flags |= RTF_ANYCAST;
2101 else
1da177e4 2102 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2103 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2104 if (err) {
d8d1f30b 2105 dst_free(&rt->dst);
f83c7790 2106 return ERR_PTR(err);
1da177e4
LT
2107 }
2108
4e3fd7a0 2109 rt->rt6i_dst.addr = *addr;
1da177e4 2110 rt->rt6i_dst.plen = 128;
5578689a 2111 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2112
d8d1f30b 2113 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2114
2115 return rt;
2116}
2117
c3968a85
DW
2118int ip6_route_get_saddr(struct net *net,
2119 struct rt6_info *rt,
b71d1d42 2120 const struct in6_addr *daddr,
c3968a85
DW
2121 unsigned int prefs,
2122 struct in6_addr *saddr)
2123{
2124 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2125 int err = 0;
2126 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2127 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2128 else
2129 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2130 daddr, prefs, saddr);
2131 return err;
2132}
2133
2134/* remove deleted ip from prefsrc entries */
2135struct arg_dev_net_ip {
2136 struct net_device *dev;
2137 struct net *net;
2138 struct in6_addr *addr;
2139};
2140
2141static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2142{
2143 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2144 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2145 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2146
d1918542 2147 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2148 rt != net->ipv6.ip6_null_entry &&
2149 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2150 /* remove prefsrc entry */
2151 rt->rt6i_prefsrc.plen = 0;
2152 }
2153 return 0;
2154}
2155
2156void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2157{
2158 struct net *net = dev_net(ifp->idev->dev);
2159 struct arg_dev_net_ip adni = {
2160 .dev = ifp->idev->dev,
2161 .net = net,
2162 .addr = &ifp->addr,
2163 };
2164 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2165}
2166
8ed67789
DL
2167struct arg_dev_net {
2168 struct net_device *dev;
2169 struct net *net;
2170};
2171
1da177e4
LT
2172static int fib6_ifdown(struct rt6_info *rt, void *arg)
2173{
bc3ef660 2174 const struct arg_dev_net *adn = arg;
2175 const struct net_device *dev = adn->dev;
8ed67789 2176
d1918542 2177 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2178 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2179 return -1;
c159d30c 2180
1da177e4
LT
2181 return 0;
2182}
2183
f3db4851 2184void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2185{
8ed67789
DL
2186 struct arg_dev_net adn = {
2187 .dev = dev,
2188 .net = net,
2189 };
2190
2191 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2192 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2193}
2194
95c96174 2195struct rt6_mtu_change_arg {
1da177e4 2196 struct net_device *dev;
95c96174 2197 unsigned int mtu;
1da177e4
LT
2198};
2199
2200static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2201{
2202 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2203 struct inet6_dev *idev;
2204
2205 /* In IPv6 pmtu discovery is not optional,
2206 so that RTAX_MTU lock cannot disable it.
2207 We still use this lock to block changes
2208 caused by addrconf/ndisc.
2209 */
2210
2211 idev = __in6_dev_get(arg->dev);
38308473 2212 if (!idev)
1da177e4
LT
2213 return 0;
2214
2215 /* For administrative MTU increase, there is no way to discover
2216 IPv6 PMTU increase, so PMTU increase should be updated here.
2217 Since RFC 1981 doesn't include administrative MTU increase
2218 update PMTU increase is a MUST. (i.e. jumbo frame)
2219 */
2220 /*
2221 If new MTU is less than route PMTU, this new MTU will be the
2222 lowest MTU in the path, update the route PMTU to reflect PMTU
2223 decreases; if new MTU is greater than route PMTU, and the
2224 old MTU is the lowest MTU in the path, update the route PMTU
2225 to reflect the increase. In this case if the other nodes' MTU
2226 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2227 PMTU discouvery.
2228 */
d1918542 2229 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2230 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2231 (dst_mtu(&rt->dst) >= arg->mtu ||
2232 (dst_mtu(&rt->dst) < arg->mtu &&
2233 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2234 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2235 }
1da177e4
LT
2236 return 0;
2237}
2238
95c96174 2239void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2240{
c71099ac
TG
2241 struct rt6_mtu_change_arg arg = {
2242 .dev = dev,
2243 .mtu = mtu,
2244 };
1da177e4 2245
c346dca1 2246 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2247}
2248
ef7c79ed 2249static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2250 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2251 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2252 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2253 [RTA_PRIORITY] = { .type = NLA_U32 },
2254 [RTA_METRICS] = { .type = NLA_NESTED },
2255};
2256
2257static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2258 struct fib6_config *cfg)
1da177e4 2259{
86872cb5
TG
2260 struct rtmsg *rtm;
2261 struct nlattr *tb[RTA_MAX+1];
2262 int err;
1da177e4 2263
86872cb5
TG
2264 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2265 if (err < 0)
2266 goto errout;
1da177e4 2267
86872cb5
TG
2268 err = -EINVAL;
2269 rtm = nlmsg_data(nlh);
2270 memset(cfg, 0, sizeof(*cfg));
2271
2272 cfg->fc_table = rtm->rtm_table;
2273 cfg->fc_dst_len = rtm->rtm_dst_len;
2274 cfg->fc_src_len = rtm->rtm_src_len;
2275 cfg->fc_flags = RTF_UP;
2276 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2277 cfg->fc_type = rtm->rtm_type;
86872cb5 2278
ef2c7d7b
ND
2279 if (rtm->rtm_type == RTN_UNREACHABLE ||
2280 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2281 rtm->rtm_type == RTN_PROHIBIT ||
2282 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2283 cfg->fc_flags |= RTF_REJECT;
2284
ab79ad14
2285 if (rtm->rtm_type == RTN_LOCAL)
2286 cfg->fc_flags |= RTF_LOCAL;
2287
15e47304 2288 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2289 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2290 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2291
2292 if (tb[RTA_GATEWAY]) {
2293 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2294 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2295 }
86872cb5
TG
2296
2297 if (tb[RTA_DST]) {
2298 int plen = (rtm->rtm_dst_len + 7) >> 3;
2299
2300 if (nla_len(tb[RTA_DST]) < plen)
2301 goto errout;
2302
2303 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2304 }
86872cb5
TG
2305
2306 if (tb[RTA_SRC]) {
2307 int plen = (rtm->rtm_src_len + 7) >> 3;
2308
2309 if (nla_len(tb[RTA_SRC]) < plen)
2310 goto errout;
2311
2312 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2313 }
86872cb5 2314
c3968a85
DW
2315 if (tb[RTA_PREFSRC])
2316 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2317
86872cb5
TG
2318 if (tb[RTA_OIF])
2319 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2320
2321 if (tb[RTA_PRIORITY])
2322 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2323
2324 if (tb[RTA_METRICS]) {
2325 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2326 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2327 }
86872cb5
TG
2328
2329 if (tb[RTA_TABLE])
2330 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2331
2332 err = 0;
2333errout:
2334 return err;
1da177e4
LT
2335}
2336
c127ea2c 2337static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2338{
86872cb5
TG
2339 struct fib6_config cfg;
2340 int err;
1da177e4 2341
86872cb5
TG
2342 err = rtm_to_fib6_config(skb, nlh, &cfg);
2343 if (err < 0)
2344 return err;
2345
2346 return ip6_route_del(&cfg);
1da177e4
LT
2347}
2348
c127ea2c 2349static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2350{
86872cb5
TG
2351 struct fib6_config cfg;
2352 int err;
1da177e4 2353
86872cb5
TG
2354 err = rtm_to_fib6_config(skb, nlh, &cfg);
2355 if (err < 0)
2356 return err;
2357
2358 return ip6_route_add(&cfg);
1da177e4
LT
2359}
2360
339bf98f
TG
2361static inline size_t rt6_nlmsg_size(void)
2362{
2363 return NLMSG_ALIGN(sizeof(struct rtmsg))
2364 + nla_total_size(16) /* RTA_SRC */
2365 + nla_total_size(16) /* RTA_DST */
2366 + nla_total_size(16) /* RTA_GATEWAY */
2367 + nla_total_size(16) /* RTA_PREFSRC */
2368 + nla_total_size(4) /* RTA_TABLE */
2369 + nla_total_size(4) /* RTA_IIF */
2370 + nla_total_size(4) /* RTA_OIF */
2371 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2372 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2373 + nla_total_size(sizeof(struct rta_cacheinfo));
2374}
2375
191cd582
BH
2376static int rt6_fill_node(struct net *net,
2377 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2378 struct in6_addr *dst, struct in6_addr *src,
15e47304 2379 int iif, int type, u32 portid, u32 seq,
7bc570c8 2380 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2381{
2382 struct rtmsg *rtm;
2d7202bf 2383 struct nlmsghdr *nlh;
e3703b3d 2384 long expires;
9e762a4a 2385 u32 table;
f2c31e32 2386 struct neighbour *n;
1da177e4
LT
2387
2388 if (prefix) { /* user wants prefix routes only */
2389 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390 /* success since this is not a prefix route */
2391 return 1;
2392 }
2393 }
2394
15e47304 2395 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2396 if (!nlh)
26932566 2397 return -EMSGSIZE;
2d7202bf
TG
2398
2399 rtm = nlmsg_data(nlh);
1da177e4
LT
2400 rtm->rtm_family = AF_INET6;
2401 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402 rtm->rtm_src_len = rt->rt6i_src.plen;
2403 rtm->rtm_tos = 0;
c71099ac 2404 if (rt->rt6i_table)
9e762a4a 2405 table = rt->rt6i_table->tb6_id;
c71099ac 2406 else
9e762a4a
PM
2407 table = RT6_TABLE_UNSPEC;
2408 rtm->rtm_table = table;
c78679e8
DM
2409 if (nla_put_u32(skb, RTA_TABLE, table))
2410 goto nla_put_failure;
ef2c7d7b
ND
2411 if (rt->rt6i_flags & RTF_REJECT) {
2412 switch (rt->dst.error) {
2413 case -EINVAL:
2414 rtm->rtm_type = RTN_BLACKHOLE;
2415 break;
2416 case -EACCES:
2417 rtm->rtm_type = RTN_PROHIBIT;
2418 break;
b4949ab2
ND
2419 case -EAGAIN:
2420 rtm->rtm_type = RTN_THROW;
2421 break;
ef2c7d7b
ND
2422 default:
2423 rtm->rtm_type = RTN_UNREACHABLE;
2424 break;
2425 }
2426 }
38308473 2427 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2428 rtm->rtm_type = RTN_LOCAL;
d1918542 2429 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2430 rtm->rtm_type = RTN_LOCAL;
2431 else
2432 rtm->rtm_type = RTN_UNICAST;
2433 rtm->rtm_flags = 0;
2434 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2435 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2436 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2437 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2438 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2439 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2440 rtm->rtm_protocol = RTPROT_RA;
2441 else
2442 rtm->rtm_protocol = RTPROT_KERNEL;
2443 }
1da177e4 2444
38308473 2445 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2446 rtm->rtm_flags |= RTM_F_CLONED;
2447
2448 if (dst) {
c78679e8
DM
2449 if (nla_put(skb, RTA_DST, 16, dst))
2450 goto nla_put_failure;
1ab1457c 2451 rtm->rtm_dst_len = 128;
1da177e4 2452 } else if (rtm->rtm_dst_len)
c78679e8
DM
2453 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2454 goto nla_put_failure;
1da177e4
LT
2455#ifdef CONFIG_IPV6_SUBTREES
2456 if (src) {
c78679e8
DM
2457 if (nla_put(skb, RTA_SRC, 16, src))
2458 goto nla_put_failure;
1ab1457c 2459 rtm->rtm_src_len = 128;
c78679e8
DM
2460 } else if (rtm->rtm_src_len &&
2461 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2462 goto nla_put_failure;
1da177e4 2463#endif
7bc570c8
YH
2464 if (iif) {
2465#ifdef CONFIG_IPV6_MROUTE
2466 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2467 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2468 if (err <= 0) {
2469 if (!nowait) {
2470 if (err == 0)
2471 return 0;
2472 goto nla_put_failure;
2473 } else {
2474 if (err == -EMSGSIZE)
2475 goto nla_put_failure;
2476 }
2477 }
2478 } else
2479#endif
c78679e8
DM
2480 if (nla_put_u32(skb, RTA_IIF, iif))
2481 goto nla_put_failure;
7bc570c8 2482 } else if (dst) {
1da177e4 2483 struct in6_addr saddr_buf;
c78679e8
DM
2484 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2485 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2486 goto nla_put_failure;
1da177e4 2487 }
2d7202bf 2488
c3968a85
DW
2489 if (rt->rt6i_prefsrc.plen) {
2490 struct in6_addr saddr_buf;
4e3fd7a0 2491 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2492 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2493 goto nla_put_failure;
c3968a85
DW
2494 }
2495
defb3519 2496 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2497 goto nla_put_failure;
2498
f2c31e32 2499 rcu_read_lock();
97cac082 2500 n = rt->n;
94f826b8
ED
2501 if (n) {
2502 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2503 rcu_read_unlock();
2504 goto nla_put_failure;
2505 }
2506 }
f2c31e32 2507 rcu_read_unlock();
2d7202bf 2508
c78679e8
DM
2509 if (rt->dst.dev &&
2510 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2511 goto nla_put_failure;
2512 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2513 goto nla_put_failure;
8253947e
LW
2514
2515 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2516
87a50699 2517 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2518 goto nla_put_failure;
2d7202bf
TG
2519
2520 return nlmsg_end(skb, nlh);
2521
2522nla_put_failure:
26932566
PM
2523 nlmsg_cancel(skb, nlh);
2524 return -EMSGSIZE;
1da177e4
LT
2525}
2526
1b43af54 2527int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2528{
2529 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2530 int prefix;
2531
2d7202bf
TG
2532 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2533 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2534 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2535 } else
2536 prefix = 0;
2537
191cd582
BH
2538 return rt6_fill_node(arg->net,
2539 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2540 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2541 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2542}
2543
c127ea2c 2544static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2545{
3b1e0a65 2546 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2547 struct nlattr *tb[RTA_MAX+1];
2548 struct rt6_info *rt;
1da177e4 2549 struct sk_buff *skb;
ab364a6f 2550 struct rtmsg *rtm;
4c9483b2 2551 struct flowi6 fl6;
72331bc0 2552 int err, iif = 0, oif = 0;
1da177e4 2553
ab364a6f
TG
2554 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2555 if (err < 0)
2556 goto errout;
1da177e4 2557
ab364a6f 2558 err = -EINVAL;
4c9483b2 2559 memset(&fl6, 0, sizeof(fl6));
1da177e4 2560
ab364a6f
TG
2561 if (tb[RTA_SRC]) {
2562 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2563 goto errout;
2564
4e3fd7a0 2565 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2566 }
2567
2568 if (tb[RTA_DST]) {
2569 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2570 goto errout;
2571
4e3fd7a0 2572 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2573 }
2574
2575 if (tb[RTA_IIF])
2576 iif = nla_get_u32(tb[RTA_IIF]);
2577
2578 if (tb[RTA_OIF])
72331bc0 2579 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2580
2581 if (iif) {
2582 struct net_device *dev;
72331bc0
SL
2583 int flags = 0;
2584
5578689a 2585 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2586 if (!dev) {
2587 err = -ENODEV;
ab364a6f 2588 goto errout;
1da177e4 2589 }
72331bc0
SL
2590
2591 fl6.flowi6_iif = iif;
2592
2593 if (!ipv6_addr_any(&fl6.saddr))
2594 flags |= RT6_LOOKUP_F_HAS_SADDR;
2595
2596 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2597 flags);
2598 } else {
2599 fl6.flowi6_oif = oif;
2600
2601 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2602 }
2603
ab364a6f 2604 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2605 if (!skb) {
2173bff5 2606 dst_release(&rt->dst);
ab364a6f
TG
2607 err = -ENOBUFS;
2608 goto errout;
2609 }
1da177e4 2610
ab364a6f
TG
2611 /* Reserve room for dummy headers, this skb can pass
2612 through good chunk of routing engine.
2613 */
459a98ed 2614 skb_reset_mac_header(skb);
ab364a6f 2615 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2616
d8d1f30b 2617 skb_dst_set(skb, &rt->dst);
1da177e4 2618
4c9483b2 2619 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2620 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2621 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2622 if (err < 0) {
ab364a6f
TG
2623 kfree_skb(skb);
2624 goto errout;
1da177e4
LT
2625 }
2626
15e47304 2627 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2628errout:
1da177e4 2629 return err;
1da177e4
LT
2630}
2631
86872cb5 2632void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2633{
2634 struct sk_buff *skb;
5578689a 2635 struct net *net = info->nl_net;
528c4ceb
DL
2636 u32 seq;
2637 int err;
2638
2639 err = -ENOBUFS;
38308473 2640 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2641
339bf98f 2642 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2643 if (!skb)
21713ebc
TG
2644 goto errout;
2645
191cd582 2646 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2647 event, info->portid, seq, 0, 0, 0);
26932566
PM
2648 if (err < 0) {
2649 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2650 WARN_ON(err == -EMSGSIZE);
2651 kfree_skb(skb);
2652 goto errout;
2653 }
15e47304 2654 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2655 info->nlh, gfp_any());
2656 return;
21713ebc
TG
2657errout:
2658 if (err < 0)
5578689a 2659 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2660}
2661
8ed67789
DL
2662static int ip6_route_dev_notify(struct notifier_block *this,
2663 unsigned long event, void *data)
2664{
2665 struct net_device *dev = (struct net_device *)data;
c346dca1 2666 struct net *net = dev_net(dev);
8ed67789
DL
2667
2668 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2669 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2670 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2671#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2672 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2673 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2674 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2675 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2676#endif
2677 }
2678
2679 return NOTIFY_OK;
2680}
2681
1da177e4
LT
2682/*
2683 * /proc
2684 */
2685
2686#ifdef CONFIG_PROC_FS
2687
1da177e4
LT
2688struct rt6_proc_arg
2689{
2690 char *buffer;
2691 int offset;
2692 int length;
2693 int skip;
2694 int len;
2695};
2696
2697static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2698{
33120b30 2699 struct seq_file *m = p_arg;
69cce1d1 2700 struct neighbour *n;
1da177e4 2701
4b7a4274 2702 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2703
2704#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2706#else
33120b30 2707 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2708#endif
f2c31e32 2709 rcu_read_lock();
97cac082 2710 n = rt->n;
69cce1d1
DM
2711 if (n) {
2712 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2713 } else {
33120b30 2714 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2715 }
f2c31e32 2716 rcu_read_unlock();
33120b30 2717 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2718 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2719 rt->dst.__use, rt->rt6i_flags,
d1918542 2720 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2721 return 0;
2722}
2723
33120b30 2724static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2725{
f3db4851 2726 struct net *net = (struct net *)m->private;
32b293a5 2727 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2728 return 0;
2729}
1da177e4 2730
33120b30
AD
2731static int ipv6_route_open(struct inode *inode, struct file *file)
2732{
de05c557 2733 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2734}
2735
33120b30
AD
2736static const struct file_operations ipv6_route_proc_fops = {
2737 .owner = THIS_MODULE,
2738 .open = ipv6_route_open,
2739 .read = seq_read,
2740 .llseek = seq_lseek,
b6fcbdb4 2741 .release = single_release_net,
33120b30
AD
2742};
2743
1da177e4
LT
2744static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2745{
69ddb805 2746 struct net *net = (struct net *)seq->private;
1da177e4 2747 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2748 net->ipv6.rt6_stats->fib_nodes,
2749 net->ipv6.rt6_stats->fib_route_nodes,
2750 net->ipv6.rt6_stats->fib_rt_alloc,
2751 net->ipv6.rt6_stats->fib_rt_entries,
2752 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2753 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2754 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2755
2756 return 0;
2757}
2758
2759static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2760{
de05c557 2761 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2762}
2763
9a32144e 2764static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2765 .owner = THIS_MODULE,
2766 .open = rt6_stats_seq_open,
2767 .read = seq_read,
2768 .llseek = seq_lseek,
b6fcbdb4 2769 .release = single_release_net,
1da177e4
LT
2770};
2771#endif /* CONFIG_PROC_FS */
2772
2773#ifdef CONFIG_SYSCTL
2774
1da177e4 2775static
8d65af78 2776int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2777 void __user *buffer, size_t *lenp, loff_t *ppos)
2778{
c486da34
LAG
2779 struct net *net;
2780 int delay;
2781 if (!write)
1da177e4 2782 return -EINVAL;
c486da34
LAG
2783
2784 net = (struct net *)ctl->extra1;
2785 delay = net->ipv6.sysctl.flush_delay;
2786 proc_dointvec(ctl, write, buffer, lenp, ppos);
2787 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2788 return 0;
1da177e4
LT
2789}
2790
760f2d01 2791ctl_table ipv6_route_table_template[] = {
1ab1457c 2792 {
1da177e4 2793 .procname = "flush",
4990509f 2794 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2795 .maxlen = sizeof(int),
89c8b3a1 2796 .mode = 0200,
6d9f239a 2797 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2798 },
2799 {
1da177e4 2800 .procname = "gc_thresh",
9a7ec3a9 2801 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
6d9f239a 2804 .proc_handler = proc_dointvec,
1da177e4
LT
2805 },
2806 {
1da177e4 2807 .procname = "max_size",
4990509f 2808 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
6d9f239a 2811 .proc_handler = proc_dointvec,
1da177e4
LT
2812 },
2813 {
1da177e4 2814 .procname = "gc_min_interval",
4990509f 2815 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
6d9f239a 2818 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2819 },
2820 {
1da177e4 2821 .procname = "gc_timeout",
4990509f 2822 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2823 .maxlen = sizeof(int),
2824 .mode = 0644,
6d9f239a 2825 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2826 },
2827 {
1da177e4 2828 .procname = "gc_interval",
4990509f 2829 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2830 .maxlen = sizeof(int),
2831 .mode = 0644,
6d9f239a 2832 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2833 },
2834 {
1da177e4 2835 .procname = "gc_elasticity",
4990509f 2836 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2837 .maxlen = sizeof(int),
2838 .mode = 0644,
f3d3f616 2839 .proc_handler = proc_dointvec,
1da177e4
LT
2840 },
2841 {
1da177e4 2842 .procname = "mtu_expires",
4990509f 2843 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2844 .maxlen = sizeof(int),
2845 .mode = 0644,
6d9f239a 2846 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2847 },
2848 {
1da177e4 2849 .procname = "min_adv_mss",
4990509f 2850 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2851 .maxlen = sizeof(int),
2852 .mode = 0644,
f3d3f616 2853 .proc_handler = proc_dointvec,
1da177e4
LT
2854 },
2855 {
1da177e4 2856 .procname = "gc_min_interval_ms",
4990509f 2857 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2858 .maxlen = sizeof(int),
2859 .mode = 0644,
6d9f239a 2860 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2861 },
f8572d8f 2862 { }
1da177e4
LT
2863};
2864
2c8c1e72 2865struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2866{
2867 struct ctl_table *table;
2868
2869 table = kmemdup(ipv6_route_table_template,
2870 sizeof(ipv6_route_table_template),
2871 GFP_KERNEL);
5ee09105
YH
2872
2873 if (table) {
2874 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2875 table[0].extra1 = net;
86393e52 2876 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2877 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2878 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2879 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2880 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2881 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2882 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2883 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2884 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2885 }
2886
760f2d01
DL
2887 return table;
2888}
1da177e4
LT
2889#endif
2890
2c8c1e72 2891static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2892{
633d424b 2893 int ret = -ENOMEM;
8ed67789 2894
86393e52
AD
2895 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2896 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2897
fc66f95c
ED
2898 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2899 goto out_ip6_dst_ops;
2900
8ed67789
DL
2901 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2902 sizeof(*net->ipv6.ip6_null_entry),
2903 GFP_KERNEL);
2904 if (!net->ipv6.ip6_null_entry)
fc66f95c 2905 goto out_ip6_dst_entries;
d8d1f30b 2906 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2907 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2908 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2909 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2910 ip6_template_metrics, true);
8ed67789
DL
2911
2912#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2914 sizeof(*net->ipv6.ip6_prohibit_entry),
2915 GFP_KERNEL);
68fffc67
PZ
2916 if (!net->ipv6.ip6_prohibit_entry)
2917 goto out_ip6_null_entry;
d8d1f30b 2918 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2919 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2920 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2921 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2922 ip6_template_metrics, true);
8ed67789
DL
2923
2924 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2925 sizeof(*net->ipv6.ip6_blk_hole_entry),
2926 GFP_KERNEL);
68fffc67
PZ
2927 if (!net->ipv6.ip6_blk_hole_entry)
2928 goto out_ip6_prohibit_entry;
d8d1f30b 2929 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2930 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2931 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2932 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2933 ip6_template_metrics, true);
8ed67789
DL
2934#endif
2935
b339a47c
PZ
2936 net->ipv6.sysctl.flush_delay = 0;
2937 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2938 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2939 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2940 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2941 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2942 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2943 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2944
6891a346
BT
2945 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2946
8ed67789
DL
2947 ret = 0;
2948out:
2949 return ret;
f2fc6a54 2950
68fffc67
PZ
2951#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2952out_ip6_prohibit_entry:
2953 kfree(net->ipv6.ip6_prohibit_entry);
2954out_ip6_null_entry:
2955 kfree(net->ipv6.ip6_null_entry);
2956#endif
fc66f95c
ED
2957out_ip6_dst_entries:
2958 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2959out_ip6_dst_ops:
f2fc6a54 2960 goto out;
cdb18761
DL
2961}
2962
2c8c1e72 2963static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2964{
8ed67789
DL
2965 kfree(net->ipv6.ip6_null_entry);
2966#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967 kfree(net->ipv6.ip6_prohibit_entry);
2968 kfree(net->ipv6.ip6_blk_hole_entry);
2969#endif
41bb78b4 2970 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2971}
2972
d189634e
TG
2973static int __net_init ip6_route_net_init_late(struct net *net)
2974{
2975#ifdef CONFIG_PROC_FS
2976 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2977 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2978#endif
2979 return 0;
2980}
2981
2982static void __net_exit ip6_route_net_exit_late(struct net *net)
2983{
2984#ifdef CONFIG_PROC_FS
2985 proc_net_remove(net, "ipv6_route");
2986 proc_net_remove(net, "rt6_stats");
2987#endif
2988}
2989
cdb18761
DL
2990static struct pernet_operations ip6_route_net_ops = {
2991 .init = ip6_route_net_init,
2992 .exit = ip6_route_net_exit,
2993};
2994
c3426b47
DM
2995static int __net_init ipv6_inetpeer_init(struct net *net)
2996{
2997 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2998
2999 if (!bp)
3000 return -ENOMEM;
3001 inet_peer_base_init(bp);
3002 net->ipv6.peers = bp;
3003 return 0;
3004}
3005
3006static void __net_exit ipv6_inetpeer_exit(struct net *net)
3007{
3008 struct inet_peer_base *bp = net->ipv6.peers;
3009
3010 net->ipv6.peers = NULL;
56a6b248 3011 inetpeer_invalidate_tree(bp);
c3426b47
DM
3012 kfree(bp);
3013}
3014
2b823f72 3015static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3016 .init = ipv6_inetpeer_init,
3017 .exit = ipv6_inetpeer_exit,
3018};
3019
d189634e
TG
3020static struct pernet_operations ip6_route_net_late_ops = {
3021 .init = ip6_route_net_init_late,
3022 .exit = ip6_route_net_exit_late,
3023};
3024
8ed67789
DL
3025static struct notifier_block ip6_route_dev_notifier = {
3026 .notifier_call = ip6_route_dev_notify,
3027 .priority = 0,
3028};
3029
433d49c3 3030int __init ip6_route_init(void)
1da177e4 3031{
433d49c3
DL
3032 int ret;
3033
9a7ec3a9
DL
3034 ret = -ENOMEM;
3035 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3036 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3037 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3038 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3039 goto out;
14e50e57 3040
fc66f95c 3041 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3042 if (ret)
bdb3289f 3043 goto out_kmem_cache;
bdb3289f 3044
c3426b47
DM
3045 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3046 if (ret)
e8803b6c 3047 goto out_dst_entries;
2a0c451a 3048
7e52b33b
DM
3049 ret = register_pernet_subsys(&ip6_route_net_ops);
3050 if (ret)
3051 goto out_register_inetpeer;
c3426b47 3052
5dc121e9
AE
3053 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3054
8ed67789
DL
3055 /* Registering of the loopback is done before this portion of code,
3056 * the loopback reference in rt6_info will not be taken, do it
3057 * manually for init_net */
d8d1f30b 3058 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3059 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3060 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3061 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3062 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3063 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3064 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3065 #endif
e8803b6c 3066 ret = fib6_init();
433d49c3 3067 if (ret)
8ed67789 3068 goto out_register_subsys;
433d49c3 3069
433d49c3
DL
3070 ret = xfrm6_init();
3071 if (ret)
e8803b6c 3072 goto out_fib6_init;
c35b7e72 3073
433d49c3
DL
3074 ret = fib6_rules_init();
3075 if (ret)
3076 goto xfrm6_init;
7e5449c2 3077
d189634e
TG
3078 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3079 if (ret)
3080 goto fib6_rules_init;
3081
433d49c3 3082 ret = -ENOBUFS;
c7ac8679
GR
3083 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3084 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3085 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3086 goto out_register_late_subsys;
c127ea2c 3087
8ed67789 3088 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3089 if (ret)
d189634e 3090 goto out_register_late_subsys;
8ed67789 3091
433d49c3
DL
3092out:
3093 return ret;
3094
d189634e
TG
3095out_register_late_subsys:
3096 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3097fib6_rules_init:
433d49c3
DL
3098 fib6_rules_cleanup();
3099xfrm6_init:
433d49c3 3100 xfrm6_fini();
2a0c451a
TG
3101out_fib6_init:
3102 fib6_gc_cleanup();
8ed67789
DL
3103out_register_subsys:
3104 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3105out_register_inetpeer:
3106 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3107out_dst_entries:
3108 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3109out_kmem_cache:
f2fc6a54 3110 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3111 goto out;
1da177e4
LT
3112}
3113
3114void ip6_route_cleanup(void)
3115{
8ed67789 3116 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3117 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3118 fib6_rules_cleanup();
1da177e4 3119 xfrm6_fini();
1da177e4 3120 fib6_gc_cleanup();
c3426b47 3121 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3122 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3123 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3124 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3125}