Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
fbfe95a4 102 peer = rt6_get_peer_create(rt);
06582540
DM
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121}
122
39232973
DM
123static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124{
125 struct in6_addr *p = &rt->rt6i_gateway;
126
a7563f34 127 if (!ipv6_addr_any(p))
39232973
DM
128 return (const void *) p;
129 return daddr;
130}
131
d3aaeb38
DM
132static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133{
39232973
DM
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142}
143
8ade06c6 144static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 145{
8ade06c6
DM
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
f83c7790
DM
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
d3aaeb38
DM
155}
156
9a7ec3a9 157static struct dst_ops ip6_dst_ops_template = {
1da177e4 158 .family = AF_INET6,
09640e63 159 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
0dbaee3b 163 .default_advmss = ip6_default_advmss,
ebb762f2 164 .mtu = ip6_mtu,
06582540 165 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 171 .local_out = __ip6_local_out,
d3aaeb38 172 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
173};
174
ebb762f2 175static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 176{
618f9bc7
SK
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
ec831ea7
RD
180}
181
14e50e57
DM
182static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183{
184}
185
0972ddb2
HB
186static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188{
189 return NULL;
190}
191
14e50e57
DM
192static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
09640e63 194 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
ebb762f2 197 .mtu = ip6_blackhole_mtu,
214f45c9 198 .default_advmss = ip6_default_advmss,
14e50e57 199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 201 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
202};
203
62fa8a84
DM
204static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206};
207
bdb3289f 208static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
d8d1f30b
CG
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
1da177e4
LT
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 218 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
101367c2
TG
223#ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
6723ab54
DM
225static int ip6_pkt_prohibit(struct sk_buff *skb);
226static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 227
280a34c8 228static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
d8d1f30b
CG
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
101367c2
TG
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 238 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241};
242
bdb3289f 243static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
d8d1f30b
CG
249 .input = dst_discard,
250 .output = dst_discard,
101367c2
TG
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 253 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256};
257
258#endif
259
1da177e4 260/* allocate dst with ip6_dst_ops */
97bab73f 261static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 262 struct net_device *dev,
8b96d22d
DM
263 int flags,
264 struct fib6_table *table)
1da177e4 265{
97bab73f
DM
266 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267 0, 0, flags);
cf911662 268
97bab73f 269 if (rt) {
fbe58186 270 memset(&rt->rt6i_table, 0,
38308473 271 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 272 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 273 }
cf911662 274 return rt;
1da177e4
LT
275}
276
277static void ip6_dst_destroy(struct dst_entry *dst)
278{
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281
8e2ec639
YZ
282 if (!(rt->dst.flags & DST_HOST))
283 dst_destroy_metrics_generic(dst);
284
38308473 285 if (idev) {
1da177e4
LT
286 rt->rt6i_idev = NULL;
287 in6_dev_put(idev);
1ab1457c 288 }
1716a961
G
289
290 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291 dst_release(dst->from);
292
97bab73f
DM
293 if (rt6_has_peer(rt)) {
294 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
295 inet_putpeer(peer);
296 }
297}
298
6431cbc2
DM
299static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301static u32 rt6_peer_genid(void)
302{
303 return atomic_read(&__rt6_peer_genid);
304}
305
b3419363
DM
306void rt6_bind_peer(struct rt6_info *rt, int create)
307{
97bab73f 308 struct inet_peer_base *base;
b3419363
DM
309 struct inet_peer *peer;
310
97bab73f
DM
311 base = inetpeer_base_ptr(rt->_rt6i_peer);
312 if (!base)
313 return;
314
315 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
316 if (peer) {
317 if (!rt6_set_peer(rt, peer))
318 inet_putpeer(peer);
319 else
320 rt->rt6i_peer_genid = rt6_peer_genid();
321 }
1da177e4
LT
322}
323
324static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326{
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 329 struct net_device *loopback_dev =
c346dca1 330 dev_net(dev)->loopback_dev;
1da177e4 331
38308473 332 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
333 struct inet6_dev *loopback_idev =
334 in6_dev_get(loopback_dev);
38308473 335 if (loopback_idev) {
1da177e4
LT
336 rt->rt6i_idev = loopback_idev;
337 in6_dev_put(idev);
338 }
339 }
340}
341
a50feda5 342static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 343{
1716a961
G
344 struct rt6_info *ort = NULL;
345
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
a50feda5 348 return true;
1716a961
G
349 } else if (rt->dst.from) {
350 ort = (struct rt6_info *) rt->dst.from;
351 return (ort->rt6i_flags & RTF_EXPIRES) &&
352 time_after(jiffies, ort->dst.expires);
353 }
a50feda5 354 return false;
1da177e4
LT
355}
356
a50feda5 357static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 358{
a02cec21
ED
359 return ipv6_addr_type(daddr) &
360 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
361}
362
1da177e4 363/*
c71099ac 364 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
365 */
366
8ed67789
DL
367static inline struct rt6_info *rt6_device_match(struct net *net,
368 struct rt6_info *rt,
b71d1d42 369 const struct in6_addr *saddr,
1da177e4 370 int oif,
d420895e 371 int flags)
1da177e4
LT
372{
373 struct rt6_info *local = NULL;
374 struct rt6_info *sprt;
375
dd3abc4e
YH
376 if (!oif && ipv6_addr_any(saddr))
377 goto out;
378
d8d1f30b 379 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 380 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
381
382 if (oif) {
1da177e4
LT
383 if (dev->ifindex == oif)
384 return sprt;
385 if (dev->flags & IFF_LOOPBACK) {
38308473 386 if (!sprt->rt6i_idev ||
1da177e4 387 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 388 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 389 continue;
1ab1457c 390 if (local && (!oif ||
1da177e4
LT
391 local->rt6i_idev->dev->ifindex == oif))
392 continue;
393 }
394 local = sprt;
395 }
dd3abc4e
YH
396 } else {
397 if (ipv6_chk_addr(net, saddr, dev,
398 flags & RT6_LOOKUP_F_IFACE))
399 return sprt;
1da177e4 400 }
dd3abc4e 401 }
1da177e4 402
dd3abc4e 403 if (oif) {
1da177e4
LT
404 if (local)
405 return local;
406
d420895e 407 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 408 return net->ipv6.ip6_null_entry;
1da177e4 409 }
dd3abc4e 410out:
1da177e4
LT
411 return rt;
412}
413
27097255
YH
414#ifdef CONFIG_IPV6_ROUTER_PREF
415static void rt6_probe(struct rt6_info *rt)
416{
f2c31e32 417 struct neighbour *neigh;
27097255
YH
418 /*
419 * Okay, this does not seem to be appropriate
420 * for now, however, we need to check if it
421 * is really so; aka Router Reachability Probing.
422 *
423 * Router Reachability Probe MUST be rate-limited
424 * to no more than one per minute.
425 */
f2c31e32 426 rcu_read_lock();
27217455 427 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 428 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 429 goto out;
27097255
YH
430 read_lock_bh(&neigh->lock);
431 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 432 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
433 struct in6_addr mcaddr;
434 struct in6_addr *target;
435
436 neigh->updated = jiffies;
437 read_unlock_bh(&neigh->lock);
438
439 target = (struct in6_addr *)&neigh->primary_key;
440 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 441 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 442 } else {
27097255 443 read_unlock_bh(&neigh->lock);
f2c31e32
ED
444 }
445out:
446 rcu_read_unlock();
27097255
YH
447}
448#else
449static inline void rt6_probe(struct rt6_info *rt)
450{
27097255
YH
451}
452#endif
453
1da177e4 454/*
554cfb7e 455 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 456 */
b6f99a21 457static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 458{
d1918542 459 struct net_device *dev = rt->dst.dev;
161980f4 460 if (!oif || dev->ifindex == oif)
554cfb7e 461 return 2;
161980f4
DM
462 if ((dev->flags & IFF_LOOPBACK) &&
463 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
464 return 1;
465 return 0;
554cfb7e 466}
1da177e4 467
b6f99a21 468static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 469{
f2c31e32 470 struct neighbour *neigh;
398bcbeb 471 int m;
f2c31e32
ED
472
473 rcu_read_lock();
27217455 474 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
475 if (rt->rt6i_flags & RTF_NONEXTHOP ||
476 !(rt->rt6i_flags & RTF_GATEWAY))
477 m = 1;
478 else if (neigh) {
554cfb7e
YH
479 read_lock_bh(&neigh->lock);
480 if (neigh->nud_state & NUD_VALID)
4d0c5911 481 m = 2;
398bcbeb
YH
482#ifdef CONFIG_IPV6_ROUTER_PREF
483 else if (neigh->nud_state & NUD_FAILED)
484 m = 0;
485#endif
486 else
ea73ee23 487 m = 1;
554cfb7e 488 read_unlock_bh(&neigh->lock);
398bcbeb
YH
489 } else
490 m = 0;
f2c31e32 491 rcu_read_unlock();
554cfb7e 492 return m;
1da177e4
LT
493}
494
554cfb7e
YH
495static int rt6_score_route(struct rt6_info *rt, int oif,
496 int strict)
1da177e4 497{
4d0c5911 498 int m, n;
1ab1457c 499
4d0c5911 500 m = rt6_check_dev(rt, oif);
77d16f45 501 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 502 return -1;
ebacaaa0
YH
503#ifdef CONFIG_IPV6_ROUTER_PREF
504 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
505#endif
4d0c5911 506 n = rt6_check_neigh(rt);
557e92ef 507 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
508 return -1;
509 return m;
510}
511
f11e6659
DM
512static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
513 int *mpri, struct rt6_info *match)
554cfb7e 514{
f11e6659
DM
515 int m;
516
517 if (rt6_check_expired(rt))
518 goto out;
519
520 m = rt6_score_route(rt, oif, strict);
521 if (m < 0)
522 goto out;
523
524 if (m > *mpri) {
525 if (strict & RT6_LOOKUP_F_REACHABLE)
526 rt6_probe(match);
527 *mpri = m;
528 match = rt;
529 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
530 rt6_probe(rt);
531 }
532
533out:
534 return match;
535}
536
537static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
538 struct rt6_info *rr_head,
539 u32 metric, int oif, int strict)
540{
541 struct rt6_info *rt, *match;
554cfb7e 542 int mpri = -1;
1da177e4 543
f11e6659
DM
544 match = NULL;
545 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 546 rt = rt->dst.rt6_next)
f11e6659
DM
547 match = find_match(rt, oif, strict, &mpri, match);
548 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 549 rt = rt->dst.rt6_next)
f11e6659 550 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 551
f11e6659
DM
552 return match;
553}
1da177e4 554
f11e6659
DM
555static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
556{
557 struct rt6_info *match, *rt0;
8ed67789 558 struct net *net;
1da177e4 559
f11e6659
DM
560 rt0 = fn->rr_ptr;
561 if (!rt0)
562 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 563
f11e6659 564 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 565
554cfb7e 566 if (!match &&
f11e6659 567 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 568 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 569
554cfb7e 570 /* no entries matched; do round-robin */
f11e6659
DM
571 if (!next || next->rt6i_metric != rt0->rt6i_metric)
572 next = fn->leaf;
573
574 if (next != rt0)
575 fn->rr_ptr = next;
1da177e4 576 }
1da177e4 577
d1918542 578 net = dev_net(rt0->dst.dev);
a02cec21 579 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
580}
581
70ceb4f5
YH
582#ifdef CONFIG_IPV6_ROUTE_INFO
583int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 584 const struct in6_addr *gwaddr)
70ceb4f5 585{
c346dca1 586 struct net *net = dev_net(dev);
70ceb4f5
YH
587 struct route_info *rinfo = (struct route_info *) opt;
588 struct in6_addr prefix_buf, *prefix;
589 unsigned int pref;
4bed72e4 590 unsigned long lifetime;
70ceb4f5
YH
591 struct rt6_info *rt;
592
593 if (len < sizeof(struct route_info)) {
594 return -EINVAL;
595 }
596
597 /* Sanity check for prefix_len and length */
598 if (rinfo->length > 3) {
599 return -EINVAL;
600 } else if (rinfo->prefix_len > 128) {
601 return -EINVAL;
602 } else if (rinfo->prefix_len > 64) {
603 if (rinfo->length < 2) {
604 return -EINVAL;
605 }
606 } else if (rinfo->prefix_len > 0) {
607 if (rinfo->length < 1) {
608 return -EINVAL;
609 }
610 }
611
612 pref = rinfo->route_pref;
613 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 614 return -EINVAL;
70ceb4f5 615
4bed72e4 616 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
617
618 if (rinfo->length == 3)
619 prefix = (struct in6_addr *)rinfo->prefix;
620 else {
621 /* this function is safe */
622 ipv6_addr_prefix(&prefix_buf,
623 (struct in6_addr *)rinfo->prefix,
624 rinfo->prefix_len);
625 prefix = &prefix_buf;
626 }
627
efa2cea0
DL
628 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
629 dev->ifindex);
70ceb4f5
YH
630
631 if (rt && !lifetime) {
e0a1ad73 632 ip6_del_rt(rt);
70ceb4f5
YH
633 rt = NULL;
634 }
635
636 if (!rt && lifetime)
efa2cea0 637 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
638 pref);
639 else if (rt)
640 rt->rt6i_flags = RTF_ROUTEINFO |
641 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
642
643 if (rt) {
1716a961
G
644 if (!addrconf_finite_timeout(lifetime))
645 rt6_clean_expires(rt);
646 else
647 rt6_set_expires(rt, jiffies + HZ * lifetime);
648
d8d1f30b 649 dst_release(&rt->dst);
70ceb4f5
YH
650 }
651 return 0;
652}
653#endif
654
8ed67789 655#define BACKTRACK(__net, saddr) \
982f56f3 656do { \
8ed67789 657 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 658 struct fib6_node *pn; \
e0eda7bb 659 while (1) { \
982f56f3
YH
660 if (fn->fn_flags & RTN_TL_ROOT) \
661 goto out; \
662 pn = fn->parent; \
663 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 664 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
665 else \
666 fn = pn; \
667 if (fn->fn_flags & RTN_RTINFO) \
668 goto restart; \
c71099ac 669 } \
c71099ac 670 } \
38308473 671} while (0)
c71099ac 672
8ed67789
DL
673static struct rt6_info *ip6_pol_route_lookup(struct net *net,
674 struct fib6_table *table,
4c9483b2 675 struct flowi6 *fl6, int flags)
1da177e4
LT
676{
677 struct fib6_node *fn;
678 struct rt6_info *rt;
679
c71099ac 680 read_lock_bh(&table->tb6_lock);
4c9483b2 681 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
682restart:
683 rt = fn->leaf;
4c9483b2
DM
684 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
685 BACKTRACK(net, &fl6->saddr);
c71099ac 686out:
d8d1f30b 687 dst_use(&rt->dst, jiffies);
c71099ac 688 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
689 return rt;
690
691}
692
ea6e574e
FW
693struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
694 int flags)
695{
696 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
697}
698EXPORT_SYMBOL_GPL(ip6_route_lookup);
699
9acd9f3a
YH
700struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
701 const struct in6_addr *saddr, int oif, int strict)
c71099ac 702{
4c9483b2
DM
703 struct flowi6 fl6 = {
704 .flowi6_oif = oif,
705 .daddr = *daddr,
c71099ac
TG
706 };
707 struct dst_entry *dst;
77d16f45 708 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 709
adaa70bb 710 if (saddr) {
4c9483b2 711 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
712 flags |= RT6_LOOKUP_F_HAS_SADDR;
713 }
714
4c9483b2 715 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
716 if (dst->error == 0)
717 return (struct rt6_info *) dst;
718
719 dst_release(dst);
720
1da177e4
LT
721 return NULL;
722}
723
7159039a
YH
724EXPORT_SYMBOL(rt6_lookup);
725
c71099ac 726/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
727 It takes new route entry, the addition fails by any reason the
728 route is freed. In any case, if caller does not hold it, it may
729 be destroyed.
730 */
731
86872cb5 732static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
733{
734 int err;
c71099ac 735 struct fib6_table *table;
1da177e4 736
c71099ac
TG
737 table = rt->rt6i_table;
738 write_lock_bh(&table->tb6_lock);
86872cb5 739 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 740 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
741
742 return err;
743}
744
40e22e8f
TG
745int ip6_ins_rt(struct rt6_info *rt)
746{
4d1169c1 747 struct nl_info info = {
d1918542 748 .nl_net = dev_net(rt->dst.dev),
4d1169c1 749 };
528c4ceb 750 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
751}
752
1716a961 753static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 754 const struct in6_addr *daddr,
b71d1d42 755 const struct in6_addr *saddr)
1da177e4 756{
1da177e4
LT
757 struct rt6_info *rt;
758
759 /*
760 * Clone the route.
761 */
762
21efcfa0 763 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
764
765 if (rt) {
14deae41
DM
766 int attempts = !in_softirq();
767
38308473 768 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 769 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 770 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 771 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 772 rt->rt6i_gateway = *daddr;
58c4fb86 773 }
1da177e4 774
1da177e4 775 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
776
777#ifdef CONFIG_IPV6_SUBTREES
778 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 779 rt->rt6i_src.addr = *saddr;
1da177e4
LT
780 rt->rt6i_src.plen = 128;
781 }
782#endif
783
14deae41 784 retry:
8ade06c6 785 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 786 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
787 int saved_rt_min_interval =
788 net->ipv6.sysctl.ip6_rt_gc_min_interval;
789 int saved_rt_elasticity =
790 net->ipv6.sysctl.ip6_rt_gc_elasticity;
791
792 if (attempts-- > 0) {
793 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
795
86393e52 796 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
797
798 net->ipv6.sysctl.ip6_rt_gc_elasticity =
799 saved_rt_elasticity;
800 net->ipv6.sysctl.ip6_rt_gc_min_interval =
801 saved_rt_min_interval;
802 goto retry;
803 }
804
f3213831 805 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 806 dst_free(&rt->dst);
14deae41
DM
807 return NULL;
808 }
95a9a5ba 809 }
1da177e4 810
95a9a5ba
YH
811 return rt;
812}
1da177e4 813
21efcfa0
ED
814static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
815 const struct in6_addr *daddr)
299d9939 816{
21efcfa0
ED
817 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
818
299d9939 819 if (rt) {
299d9939 820 rt->rt6i_flags |= RTF_CACHE;
27217455 821 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
822 }
823 return rt;
824}
825
8ed67789 826static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 827 struct flowi6 *fl6, int flags)
1da177e4
LT
828{
829 struct fib6_node *fn;
519fbd87 830 struct rt6_info *rt, *nrt;
c71099ac 831 int strict = 0;
1da177e4 832 int attempts = 3;
519fbd87 833 int err;
53b7997f 834 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 835
77d16f45 836 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
837
838relookup:
c71099ac 839 read_lock_bh(&table->tb6_lock);
1da177e4 840
8238dd06 841restart_2:
4c9483b2 842 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
843
844restart:
4acad72d 845 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 846
4c9483b2 847 BACKTRACK(net, &fl6->saddr);
8ed67789 848 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 849 rt->rt6i_flags & RTF_CACHE)
1ddef044 850 goto out;
1da177e4 851
d8d1f30b 852 dst_hold(&rt->dst);
c71099ac 853 read_unlock_bh(&table->tb6_lock);
fb9de91e 854
27217455 855 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 856 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 857 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 858 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
859 else
860 goto out2;
e40cf353 861
d8d1f30b 862 dst_release(&rt->dst);
8ed67789 863 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 864
d8d1f30b 865 dst_hold(&rt->dst);
519fbd87 866 if (nrt) {
40e22e8f 867 err = ip6_ins_rt(nrt);
519fbd87 868 if (!err)
1da177e4 869 goto out2;
1da177e4 870 }
1da177e4 871
519fbd87
YH
872 if (--attempts <= 0)
873 goto out2;
874
875 /*
c71099ac 876 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
877 * released someone could insert this route. Relookup.
878 */
d8d1f30b 879 dst_release(&rt->dst);
519fbd87
YH
880 goto relookup;
881
882out:
8238dd06
YH
883 if (reachable) {
884 reachable = 0;
885 goto restart_2;
886 }
d8d1f30b 887 dst_hold(&rt->dst);
c71099ac 888 read_unlock_bh(&table->tb6_lock);
1da177e4 889out2:
d8d1f30b
CG
890 rt->dst.lastuse = jiffies;
891 rt->dst.__use++;
c71099ac
TG
892
893 return rt;
1da177e4
LT
894}
895
8ed67789 896static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 897 struct flowi6 *fl6, int flags)
4acad72d 898{
4c9483b2 899 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
900}
901
72331bc0
SL
902static struct dst_entry *ip6_route_input_lookup(struct net *net,
903 struct net_device *dev,
904 struct flowi6 *fl6, int flags)
905{
906 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
907 flags |= RT6_LOOKUP_F_IFACE;
908
909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
910}
911
c71099ac
TG
912void ip6_route_input(struct sk_buff *skb)
913{
b71d1d42 914 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 915 struct net *net = dev_net(skb->dev);
adaa70bb 916 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
917 struct flowi6 fl6 = {
918 .flowi6_iif = skb->dev->ifindex,
919 .daddr = iph->daddr,
920 .saddr = iph->saddr,
38308473 921 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
922 .flowi6_mark = skb->mark,
923 .flowi6_proto = iph->nexthdr,
c71099ac 924 };
adaa70bb 925
72331bc0 926 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
927}
928
8ed67789 929static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 930 struct flowi6 *fl6, int flags)
1da177e4 931{
4c9483b2 932 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
933}
934
9c7a4f9c 935struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 936 struct flowi6 *fl6)
c71099ac
TG
937{
938 int flags = 0;
939
4c9483b2 940 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 941 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 942
4c9483b2 943 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 944 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
945 else if (sk)
946 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 947
4c9483b2 948 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
949}
950
7159039a 951EXPORT_SYMBOL(ip6_route_output);
1da177e4 952
2774c131 953struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 954{
5c1e6aa3 955 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
956 struct dst_entry *new = NULL;
957
5c1e6aa3 958 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 959 if (rt) {
cf911662 960 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 961 rt6_init_peer(rt, net->ipv6.peers);
cf911662 962
d8d1f30b 963 new = &rt->dst;
14e50e57 964
14e50e57 965 new->__use = 1;
352e512c
HX
966 new->input = dst_discard;
967 new->output = dst_discard;
14e50e57 968
21efcfa0
ED
969 if (dst_metrics_read_only(&ort->dst))
970 new->_metrics = ort->dst._metrics;
971 else
972 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
973 rt->rt6i_idev = ort->rt6i_idev;
974 if (rt->rt6i_idev)
975 in6_dev_hold(rt->rt6i_idev);
14e50e57 976
4e3fd7a0 977 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
978 rt->rt6i_flags = ort->rt6i_flags;
979 rt6_clean_expires(rt);
14e50e57
DM
980 rt->rt6i_metric = 0;
981
982 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983#ifdef CONFIG_IPV6_SUBTREES
984 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985#endif
986
987 dst_free(new);
988 }
989
69ead7af
DM
990 dst_release(dst_orig);
991 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 992}
14e50e57 993
1da177e4
LT
994/*
995 * Destination cache support functions
996 */
997
998static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999{
1000 struct rt6_info *rt;
1001
1002 rt = (struct rt6_info *) dst;
1003
6431cbc2
DM
1004 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1006 if (!rt6_has_peer(rt))
6431cbc2
DM
1007 rt6_bind_peer(rt, 0);
1008 rt->rt6i_peer_genid = rt6_peer_genid();
1009 }
1da177e4 1010 return dst;
6431cbc2 1011 }
1da177e4
LT
1012 return NULL;
1013}
1014
1015static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016{
1017 struct rt6_info *rt = (struct rt6_info *) dst;
1018
1019 if (rt) {
54c1a859
YH
1020 if (rt->rt6i_flags & RTF_CACHE) {
1021 if (rt6_check_expired(rt)) {
1022 ip6_del_rt(rt);
1023 dst = NULL;
1024 }
1025 } else {
1da177e4 1026 dst_release(dst);
54c1a859
YH
1027 dst = NULL;
1028 }
1da177e4 1029 }
54c1a859 1030 return dst;
1da177e4
LT
1031}
1032
1033static void ip6_link_failure(struct sk_buff *skb)
1034{
1035 struct rt6_info *rt;
1036
3ffe533c 1037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1038
adf30907 1039 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1040 if (rt) {
1716a961
G
1041 if (rt->rt6i_flags & RTF_CACHE)
1042 rt6_update_expires(rt, 0);
1043 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1044 rt->rt6i_node->fn_sernum = -1;
1045 }
1046}
1047
1048static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049{
1050 struct rt6_info *rt6 = (struct rt6_info*)dst;
1051
81aded24 1052 dst_confirm(dst);
1da177e4 1053 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1054 struct net *net = dev_net(dst->dev);
1055
1da177e4
LT
1056 rt6->rt6i_flags |= RTF_MODIFIED;
1057 if (mtu < IPV6_MIN_MTU) {
defb3519 1058 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1059 mtu = IPV6_MIN_MTU;
defb3519
DM
1060 features |= RTAX_FEATURE_ALLFRAG;
1061 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1062 }
defb3519 1063 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1064 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1065 }
1066}
1067
81aded24
DM
1068void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1069 int oif, __be32 mark)
1070{
1071 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1072 struct dst_entry *dst;
1073 struct flowi6 fl6;
1074
1075 memset(&fl6, 0, sizeof(fl6));
1076 fl6.flowi6_oif = oif;
1077 fl6.flowi6_mark = mark;
1078 fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
1079 fl6.daddr = iph->daddr;
1080 fl6.saddr = iph->saddr;
1081 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1082
1083 dst = ip6_route_output(net, NULL, &fl6);
1084 if (!dst->error)
1085 ip6_rt_update_pmtu(dst, ntohl(mtu));
1086 dst_release(dst);
1087}
1088EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1089
1090void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1091{
1092 ip6_update_pmtu(skb, sock_net(sk), mtu,
1093 sk->sk_bound_dev_if, sk->sk_mark);
1094}
1095EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1096
0dbaee3b 1097static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1098{
0dbaee3b
DM
1099 struct net_device *dev = dst->dev;
1100 unsigned int mtu = dst_mtu(dst);
1101 struct net *net = dev_net(dev);
1102
1da177e4
LT
1103 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1104
5578689a
DL
1105 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1106 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1107
1108 /*
1ab1457c
YH
1109 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1110 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1111 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1112 * rely only on pmtu discovery"
1113 */
1114 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1115 mtu = IPV6_MAXPLEN;
1116 return mtu;
1117}
1118
ebb762f2 1119static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1120{
d33e4553 1121 struct inet6_dev *idev;
618f9bc7
SK
1122 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1123
1124 if (mtu)
1125 return mtu;
1126
1127 mtu = IPV6_MIN_MTU;
d33e4553
DM
1128
1129 rcu_read_lock();
1130 idev = __in6_dev_get(dst->dev);
1131 if (idev)
1132 mtu = idev->cnf.mtu6;
1133 rcu_read_unlock();
1134
1135 return mtu;
1136}
1137
3b00944c
YH
1138static struct dst_entry *icmp6_dst_gc_list;
1139static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1140
3b00944c 1141struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1142 struct neighbour *neigh,
87a11578 1143 struct flowi6 *fl6)
1da177e4 1144{
87a11578 1145 struct dst_entry *dst;
1da177e4
LT
1146 struct rt6_info *rt;
1147 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1148 struct net *net = dev_net(dev);
1da177e4 1149
38308473 1150 if (unlikely(!idev))
122bdf67 1151 return ERR_PTR(-ENODEV);
1da177e4 1152
8b96d22d 1153 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1154 if (unlikely(!rt)) {
1da177e4 1155 in6_dev_put(idev);
87a11578 1156 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1157 goto out;
1158 }
1159
1da177e4
LT
1160 if (neigh)
1161 neigh_hold(neigh);
14deae41 1162 else {
f83c7790 1163 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1164 if (IS_ERR(neigh)) {
252c3d84 1165 in6_dev_put(idev);
b43faac6
DM
1166 dst_free(&rt->dst);
1167 return ERR_CAST(neigh);
1168 }
14deae41 1169 }
1da177e4 1170
8e2ec639
YZ
1171 rt->dst.flags |= DST_HOST;
1172 rt->dst.output = ip6_output;
69cce1d1 1173 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1174 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1175 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1176 rt->rt6i_dst.plen = 128;
1177 rt->rt6i_idev = idev;
7011687f 1178 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1179
3b00944c 1180 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1181 rt->dst.next = icmp6_dst_gc_list;
1182 icmp6_dst_gc_list = &rt->dst;
3b00944c 1183 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1184
5578689a 1185 fib6_force_start_gc(net);
1da177e4 1186
87a11578
DM
1187 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1188
1da177e4 1189out:
87a11578 1190 return dst;
1da177e4
LT
1191}
1192
3d0f24a7 1193int icmp6_dst_gc(void)
1da177e4 1194{
e9476e95 1195 struct dst_entry *dst, **pprev;
3d0f24a7 1196 int more = 0;
1da177e4 1197
3b00944c
YH
1198 spin_lock_bh(&icmp6_dst_lock);
1199 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1200
1da177e4
LT
1201 while ((dst = *pprev) != NULL) {
1202 if (!atomic_read(&dst->__refcnt)) {
1203 *pprev = dst->next;
1204 dst_free(dst);
1da177e4
LT
1205 } else {
1206 pprev = &dst->next;
3d0f24a7 1207 ++more;
1da177e4
LT
1208 }
1209 }
1210
3b00944c 1211 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1212
3d0f24a7 1213 return more;
1da177e4
LT
1214}
1215
1e493d19
DM
1216static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1217 void *arg)
1218{
1219 struct dst_entry *dst, **pprev;
1220
1221 spin_lock_bh(&icmp6_dst_lock);
1222 pprev = &icmp6_dst_gc_list;
1223 while ((dst = *pprev) != NULL) {
1224 struct rt6_info *rt = (struct rt6_info *) dst;
1225 if (func(rt, arg)) {
1226 *pprev = dst->next;
1227 dst_free(dst);
1228 } else {
1229 pprev = &dst->next;
1230 }
1231 }
1232 spin_unlock_bh(&icmp6_dst_lock);
1233}
1234
569d3645 1235static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1236{
1da177e4 1237 unsigned long now = jiffies;
86393e52 1238 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1239 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1240 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1241 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1242 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1243 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1244 int entries;
7019b78e 1245
fc66f95c 1246 entries = dst_entries_get_fast(ops);
7019b78e 1247 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1248 entries <= rt_max_size)
1da177e4
LT
1249 goto out;
1250
6891a346
BT
1251 net->ipv6.ip6_rt_gc_expire++;
1252 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1253 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1254 entries = dst_entries_get_slow(ops);
1255 if (entries < ops->gc_thresh)
7019b78e 1256 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1257out:
7019b78e 1258 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1259 return entries > rt_max_size;
1da177e4
LT
1260}
1261
1262/* Clean host part of a prefix. Not necessary in radix tree,
1263 but results in cleaner routing tables.
1264
1265 Remove it only when all the things will work!
1266 */
1267
6b75d090 1268int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1269{
5170ae82 1270 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1271 if (hoplimit == 0) {
6b75d090 1272 struct net_device *dev = dst->dev;
c68f24cc
ED
1273 struct inet6_dev *idev;
1274
1275 rcu_read_lock();
1276 idev = __in6_dev_get(dev);
1277 if (idev)
6b75d090 1278 hoplimit = idev->cnf.hop_limit;
c68f24cc 1279 else
53b7997f 1280 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1281 rcu_read_unlock();
1da177e4
LT
1282 }
1283 return hoplimit;
1284}
abbf46ae 1285EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1286
1287/*
1288 *
1289 */
1290
86872cb5 1291int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1292{
1293 int err;
5578689a 1294 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1295 struct rt6_info *rt = NULL;
1296 struct net_device *dev = NULL;
1297 struct inet6_dev *idev = NULL;
c71099ac 1298 struct fib6_table *table;
1da177e4
LT
1299 int addr_type;
1300
86872cb5 1301 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1302 return -EINVAL;
1303#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1304 if (cfg->fc_src_len)
1da177e4
LT
1305 return -EINVAL;
1306#endif
86872cb5 1307 if (cfg->fc_ifindex) {
1da177e4 1308 err = -ENODEV;
5578689a 1309 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1310 if (!dev)
1311 goto out;
1312 idev = in6_dev_get(dev);
1313 if (!idev)
1314 goto out;
1315 }
1316
86872cb5
TG
1317 if (cfg->fc_metric == 0)
1318 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1319
d71314b4 1320 err = -ENOBUFS;
38308473
DM
1321 if (cfg->fc_nlinfo.nlh &&
1322 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1323 table = fib6_get_table(net, cfg->fc_table);
38308473 1324 if (!table) {
f3213831 1325 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1326 table = fib6_new_table(net, cfg->fc_table);
1327 }
1328 } else {
1329 table = fib6_new_table(net, cfg->fc_table);
1330 }
38308473
DM
1331
1332 if (!table)
c71099ac 1333 goto out;
c71099ac 1334
8b96d22d 1335 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1336
38308473 1337 if (!rt) {
1da177e4
LT
1338 err = -ENOMEM;
1339 goto out;
1340 }
1341
d8d1f30b 1342 rt->dst.obsolete = -1;
1716a961
G
1343
1344 if (cfg->fc_flags & RTF_EXPIRES)
1345 rt6_set_expires(rt, jiffies +
1346 clock_t_to_jiffies(cfg->fc_expires));
1347 else
1348 rt6_clean_expires(rt);
1da177e4 1349
86872cb5
TG
1350 if (cfg->fc_protocol == RTPROT_UNSPEC)
1351 cfg->fc_protocol = RTPROT_BOOT;
1352 rt->rt6i_protocol = cfg->fc_protocol;
1353
1354 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1355
1356 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1357 rt->dst.input = ip6_mc_input;
ab79ad14
1358 else if (cfg->fc_flags & RTF_LOCAL)
1359 rt->dst.input = ip6_input;
1da177e4 1360 else
d8d1f30b 1361 rt->dst.input = ip6_forward;
1da177e4 1362
d8d1f30b 1363 rt->dst.output = ip6_output;
1da177e4 1364
86872cb5
TG
1365 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1366 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1367 if (rt->rt6i_dst.plen == 128)
11d53b49 1368 rt->dst.flags |= DST_HOST;
1da177e4 1369
8e2ec639
YZ
1370 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1371 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1372 if (!metrics) {
1373 err = -ENOMEM;
1374 goto out;
1375 }
1376 dst_init_metrics(&rt->dst, metrics, 0);
1377 }
1da177e4 1378#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1379 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1380 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1381#endif
1382
86872cb5 1383 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1384
1385 /* We cannot add true routes via loopback here,
1386 they would result in kernel looping; promote them to reject routes
1387 */
86872cb5 1388 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1389 (dev && (dev->flags & IFF_LOOPBACK) &&
1390 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1391 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1392 /* hold loopback dev/idev if we haven't done so. */
5578689a 1393 if (dev != net->loopback_dev) {
1da177e4
LT
1394 if (dev) {
1395 dev_put(dev);
1396 in6_dev_put(idev);
1397 }
5578689a 1398 dev = net->loopback_dev;
1da177e4
LT
1399 dev_hold(dev);
1400 idev = in6_dev_get(dev);
1401 if (!idev) {
1402 err = -ENODEV;
1403 goto out;
1404 }
1405 }
d8d1f30b
CG
1406 rt->dst.output = ip6_pkt_discard_out;
1407 rt->dst.input = ip6_pkt_discard;
1408 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1409 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1410 goto install_route;
1411 }
1412
86872cb5 1413 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1414 const struct in6_addr *gw_addr;
1da177e4
LT
1415 int gwa_type;
1416
86872cb5 1417 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1418 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1419 gwa_type = ipv6_addr_type(gw_addr);
1420
1421 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1422 struct rt6_info *grt;
1423
1424 /* IPv6 strictly inhibits using not link-local
1425 addresses as nexthop address.
1426 Otherwise, router will not able to send redirects.
1427 It is very good, but in some (rare!) circumstances
1428 (SIT, PtP, NBMA NOARP links) it is handy to allow
1429 some exceptions. --ANK
1430 */
1431 err = -EINVAL;
38308473 1432 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1433 goto out;
1434
5578689a 1435 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1436
1437 err = -EHOSTUNREACH;
38308473 1438 if (!grt)
1da177e4
LT
1439 goto out;
1440 if (dev) {
d1918542 1441 if (dev != grt->dst.dev) {
d8d1f30b 1442 dst_release(&grt->dst);
1da177e4
LT
1443 goto out;
1444 }
1445 } else {
d1918542 1446 dev = grt->dst.dev;
1da177e4
LT
1447 idev = grt->rt6i_idev;
1448 dev_hold(dev);
1449 in6_dev_hold(grt->rt6i_idev);
1450 }
38308473 1451 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1452 err = 0;
d8d1f30b 1453 dst_release(&grt->dst);
1da177e4
LT
1454
1455 if (err)
1456 goto out;
1457 }
1458 err = -EINVAL;
38308473 1459 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1460 goto out;
1461 }
1462
1463 err = -ENODEV;
38308473 1464 if (!dev)
1da177e4
LT
1465 goto out;
1466
c3968a85
DW
1467 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1468 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1469 err = -EINVAL;
1470 goto out;
1471 }
4e3fd7a0 1472 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1473 rt->rt6i_prefsrc.plen = 128;
1474 } else
1475 rt->rt6i_prefsrc.plen = 0;
1476
86872cb5 1477 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1478 err = rt6_bind_neighbour(rt, dev);
f83c7790 1479 if (err)
1da177e4 1480 goto out;
1da177e4
LT
1481 }
1482
86872cb5 1483 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1484
1485install_route:
86872cb5
TG
1486 if (cfg->fc_mx) {
1487 struct nlattr *nla;
1488 int remaining;
1489
1490 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1491 int type = nla_type(nla);
86872cb5
TG
1492
1493 if (type) {
1494 if (type > RTAX_MAX) {
1da177e4
LT
1495 err = -EINVAL;
1496 goto out;
1497 }
86872cb5 1498
defb3519 1499 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1500 }
1da177e4
LT
1501 }
1502 }
1503
d8d1f30b 1504 rt->dst.dev = dev;
1da177e4 1505 rt->rt6i_idev = idev;
c71099ac 1506 rt->rt6i_table = table;
63152fc0 1507
c346dca1 1508 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1509
86872cb5 1510 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1511
1512out:
1513 if (dev)
1514 dev_put(dev);
1515 if (idev)
1516 in6_dev_put(idev);
1517 if (rt)
d8d1f30b 1518 dst_free(&rt->dst);
1da177e4
LT
1519 return err;
1520}
1521
86872cb5 1522static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1523{
1524 int err;
c71099ac 1525 struct fib6_table *table;
d1918542 1526 struct net *net = dev_net(rt->dst.dev);
1da177e4 1527
8ed67789 1528 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1529 return -ENOENT;
1530
c71099ac
TG
1531 table = rt->rt6i_table;
1532 write_lock_bh(&table->tb6_lock);
1da177e4 1533
86872cb5 1534 err = fib6_del(rt, info);
d8d1f30b 1535 dst_release(&rt->dst);
1da177e4 1536
c71099ac 1537 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1538
1539 return err;
1540}
1541
e0a1ad73
TG
1542int ip6_del_rt(struct rt6_info *rt)
1543{
4d1169c1 1544 struct nl_info info = {
d1918542 1545 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1546 };
528c4ceb 1547 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1548}
1549
86872cb5 1550static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1551{
c71099ac 1552 struct fib6_table *table;
1da177e4
LT
1553 struct fib6_node *fn;
1554 struct rt6_info *rt;
1555 int err = -ESRCH;
1556
5578689a 1557 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1558 if (!table)
c71099ac
TG
1559 return err;
1560
1561 read_lock_bh(&table->tb6_lock);
1da177e4 1562
c71099ac 1563 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1564 &cfg->fc_dst, cfg->fc_dst_len,
1565 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1566
1da177e4 1567 if (fn) {
d8d1f30b 1568 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1569 if (cfg->fc_ifindex &&
d1918542
DM
1570 (!rt->dst.dev ||
1571 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1572 continue;
86872cb5
TG
1573 if (cfg->fc_flags & RTF_GATEWAY &&
1574 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1575 continue;
86872cb5 1576 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1577 continue;
d8d1f30b 1578 dst_hold(&rt->dst);
c71099ac 1579 read_unlock_bh(&table->tb6_lock);
1da177e4 1580
86872cb5 1581 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1582 }
1583 }
c71099ac 1584 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1585
1586 return err;
1587}
1588
1589/*
1590 * Handle redirects
1591 */
a6279458 1592struct ip6rd_flowi {
4c9483b2 1593 struct flowi6 fl6;
a6279458
YH
1594 struct in6_addr gateway;
1595};
1596
8ed67789
DL
1597static struct rt6_info *__ip6_route_redirect(struct net *net,
1598 struct fib6_table *table,
4c9483b2 1599 struct flowi6 *fl6,
a6279458 1600 int flags)
1da177e4 1601{
4c9483b2 1602 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1603 struct rt6_info *rt;
e843b9e1 1604 struct fib6_node *fn;
c71099ac 1605
1da177e4 1606 /*
e843b9e1
YH
1607 * Get the "current" route for this destination and
1608 * check if the redirect has come from approriate router.
1609 *
1610 * RFC 2461 specifies that redirects should only be
1611 * accepted if they come from the nexthop to the target.
1612 * Due to the way the routes are chosen, this notion
1613 * is a bit fuzzy and one might need to check all possible
1614 * routes.
1da177e4 1615 */
1da177e4 1616
c71099ac 1617 read_lock_bh(&table->tb6_lock);
4c9483b2 1618 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1619restart:
d8d1f30b 1620 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1621 /*
1622 * Current route is on-link; redirect is always invalid.
1623 *
1624 * Seems, previous statement is not true. It could
1625 * be node, which looks for us as on-link (f.e. proxy ndisc)
1626 * But then router serving it might decide, that we should
1627 * know truth 8)8) --ANK (980726).
1628 */
1629 if (rt6_check_expired(rt))
1630 continue;
1631 if (!(rt->rt6i_flags & RTF_GATEWAY))
1632 continue;
d1918542 1633 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1634 continue;
a6279458 1635 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1636 continue;
1637 break;
1638 }
a6279458 1639
cb15d9c2 1640 if (!rt)
8ed67789 1641 rt = net->ipv6.ip6_null_entry;
4c9483b2 1642 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1643out:
d8d1f30b 1644 dst_hold(&rt->dst);
a6279458 1645
c71099ac 1646 read_unlock_bh(&table->tb6_lock);
e843b9e1 1647
a6279458
YH
1648 return rt;
1649};
1650
b71d1d42
ED
1651static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1652 const struct in6_addr *src,
1653 const struct in6_addr *gateway,
a6279458
YH
1654 struct net_device *dev)
1655{
adaa70bb 1656 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1657 struct net *net = dev_net(dev);
a6279458 1658 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1659 .fl6 = {
1660 .flowi6_oif = dev->ifindex,
1661 .daddr = *dest,
1662 .saddr = *src,
a6279458 1663 },
a6279458 1664 };
adaa70bb 1665
4e3fd7a0 1666 rdfl.gateway = *gateway;
86c36ce4 1667
adaa70bb
TG
1668 if (rt6_need_strict(dest))
1669 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1670
4c9483b2 1671 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1672 flags, __ip6_route_redirect);
a6279458
YH
1673}
1674
b71d1d42
ED
1675void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1676 const struct in6_addr *saddr,
a6279458
YH
1677 struct neighbour *neigh, u8 *lladdr, int on_link)
1678{
1679 struct rt6_info *rt, *nrt = NULL;
1680 struct netevent_redirect netevent;
c346dca1 1681 struct net *net = dev_net(neigh->dev);
a6279458
YH
1682
1683 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1684
8ed67789 1685 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1686 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1687 goto out;
1da177e4
LT
1688 }
1689
1da177e4
LT
1690 /*
1691 * We have finally decided to accept it.
1692 */
1693
1ab1457c 1694 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1695 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1696 NEIGH_UPDATE_F_OVERRIDE|
1697 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1698 NEIGH_UPDATE_F_ISROUTER))
1699 );
1700
1701 /*
1702 * Redirect received -> path was valid.
1703 * Look, redirects are sent only in response to data packets,
1704 * so that this nexthop apparently is reachable. --ANK
1705 */
d8d1f30b 1706 dst_confirm(&rt->dst);
1da177e4
LT
1707
1708 /* Duplicate redirect: silently ignore. */
27217455 1709 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1710 goto out;
1711
21efcfa0 1712 nrt = ip6_rt_copy(rt, dest);
38308473 1713 if (!nrt)
1da177e4
LT
1714 goto out;
1715
1716 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1717 if (on_link)
1718 nrt->rt6i_flags &= ~RTF_GATEWAY;
1719
4e3fd7a0 1720 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1721 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1722
40e22e8f 1723 if (ip6_ins_rt(nrt))
1da177e4
LT
1724 goto out;
1725
d8d1f30b
CG
1726 netevent.old = &rt->dst;
1727 netevent.new = &nrt->dst;
8d71740c
TT
1728 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1729
38308473 1730 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1731 ip6_del_rt(rt);
1da177e4
LT
1732 return;
1733 }
1734
1735out:
d8d1f30b 1736 dst_release(&rt->dst);
1da177e4
LT
1737}
1738
1da177e4
LT
1739/*
1740 * Misc support functions
1741 */
1742
1716a961 1743static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1744 const struct in6_addr *dest)
1da177e4 1745{
d1918542 1746 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1747 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1748 ort->rt6i_table);
1da177e4
LT
1749
1750 if (rt) {
d8d1f30b
CG
1751 rt->dst.input = ort->dst.input;
1752 rt->dst.output = ort->dst.output;
8e2ec639 1753 rt->dst.flags |= DST_HOST;
d8d1f30b 1754
4e3fd7a0 1755 rt->rt6i_dst.addr = *dest;
8e2ec639 1756 rt->rt6i_dst.plen = 128;
defb3519 1757 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1758 rt->dst.error = ort->dst.error;
1da177e4
LT
1759 rt->rt6i_idev = ort->rt6i_idev;
1760 if (rt->rt6i_idev)
1761 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1762 rt->dst.lastuse = jiffies;
1da177e4 1763
4e3fd7a0 1764 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1765 rt->rt6i_flags = ort->rt6i_flags;
1766 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1767 (RTF_DEFAULT | RTF_ADDRCONF))
1768 rt6_set_from(rt, ort);
1769 else
1770 rt6_clean_expires(rt);
1da177e4
LT
1771 rt->rt6i_metric = 0;
1772
1da177e4
LT
1773#ifdef CONFIG_IPV6_SUBTREES
1774 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1775#endif
0f6c6392 1776 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1777 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1778 }
1779 return rt;
1780}
1781
70ceb4f5 1782#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1783static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1784 const struct in6_addr *prefix, int prefixlen,
1785 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1786{
1787 struct fib6_node *fn;
1788 struct rt6_info *rt = NULL;
c71099ac
TG
1789 struct fib6_table *table;
1790
efa2cea0 1791 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1792 if (!table)
c71099ac 1793 return NULL;
70ceb4f5 1794
c71099ac
TG
1795 write_lock_bh(&table->tb6_lock);
1796 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1797 if (!fn)
1798 goto out;
1799
d8d1f30b 1800 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1801 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1802 continue;
1803 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1804 continue;
1805 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1806 continue;
d8d1f30b 1807 dst_hold(&rt->dst);
70ceb4f5
YH
1808 break;
1809 }
1810out:
c71099ac 1811 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1812 return rt;
1813}
1814
efa2cea0 1815static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1816 const struct in6_addr *prefix, int prefixlen,
1817 const struct in6_addr *gwaddr, int ifindex,
95c96174 1818 unsigned int pref)
70ceb4f5 1819{
86872cb5
TG
1820 struct fib6_config cfg = {
1821 .fc_table = RT6_TABLE_INFO,
238fc7ea 1822 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1823 .fc_ifindex = ifindex,
1824 .fc_dst_len = prefixlen,
1825 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1826 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1827 .fc_nlinfo.pid = 0,
1828 .fc_nlinfo.nlh = NULL,
1829 .fc_nlinfo.nl_net = net,
86872cb5
TG
1830 };
1831
4e3fd7a0
AD
1832 cfg.fc_dst = *prefix;
1833 cfg.fc_gateway = *gwaddr;
70ceb4f5 1834
e317da96
YH
1835 /* We should treat it as a default route if prefix length is 0. */
1836 if (!prefixlen)
86872cb5 1837 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1838
86872cb5 1839 ip6_route_add(&cfg);
70ceb4f5 1840
efa2cea0 1841 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1842}
1843#endif
1844
b71d1d42 1845struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1846{
1da177e4 1847 struct rt6_info *rt;
c71099ac 1848 struct fib6_table *table;
1da177e4 1849
c346dca1 1850 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1851 if (!table)
c71099ac 1852 return NULL;
1da177e4 1853
c71099ac 1854 write_lock_bh(&table->tb6_lock);
d8d1f30b 1855 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1856 if (dev == rt->dst.dev &&
045927ff 1857 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1858 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1859 break;
1860 }
1861 if (rt)
d8d1f30b 1862 dst_hold(&rt->dst);
c71099ac 1863 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1864 return rt;
1865}
1866
b71d1d42 1867struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1868 struct net_device *dev,
1869 unsigned int pref)
1da177e4 1870{
86872cb5
TG
1871 struct fib6_config cfg = {
1872 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1873 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1874 .fc_ifindex = dev->ifindex,
1875 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1876 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1877 .fc_nlinfo.pid = 0,
1878 .fc_nlinfo.nlh = NULL,
c346dca1 1879 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1880 };
1da177e4 1881
4e3fd7a0 1882 cfg.fc_gateway = *gwaddr;
1da177e4 1883
86872cb5 1884 ip6_route_add(&cfg);
1da177e4 1885
1da177e4
LT
1886 return rt6_get_dflt_router(gwaddr, dev);
1887}
1888
7b4da532 1889void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1890{
1891 struct rt6_info *rt;
c71099ac
TG
1892 struct fib6_table *table;
1893
1894 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1895 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1896 if (!table)
c71099ac 1897 return;
1da177e4
LT
1898
1899restart:
c71099ac 1900 read_lock_bh(&table->tb6_lock);
d8d1f30b 1901 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1902 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1903 dst_hold(&rt->dst);
c71099ac 1904 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1905 ip6_del_rt(rt);
1da177e4
LT
1906 goto restart;
1907 }
1908 }
c71099ac 1909 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1910}
1911
5578689a
DL
1912static void rtmsg_to_fib6_config(struct net *net,
1913 struct in6_rtmsg *rtmsg,
86872cb5
TG
1914 struct fib6_config *cfg)
1915{
1916 memset(cfg, 0, sizeof(*cfg));
1917
1918 cfg->fc_table = RT6_TABLE_MAIN;
1919 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1920 cfg->fc_metric = rtmsg->rtmsg_metric;
1921 cfg->fc_expires = rtmsg->rtmsg_info;
1922 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1923 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1924 cfg->fc_flags = rtmsg->rtmsg_flags;
1925
5578689a 1926 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1927
4e3fd7a0
AD
1928 cfg->fc_dst = rtmsg->rtmsg_dst;
1929 cfg->fc_src = rtmsg->rtmsg_src;
1930 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1931}
1932
5578689a 1933int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1934{
86872cb5 1935 struct fib6_config cfg;
1da177e4
LT
1936 struct in6_rtmsg rtmsg;
1937 int err;
1938
1939 switch(cmd) {
1940 case SIOCADDRT: /* Add a route */
1941 case SIOCDELRT: /* Delete a route */
1942 if (!capable(CAP_NET_ADMIN))
1943 return -EPERM;
1944 err = copy_from_user(&rtmsg, arg,
1945 sizeof(struct in6_rtmsg));
1946 if (err)
1947 return -EFAULT;
86872cb5 1948
5578689a 1949 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1950
1da177e4
LT
1951 rtnl_lock();
1952 switch (cmd) {
1953 case SIOCADDRT:
86872cb5 1954 err = ip6_route_add(&cfg);
1da177e4
LT
1955 break;
1956 case SIOCDELRT:
86872cb5 1957 err = ip6_route_del(&cfg);
1da177e4
LT
1958 break;
1959 default:
1960 err = -EINVAL;
1961 }
1962 rtnl_unlock();
1963
1964 return err;
3ff50b79 1965 }
1da177e4
LT
1966
1967 return -EINVAL;
1968}
1969
1970/*
1971 * Drop the packet on the floor
1972 */
1973
d5fdd6ba 1974static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1975{
612f09e8 1976 int type;
adf30907 1977 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1978 switch (ipstats_mib_noroutes) {
1979 case IPSTATS_MIB_INNOROUTES:
0660e03f 1980 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1981 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1982 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1983 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1984 break;
1985 }
1986 /* FALLTHROUGH */
1987 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1988 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1989 ipstats_mib_noroutes);
612f09e8
YH
1990 break;
1991 }
3ffe533c 1992 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1993 kfree_skb(skb);
1994 return 0;
1995}
1996
9ce8ade0
TG
1997static int ip6_pkt_discard(struct sk_buff *skb)
1998{
612f09e8 1999 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2000}
2001
20380731 2002static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2003{
adf30907 2004 skb->dev = skb_dst(skb)->dev;
612f09e8 2005 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2006}
2007
6723ab54
DM
2008#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2009
9ce8ade0
TG
2010static int ip6_pkt_prohibit(struct sk_buff *skb)
2011{
612f09e8 2012 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2013}
2014
2015static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2016{
adf30907 2017 skb->dev = skb_dst(skb)->dev;
612f09e8 2018 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2019}
2020
6723ab54
DM
2021#endif
2022
1da177e4
LT
2023/*
2024 * Allocate a dst for local (unicast / anycast) address.
2025 */
2026
2027struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2028 const struct in6_addr *addr,
8f031519 2029 bool anycast)
1da177e4 2030{
c346dca1 2031 struct net *net = dev_net(idev->dev);
8b96d22d 2032 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2033 int err;
1da177e4 2034
38308473 2035 if (!rt) {
f3213831 2036 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2037 return ERR_PTR(-ENOMEM);
40385653 2038 }
1da177e4 2039
1da177e4
LT
2040 in6_dev_hold(idev);
2041
11d53b49 2042 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2043 rt->dst.input = ip6_input;
2044 rt->dst.output = ip6_output;
1da177e4 2045 rt->rt6i_idev = idev;
d8d1f30b 2046 rt->dst.obsolete = -1;
1da177e4
LT
2047
2048 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2049 if (anycast)
2050 rt->rt6i_flags |= RTF_ANYCAST;
2051 else
1da177e4 2052 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2053 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2054 if (err) {
d8d1f30b 2055 dst_free(&rt->dst);
f83c7790 2056 return ERR_PTR(err);
1da177e4
LT
2057 }
2058
4e3fd7a0 2059 rt->rt6i_dst.addr = *addr;
1da177e4 2060 rt->rt6i_dst.plen = 128;
5578689a 2061 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2062
d8d1f30b 2063 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2064
2065 return rt;
2066}
2067
c3968a85
DW
2068int ip6_route_get_saddr(struct net *net,
2069 struct rt6_info *rt,
b71d1d42 2070 const struct in6_addr *daddr,
c3968a85
DW
2071 unsigned int prefs,
2072 struct in6_addr *saddr)
2073{
2074 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2075 int err = 0;
2076 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2077 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2078 else
2079 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2080 daddr, prefs, saddr);
2081 return err;
2082}
2083
2084/* remove deleted ip from prefsrc entries */
2085struct arg_dev_net_ip {
2086 struct net_device *dev;
2087 struct net *net;
2088 struct in6_addr *addr;
2089};
2090
2091static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2092{
2093 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2094 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2095 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2096
d1918542 2097 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2098 rt != net->ipv6.ip6_null_entry &&
2099 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2100 /* remove prefsrc entry */
2101 rt->rt6i_prefsrc.plen = 0;
2102 }
2103 return 0;
2104}
2105
2106void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2107{
2108 struct net *net = dev_net(ifp->idev->dev);
2109 struct arg_dev_net_ip adni = {
2110 .dev = ifp->idev->dev,
2111 .net = net,
2112 .addr = &ifp->addr,
2113 };
2114 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2115}
2116
8ed67789
DL
2117struct arg_dev_net {
2118 struct net_device *dev;
2119 struct net *net;
2120};
2121
1da177e4
LT
2122static int fib6_ifdown(struct rt6_info *rt, void *arg)
2123{
bc3ef660 2124 const struct arg_dev_net *adn = arg;
2125 const struct net_device *dev = adn->dev;
8ed67789 2126
d1918542 2127 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2128 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2129 return -1;
c159d30c 2130
1da177e4
LT
2131 return 0;
2132}
2133
f3db4851 2134void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2135{
8ed67789
DL
2136 struct arg_dev_net adn = {
2137 .dev = dev,
2138 .net = net,
2139 };
2140
2141 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2142 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2143}
2144
95c96174 2145struct rt6_mtu_change_arg {
1da177e4 2146 struct net_device *dev;
95c96174 2147 unsigned int mtu;
1da177e4
LT
2148};
2149
2150static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2151{
2152 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2153 struct inet6_dev *idev;
2154
2155 /* In IPv6 pmtu discovery is not optional,
2156 so that RTAX_MTU lock cannot disable it.
2157 We still use this lock to block changes
2158 caused by addrconf/ndisc.
2159 */
2160
2161 idev = __in6_dev_get(arg->dev);
38308473 2162 if (!idev)
1da177e4
LT
2163 return 0;
2164
2165 /* For administrative MTU increase, there is no way to discover
2166 IPv6 PMTU increase, so PMTU increase should be updated here.
2167 Since RFC 1981 doesn't include administrative MTU increase
2168 update PMTU increase is a MUST. (i.e. jumbo frame)
2169 */
2170 /*
2171 If new MTU is less than route PMTU, this new MTU will be the
2172 lowest MTU in the path, update the route PMTU to reflect PMTU
2173 decreases; if new MTU is greater than route PMTU, and the
2174 old MTU is the lowest MTU in the path, update the route PMTU
2175 to reflect the increase. In this case if the other nodes' MTU
2176 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2177 PMTU discouvery.
2178 */
d1918542 2179 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2180 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2181 (dst_mtu(&rt->dst) >= arg->mtu ||
2182 (dst_mtu(&rt->dst) < arg->mtu &&
2183 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2184 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2185 }
1da177e4
LT
2186 return 0;
2187}
2188
95c96174 2189void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2190{
c71099ac
TG
2191 struct rt6_mtu_change_arg arg = {
2192 .dev = dev,
2193 .mtu = mtu,
2194 };
1da177e4 2195
c346dca1 2196 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2197}
2198
ef7c79ed 2199static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2200 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2201 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2202 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2203 [RTA_PRIORITY] = { .type = NLA_U32 },
2204 [RTA_METRICS] = { .type = NLA_NESTED },
2205};
2206
2207static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2208 struct fib6_config *cfg)
1da177e4 2209{
86872cb5
TG
2210 struct rtmsg *rtm;
2211 struct nlattr *tb[RTA_MAX+1];
2212 int err;
1da177e4 2213
86872cb5
TG
2214 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2215 if (err < 0)
2216 goto errout;
1da177e4 2217
86872cb5
TG
2218 err = -EINVAL;
2219 rtm = nlmsg_data(nlh);
2220 memset(cfg, 0, sizeof(*cfg));
2221
2222 cfg->fc_table = rtm->rtm_table;
2223 cfg->fc_dst_len = rtm->rtm_dst_len;
2224 cfg->fc_src_len = rtm->rtm_src_len;
2225 cfg->fc_flags = RTF_UP;
2226 cfg->fc_protocol = rtm->rtm_protocol;
2227
2228 if (rtm->rtm_type == RTN_UNREACHABLE)
2229 cfg->fc_flags |= RTF_REJECT;
2230
ab79ad14
2231 if (rtm->rtm_type == RTN_LOCAL)
2232 cfg->fc_flags |= RTF_LOCAL;
2233
86872cb5
TG
2234 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2235 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2236 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2237
2238 if (tb[RTA_GATEWAY]) {
2239 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2240 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2241 }
86872cb5
TG
2242
2243 if (tb[RTA_DST]) {
2244 int plen = (rtm->rtm_dst_len + 7) >> 3;
2245
2246 if (nla_len(tb[RTA_DST]) < plen)
2247 goto errout;
2248
2249 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2250 }
86872cb5
TG
2251
2252 if (tb[RTA_SRC]) {
2253 int plen = (rtm->rtm_src_len + 7) >> 3;
2254
2255 if (nla_len(tb[RTA_SRC]) < plen)
2256 goto errout;
2257
2258 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2259 }
86872cb5 2260
c3968a85
DW
2261 if (tb[RTA_PREFSRC])
2262 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2263
86872cb5
TG
2264 if (tb[RTA_OIF])
2265 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2266
2267 if (tb[RTA_PRIORITY])
2268 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2269
2270 if (tb[RTA_METRICS]) {
2271 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2272 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2273 }
86872cb5
TG
2274
2275 if (tb[RTA_TABLE])
2276 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2277
2278 err = 0;
2279errout:
2280 return err;
1da177e4
LT
2281}
2282
c127ea2c 2283static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2284{
86872cb5
TG
2285 struct fib6_config cfg;
2286 int err;
1da177e4 2287
86872cb5
TG
2288 err = rtm_to_fib6_config(skb, nlh, &cfg);
2289 if (err < 0)
2290 return err;
2291
2292 return ip6_route_del(&cfg);
1da177e4
LT
2293}
2294
c127ea2c 2295static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2296{
86872cb5
TG
2297 struct fib6_config cfg;
2298 int err;
1da177e4 2299
86872cb5
TG
2300 err = rtm_to_fib6_config(skb, nlh, &cfg);
2301 if (err < 0)
2302 return err;
2303
2304 return ip6_route_add(&cfg);
1da177e4
LT
2305}
2306
339bf98f
TG
2307static inline size_t rt6_nlmsg_size(void)
2308{
2309 return NLMSG_ALIGN(sizeof(struct rtmsg))
2310 + nla_total_size(16) /* RTA_SRC */
2311 + nla_total_size(16) /* RTA_DST */
2312 + nla_total_size(16) /* RTA_GATEWAY */
2313 + nla_total_size(16) /* RTA_PREFSRC */
2314 + nla_total_size(4) /* RTA_TABLE */
2315 + nla_total_size(4) /* RTA_IIF */
2316 + nla_total_size(4) /* RTA_OIF */
2317 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2318 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2319 + nla_total_size(sizeof(struct rta_cacheinfo));
2320}
2321
191cd582
BH
2322static int rt6_fill_node(struct net *net,
2323 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2324 struct in6_addr *dst, struct in6_addr *src,
2325 int iif, int type, u32 pid, u32 seq,
7bc570c8 2326 int prefix, int nowait, unsigned int flags)
1da177e4 2327{
346f870b 2328 const struct inet_peer *peer;
1da177e4 2329 struct rtmsg *rtm;
2d7202bf 2330 struct nlmsghdr *nlh;
e3703b3d 2331 long expires;
9e762a4a 2332 u32 table;
f2c31e32 2333 struct neighbour *n;
346f870b 2334 u32 ts, tsage;
1da177e4
LT
2335
2336 if (prefix) { /* user wants prefix routes only */
2337 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2338 /* success since this is not a prefix route */
2339 return 1;
2340 }
2341 }
2342
2d7202bf 2343 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2344 if (!nlh)
26932566 2345 return -EMSGSIZE;
2d7202bf
TG
2346
2347 rtm = nlmsg_data(nlh);
1da177e4
LT
2348 rtm->rtm_family = AF_INET6;
2349 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2350 rtm->rtm_src_len = rt->rt6i_src.plen;
2351 rtm->rtm_tos = 0;
c71099ac 2352 if (rt->rt6i_table)
9e762a4a 2353 table = rt->rt6i_table->tb6_id;
c71099ac 2354 else
9e762a4a
PM
2355 table = RT6_TABLE_UNSPEC;
2356 rtm->rtm_table = table;
c78679e8
DM
2357 if (nla_put_u32(skb, RTA_TABLE, table))
2358 goto nla_put_failure;
38308473 2359 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2360 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2361 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2362 rtm->rtm_type = RTN_LOCAL;
d1918542 2363 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2364 rtm->rtm_type = RTN_LOCAL;
2365 else
2366 rtm->rtm_type = RTN_UNICAST;
2367 rtm->rtm_flags = 0;
2368 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2369 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2370 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2371 rtm->rtm_protocol = RTPROT_REDIRECT;
2372 else if (rt->rt6i_flags & RTF_ADDRCONF)
2373 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2374 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2375 rtm->rtm_protocol = RTPROT_RA;
2376
38308473 2377 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2378 rtm->rtm_flags |= RTM_F_CLONED;
2379
2380 if (dst) {
c78679e8
DM
2381 if (nla_put(skb, RTA_DST, 16, dst))
2382 goto nla_put_failure;
1ab1457c 2383 rtm->rtm_dst_len = 128;
1da177e4 2384 } else if (rtm->rtm_dst_len)
c78679e8
DM
2385 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2386 goto nla_put_failure;
1da177e4
LT
2387#ifdef CONFIG_IPV6_SUBTREES
2388 if (src) {
c78679e8
DM
2389 if (nla_put(skb, RTA_SRC, 16, src))
2390 goto nla_put_failure;
1ab1457c 2391 rtm->rtm_src_len = 128;
c78679e8
DM
2392 } else if (rtm->rtm_src_len &&
2393 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2394 goto nla_put_failure;
1da177e4 2395#endif
7bc570c8
YH
2396 if (iif) {
2397#ifdef CONFIG_IPV6_MROUTE
2398 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2399 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2400 if (err <= 0) {
2401 if (!nowait) {
2402 if (err == 0)
2403 return 0;
2404 goto nla_put_failure;
2405 } else {
2406 if (err == -EMSGSIZE)
2407 goto nla_put_failure;
2408 }
2409 }
2410 } else
2411#endif
c78679e8
DM
2412 if (nla_put_u32(skb, RTA_IIF, iif))
2413 goto nla_put_failure;
7bc570c8 2414 } else if (dst) {
1da177e4 2415 struct in6_addr saddr_buf;
c78679e8
DM
2416 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2417 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2418 goto nla_put_failure;
1da177e4 2419 }
2d7202bf 2420
c3968a85
DW
2421 if (rt->rt6i_prefsrc.plen) {
2422 struct in6_addr saddr_buf;
4e3fd7a0 2423 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2424 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2425 goto nla_put_failure;
c3968a85
DW
2426 }
2427
defb3519 2428 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2429 goto nla_put_failure;
2430
f2c31e32 2431 rcu_read_lock();
27217455 2432 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2433 if (n) {
2434 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2435 rcu_read_unlock();
2436 goto nla_put_failure;
2437 }
2438 }
f2c31e32 2439 rcu_read_unlock();
2d7202bf 2440
c78679e8
DM
2441 if (rt->dst.dev &&
2442 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2443 goto nla_put_failure;
2444 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2445 goto nla_put_failure;
36e3deae
YH
2446 if (!(rt->rt6i_flags & RTF_EXPIRES))
2447 expires = 0;
d1918542
DM
2448 else if (rt->dst.expires - jiffies < INT_MAX)
2449 expires = rt->dst.expires - jiffies;
36e3deae
YH
2450 else
2451 expires = INT_MAX;
69cdf8f9 2452
97bab73f
DM
2453 peer = NULL;
2454 if (rt6_has_peer(rt))
2455 peer = rt6_peer_ptr(rt);
346f870b
DM
2456 ts = tsage = 0;
2457 if (peer && peer->tcp_ts_stamp) {
2458 ts = peer->tcp_ts;
2459 tsage = get_seconds() - peer->tcp_ts_stamp;
2460 }
2461
2462 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2463 expires, rt->dst.error) < 0)
e3703b3d 2464 goto nla_put_failure;
2d7202bf
TG
2465
2466 return nlmsg_end(skb, nlh);
2467
2468nla_put_failure:
26932566
PM
2469 nlmsg_cancel(skb, nlh);
2470 return -EMSGSIZE;
1da177e4
LT
2471}
2472
1b43af54 2473int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2474{
2475 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2476 int prefix;
2477
2d7202bf
TG
2478 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2479 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2480 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2481 } else
2482 prefix = 0;
2483
191cd582
BH
2484 return rt6_fill_node(arg->net,
2485 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2486 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2487 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2488}
2489
c127ea2c 2490static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2491{
3b1e0a65 2492 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2493 struct nlattr *tb[RTA_MAX+1];
2494 struct rt6_info *rt;
1da177e4 2495 struct sk_buff *skb;
ab364a6f 2496 struct rtmsg *rtm;
4c9483b2 2497 struct flowi6 fl6;
72331bc0 2498 int err, iif = 0, oif = 0;
1da177e4 2499
ab364a6f
TG
2500 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2501 if (err < 0)
2502 goto errout;
1da177e4 2503
ab364a6f 2504 err = -EINVAL;
4c9483b2 2505 memset(&fl6, 0, sizeof(fl6));
1da177e4 2506
ab364a6f
TG
2507 if (tb[RTA_SRC]) {
2508 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2509 goto errout;
2510
4e3fd7a0 2511 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2512 }
2513
2514 if (tb[RTA_DST]) {
2515 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2516 goto errout;
2517
4e3fd7a0 2518 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2519 }
2520
2521 if (tb[RTA_IIF])
2522 iif = nla_get_u32(tb[RTA_IIF]);
2523
2524 if (tb[RTA_OIF])
72331bc0 2525 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2526
2527 if (iif) {
2528 struct net_device *dev;
72331bc0
SL
2529 int flags = 0;
2530
5578689a 2531 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2532 if (!dev) {
2533 err = -ENODEV;
ab364a6f 2534 goto errout;
1da177e4 2535 }
72331bc0
SL
2536
2537 fl6.flowi6_iif = iif;
2538
2539 if (!ipv6_addr_any(&fl6.saddr))
2540 flags |= RT6_LOOKUP_F_HAS_SADDR;
2541
2542 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2543 flags);
2544 } else {
2545 fl6.flowi6_oif = oif;
2546
2547 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2548 }
2549
ab364a6f 2550 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2551 if (!skb) {
2173bff5 2552 dst_release(&rt->dst);
ab364a6f
TG
2553 err = -ENOBUFS;
2554 goto errout;
2555 }
1da177e4 2556
ab364a6f
TG
2557 /* Reserve room for dummy headers, this skb can pass
2558 through good chunk of routing engine.
2559 */
459a98ed 2560 skb_reset_mac_header(skb);
ab364a6f 2561 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2562
d8d1f30b 2563 skb_dst_set(skb, &rt->dst);
1da177e4 2564
4c9483b2 2565 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2566 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2567 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2568 if (err < 0) {
ab364a6f
TG
2569 kfree_skb(skb);
2570 goto errout;
1da177e4
LT
2571 }
2572
5578689a 2573 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2574errout:
1da177e4 2575 return err;
1da177e4
LT
2576}
2577
86872cb5 2578void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2579{
2580 struct sk_buff *skb;
5578689a 2581 struct net *net = info->nl_net;
528c4ceb
DL
2582 u32 seq;
2583 int err;
2584
2585 err = -ENOBUFS;
38308473 2586 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2587
339bf98f 2588 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2589 if (!skb)
21713ebc
TG
2590 goto errout;
2591
191cd582 2592 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2593 event, info->pid, seq, 0, 0, 0);
26932566
PM
2594 if (err < 0) {
2595 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2596 WARN_ON(err == -EMSGSIZE);
2597 kfree_skb(skb);
2598 goto errout;
2599 }
1ce85fe4
PNA
2600 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2601 info->nlh, gfp_any());
2602 return;
21713ebc
TG
2603errout:
2604 if (err < 0)
5578689a 2605 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2606}
2607
8ed67789
DL
2608static int ip6_route_dev_notify(struct notifier_block *this,
2609 unsigned long event, void *data)
2610{
2611 struct net_device *dev = (struct net_device *)data;
c346dca1 2612 struct net *net = dev_net(dev);
8ed67789
DL
2613
2614 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2615 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2616 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2617#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2618 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2619 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2620 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2621 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2622#endif
2623 }
2624
2625 return NOTIFY_OK;
2626}
2627
1da177e4
LT
2628/*
2629 * /proc
2630 */
2631
2632#ifdef CONFIG_PROC_FS
2633
1da177e4
LT
2634struct rt6_proc_arg
2635{
2636 char *buffer;
2637 int offset;
2638 int length;
2639 int skip;
2640 int len;
2641};
2642
2643static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2644{
33120b30 2645 struct seq_file *m = p_arg;
69cce1d1 2646 struct neighbour *n;
1da177e4 2647
4b7a4274 2648 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2649
2650#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2651 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2652#else
33120b30 2653 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2654#endif
f2c31e32 2655 rcu_read_lock();
27217455 2656 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2657 if (n) {
2658 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2659 } else {
33120b30 2660 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2661 }
f2c31e32 2662 rcu_read_unlock();
33120b30 2663 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2664 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2665 rt->dst.__use, rt->rt6i_flags,
d1918542 2666 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2667 return 0;
2668}
2669
33120b30 2670static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2671{
f3db4851 2672 struct net *net = (struct net *)m->private;
32b293a5 2673 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2674 return 0;
2675}
1da177e4 2676
33120b30
AD
2677static int ipv6_route_open(struct inode *inode, struct file *file)
2678{
de05c557 2679 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2680}
2681
33120b30
AD
2682static const struct file_operations ipv6_route_proc_fops = {
2683 .owner = THIS_MODULE,
2684 .open = ipv6_route_open,
2685 .read = seq_read,
2686 .llseek = seq_lseek,
b6fcbdb4 2687 .release = single_release_net,
33120b30
AD
2688};
2689
1da177e4
LT
2690static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2691{
69ddb805 2692 struct net *net = (struct net *)seq->private;
1da177e4 2693 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2694 net->ipv6.rt6_stats->fib_nodes,
2695 net->ipv6.rt6_stats->fib_route_nodes,
2696 net->ipv6.rt6_stats->fib_rt_alloc,
2697 net->ipv6.rt6_stats->fib_rt_entries,
2698 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2699 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2700 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2701
2702 return 0;
2703}
2704
2705static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2706{
de05c557 2707 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2708}
2709
9a32144e 2710static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2711 .owner = THIS_MODULE,
2712 .open = rt6_stats_seq_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
b6fcbdb4 2715 .release = single_release_net,
1da177e4
LT
2716};
2717#endif /* CONFIG_PROC_FS */
2718
2719#ifdef CONFIG_SYSCTL
2720
1da177e4 2721static
8d65af78 2722int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2723 void __user *buffer, size_t *lenp, loff_t *ppos)
2724{
c486da34
LAG
2725 struct net *net;
2726 int delay;
2727 if (!write)
1da177e4 2728 return -EINVAL;
c486da34
LAG
2729
2730 net = (struct net *)ctl->extra1;
2731 delay = net->ipv6.sysctl.flush_delay;
2732 proc_dointvec(ctl, write, buffer, lenp, ppos);
2733 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2734 return 0;
1da177e4
LT
2735}
2736
760f2d01 2737ctl_table ipv6_route_table_template[] = {
1ab1457c 2738 {
1da177e4 2739 .procname = "flush",
4990509f 2740 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2741 .maxlen = sizeof(int),
89c8b3a1 2742 .mode = 0200,
6d9f239a 2743 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2744 },
2745 {
1da177e4 2746 .procname = "gc_thresh",
9a7ec3a9 2747 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2748 .maxlen = sizeof(int),
2749 .mode = 0644,
6d9f239a 2750 .proc_handler = proc_dointvec,
1da177e4
LT
2751 },
2752 {
1da177e4 2753 .procname = "max_size",
4990509f 2754 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2755 .maxlen = sizeof(int),
2756 .mode = 0644,
6d9f239a 2757 .proc_handler = proc_dointvec,
1da177e4
LT
2758 },
2759 {
1da177e4 2760 .procname = "gc_min_interval",
4990509f 2761 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2762 .maxlen = sizeof(int),
2763 .mode = 0644,
6d9f239a 2764 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2765 },
2766 {
1da177e4 2767 .procname = "gc_timeout",
4990509f 2768 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2769 .maxlen = sizeof(int),
2770 .mode = 0644,
6d9f239a 2771 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2772 },
2773 {
1da177e4 2774 .procname = "gc_interval",
4990509f 2775 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
6d9f239a 2778 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2779 },
2780 {
1da177e4 2781 .procname = "gc_elasticity",
4990509f 2782 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
f3d3f616 2785 .proc_handler = proc_dointvec,
1da177e4
LT
2786 },
2787 {
1da177e4 2788 .procname = "mtu_expires",
4990509f 2789 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
6d9f239a 2792 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2793 },
2794 {
1da177e4 2795 .procname = "min_adv_mss",
4990509f 2796 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
f3d3f616 2799 .proc_handler = proc_dointvec,
1da177e4
LT
2800 },
2801 {
1da177e4 2802 .procname = "gc_min_interval_ms",
4990509f 2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
6d9f239a 2806 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2807 },
f8572d8f 2808 { }
1da177e4
LT
2809};
2810
2c8c1e72 2811struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2812{
2813 struct ctl_table *table;
2814
2815 table = kmemdup(ipv6_route_table_template,
2816 sizeof(ipv6_route_table_template),
2817 GFP_KERNEL);
5ee09105
YH
2818
2819 if (table) {
2820 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2821 table[0].extra1 = net;
86393e52 2822 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2823 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2824 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2825 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2826 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2827 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2828 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2829 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2830 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2831 }
2832
760f2d01
DL
2833 return table;
2834}
1da177e4
LT
2835#endif
2836
2c8c1e72 2837static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2838{
633d424b 2839 int ret = -ENOMEM;
8ed67789 2840
86393e52
AD
2841 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2842 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2843
fc66f95c
ED
2844 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2845 goto out_ip6_dst_ops;
2846
8ed67789
DL
2847 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2848 sizeof(*net->ipv6.ip6_null_entry),
2849 GFP_KERNEL);
2850 if (!net->ipv6.ip6_null_entry)
fc66f95c 2851 goto out_ip6_dst_entries;
d8d1f30b 2852 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2853 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2854 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2855 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2856 ip6_template_metrics, true);
8ed67789
DL
2857
2858#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2859 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2860 sizeof(*net->ipv6.ip6_prohibit_entry),
2861 GFP_KERNEL);
68fffc67
PZ
2862 if (!net->ipv6.ip6_prohibit_entry)
2863 goto out_ip6_null_entry;
d8d1f30b 2864 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2865 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2866 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2867 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2868 ip6_template_metrics, true);
8ed67789
DL
2869
2870 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2871 sizeof(*net->ipv6.ip6_blk_hole_entry),
2872 GFP_KERNEL);
68fffc67
PZ
2873 if (!net->ipv6.ip6_blk_hole_entry)
2874 goto out_ip6_prohibit_entry;
d8d1f30b 2875 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2876 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2877 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2878 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2879 ip6_template_metrics, true);
8ed67789
DL
2880#endif
2881
b339a47c
PZ
2882 net->ipv6.sysctl.flush_delay = 0;
2883 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2884 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2885 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2886 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2887 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2888 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2889 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2890
cdb18761
DL
2891#ifdef CONFIG_PROC_FS
2892 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2893 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2894#endif
6891a346
BT
2895 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2896
8ed67789
DL
2897 ret = 0;
2898out:
2899 return ret;
f2fc6a54 2900
68fffc67
PZ
2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902out_ip6_prohibit_entry:
2903 kfree(net->ipv6.ip6_prohibit_entry);
2904out_ip6_null_entry:
2905 kfree(net->ipv6.ip6_null_entry);
2906#endif
fc66f95c
ED
2907out_ip6_dst_entries:
2908 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2909out_ip6_dst_ops:
f2fc6a54 2910 goto out;
cdb18761
DL
2911}
2912
2c8c1e72 2913static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2914{
2915#ifdef CONFIG_PROC_FS
2916 proc_net_remove(net, "ipv6_route");
2917 proc_net_remove(net, "rt6_stats");
2918#endif
8ed67789
DL
2919 kfree(net->ipv6.ip6_null_entry);
2920#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2921 kfree(net->ipv6.ip6_prohibit_entry);
2922 kfree(net->ipv6.ip6_blk_hole_entry);
2923#endif
41bb78b4 2924 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2925}
2926
2927static struct pernet_operations ip6_route_net_ops = {
2928 .init = ip6_route_net_init,
2929 .exit = ip6_route_net_exit,
2930};
2931
c3426b47
DM
2932static int __net_init ipv6_inetpeer_init(struct net *net)
2933{
2934 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2935
2936 if (!bp)
2937 return -ENOMEM;
2938 inet_peer_base_init(bp);
2939 net->ipv6.peers = bp;
2940 return 0;
2941}
2942
2943static void __net_exit ipv6_inetpeer_exit(struct net *net)
2944{
2945 struct inet_peer_base *bp = net->ipv6.peers;
2946
2947 net->ipv6.peers = NULL;
56a6b248 2948 inetpeer_invalidate_tree(bp);
c3426b47
DM
2949 kfree(bp);
2950}
2951
2b823f72 2952static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2953 .init = ipv6_inetpeer_init,
2954 .exit = ipv6_inetpeer_exit,
2955};
2956
8ed67789
DL
2957static struct notifier_block ip6_route_dev_notifier = {
2958 .notifier_call = ip6_route_dev_notify,
2959 .priority = 0,
2960};
2961
433d49c3 2962int __init ip6_route_init(void)
1da177e4 2963{
433d49c3
DL
2964 int ret;
2965
9a7ec3a9
DL
2966 ret = -ENOMEM;
2967 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2968 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2969 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2970 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2971 goto out;
14e50e57 2972
fc66f95c 2973 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2974 if (ret)
bdb3289f 2975 goto out_kmem_cache;
bdb3289f 2976
fc66f95c
ED
2977 ret = register_pernet_subsys(&ip6_route_net_ops);
2978 if (ret)
2979 goto out_dst_entries;
2980
c3426b47
DM
2981 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
2982 if (ret)
2983 goto out_register_subsys;
2984
5dc121e9
AE
2985 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2986
8ed67789
DL
2987 /* Registering of the loopback is done before this portion of code,
2988 * the loopback reference in rt6_info will not be taken, do it
2989 * manually for init_net */
d8d1f30b 2990 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2991 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2992 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2993 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2994 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2995 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2996 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2997 #endif
433d49c3
DL
2998 ret = fib6_init();
2999 if (ret)
c3426b47 3000 goto out_register_inetpeer;
433d49c3 3001
433d49c3
DL
3002 ret = xfrm6_init();
3003 if (ret)
cdb18761 3004 goto out_fib6_init;
c35b7e72 3005
433d49c3
DL
3006 ret = fib6_rules_init();
3007 if (ret)
3008 goto xfrm6_init;
7e5449c2 3009
433d49c3 3010 ret = -ENOBUFS;
c7ac8679
GR
3011 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3012 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3013 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3014 goto fib6_rules_init;
c127ea2c 3015
8ed67789 3016 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3017 if (ret)
3018 goto fib6_rules_init;
8ed67789 3019
433d49c3
DL
3020out:
3021 return ret;
3022
3023fib6_rules_init:
433d49c3
DL
3024 fib6_rules_cleanup();
3025xfrm6_init:
433d49c3 3026 xfrm6_fini();
433d49c3 3027out_fib6_init:
433d49c3 3028 fib6_gc_cleanup();
c3426b47
DM
3029out_register_inetpeer:
3030 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789
DL
3031out_register_subsys:
3032 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3033out_dst_entries:
3034 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3035out_kmem_cache:
f2fc6a54 3036 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3037 goto out;
1da177e4
LT
3038}
3039
3040void ip6_route_cleanup(void)
3041{
8ed67789 3042 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3043 fib6_rules_cleanup();
1da177e4 3044 xfrm6_fini();
1da177e4 3045 fib6_gc_cleanup();
c3426b47 3046 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3047 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3048 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3049 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3050}