inet: Add inetpeer tree roots to the FIB tables.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
fbfe95a4 102 peer = rt6_get_peer_create(rt);
06582540
DM
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121}
122
39232973
DM
123static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124{
125 struct in6_addr *p = &rt->rt6i_gateway;
126
a7563f34 127 if (!ipv6_addr_any(p))
39232973
DM
128 return (const void *) p;
129 return daddr;
130}
131
d3aaeb38
DM
132static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133{
39232973
DM
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142}
143
8ade06c6 144static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 145{
8ade06c6
DM
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
f83c7790
DM
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
d3aaeb38
DM
155}
156
9a7ec3a9 157static struct dst_ops ip6_dst_ops_template = {
1da177e4 158 .family = AF_INET6,
09640e63 159 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
0dbaee3b 163 .default_advmss = ip6_default_advmss,
ebb762f2 164 .mtu = ip6_mtu,
06582540 165 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 171 .local_out = __ip6_local_out,
d3aaeb38 172 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
173};
174
ebb762f2 175static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 176{
618f9bc7
SK
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
ec831ea7
RD
180}
181
14e50e57
DM
182static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183{
184}
185
0972ddb2
HB
186static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188{
189 return NULL;
190}
191
14e50e57
DM
192static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
09640e63 194 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
ebb762f2 197 .mtu = ip6_blackhole_mtu,
214f45c9 198 .default_advmss = ip6_default_advmss,
14e50e57 199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 201 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
202};
203
62fa8a84
DM
204static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206};
207
bdb3289f 208static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
d8d1f30b
CG
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
1da177e4
LT
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 218 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
101367c2
TG
223#ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
6723ab54
DM
225static int ip6_pkt_prohibit(struct sk_buff *skb);
226static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 227
280a34c8 228static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
d8d1f30b
CG
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
101367c2
TG
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 238 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241};
242
bdb3289f 243static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
d8d1f30b
CG
249 .input = dst_discard,
250 .output = dst_discard,
101367c2
TG
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 253 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256};
257
258#endif
259
1da177e4 260/* allocate dst with ip6_dst_ops */
97bab73f 261static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f
DM
262 struct net_device *dev,
263 int flags)
1da177e4 264{
97bab73f
DM
265 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
266 0, 0, flags);
cf911662 267
97bab73f 268 if (rt) {
fbe58186 269 memset(&rt->rt6i_table, 0,
38308473 270 sizeof(*rt) - sizeof(struct dst_entry));
97bab73f
DM
271 rt6_init_peer(rt, net->ipv6.peers);
272 }
cf911662 273 return rt;
1da177e4
LT
274}
275
276static void ip6_dst_destroy(struct dst_entry *dst)
277{
278 struct rt6_info *rt = (struct rt6_info *)dst;
279 struct inet6_dev *idev = rt->rt6i_idev;
280
8e2ec639
YZ
281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
38308473 284 if (idev) {
1da177e4
LT
285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
1ab1457c 287 }
1716a961
G
288
289 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 dst_release(dst->from);
291
97bab73f
DM
292 if (rt6_has_peer(rt)) {
293 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
294 inet_putpeer(peer);
295 }
296}
297
6431cbc2
DM
298static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299
300static u32 rt6_peer_genid(void)
301{
302 return atomic_read(&__rt6_peer_genid);
303}
304
b3419363
DM
305void rt6_bind_peer(struct rt6_info *rt, int create)
306{
97bab73f 307 struct inet_peer_base *base;
b3419363
DM
308 struct inet_peer *peer;
309
97bab73f
DM
310 base = inetpeer_base_ptr(rt->_rt6i_peer);
311 if (!base)
312 return;
313
314 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
315 if (!rt6_set_peer(rt, peer))
b3419363 316 inet_putpeer(peer);
6431cbc2
DM
317 else
318 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
319}
320
321static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
322 int how)
323{
324 struct rt6_info *rt = (struct rt6_info *)dst;
325 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 326 struct net_device *loopback_dev =
c346dca1 327 dev_net(dev)->loopback_dev;
1da177e4 328
38308473 329 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
330 struct inet6_dev *loopback_idev =
331 in6_dev_get(loopback_dev);
38308473 332 if (loopback_idev) {
1da177e4
LT
333 rt->rt6i_idev = loopback_idev;
334 in6_dev_put(idev);
335 }
336 }
337}
338
a50feda5 339static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 340{
1716a961
G
341 struct rt6_info *ort = NULL;
342
343 if (rt->rt6i_flags & RTF_EXPIRES) {
344 if (time_after(jiffies, rt->dst.expires))
a50feda5 345 return true;
1716a961
G
346 } else if (rt->dst.from) {
347 ort = (struct rt6_info *) rt->dst.from;
348 return (ort->rt6i_flags & RTF_EXPIRES) &&
349 time_after(jiffies, ort->dst.expires);
350 }
a50feda5 351 return false;
1da177e4
LT
352}
353
a50feda5 354static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 355{
a02cec21
ED
356 return ipv6_addr_type(daddr) &
357 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
358}
359
1da177e4 360/*
c71099ac 361 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
362 */
363
8ed67789
DL
364static inline struct rt6_info *rt6_device_match(struct net *net,
365 struct rt6_info *rt,
b71d1d42 366 const struct in6_addr *saddr,
1da177e4 367 int oif,
d420895e 368 int flags)
1da177e4
LT
369{
370 struct rt6_info *local = NULL;
371 struct rt6_info *sprt;
372
dd3abc4e
YH
373 if (!oif && ipv6_addr_any(saddr))
374 goto out;
375
d8d1f30b 376 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 377 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
378
379 if (oif) {
1da177e4
LT
380 if (dev->ifindex == oif)
381 return sprt;
382 if (dev->flags & IFF_LOOPBACK) {
38308473 383 if (!sprt->rt6i_idev ||
1da177e4 384 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 385 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 386 continue;
1ab1457c 387 if (local && (!oif ||
1da177e4
LT
388 local->rt6i_idev->dev->ifindex == oif))
389 continue;
390 }
391 local = sprt;
392 }
dd3abc4e
YH
393 } else {
394 if (ipv6_chk_addr(net, saddr, dev,
395 flags & RT6_LOOKUP_F_IFACE))
396 return sprt;
1da177e4 397 }
dd3abc4e 398 }
1da177e4 399
dd3abc4e 400 if (oif) {
1da177e4
LT
401 if (local)
402 return local;
403
d420895e 404 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 405 return net->ipv6.ip6_null_entry;
1da177e4 406 }
dd3abc4e 407out:
1da177e4
LT
408 return rt;
409}
410
27097255
YH
411#ifdef CONFIG_IPV6_ROUTER_PREF
412static void rt6_probe(struct rt6_info *rt)
413{
f2c31e32 414 struct neighbour *neigh;
27097255
YH
415 /*
416 * Okay, this does not seem to be appropriate
417 * for now, however, we need to check if it
418 * is really so; aka Router Reachability Probing.
419 *
420 * Router Reachability Probe MUST be rate-limited
421 * to no more than one per minute.
422 */
f2c31e32 423 rcu_read_lock();
27217455 424 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 425 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 426 goto out;
27097255
YH
427 read_lock_bh(&neigh->lock);
428 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 429 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
430 struct in6_addr mcaddr;
431 struct in6_addr *target;
432
433 neigh->updated = jiffies;
434 read_unlock_bh(&neigh->lock);
435
436 target = (struct in6_addr *)&neigh->primary_key;
437 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 438 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 439 } else {
27097255 440 read_unlock_bh(&neigh->lock);
f2c31e32
ED
441 }
442out:
443 rcu_read_unlock();
27097255
YH
444}
445#else
446static inline void rt6_probe(struct rt6_info *rt)
447{
27097255
YH
448}
449#endif
450
1da177e4 451/*
554cfb7e 452 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 453 */
b6f99a21 454static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 455{
d1918542 456 struct net_device *dev = rt->dst.dev;
161980f4 457 if (!oif || dev->ifindex == oif)
554cfb7e 458 return 2;
161980f4
DM
459 if ((dev->flags & IFF_LOOPBACK) &&
460 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
461 return 1;
462 return 0;
554cfb7e 463}
1da177e4 464
b6f99a21 465static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 466{
f2c31e32 467 struct neighbour *neigh;
398bcbeb 468 int m;
f2c31e32
ED
469
470 rcu_read_lock();
27217455 471 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
472 if (rt->rt6i_flags & RTF_NONEXTHOP ||
473 !(rt->rt6i_flags & RTF_GATEWAY))
474 m = 1;
475 else if (neigh) {
554cfb7e
YH
476 read_lock_bh(&neigh->lock);
477 if (neigh->nud_state & NUD_VALID)
4d0c5911 478 m = 2;
398bcbeb
YH
479#ifdef CONFIG_IPV6_ROUTER_PREF
480 else if (neigh->nud_state & NUD_FAILED)
481 m = 0;
482#endif
483 else
ea73ee23 484 m = 1;
554cfb7e 485 read_unlock_bh(&neigh->lock);
398bcbeb
YH
486 } else
487 m = 0;
f2c31e32 488 rcu_read_unlock();
554cfb7e 489 return m;
1da177e4
LT
490}
491
554cfb7e
YH
492static int rt6_score_route(struct rt6_info *rt, int oif,
493 int strict)
1da177e4 494{
4d0c5911 495 int m, n;
1ab1457c 496
4d0c5911 497 m = rt6_check_dev(rt, oif);
77d16f45 498 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 499 return -1;
ebacaaa0
YH
500#ifdef CONFIG_IPV6_ROUTER_PREF
501 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
502#endif
4d0c5911 503 n = rt6_check_neigh(rt);
557e92ef 504 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
505 return -1;
506 return m;
507}
508
f11e6659
DM
509static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
510 int *mpri, struct rt6_info *match)
554cfb7e 511{
f11e6659
DM
512 int m;
513
514 if (rt6_check_expired(rt))
515 goto out;
516
517 m = rt6_score_route(rt, oif, strict);
518 if (m < 0)
519 goto out;
520
521 if (m > *mpri) {
522 if (strict & RT6_LOOKUP_F_REACHABLE)
523 rt6_probe(match);
524 *mpri = m;
525 match = rt;
526 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
527 rt6_probe(rt);
528 }
529
530out:
531 return match;
532}
533
534static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
535 struct rt6_info *rr_head,
536 u32 metric, int oif, int strict)
537{
538 struct rt6_info *rt, *match;
554cfb7e 539 int mpri = -1;
1da177e4 540
f11e6659
DM
541 match = NULL;
542 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 543 rt = rt->dst.rt6_next)
f11e6659
DM
544 match = find_match(rt, oif, strict, &mpri, match);
545 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 546 rt = rt->dst.rt6_next)
f11e6659 547 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 548
f11e6659
DM
549 return match;
550}
1da177e4 551
f11e6659
DM
552static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
553{
554 struct rt6_info *match, *rt0;
8ed67789 555 struct net *net;
1da177e4 556
f11e6659
DM
557 rt0 = fn->rr_ptr;
558 if (!rt0)
559 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 560
f11e6659 561 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 562
554cfb7e 563 if (!match &&
f11e6659 564 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 565 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 566
554cfb7e 567 /* no entries matched; do round-robin */
f11e6659
DM
568 if (!next || next->rt6i_metric != rt0->rt6i_metric)
569 next = fn->leaf;
570
571 if (next != rt0)
572 fn->rr_ptr = next;
1da177e4 573 }
1da177e4 574
d1918542 575 net = dev_net(rt0->dst.dev);
a02cec21 576 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
577}
578
70ceb4f5
YH
579#ifdef CONFIG_IPV6_ROUTE_INFO
580int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 581 const struct in6_addr *gwaddr)
70ceb4f5 582{
c346dca1 583 struct net *net = dev_net(dev);
70ceb4f5
YH
584 struct route_info *rinfo = (struct route_info *) opt;
585 struct in6_addr prefix_buf, *prefix;
586 unsigned int pref;
4bed72e4 587 unsigned long lifetime;
70ceb4f5
YH
588 struct rt6_info *rt;
589
590 if (len < sizeof(struct route_info)) {
591 return -EINVAL;
592 }
593
594 /* Sanity check for prefix_len and length */
595 if (rinfo->length > 3) {
596 return -EINVAL;
597 } else if (rinfo->prefix_len > 128) {
598 return -EINVAL;
599 } else if (rinfo->prefix_len > 64) {
600 if (rinfo->length < 2) {
601 return -EINVAL;
602 }
603 } else if (rinfo->prefix_len > 0) {
604 if (rinfo->length < 1) {
605 return -EINVAL;
606 }
607 }
608
609 pref = rinfo->route_pref;
610 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 611 return -EINVAL;
70ceb4f5 612
4bed72e4 613 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
614
615 if (rinfo->length == 3)
616 prefix = (struct in6_addr *)rinfo->prefix;
617 else {
618 /* this function is safe */
619 ipv6_addr_prefix(&prefix_buf,
620 (struct in6_addr *)rinfo->prefix,
621 rinfo->prefix_len);
622 prefix = &prefix_buf;
623 }
624
efa2cea0
DL
625 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
626 dev->ifindex);
70ceb4f5
YH
627
628 if (rt && !lifetime) {
e0a1ad73 629 ip6_del_rt(rt);
70ceb4f5
YH
630 rt = NULL;
631 }
632
633 if (!rt && lifetime)
efa2cea0 634 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
635 pref);
636 else if (rt)
637 rt->rt6i_flags = RTF_ROUTEINFO |
638 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
639
640 if (rt) {
1716a961
G
641 if (!addrconf_finite_timeout(lifetime))
642 rt6_clean_expires(rt);
643 else
644 rt6_set_expires(rt, jiffies + HZ * lifetime);
645
d8d1f30b 646 dst_release(&rt->dst);
70ceb4f5
YH
647 }
648 return 0;
649}
650#endif
651
8ed67789 652#define BACKTRACK(__net, saddr) \
982f56f3 653do { \
8ed67789 654 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 655 struct fib6_node *pn; \
e0eda7bb 656 while (1) { \
982f56f3
YH
657 if (fn->fn_flags & RTN_TL_ROOT) \
658 goto out; \
659 pn = fn->parent; \
660 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 661 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
662 else \
663 fn = pn; \
664 if (fn->fn_flags & RTN_RTINFO) \
665 goto restart; \
c71099ac 666 } \
c71099ac 667 } \
38308473 668} while (0)
c71099ac 669
8ed67789
DL
670static struct rt6_info *ip6_pol_route_lookup(struct net *net,
671 struct fib6_table *table,
4c9483b2 672 struct flowi6 *fl6, int flags)
1da177e4
LT
673{
674 struct fib6_node *fn;
675 struct rt6_info *rt;
676
c71099ac 677 read_lock_bh(&table->tb6_lock);
4c9483b2 678 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
679restart:
680 rt = fn->leaf;
4c9483b2
DM
681 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
682 BACKTRACK(net, &fl6->saddr);
c71099ac 683out:
d8d1f30b 684 dst_use(&rt->dst, jiffies);
c71099ac 685 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
686 return rt;
687
688}
689
ea6e574e
FW
690struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
691 int flags)
692{
693 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
694}
695EXPORT_SYMBOL_GPL(ip6_route_lookup);
696
9acd9f3a
YH
697struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
698 const struct in6_addr *saddr, int oif, int strict)
c71099ac 699{
4c9483b2
DM
700 struct flowi6 fl6 = {
701 .flowi6_oif = oif,
702 .daddr = *daddr,
c71099ac
TG
703 };
704 struct dst_entry *dst;
77d16f45 705 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 706
adaa70bb 707 if (saddr) {
4c9483b2 708 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
709 flags |= RT6_LOOKUP_F_HAS_SADDR;
710 }
711
4c9483b2 712 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
713 if (dst->error == 0)
714 return (struct rt6_info *) dst;
715
716 dst_release(dst);
717
1da177e4
LT
718 return NULL;
719}
720
7159039a
YH
721EXPORT_SYMBOL(rt6_lookup);
722
c71099ac 723/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
724 It takes new route entry, the addition fails by any reason the
725 route is freed. In any case, if caller does not hold it, it may
726 be destroyed.
727 */
728
86872cb5 729static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
730{
731 int err;
c71099ac 732 struct fib6_table *table;
1da177e4 733
c71099ac
TG
734 table = rt->rt6i_table;
735 write_lock_bh(&table->tb6_lock);
86872cb5 736 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 737 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
738
739 return err;
740}
741
40e22e8f
TG
742int ip6_ins_rt(struct rt6_info *rt)
743{
4d1169c1 744 struct nl_info info = {
d1918542 745 .nl_net = dev_net(rt->dst.dev),
4d1169c1 746 };
528c4ceb 747 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
748}
749
1716a961 750static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 751 const struct in6_addr *daddr,
b71d1d42 752 const struct in6_addr *saddr)
1da177e4 753{
1da177e4
LT
754 struct rt6_info *rt;
755
756 /*
757 * Clone the route.
758 */
759
21efcfa0 760 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
761
762 if (rt) {
14deae41
DM
763 int attempts = !in_softirq();
764
38308473 765 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 766 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 767 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 768 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 769 rt->rt6i_gateway = *daddr;
58c4fb86 770 }
1da177e4 771
1da177e4 772 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
773
774#ifdef CONFIG_IPV6_SUBTREES
775 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 776 rt->rt6i_src.addr = *saddr;
1da177e4
LT
777 rt->rt6i_src.plen = 128;
778 }
779#endif
780
14deae41 781 retry:
8ade06c6 782 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 783 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
784 int saved_rt_min_interval =
785 net->ipv6.sysctl.ip6_rt_gc_min_interval;
786 int saved_rt_elasticity =
787 net->ipv6.sysctl.ip6_rt_gc_elasticity;
788
789 if (attempts-- > 0) {
790 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
791 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
792
86393e52 793 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
794
795 net->ipv6.sysctl.ip6_rt_gc_elasticity =
796 saved_rt_elasticity;
797 net->ipv6.sysctl.ip6_rt_gc_min_interval =
798 saved_rt_min_interval;
799 goto retry;
800 }
801
f3213831 802 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 803 dst_free(&rt->dst);
14deae41
DM
804 return NULL;
805 }
95a9a5ba 806 }
1da177e4 807
95a9a5ba
YH
808 return rt;
809}
1da177e4 810
21efcfa0
ED
811static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
812 const struct in6_addr *daddr)
299d9939 813{
21efcfa0
ED
814 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
815
299d9939 816 if (rt) {
299d9939 817 rt->rt6i_flags |= RTF_CACHE;
27217455 818 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
819 }
820 return rt;
821}
822
8ed67789 823static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 824 struct flowi6 *fl6, int flags)
1da177e4
LT
825{
826 struct fib6_node *fn;
519fbd87 827 struct rt6_info *rt, *nrt;
c71099ac 828 int strict = 0;
1da177e4 829 int attempts = 3;
519fbd87 830 int err;
53b7997f 831 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 832
77d16f45 833 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
834
835relookup:
c71099ac 836 read_lock_bh(&table->tb6_lock);
1da177e4 837
8238dd06 838restart_2:
4c9483b2 839 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
840
841restart:
4acad72d 842 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 843
4c9483b2 844 BACKTRACK(net, &fl6->saddr);
8ed67789 845 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 846 rt->rt6i_flags & RTF_CACHE)
1ddef044 847 goto out;
1da177e4 848
d8d1f30b 849 dst_hold(&rt->dst);
c71099ac 850 read_unlock_bh(&table->tb6_lock);
fb9de91e 851
27217455 852 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 853 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 854 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 855 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
856 else
857 goto out2;
e40cf353 858
d8d1f30b 859 dst_release(&rt->dst);
8ed67789 860 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 861
d8d1f30b 862 dst_hold(&rt->dst);
519fbd87 863 if (nrt) {
40e22e8f 864 err = ip6_ins_rt(nrt);
519fbd87 865 if (!err)
1da177e4 866 goto out2;
1da177e4 867 }
1da177e4 868
519fbd87
YH
869 if (--attempts <= 0)
870 goto out2;
871
872 /*
c71099ac 873 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
874 * released someone could insert this route. Relookup.
875 */
d8d1f30b 876 dst_release(&rt->dst);
519fbd87
YH
877 goto relookup;
878
879out:
8238dd06
YH
880 if (reachable) {
881 reachable = 0;
882 goto restart_2;
883 }
d8d1f30b 884 dst_hold(&rt->dst);
c71099ac 885 read_unlock_bh(&table->tb6_lock);
1da177e4 886out2:
d8d1f30b
CG
887 rt->dst.lastuse = jiffies;
888 rt->dst.__use++;
c71099ac
TG
889
890 return rt;
1da177e4
LT
891}
892
8ed67789 893static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 894 struct flowi6 *fl6, int flags)
4acad72d 895{
4c9483b2 896 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
897}
898
72331bc0
SL
899static struct dst_entry *ip6_route_input_lookup(struct net *net,
900 struct net_device *dev,
901 struct flowi6 *fl6, int flags)
902{
903 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
904 flags |= RT6_LOOKUP_F_IFACE;
905
906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
907}
908
c71099ac
TG
909void ip6_route_input(struct sk_buff *skb)
910{
b71d1d42 911 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 912 struct net *net = dev_net(skb->dev);
adaa70bb 913 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
914 struct flowi6 fl6 = {
915 .flowi6_iif = skb->dev->ifindex,
916 .daddr = iph->daddr,
917 .saddr = iph->saddr,
38308473 918 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
919 .flowi6_mark = skb->mark,
920 .flowi6_proto = iph->nexthdr,
c71099ac 921 };
adaa70bb 922
72331bc0 923 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
924}
925
8ed67789 926static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 927 struct flowi6 *fl6, int flags)
1da177e4 928{
4c9483b2 929 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
930}
931
9c7a4f9c 932struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 933 struct flowi6 *fl6)
c71099ac
TG
934{
935 int flags = 0;
936
4c9483b2 937 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 938 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 939
4c9483b2 940 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 941 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
942 else if (sk)
943 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 944
4c9483b2 945 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
946}
947
7159039a 948EXPORT_SYMBOL(ip6_route_output);
1da177e4 949
2774c131 950struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 951{
5c1e6aa3 952 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
953 struct dst_entry *new = NULL;
954
5c1e6aa3 955 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 956 if (rt) {
cf911662 957 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 958 rt6_init_peer(rt, net->ipv6.peers);
cf911662 959
d8d1f30b 960 new = &rt->dst;
14e50e57 961
14e50e57 962 new->__use = 1;
352e512c
HX
963 new->input = dst_discard;
964 new->output = dst_discard;
14e50e57 965
21efcfa0
ED
966 if (dst_metrics_read_only(&ort->dst))
967 new->_metrics = ort->dst._metrics;
968 else
969 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
970 rt->rt6i_idev = ort->rt6i_idev;
971 if (rt->rt6i_idev)
972 in6_dev_hold(rt->rt6i_idev);
14e50e57 973
4e3fd7a0 974 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
975 rt->rt6i_flags = ort->rt6i_flags;
976 rt6_clean_expires(rt);
14e50e57
DM
977 rt->rt6i_metric = 0;
978
979 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
980#ifdef CONFIG_IPV6_SUBTREES
981 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
982#endif
983
984 dst_free(new);
985 }
986
69ead7af
DM
987 dst_release(dst_orig);
988 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 989}
14e50e57 990
1da177e4
LT
991/*
992 * Destination cache support functions
993 */
994
995static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
996{
997 struct rt6_info *rt;
998
999 rt = (struct rt6_info *) dst;
1000
6431cbc2
DM
1001 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1002 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1003 if (!rt6_has_peer(rt))
6431cbc2
DM
1004 rt6_bind_peer(rt, 0);
1005 rt->rt6i_peer_genid = rt6_peer_genid();
1006 }
1da177e4 1007 return dst;
6431cbc2 1008 }
1da177e4
LT
1009 return NULL;
1010}
1011
1012static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1013{
1014 struct rt6_info *rt = (struct rt6_info *) dst;
1015
1016 if (rt) {
54c1a859
YH
1017 if (rt->rt6i_flags & RTF_CACHE) {
1018 if (rt6_check_expired(rt)) {
1019 ip6_del_rt(rt);
1020 dst = NULL;
1021 }
1022 } else {
1da177e4 1023 dst_release(dst);
54c1a859
YH
1024 dst = NULL;
1025 }
1da177e4 1026 }
54c1a859 1027 return dst;
1da177e4
LT
1028}
1029
1030static void ip6_link_failure(struct sk_buff *skb)
1031{
1032 struct rt6_info *rt;
1033
3ffe533c 1034 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1035
adf30907 1036 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1037 if (rt) {
1716a961
G
1038 if (rt->rt6i_flags & RTF_CACHE)
1039 rt6_update_expires(rt, 0);
1040 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1041 rt->rt6i_node->fn_sernum = -1;
1042 }
1043}
1044
1045static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1046{
1047 struct rt6_info *rt6 = (struct rt6_info*)dst;
1048
1049 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1050 rt6->rt6i_flags |= RTF_MODIFIED;
1051 if (mtu < IPV6_MIN_MTU) {
defb3519 1052 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1053 mtu = IPV6_MIN_MTU;
defb3519
DM
1054 features |= RTAX_FEATURE_ALLFRAG;
1055 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1056 }
defb3519 1057 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1058 }
1059}
1060
0dbaee3b 1061static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1062{
0dbaee3b
DM
1063 struct net_device *dev = dst->dev;
1064 unsigned int mtu = dst_mtu(dst);
1065 struct net *net = dev_net(dev);
1066
1da177e4
LT
1067 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1068
5578689a
DL
1069 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1070 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1071
1072 /*
1ab1457c
YH
1073 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1074 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1075 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1076 * rely only on pmtu discovery"
1077 */
1078 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1079 mtu = IPV6_MAXPLEN;
1080 return mtu;
1081}
1082
ebb762f2 1083static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1084{
d33e4553 1085 struct inet6_dev *idev;
618f9bc7
SK
1086 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1087
1088 if (mtu)
1089 return mtu;
1090
1091 mtu = IPV6_MIN_MTU;
d33e4553
DM
1092
1093 rcu_read_lock();
1094 idev = __in6_dev_get(dst->dev);
1095 if (idev)
1096 mtu = idev->cnf.mtu6;
1097 rcu_read_unlock();
1098
1099 return mtu;
1100}
1101
3b00944c
YH
1102static struct dst_entry *icmp6_dst_gc_list;
1103static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1104
3b00944c 1105struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1106 struct neighbour *neigh,
87a11578 1107 struct flowi6 *fl6)
1da177e4 1108{
87a11578 1109 struct dst_entry *dst;
1da177e4
LT
1110 struct rt6_info *rt;
1111 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1112 struct net *net = dev_net(dev);
1da177e4 1113
38308473 1114 if (unlikely(!idev))
122bdf67 1115 return ERR_PTR(-ENODEV);
1da177e4 1116
97bab73f 1117 rt = ip6_dst_alloc(net, dev, 0);
38308473 1118 if (unlikely(!rt)) {
1da177e4 1119 in6_dev_put(idev);
87a11578 1120 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1121 goto out;
1122 }
1123
1da177e4
LT
1124 if (neigh)
1125 neigh_hold(neigh);
14deae41 1126 else {
f83c7790 1127 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1128 if (IS_ERR(neigh)) {
252c3d84 1129 in6_dev_put(idev);
b43faac6
DM
1130 dst_free(&rt->dst);
1131 return ERR_CAST(neigh);
1132 }
14deae41 1133 }
1da177e4 1134
8e2ec639
YZ
1135 rt->dst.flags |= DST_HOST;
1136 rt->dst.output = ip6_output;
69cce1d1 1137 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1138 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1139 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1140 rt->rt6i_dst.plen = 128;
1141 rt->rt6i_idev = idev;
7011687f 1142 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1143
3b00944c 1144 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1145 rt->dst.next = icmp6_dst_gc_list;
1146 icmp6_dst_gc_list = &rt->dst;
3b00944c 1147 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1148
5578689a 1149 fib6_force_start_gc(net);
1da177e4 1150
87a11578
DM
1151 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1152
1da177e4 1153out:
87a11578 1154 return dst;
1da177e4
LT
1155}
1156
3d0f24a7 1157int icmp6_dst_gc(void)
1da177e4 1158{
e9476e95 1159 struct dst_entry *dst, **pprev;
3d0f24a7 1160 int more = 0;
1da177e4 1161
3b00944c
YH
1162 spin_lock_bh(&icmp6_dst_lock);
1163 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1164
1da177e4
LT
1165 while ((dst = *pprev) != NULL) {
1166 if (!atomic_read(&dst->__refcnt)) {
1167 *pprev = dst->next;
1168 dst_free(dst);
1da177e4
LT
1169 } else {
1170 pprev = &dst->next;
3d0f24a7 1171 ++more;
1da177e4
LT
1172 }
1173 }
1174
3b00944c 1175 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1176
3d0f24a7 1177 return more;
1da177e4
LT
1178}
1179
1e493d19
DM
1180static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1181 void *arg)
1182{
1183 struct dst_entry *dst, **pprev;
1184
1185 spin_lock_bh(&icmp6_dst_lock);
1186 pprev = &icmp6_dst_gc_list;
1187 while ((dst = *pprev) != NULL) {
1188 struct rt6_info *rt = (struct rt6_info *) dst;
1189 if (func(rt, arg)) {
1190 *pprev = dst->next;
1191 dst_free(dst);
1192 } else {
1193 pprev = &dst->next;
1194 }
1195 }
1196 spin_unlock_bh(&icmp6_dst_lock);
1197}
1198
569d3645 1199static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1200{
1da177e4 1201 unsigned long now = jiffies;
86393e52 1202 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1203 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1204 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1205 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1206 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1207 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1208 int entries;
7019b78e 1209
fc66f95c 1210 entries = dst_entries_get_fast(ops);
7019b78e 1211 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1212 entries <= rt_max_size)
1da177e4
LT
1213 goto out;
1214
6891a346
BT
1215 net->ipv6.ip6_rt_gc_expire++;
1216 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1217 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1218 entries = dst_entries_get_slow(ops);
1219 if (entries < ops->gc_thresh)
7019b78e 1220 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1221out:
7019b78e 1222 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1223 return entries > rt_max_size;
1da177e4
LT
1224}
1225
1226/* Clean host part of a prefix. Not necessary in radix tree,
1227 but results in cleaner routing tables.
1228
1229 Remove it only when all the things will work!
1230 */
1231
6b75d090 1232int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1233{
5170ae82 1234 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1235 if (hoplimit == 0) {
6b75d090 1236 struct net_device *dev = dst->dev;
c68f24cc
ED
1237 struct inet6_dev *idev;
1238
1239 rcu_read_lock();
1240 idev = __in6_dev_get(dev);
1241 if (idev)
6b75d090 1242 hoplimit = idev->cnf.hop_limit;
c68f24cc 1243 else
53b7997f 1244 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1245 rcu_read_unlock();
1da177e4
LT
1246 }
1247 return hoplimit;
1248}
abbf46ae 1249EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1250
1251/*
1252 *
1253 */
1254
86872cb5 1255int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1256{
1257 int err;
5578689a 1258 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1259 struct rt6_info *rt = NULL;
1260 struct net_device *dev = NULL;
1261 struct inet6_dev *idev = NULL;
c71099ac 1262 struct fib6_table *table;
1da177e4
LT
1263 int addr_type;
1264
86872cb5 1265 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1266 return -EINVAL;
1267#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1268 if (cfg->fc_src_len)
1da177e4
LT
1269 return -EINVAL;
1270#endif
86872cb5 1271 if (cfg->fc_ifindex) {
1da177e4 1272 err = -ENODEV;
5578689a 1273 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1274 if (!dev)
1275 goto out;
1276 idev = in6_dev_get(dev);
1277 if (!idev)
1278 goto out;
1279 }
1280
86872cb5
TG
1281 if (cfg->fc_metric == 0)
1282 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1283
d71314b4 1284 err = -ENOBUFS;
38308473
DM
1285 if (cfg->fc_nlinfo.nlh &&
1286 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1287 table = fib6_get_table(net, cfg->fc_table);
38308473 1288 if (!table) {
f3213831 1289 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1290 table = fib6_new_table(net, cfg->fc_table);
1291 }
1292 } else {
1293 table = fib6_new_table(net, cfg->fc_table);
1294 }
38308473
DM
1295
1296 if (!table)
c71099ac 1297 goto out;
c71099ac 1298
97bab73f 1299 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT);
1da177e4 1300
38308473 1301 if (!rt) {
1da177e4
LT
1302 err = -ENOMEM;
1303 goto out;
1304 }
1305
d8d1f30b 1306 rt->dst.obsolete = -1;
1716a961
G
1307
1308 if (cfg->fc_flags & RTF_EXPIRES)
1309 rt6_set_expires(rt, jiffies +
1310 clock_t_to_jiffies(cfg->fc_expires));
1311 else
1312 rt6_clean_expires(rt);
1da177e4 1313
86872cb5
TG
1314 if (cfg->fc_protocol == RTPROT_UNSPEC)
1315 cfg->fc_protocol = RTPROT_BOOT;
1316 rt->rt6i_protocol = cfg->fc_protocol;
1317
1318 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1319
1320 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1321 rt->dst.input = ip6_mc_input;
ab79ad14
1322 else if (cfg->fc_flags & RTF_LOCAL)
1323 rt->dst.input = ip6_input;
1da177e4 1324 else
d8d1f30b 1325 rt->dst.input = ip6_forward;
1da177e4 1326
d8d1f30b 1327 rt->dst.output = ip6_output;
1da177e4 1328
86872cb5
TG
1329 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1330 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1331 if (rt->rt6i_dst.plen == 128)
11d53b49 1332 rt->dst.flags |= DST_HOST;
1da177e4 1333
8e2ec639
YZ
1334 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1335 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1336 if (!metrics) {
1337 err = -ENOMEM;
1338 goto out;
1339 }
1340 dst_init_metrics(&rt->dst, metrics, 0);
1341 }
1da177e4 1342#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1343 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1344 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1345#endif
1346
86872cb5 1347 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1348
1349 /* We cannot add true routes via loopback here,
1350 they would result in kernel looping; promote them to reject routes
1351 */
86872cb5 1352 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1353 (dev && (dev->flags & IFF_LOOPBACK) &&
1354 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1355 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1356 /* hold loopback dev/idev if we haven't done so. */
5578689a 1357 if (dev != net->loopback_dev) {
1da177e4
LT
1358 if (dev) {
1359 dev_put(dev);
1360 in6_dev_put(idev);
1361 }
5578689a 1362 dev = net->loopback_dev;
1da177e4
LT
1363 dev_hold(dev);
1364 idev = in6_dev_get(dev);
1365 if (!idev) {
1366 err = -ENODEV;
1367 goto out;
1368 }
1369 }
d8d1f30b
CG
1370 rt->dst.output = ip6_pkt_discard_out;
1371 rt->dst.input = ip6_pkt_discard;
1372 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1373 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1374 goto install_route;
1375 }
1376
86872cb5 1377 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1378 const struct in6_addr *gw_addr;
1da177e4
LT
1379 int gwa_type;
1380
86872cb5 1381 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1382 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1383 gwa_type = ipv6_addr_type(gw_addr);
1384
1385 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1386 struct rt6_info *grt;
1387
1388 /* IPv6 strictly inhibits using not link-local
1389 addresses as nexthop address.
1390 Otherwise, router will not able to send redirects.
1391 It is very good, but in some (rare!) circumstances
1392 (SIT, PtP, NBMA NOARP links) it is handy to allow
1393 some exceptions. --ANK
1394 */
1395 err = -EINVAL;
38308473 1396 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1397 goto out;
1398
5578689a 1399 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1400
1401 err = -EHOSTUNREACH;
38308473 1402 if (!grt)
1da177e4
LT
1403 goto out;
1404 if (dev) {
d1918542 1405 if (dev != grt->dst.dev) {
d8d1f30b 1406 dst_release(&grt->dst);
1da177e4
LT
1407 goto out;
1408 }
1409 } else {
d1918542 1410 dev = grt->dst.dev;
1da177e4
LT
1411 idev = grt->rt6i_idev;
1412 dev_hold(dev);
1413 in6_dev_hold(grt->rt6i_idev);
1414 }
38308473 1415 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1416 err = 0;
d8d1f30b 1417 dst_release(&grt->dst);
1da177e4
LT
1418
1419 if (err)
1420 goto out;
1421 }
1422 err = -EINVAL;
38308473 1423 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1424 goto out;
1425 }
1426
1427 err = -ENODEV;
38308473 1428 if (!dev)
1da177e4
LT
1429 goto out;
1430
c3968a85
DW
1431 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1432 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1433 err = -EINVAL;
1434 goto out;
1435 }
4e3fd7a0 1436 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1437 rt->rt6i_prefsrc.plen = 128;
1438 } else
1439 rt->rt6i_prefsrc.plen = 0;
1440
86872cb5 1441 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1442 err = rt6_bind_neighbour(rt, dev);
f83c7790 1443 if (err)
1da177e4 1444 goto out;
1da177e4
LT
1445 }
1446
86872cb5 1447 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1448
1449install_route:
86872cb5
TG
1450 if (cfg->fc_mx) {
1451 struct nlattr *nla;
1452 int remaining;
1453
1454 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1455 int type = nla_type(nla);
86872cb5
TG
1456
1457 if (type) {
1458 if (type > RTAX_MAX) {
1da177e4
LT
1459 err = -EINVAL;
1460 goto out;
1461 }
86872cb5 1462
defb3519 1463 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1464 }
1da177e4
LT
1465 }
1466 }
1467
d8d1f30b 1468 rt->dst.dev = dev;
1da177e4 1469 rt->rt6i_idev = idev;
c71099ac 1470 rt->rt6i_table = table;
63152fc0 1471
c346dca1 1472 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1473
86872cb5 1474 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1475
1476out:
1477 if (dev)
1478 dev_put(dev);
1479 if (idev)
1480 in6_dev_put(idev);
1481 if (rt)
d8d1f30b 1482 dst_free(&rt->dst);
1da177e4
LT
1483 return err;
1484}
1485
86872cb5 1486static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1487{
1488 int err;
c71099ac 1489 struct fib6_table *table;
d1918542 1490 struct net *net = dev_net(rt->dst.dev);
1da177e4 1491
8ed67789 1492 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1493 return -ENOENT;
1494
c71099ac
TG
1495 table = rt->rt6i_table;
1496 write_lock_bh(&table->tb6_lock);
1da177e4 1497
86872cb5 1498 err = fib6_del(rt, info);
d8d1f30b 1499 dst_release(&rt->dst);
1da177e4 1500
c71099ac 1501 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1502
1503 return err;
1504}
1505
e0a1ad73
TG
1506int ip6_del_rt(struct rt6_info *rt)
1507{
4d1169c1 1508 struct nl_info info = {
d1918542 1509 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1510 };
528c4ceb 1511 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1512}
1513
86872cb5 1514static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1515{
c71099ac 1516 struct fib6_table *table;
1da177e4
LT
1517 struct fib6_node *fn;
1518 struct rt6_info *rt;
1519 int err = -ESRCH;
1520
5578689a 1521 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1522 if (!table)
c71099ac
TG
1523 return err;
1524
1525 read_lock_bh(&table->tb6_lock);
1da177e4 1526
c71099ac 1527 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1528 &cfg->fc_dst, cfg->fc_dst_len,
1529 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1530
1da177e4 1531 if (fn) {
d8d1f30b 1532 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1533 if (cfg->fc_ifindex &&
d1918542
DM
1534 (!rt->dst.dev ||
1535 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1536 continue;
86872cb5
TG
1537 if (cfg->fc_flags & RTF_GATEWAY &&
1538 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1539 continue;
86872cb5 1540 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1541 continue;
d8d1f30b 1542 dst_hold(&rt->dst);
c71099ac 1543 read_unlock_bh(&table->tb6_lock);
1da177e4 1544
86872cb5 1545 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1546 }
1547 }
c71099ac 1548 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1549
1550 return err;
1551}
1552
1553/*
1554 * Handle redirects
1555 */
a6279458 1556struct ip6rd_flowi {
4c9483b2 1557 struct flowi6 fl6;
a6279458
YH
1558 struct in6_addr gateway;
1559};
1560
8ed67789
DL
1561static struct rt6_info *__ip6_route_redirect(struct net *net,
1562 struct fib6_table *table,
4c9483b2 1563 struct flowi6 *fl6,
a6279458 1564 int flags)
1da177e4 1565{
4c9483b2 1566 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1567 struct rt6_info *rt;
e843b9e1 1568 struct fib6_node *fn;
c71099ac 1569
1da177e4 1570 /*
e843b9e1
YH
1571 * Get the "current" route for this destination and
1572 * check if the redirect has come from approriate router.
1573 *
1574 * RFC 2461 specifies that redirects should only be
1575 * accepted if they come from the nexthop to the target.
1576 * Due to the way the routes are chosen, this notion
1577 * is a bit fuzzy and one might need to check all possible
1578 * routes.
1da177e4 1579 */
1da177e4 1580
c71099ac 1581 read_lock_bh(&table->tb6_lock);
4c9483b2 1582 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1583restart:
d8d1f30b 1584 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1585 /*
1586 * Current route is on-link; redirect is always invalid.
1587 *
1588 * Seems, previous statement is not true. It could
1589 * be node, which looks for us as on-link (f.e. proxy ndisc)
1590 * But then router serving it might decide, that we should
1591 * know truth 8)8) --ANK (980726).
1592 */
1593 if (rt6_check_expired(rt))
1594 continue;
1595 if (!(rt->rt6i_flags & RTF_GATEWAY))
1596 continue;
d1918542 1597 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1598 continue;
a6279458 1599 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1600 continue;
1601 break;
1602 }
a6279458 1603
cb15d9c2 1604 if (!rt)
8ed67789 1605 rt = net->ipv6.ip6_null_entry;
4c9483b2 1606 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1607out:
d8d1f30b 1608 dst_hold(&rt->dst);
a6279458 1609
c71099ac 1610 read_unlock_bh(&table->tb6_lock);
e843b9e1 1611
a6279458
YH
1612 return rt;
1613};
1614
b71d1d42
ED
1615static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1616 const struct in6_addr *src,
1617 const struct in6_addr *gateway,
a6279458
YH
1618 struct net_device *dev)
1619{
adaa70bb 1620 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1621 struct net *net = dev_net(dev);
a6279458 1622 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1623 .fl6 = {
1624 .flowi6_oif = dev->ifindex,
1625 .daddr = *dest,
1626 .saddr = *src,
a6279458 1627 },
a6279458 1628 };
adaa70bb 1629
4e3fd7a0 1630 rdfl.gateway = *gateway;
86c36ce4 1631
adaa70bb
TG
1632 if (rt6_need_strict(dest))
1633 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1634
4c9483b2 1635 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1636 flags, __ip6_route_redirect);
a6279458
YH
1637}
1638
b71d1d42
ED
1639void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1640 const struct in6_addr *saddr,
a6279458
YH
1641 struct neighbour *neigh, u8 *lladdr, int on_link)
1642{
1643 struct rt6_info *rt, *nrt = NULL;
1644 struct netevent_redirect netevent;
c346dca1 1645 struct net *net = dev_net(neigh->dev);
a6279458
YH
1646
1647 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1648
8ed67789 1649 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1650 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1651 goto out;
1da177e4
LT
1652 }
1653
1da177e4
LT
1654 /*
1655 * We have finally decided to accept it.
1656 */
1657
1ab1457c 1658 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1659 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1660 NEIGH_UPDATE_F_OVERRIDE|
1661 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1662 NEIGH_UPDATE_F_ISROUTER))
1663 );
1664
1665 /*
1666 * Redirect received -> path was valid.
1667 * Look, redirects are sent only in response to data packets,
1668 * so that this nexthop apparently is reachable. --ANK
1669 */
d8d1f30b 1670 dst_confirm(&rt->dst);
1da177e4
LT
1671
1672 /* Duplicate redirect: silently ignore. */
27217455 1673 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1674 goto out;
1675
21efcfa0 1676 nrt = ip6_rt_copy(rt, dest);
38308473 1677 if (!nrt)
1da177e4
LT
1678 goto out;
1679
1680 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1681 if (on_link)
1682 nrt->rt6i_flags &= ~RTF_GATEWAY;
1683
4e3fd7a0 1684 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1685 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1686
40e22e8f 1687 if (ip6_ins_rt(nrt))
1da177e4
LT
1688 goto out;
1689
d8d1f30b
CG
1690 netevent.old = &rt->dst;
1691 netevent.new = &nrt->dst;
8d71740c
TT
1692 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1693
38308473 1694 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1695 ip6_del_rt(rt);
1da177e4
LT
1696 return;
1697 }
1698
1699out:
d8d1f30b 1700 dst_release(&rt->dst);
1da177e4
LT
1701}
1702
1703/*
1704 * Handle ICMP "packet too big" messages
1705 * i.e. Path MTU discovery
1706 */
1707
b71d1d42 1708static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1709 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1710{
1711 struct rt6_info *rt, *nrt;
1712 int allfrag = 0;
d3052b55 1713again:
ae878ae2 1714 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1715 if (!rt)
1da177e4
LT
1716 return;
1717
d3052b55
AV
1718 if (rt6_check_expired(rt)) {
1719 ip6_del_rt(rt);
1720 goto again;
1721 }
1722
d8d1f30b 1723 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1724 goto out;
1725
1726 if (pmtu < IPV6_MIN_MTU) {
1727 /*
1ab1457c 1728 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1729 * MTU (1280) and a fragment header should always be included
1730 * after a node receiving Too Big message reporting PMTU is
1731 * less than the IPv6 Minimum Link MTU.
1732 */
1733 pmtu = IPV6_MIN_MTU;
1734 allfrag = 1;
1735 }
1736
1737 /* New mtu received -> path was valid.
1738 They are sent only in response to data packets,
1739 so that this nexthop apparently is reachable. --ANK
1740 */
d8d1f30b 1741 dst_confirm(&rt->dst);
1da177e4
LT
1742
1743 /* Host route. If it is static, it would be better
1744 not to override it, but add new one, so that
1745 when cache entry will expire old pmtu
1746 would return automatically.
1747 */
1748 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1749 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1750 if (allfrag) {
1751 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1752 features |= RTAX_FEATURE_ALLFRAG;
1753 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1754 }
1716a961
G
1755 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1756 rt->rt6i_flags |= RTF_MODIFIED;
1da177e4
LT
1757 goto out;
1758 }
1759
1760 /* Network route.
1761 Two cases are possible:
1762 1. It is connected route. Action: COW
1763 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1764 */
27217455 1765 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1766 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1767 else
1768 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1769
d5315b50 1770 if (nrt) {
defb3519
DM
1771 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1772 if (allfrag) {
1773 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1774 features |= RTAX_FEATURE_ALLFRAG;
1775 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1776 }
a1e78363
YH
1777
1778 /* According to RFC 1981, detecting PMTU increase shouldn't be
1779 * happened within 5 mins, the recommended timer is 10 mins.
1780 * Here this route expiration time is set to ip6_rt_mtu_expires
1781 * which is 10 mins. After 10 mins the decreased pmtu is expired
1782 * and detecting PMTU increase will be automatically happened.
1783 */
1716a961
G
1784 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1785 nrt->rt6i_flags |= RTF_DYNAMIC;
40e22e8f 1786 ip6_ins_rt(nrt);
1da177e4 1787 }
1da177e4 1788out:
d8d1f30b 1789 dst_release(&rt->dst);
1da177e4
LT
1790}
1791
b71d1d42 1792void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1793 struct net_device *dev, u32 pmtu)
1794{
1795 struct net *net = dev_net(dev);
1796
1797 /*
1798 * RFC 1981 states that a node "MUST reduce the size of the packets it
1799 * is sending along the path" that caused the Packet Too Big message.
1800 * Since it's not possible in the general case to determine which
1801 * interface was used to send the original packet, we update the MTU
1802 * on the interface that will be used to send future packets. We also
1803 * update the MTU on the interface that received the Packet Too Big in
1804 * case the original packet was forced out that interface with
1805 * SO_BINDTODEVICE or similar. This is the next best thing to the
1806 * correct behaviour, which would be to update the MTU on all
1807 * interfaces.
1808 */
1809 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1810 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1811}
1812
1da177e4
LT
1813/*
1814 * Misc support functions
1815 */
1816
1716a961 1817static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1818 const struct in6_addr *dest)
1da177e4 1819{
d1918542 1820 struct net *net = dev_net(ort->dst.dev);
97bab73f 1821 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0);
1da177e4
LT
1822
1823 if (rt) {
d8d1f30b
CG
1824 rt->dst.input = ort->dst.input;
1825 rt->dst.output = ort->dst.output;
8e2ec639 1826 rt->dst.flags |= DST_HOST;
d8d1f30b 1827
4e3fd7a0 1828 rt->rt6i_dst.addr = *dest;
8e2ec639 1829 rt->rt6i_dst.plen = 128;
defb3519 1830 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1831 rt->dst.error = ort->dst.error;
1da177e4
LT
1832 rt->rt6i_idev = ort->rt6i_idev;
1833 if (rt->rt6i_idev)
1834 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1835 rt->dst.lastuse = jiffies;
1da177e4 1836
4e3fd7a0 1837 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1838 rt->rt6i_flags = ort->rt6i_flags;
1839 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1840 (RTF_DEFAULT | RTF_ADDRCONF))
1841 rt6_set_from(rt, ort);
1842 else
1843 rt6_clean_expires(rt);
1da177e4
LT
1844 rt->rt6i_metric = 0;
1845
1da177e4
LT
1846#ifdef CONFIG_IPV6_SUBTREES
1847 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1848#endif
0f6c6392 1849 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1850 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1851 }
1852 return rt;
1853}
1854
70ceb4f5 1855#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1856static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1857 const struct in6_addr *prefix, int prefixlen,
1858 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1859{
1860 struct fib6_node *fn;
1861 struct rt6_info *rt = NULL;
c71099ac
TG
1862 struct fib6_table *table;
1863
efa2cea0 1864 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1865 if (!table)
c71099ac 1866 return NULL;
70ceb4f5 1867
c71099ac
TG
1868 write_lock_bh(&table->tb6_lock);
1869 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1870 if (!fn)
1871 goto out;
1872
d8d1f30b 1873 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1874 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1875 continue;
1876 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1877 continue;
1878 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1879 continue;
d8d1f30b 1880 dst_hold(&rt->dst);
70ceb4f5
YH
1881 break;
1882 }
1883out:
c71099ac 1884 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1885 return rt;
1886}
1887
efa2cea0 1888static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1889 const struct in6_addr *prefix, int prefixlen,
1890 const struct in6_addr *gwaddr, int ifindex,
95c96174 1891 unsigned int pref)
70ceb4f5 1892{
86872cb5
TG
1893 struct fib6_config cfg = {
1894 .fc_table = RT6_TABLE_INFO,
238fc7ea 1895 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1896 .fc_ifindex = ifindex,
1897 .fc_dst_len = prefixlen,
1898 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1899 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1900 .fc_nlinfo.pid = 0,
1901 .fc_nlinfo.nlh = NULL,
1902 .fc_nlinfo.nl_net = net,
86872cb5
TG
1903 };
1904
4e3fd7a0
AD
1905 cfg.fc_dst = *prefix;
1906 cfg.fc_gateway = *gwaddr;
70ceb4f5 1907
e317da96
YH
1908 /* We should treat it as a default route if prefix length is 0. */
1909 if (!prefixlen)
86872cb5 1910 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1911
86872cb5 1912 ip6_route_add(&cfg);
70ceb4f5 1913
efa2cea0 1914 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1915}
1916#endif
1917
b71d1d42 1918struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1919{
1da177e4 1920 struct rt6_info *rt;
c71099ac 1921 struct fib6_table *table;
1da177e4 1922
c346dca1 1923 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1924 if (!table)
c71099ac 1925 return NULL;
1da177e4 1926
c71099ac 1927 write_lock_bh(&table->tb6_lock);
d8d1f30b 1928 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1929 if (dev == rt->dst.dev &&
045927ff 1930 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1931 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1932 break;
1933 }
1934 if (rt)
d8d1f30b 1935 dst_hold(&rt->dst);
c71099ac 1936 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1937 return rt;
1938}
1939
b71d1d42 1940struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1941 struct net_device *dev,
1942 unsigned int pref)
1da177e4 1943{
86872cb5
TG
1944 struct fib6_config cfg = {
1945 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1946 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1947 .fc_ifindex = dev->ifindex,
1948 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1949 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1950 .fc_nlinfo.pid = 0,
1951 .fc_nlinfo.nlh = NULL,
c346dca1 1952 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1953 };
1da177e4 1954
4e3fd7a0 1955 cfg.fc_gateway = *gwaddr;
1da177e4 1956
86872cb5 1957 ip6_route_add(&cfg);
1da177e4 1958
1da177e4
LT
1959 return rt6_get_dflt_router(gwaddr, dev);
1960}
1961
7b4da532 1962void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1963{
1964 struct rt6_info *rt;
c71099ac
TG
1965 struct fib6_table *table;
1966
1967 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1968 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1969 if (!table)
c71099ac 1970 return;
1da177e4
LT
1971
1972restart:
c71099ac 1973 read_lock_bh(&table->tb6_lock);
d8d1f30b 1974 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1975 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1976 dst_hold(&rt->dst);
c71099ac 1977 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1978 ip6_del_rt(rt);
1da177e4
LT
1979 goto restart;
1980 }
1981 }
c71099ac 1982 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1983}
1984
5578689a
DL
1985static void rtmsg_to_fib6_config(struct net *net,
1986 struct in6_rtmsg *rtmsg,
86872cb5
TG
1987 struct fib6_config *cfg)
1988{
1989 memset(cfg, 0, sizeof(*cfg));
1990
1991 cfg->fc_table = RT6_TABLE_MAIN;
1992 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1993 cfg->fc_metric = rtmsg->rtmsg_metric;
1994 cfg->fc_expires = rtmsg->rtmsg_info;
1995 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1996 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1997 cfg->fc_flags = rtmsg->rtmsg_flags;
1998
5578689a 1999 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2000
4e3fd7a0
AD
2001 cfg->fc_dst = rtmsg->rtmsg_dst;
2002 cfg->fc_src = rtmsg->rtmsg_src;
2003 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2004}
2005
5578689a 2006int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2007{
86872cb5 2008 struct fib6_config cfg;
1da177e4
LT
2009 struct in6_rtmsg rtmsg;
2010 int err;
2011
2012 switch(cmd) {
2013 case SIOCADDRT: /* Add a route */
2014 case SIOCDELRT: /* Delete a route */
2015 if (!capable(CAP_NET_ADMIN))
2016 return -EPERM;
2017 err = copy_from_user(&rtmsg, arg,
2018 sizeof(struct in6_rtmsg));
2019 if (err)
2020 return -EFAULT;
86872cb5 2021
5578689a 2022 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2023
1da177e4
LT
2024 rtnl_lock();
2025 switch (cmd) {
2026 case SIOCADDRT:
86872cb5 2027 err = ip6_route_add(&cfg);
1da177e4
LT
2028 break;
2029 case SIOCDELRT:
86872cb5 2030 err = ip6_route_del(&cfg);
1da177e4
LT
2031 break;
2032 default:
2033 err = -EINVAL;
2034 }
2035 rtnl_unlock();
2036
2037 return err;
3ff50b79 2038 }
1da177e4
LT
2039
2040 return -EINVAL;
2041}
2042
2043/*
2044 * Drop the packet on the floor
2045 */
2046
d5fdd6ba 2047static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2048{
612f09e8 2049 int type;
adf30907 2050 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2051 switch (ipstats_mib_noroutes) {
2052 case IPSTATS_MIB_INNOROUTES:
0660e03f 2053 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2054 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2055 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2056 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2057 break;
2058 }
2059 /* FALLTHROUGH */
2060 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2061 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2062 ipstats_mib_noroutes);
612f09e8
YH
2063 break;
2064 }
3ffe533c 2065 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2066 kfree_skb(skb);
2067 return 0;
2068}
2069
9ce8ade0
TG
2070static int ip6_pkt_discard(struct sk_buff *skb)
2071{
612f09e8 2072 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2073}
2074
20380731 2075static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2076{
adf30907 2077 skb->dev = skb_dst(skb)->dev;
612f09e8 2078 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2079}
2080
6723ab54
DM
2081#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2082
9ce8ade0
TG
2083static int ip6_pkt_prohibit(struct sk_buff *skb)
2084{
612f09e8 2085 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2086}
2087
2088static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2089{
adf30907 2090 skb->dev = skb_dst(skb)->dev;
612f09e8 2091 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2092}
2093
6723ab54
DM
2094#endif
2095
1da177e4
LT
2096/*
2097 * Allocate a dst for local (unicast / anycast) address.
2098 */
2099
2100struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2101 const struct in6_addr *addr,
8f031519 2102 bool anycast)
1da177e4 2103{
c346dca1 2104 struct net *net = dev_net(idev->dev);
97bab73f 2105 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0);
f83c7790 2106 int err;
1da177e4 2107
38308473 2108 if (!rt) {
f3213831 2109 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2110 return ERR_PTR(-ENOMEM);
40385653 2111 }
1da177e4 2112
1da177e4
LT
2113 in6_dev_hold(idev);
2114
11d53b49 2115 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2116 rt->dst.input = ip6_input;
2117 rt->dst.output = ip6_output;
1da177e4 2118 rt->rt6i_idev = idev;
d8d1f30b 2119 rt->dst.obsolete = -1;
1da177e4
LT
2120
2121 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2122 if (anycast)
2123 rt->rt6i_flags |= RTF_ANYCAST;
2124 else
1da177e4 2125 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2126 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2127 if (err) {
d8d1f30b 2128 dst_free(&rt->dst);
f83c7790 2129 return ERR_PTR(err);
1da177e4
LT
2130 }
2131
4e3fd7a0 2132 rt->rt6i_dst.addr = *addr;
1da177e4 2133 rt->rt6i_dst.plen = 128;
5578689a 2134 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2135
d8d1f30b 2136 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2137
2138 return rt;
2139}
2140
c3968a85
DW
2141int ip6_route_get_saddr(struct net *net,
2142 struct rt6_info *rt,
b71d1d42 2143 const struct in6_addr *daddr,
c3968a85
DW
2144 unsigned int prefs,
2145 struct in6_addr *saddr)
2146{
2147 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2148 int err = 0;
2149 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2150 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2151 else
2152 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2153 daddr, prefs, saddr);
2154 return err;
2155}
2156
2157/* remove deleted ip from prefsrc entries */
2158struct arg_dev_net_ip {
2159 struct net_device *dev;
2160 struct net *net;
2161 struct in6_addr *addr;
2162};
2163
2164static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2165{
2166 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2167 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2168 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2169
d1918542 2170 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2171 rt != net->ipv6.ip6_null_entry &&
2172 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2173 /* remove prefsrc entry */
2174 rt->rt6i_prefsrc.plen = 0;
2175 }
2176 return 0;
2177}
2178
2179void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2180{
2181 struct net *net = dev_net(ifp->idev->dev);
2182 struct arg_dev_net_ip adni = {
2183 .dev = ifp->idev->dev,
2184 .net = net,
2185 .addr = &ifp->addr,
2186 };
2187 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2188}
2189
8ed67789
DL
2190struct arg_dev_net {
2191 struct net_device *dev;
2192 struct net *net;
2193};
2194
1da177e4
LT
2195static int fib6_ifdown(struct rt6_info *rt, void *arg)
2196{
bc3ef660 2197 const struct arg_dev_net *adn = arg;
2198 const struct net_device *dev = adn->dev;
8ed67789 2199
d1918542 2200 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2201 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2202 return -1;
c159d30c 2203
1da177e4
LT
2204 return 0;
2205}
2206
f3db4851 2207void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2208{
8ed67789
DL
2209 struct arg_dev_net adn = {
2210 .dev = dev,
2211 .net = net,
2212 };
2213
2214 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2215 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2216}
2217
95c96174 2218struct rt6_mtu_change_arg {
1da177e4 2219 struct net_device *dev;
95c96174 2220 unsigned int mtu;
1da177e4
LT
2221};
2222
2223static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2224{
2225 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2226 struct inet6_dev *idev;
2227
2228 /* In IPv6 pmtu discovery is not optional,
2229 so that RTAX_MTU lock cannot disable it.
2230 We still use this lock to block changes
2231 caused by addrconf/ndisc.
2232 */
2233
2234 idev = __in6_dev_get(arg->dev);
38308473 2235 if (!idev)
1da177e4
LT
2236 return 0;
2237
2238 /* For administrative MTU increase, there is no way to discover
2239 IPv6 PMTU increase, so PMTU increase should be updated here.
2240 Since RFC 1981 doesn't include administrative MTU increase
2241 update PMTU increase is a MUST. (i.e. jumbo frame)
2242 */
2243 /*
2244 If new MTU is less than route PMTU, this new MTU will be the
2245 lowest MTU in the path, update the route PMTU to reflect PMTU
2246 decreases; if new MTU is greater than route PMTU, and the
2247 old MTU is the lowest MTU in the path, update the route PMTU
2248 to reflect the increase. In this case if the other nodes' MTU
2249 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2250 PMTU discouvery.
2251 */
d1918542 2252 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2253 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2254 (dst_mtu(&rt->dst) >= arg->mtu ||
2255 (dst_mtu(&rt->dst) < arg->mtu &&
2256 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2257 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2258 }
1da177e4
LT
2259 return 0;
2260}
2261
95c96174 2262void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2263{
c71099ac
TG
2264 struct rt6_mtu_change_arg arg = {
2265 .dev = dev,
2266 .mtu = mtu,
2267 };
1da177e4 2268
c346dca1 2269 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2270}
2271
ef7c79ed 2272static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2273 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2274 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2275 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2276 [RTA_PRIORITY] = { .type = NLA_U32 },
2277 [RTA_METRICS] = { .type = NLA_NESTED },
2278};
2279
2280static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2281 struct fib6_config *cfg)
1da177e4 2282{
86872cb5
TG
2283 struct rtmsg *rtm;
2284 struct nlattr *tb[RTA_MAX+1];
2285 int err;
1da177e4 2286
86872cb5
TG
2287 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2288 if (err < 0)
2289 goto errout;
1da177e4 2290
86872cb5
TG
2291 err = -EINVAL;
2292 rtm = nlmsg_data(nlh);
2293 memset(cfg, 0, sizeof(*cfg));
2294
2295 cfg->fc_table = rtm->rtm_table;
2296 cfg->fc_dst_len = rtm->rtm_dst_len;
2297 cfg->fc_src_len = rtm->rtm_src_len;
2298 cfg->fc_flags = RTF_UP;
2299 cfg->fc_protocol = rtm->rtm_protocol;
2300
2301 if (rtm->rtm_type == RTN_UNREACHABLE)
2302 cfg->fc_flags |= RTF_REJECT;
2303
ab79ad14
2304 if (rtm->rtm_type == RTN_LOCAL)
2305 cfg->fc_flags |= RTF_LOCAL;
2306
86872cb5
TG
2307 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2308 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2309 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2310
2311 if (tb[RTA_GATEWAY]) {
2312 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2313 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2314 }
86872cb5
TG
2315
2316 if (tb[RTA_DST]) {
2317 int plen = (rtm->rtm_dst_len + 7) >> 3;
2318
2319 if (nla_len(tb[RTA_DST]) < plen)
2320 goto errout;
2321
2322 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2323 }
86872cb5
TG
2324
2325 if (tb[RTA_SRC]) {
2326 int plen = (rtm->rtm_src_len + 7) >> 3;
2327
2328 if (nla_len(tb[RTA_SRC]) < plen)
2329 goto errout;
2330
2331 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2332 }
86872cb5 2333
c3968a85
DW
2334 if (tb[RTA_PREFSRC])
2335 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2336
86872cb5
TG
2337 if (tb[RTA_OIF])
2338 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2339
2340 if (tb[RTA_PRIORITY])
2341 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2342
2343 if (tb[RTA_METRICS]) {
2344 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2345 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2346 }
86872cb5
TG
2347
2348 if (tb[RTA_TABLE])
2349 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2350
2351 err = 0;
2352errout:
2353 return err;
1da177e4
LT
2354}
2355
c127ea2c 2356static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2357{
86872cb5
TG
2358 struct fib6_config cfg;
2359 int err;
1da177e4 2360
86872cb5
TG
2361 err = rtm_to_fib6_config(skb, nlh, &cfg);
2362 if (err < 0)
2363 return err;
2364
2365 return ip6_route_del(&cfg);
1da177e4
LT
2366}
2367
c127ea2c 2368static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2369{
86872cb5
TG
2370 struct fib6_config cfg;
2371 int err;
1da177e4 2372
86872cb5
TG
2373 err = rtm_to_fib6_config(skb, nlh, &cfg);
2374 if (err < 0)
2375 return err;
2376
2377 return ip6_route_add(&cfg);
1da177e4
LT
2378}
2379
339bf98f
TG
2380static inline size_t rt6_nlmsg_size(void)
2381{
2382 return NLMSG_ALIGN(sizeof(struct rtmsg))
2383 + nla_total_size(16) /* RTA_SRC */
2384 + nla_total_size(16) /* RTA_DST */
2385 + nla_total_size(16) /* RTA_GATEWAY */
2386 + nla_total_size(16) /* RTA_PREFSRC */
2387 + nla_total_size(4) /* RTA_TABLE */
2388 + nla_total_size(4) /* RTA_IIF */
2389 + nla_total_size(4) /* RTA_OIF */
2390 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2391 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2392 + nla_total_size(sizeof(struct rta_cacheinfo));
2393}
2394
191cd582
BH
2395static int rt6_fill_node(struct net *net,
2396 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2397 struct in6_addr *dst, struct in6_addr *src,
2398 int iif, int type, u32 pid, u32 seq,
7bc570c8 2399 int prefix, int nowait, unsigned int flags)
1da177e4 2400{
346f870b 2401 const struct inet_peer *peer;
1da177e4 2402 struct rtmsg *rtm;
2d7202bf 2403 struct nlmsghdr *nlh;
e3703b3d 2404 long expires;
9e762a4a 2405 u32 table;
f2c31e32 2406 struct neighbour *n;
346f870b 2407 u32 ts, tsage;
1da177e4
LT
2408
2409 if (prefix) { /* user wants prefix routes only */
2410 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2411 /* success since this is not a prefix route */
2412 return 1;
2413 }
2414 }
2415
2d7202bf 2416 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2417 if (!nlh)
26932566 2418 return -EMSGSIZE;
2d7202bf
TG
2419
2420 rtm = nlmsg_data(nlh);
1da177e4
LT
2421 rtm->rtm_family = AF_INET6;
2422 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2423 rtm->rtm_src_len = rt->rt6i_src.plen;
2424 rtm->rtm_tos = 0;
c71099ac 2425 if (rt->rt6i_table)
9e762a4a 2426 table = rt->rt6i_table->tb6_id;
c71099ac 2427 else
9e762a4a
PM
2428 table = RT6_TABLE_UNSPEC;
2429 rtm->rtm_table = table;
c78679e8
DM
2430 if (nla_put_u32(skb, RTA_TABLE, table))
2431 goto nla_put_failure;
38308473 2432 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2433 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2434 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2435 rtm->rtm_type = RTN_LOCAL;
d1918542 2436 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2437 rtm->rtm_type = RTN_LOCAL;
2438 else
2439 rtm->rtm_type = RTN_UNICAST;
2440 rtm->rtm_flags = 0;
2441 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2442 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2443 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2444 rtm->rtm_protocol = RTPROT_REDIRECT;
2445 else if (rt->rt6i_flags & RTF_ADDRCONF)
2446 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2447 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2448 rtm->rtm_protocol = RTPROT_RA;
2449
38308473 2450 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2451 rtm->rtm_flags |= RTM_F_CLONED;
2452
2453 if (dst) {
c78679e8
DM
2454 if (nla_put(skb, RTA_DST, 16, dst))
2455 goto nla_put_failure;
1ab1457c 2456 rtm->rtm_dst_len = 128;
1da177e4 2457 } else if (rtm->rtm_dst_len)
c78679e8
DM
2458 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2459 goto nla_put_failure;
1da177e4
LT
2460#ifdef CONFIG_IPV6_SUBTREES
2461 if (src) {
c78679e8
DM
2462 if (nla_put(skb, RTA_SRC, 16, src))
2463 goto nla_put_failure;
1ab1457c 2464 rtm->rtm_src_len = 128;
c78679e8
DM
2465 } else if (rtm->rtm_src_len &&
2466 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2467 goto nla_put_failure;
1da177e4 2468#endif
7bc570c8
YH
2469 if (iif) {
2470#ifdef CONFIG_IPV6_MROUTE
2471 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2472 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2473 if (err <= 0) {
2474 if (!nowait) {
2475 if (err == 0)
2476 return 0;
2477 goto nla_put_failure;
2478 } else {
2479 if (err == -EMSGSIZE)
2480 goto nla_put_failure;
2481 }
2482 }
2483 } else
2484#endif
c78679e8
DM
2485 if (nla_put_u32(skb, RTA_IIF, iif))
2486 goto nla_put_failure;
7bc570c8 2487 } else if (dst) {
1da177e4 2488 struct in6_addr saddr_buf;
c78679e8
DM
2489 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2490 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2491 goto nla_put_failure;
1da177e4 2492 }
2d7202bf 2493
c3968a85
DW
2494 if (rt->rt6i_prefsrc.plen) {
2495 struct in6_addr saddr_buf;
4e3fd7a0 2496 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2497 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2498 goto nla_put_failure;
c3968a85
DW
2499 }
2500
defb3519 2501 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2502 goto nla_put_failure;
2503
f2c31e32 2504 rcu_read_lock();
27217455 2505 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2506 if (n) {
2507 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2508 rcu_read_unlock();
2509 goto nla_put_failure;
2510 }
2511 }
f2c31e32 2512 rcu_read_unlock();
2d7202bf 2513
c78679e8
DM
2514 if (rt->dst.dev &&
2515 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2516 goto nla_put_failure;
2517 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2518 goto nla_put_failure;
36e3deae
YH
2519 if (!(rt->rt6i_flags & RTF_EXPIRES))
2520 expires = 0;
d1918542
DM
2521 else if (rt->dst.expires - jiffies < INT_MAX)
2522 expires = rt->dst.expires - jiffies;
36e3deae
YH
2523 else
2524 expires = INT_MAX;
69cdf8f9 2525
97bab73f
DM
2526 peer = NULL;
2527 if (rt6_has_peer(rt))
2528 peer = rt6_peer_ptr(rt);
346f870b
DM
2529 ts = tsage = 0;
2530 if (peer && peer->tcp_ts_stamp) {
2531 ts = peer->tcp_ts;
2532 tsage = get_seconds() - peer->tcp_ts_stamp;
2533 }
2534
2535 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2536 expires, rt->dst.error) < 0)
e3703b3d 2537 goto nla_put_failure;
2d7202bf
TG
2538
2539 return nlmsg_end(skb, nlh);
2540
2541nla_put_failure:
26932566
PM
2542 nlmsg_cancel(skb, nlh);
2543 return -EMSGSIZE;
1da177e4
LT
2544}
2545
1b43af54 2546int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2547{
2548 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2549 int prefix;
2550
2d7202bf
TG
2551 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2552 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2553 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2554 } else
2555 prefix = 0;
2556
191cd582
BH
2557 return rt6_fill_node(arg->net,
2558 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2559 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2560 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2561}
2562
c127ea2c 2563static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2564{
3b1e0a65 2565 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2566 struct nlattr *tb[RTA_MAX+1];
2567 struct rt6_info *rt;
1da177e4 2568 struct sk_buff *skb;
ab364a6f 2569 struct rtmsg *rtm;
4c9483b2 2570 struct flowi6 fl6;
72331bc0 2571 int err, iif = 0, oif = 0;
1da177e4 2572
ab364a6f
TG
2573 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2574 if (err < 0)
2575 goto errout;
1da177e4 2576
ab364a6f 2577 err = -EINVAL;
4c9483b2 2578 memset(&fl6, 0, sizeof(fl6));
1da177e4 2579
ab364a6f
TG
2580 if (tb[RTA_SRC]) {
2581 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2582 goto errout;
2583
4e3fd7a0 2584 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2585 }
2586
2587 if (tb[RTA_DST]) {
2588 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2589 goto errout;
2590
4e3fd7a0 2591 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2592 }
2593
2594 if (tb[RTA_IIF])
2595 iif = nla_get_u32(tb[RTA_IIF]);
2596
2597 if (tb[RTA_OIF])
72331bc0 2598 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2599
2600 if (iif) {
2601 struct net_device *dev;
72331bc0
SL
2602 int flags = 0;
2603
5578689a 2604 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2605 if (!dev) {
2606 err = -ENODEV;
ab364a6f 2607 goto errout;
1da177e4 2608 }
72331bc0
SL
2609
2610 fl6.flowi6_iif = iif;
2611
2612 if (!ipv6_addr_any(&fl6.saddr))
2613 flags |= RT6_LOOKUP_F_HAS_SADDR;
2614
2615 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2616 flags);
2617 } else {
2618 fl6.flowi6_oif = oif;
2619
2620 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2621 }
2622
ab364a6f 2623 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2624 if (!skb) {
2173bff5 2625 dst_release(&rt->dst);
ab364a6f
TG
2626 err = -ENOBUFS;
2627 goto errout;
2628 }
1da177e4 2629
ab364a6f
TG
2630 /* Reserve room for dummy headers, this skb can pass
2631 through good chunk of routing engine.
2632 */
459a98ed 2633 skb_reset_mac_header(skb);
ab364a6f 2634 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2635
d8d1f30b 2636 skb_dst_set(skb, &rt->dst);
1da177e4 2637
4c9483b2 2638 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2639 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2640 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2641 if (err < 0) {
ab364a6f
TG
2642 kfree_skb(skb);
2643 goto errout;
1da177e4
LT
2644 }
2645
5578689a 2646 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2647errout:
1da177e4 2648 return err;
1da177e4
LT
2649}
2650
86872cb5 2651void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2652{
2653 struct sk_buff *skb;
5578689a 2654 struct net *net = info->nl_net;
528c4ceb
DL
2655 u32 seq;
2656 int err;
2657
2658 err = -ENOBUFS;
38308473 2659 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2660
339bf98f 2661 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2662 if (!skb)
21713ebc
TG
2663 goto errout;
2664
191cd582 2665 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2666 event, info->pid, seq, 0, 0, 0);
26932566
PM
2667 if (err < 0) {
2668 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2669 WARN_ON(err == -EMSGSIZE);
2670 kfree_skb(skb);
2671 goto errout;
2672 }
1ce85fe4
PNA
2673 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2674 info->nlh, gfp_any());
2675 return;
21713ebc
TG
2676errout:
2677 if (err < 0)
5578689a 2678 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2679}
2680
8ed67789
DL
2681static int ip6_route_dev_notify(struct notifier_block *this,
2682 unsigned long event, void *data)
2683{
2684 struct net_device *dev = (struct net_device *)data;
c346dca1 2685 struct net *net = dev_net(dev);
8ed67789
DL
2686
2687 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2688 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2689 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2690#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2691 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2692 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2693 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2694 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2695#endif
2696 }
2697
2698 return NOTIFY_OK;
2699}
2700
1da177e4
LT
2701/*
2702 * /proc
2703 */
2704
2705#ifdef CONFIG_PROC_FS
2706
1da177e4
LT
2707struct rt6_proc_arg
2708{
2709 char *buffer;
2710 int offset;
2711 int length;
2712 int skip;
2713 int len;
2714};
2715
2716static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2717{
33120b30 2718 struct seq_file *m = p_arg;
69cce1d1 2719 struct neighbour *n;
1da177e4 2720
4b7a4274 2721 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2722
2723#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2724 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2725#else
33120b30 2726 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2727#endif
f2c31e32 2728 rcu_read_lock();
27217455 2729 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2730 if (n) {
2731 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2732 } else {
33120b30 2733 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2734 }
f2c31e32 2735 rcu_read_unlock();
33120b30 2736 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2737 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2738 rt->dst.__use, rt->rt6i_flags,
d1918542 2739 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2740 return 0;
2741}
2742
33120b30 2743static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2744{
f3db4851 2745 struct net *net = (struct net *)m->private;
32b293a5 2746 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2747 return 0;
2748}
1da177e4 2749
33120b30
AD
2750static int ipv6_route_open(struct inode *inode, struct file *file)
2751{
de05c557 2752 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2753}
2754
33120b30
AD
2755static const struct file_operations ipv6_route_proc_fops = {
2756 .owner = THIS_MODULE,
2757 .open = ipv6_route_open,
2758 .read = seq_read,
2759 .llseek = seq_lseek,
b6fcbdb4 2760 .release = single_release_net,
33120b30
AD
2761};
2762
1da177e4
LT
2763static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2764{
69ddb805 2765 struct net *net = (struct net *)seq->private;
1da177e4 2766 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2767 net->ipv6.rt6_stats->fib_nodes,
2768 net->ipv6.rt6_stats->fib_route_nodes,
2769 net->ipv6.rt6_stats->fib_rt_alloc,
2770 net->ipv6.rt6_stats->fib_rt_entries,
2771 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2772 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2773 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2774
2775 return 0;
2776}
2777
2778static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2779{
de05c557 2780 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2781}
2782
9a32144e 2783static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2784 .owner = THIS_MODULE,
2785 .open = rt6_stats_seq_open,
2786 .read = seq_read,
2787 .llseek = seq_lseek,
b6fcbdb4 2788 .release = single_release_net,
1da177e4
LT
2789};
2790#endif /* CONFIG_PROC_FS */
2791
2792#ifdef CONFIG_SYSCTL
2793
1da177e4 2794static
8d65af78 2795int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2796 void __user *buffer, size_t *lenp, loff_t *ppos)
2797{
c486da34
LAG
2798 struct net *net;
2799 int delay;
2800 if (!write)
1da177e4 2801 return -EINVAL;
c486da34
LAG
2802
2803 net = (struct net *)ctl->extra1;
2804 delay = net->ipv6.sysctl.flush_delay;
2805 proc_dointvec(ctl, write, buffer, lenp, ppos);
2806 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2807 return 0;
1da177e4
LT
2808}
2809
760f2d01 2810ctl_table ipv6_route_table_template[] = {
1ab1457c 2811 {
1da177e4 2812 .procname = "flush",
4990509f 2813 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2814 .maxlen = sizeof(int),
89c8b3a1 2815 .mode = 0200,
6d9f239a 2816 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2817 },
2818 {
1da177e4 2819 .procname = "gc_thresh",
9a7ec3a9 2820 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2821 .maxlen = sizeof(int),
2822 .mode = 0644,
6d9f239a 2823 .proc_handler = proc_dointvec,
1da177e4
LT
2824 },
2825 {
1da177e4 2826 .procname = "max_size",
4990509f 2827 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2828 .maxlen = sizeof(int),
2829 .mode = 0644,
6d9f239a 2830 .proc_handler = proc_dointvec,
1da177e4
LT
2831 },
2832 {
1da177e4 2833 .procname = "gc_min_interval",
4990509f 2834 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2835 .maxlen = sizeof(int),
2836 .mode = 0644,
6d9f239a 2837 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2838 },
2839 {
1da177e4 2840 .procname = "gc_timeout",
4990509f 2841 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
6d9f239a 2844 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2845 },
2846 {
1da177e4 2847 .procname = "gc_interval",
4990509f 2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
6d9f239a 2851 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2852 },
2853 {
1da177e4 2854 .procname = "gc_elasticity",
4990509f 2855 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2856 .maxlen = sizeof(int),
2857 .mode = 0644,
f3d3f616 2858 .proc_handler = proc_dointvec,
1da177e4
LT
2859 },
2860 {
1da177e4 2861 .procname = "mtu_expires",
4990509f 2862 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
6d9f239a 2865 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2866 },
2867 {
1da177e4 2868 .procname = "min_adv_mss",
4990509f 2869 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2870 .maxlen = sizeof(int),
2871 .mode = 0644,
f3d3f616 2872 .proc_handler = proc_dointvec,
1da177e4
LT
2873 },
2874 {
1da177e4 2875 .procname = "gc_min_interval_ms",
4990509f 2876 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2877 .maxlen = sizeof(int),
2878 .mode = 0644,
6d9f239a 2879 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2880 },
f8572d8f 2881 { }
1da177e4
LT
2882};
2883
2c8c1e72 2884struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2885{
2886 struct ctl_table *table;
2887
2888 table = kmemdup(ipv6_route_table_template,
2889 sizeof(ipv6_route_table_template),
2890 GFP_KERNEL);
5ee09105
YH
2891
2892 if (table) {
2893 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2894 table[0].extra1 = net;
86393e52 2895 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2896 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2897 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2898 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2899 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2900 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2901 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2902 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2903 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2904 }
2905
760f2d01
DL
2906 return table;
2907}
1da177e4
LT
2908#endif
2909
2c8c1e72 2910static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2911{
633d424b 2912 int ret = -ENOMEM;
8ed67789 2913
86393e52
AD
2914 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2915 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2916
fc66f95c
ED
2917 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2918 goto out_ip6_dst_ops;
2919
8ed67789
DL
2920 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2921 sizeof(*net->ipv6.ip6_null_entry),
2922 GFP_KERNEL);
2923 if (!net->ipv6.ip6_null_entry)
fc66f95c 2924 goto out_ip6_dst_entries;
d8d1f30b 2925 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2926 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2927 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2928 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2929 ip6_template_metrics, true);
8ed67789
DL
2930
2931#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2933 sizeof(*net->ipv6.ip6_prohibit_entry),
2934 GFP_KERNEL);
68fffc67
PZ
2935 if (!net->ipv6.ip6_prohibit_entry)
2936 goto out_ip6_null_entry;
d8d1f30b 2937 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2938 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2939 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2940 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2941 ip6_template_metrics, true);
8ed67789
DL
2942
2943 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2944 sizeof(*net->ipv6.ip6_blk_hole_entry),
2945 GFP_KERNEL);
68fffc67
PZ
2946 if (!net->ipv6.ip6_blk_hole_entry)
2947 goto out_ip6_prohibit_entry;
d8d1f30b 2948 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2949 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2950 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2951 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2952 ip6_template_metrics, true);
8ed67789
DL
2953#endif
2954
b339a47c
PZ
2955 net->ipv6.sysctl.flush_delay = 0;
2956 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2957 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2958 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2959 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2960 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2961 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2962 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2963
cdb18761
DL
2964#ifdef CONFIG_PROC_FS
2965 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2966 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2967#endif
6891a346
BT
2968 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2969
8ed67789
DL
2970 ret = 0;
2971out:
2972 return ret;
f2fc6a54 2973
68fffc67
PZ
2974#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2975out_ip6_prohibit_entry:
2976 kfree(net->ipv6.ip6_prohibit_entry);
2977out_ip6_null_entry:
2978 kfree(net->ipv6.ip6_null_entry);
2979#endif
fc66f95c
ED
2980out_ip6_dst_entries:
2981 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2982out_ip6_dst_ops:
f2fc6a54 2983 goto out;
cdb18761
DL
2984}
2985
2c8c1e72 2986static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2987{
2988#ifdef CONFIG_PROC_FS
2989 proc_net_remove(net, "ipv6_route");
2990 proc_net_remove(net, "rt6_stats");
2991#endif
8ed67789
DL
2992 kfree(net->ipv6.ip6_null_entry);
2993#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2994 kfree(net->ipv6.ip6_prohibit_entry);
2995 kfree(net->ipv6.ip6_blk_hole_entry);
2996#endif
41bb78b4 2997 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2998}
2999
3000static struct pernet_operations ip6_route_net_ops = {
3001 .init = ip6_route_net_init,
3002 .exit = ip6_route_net_exit,
3003};
3004
c3426b47
DM
3005static int __net_init ipv6_inetpeer_init(struct net *net)
3006{
3007 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3008
3009 if (!bp)
3010 return -ENOMEM;
3011 inet_peer_base_init(bp);
3012 net->ipv6.peers = bp;
3013 return 0;
3014}
3015
3016static void __net_exit ipv6_inetpeer_exit(struct net *net)
3017{
3018 struct inet_peer_base *bp = net->ipv6.peers;
3019
3020 net->ipv6.peers = NULL;
56a6b248 3021 inetpeer_invalidate_tree(bp);
c3426b47
DM
3022 kfree(bp);
3023}
3024
2b823f72 3025static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3026 .init = ipv6_inetpeer_init,
3027 .exit = ipv6_inetpeer_exit,
3028};
3029
8ed67789
DL
3030static struct notifier_block ip6_route_dev_notifier = {
3031 .notifier_call = ip6_route_dev_notify,
3032 .priority = 0,
3033};
3034
433d49c3 3035int __init ip6_route_init(void)
1da177e4 3036{
433d49c3
DL
3037 int ret;
3038
9a7ec3a9
DL
3039 ret = -ENOMEM;
3040 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3041 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3042 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3043 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3044 goto out;
14e50e57 3045
fc66f95c 3046 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3047 if (ret)
bdb3289f 3048 goto out_kmem_cache;
bdb3289f 3049
fc66f95c
ED
3050 ret = register_pernet_subsys(&ip6_route_net_ops);
3051 if (ret)
3052 goto out_dst_entries;
3053
c3426b47
DM
3054 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3055 if (ret)
3056 goto out_register_subsys;
3057
5dc121e9
AE
3058 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3059
8ed67789
DL
3060 /* Registering of the loopback is done before this portion of code,
3061 * the loopback reference in rt6_info will not be taken, do it
3062 * manually for init_net */
d8d1f30b 3063 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3064 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3065 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3066 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3067 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3068 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3069 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3070 #endif
433d49c3
DL
3071 ret = fib6_init();
3072 if (ret)
c3426b47 3073 goto out_register_inetpeer;
433d49c3 3074
433d49c3
DL
3075 ret = xfrm6_init();
3076 if (ret)
cdb18761 3077 goto out_fib6_init;
c35b7e72 3078
433d49c3
DL
3079 ret = fib6_rules_init();
3080 if (ret)
3081 goto xfrm6_init;
7e5449c2 3082
433d49c3 3083 ret = -ENOBUFS;
c7ac8679
GR
3084 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3085 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3086 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3087 goto fib6_rules_init;
c127ea2c 3088
8ed67789 3089 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3090 if (ret)
3091 goto fib6_rules_init;
8ed67789 3092
433d49c3
DL
3093out:
3094 return ret;
3095
3096fib6_rules_init:
433d49c3
DL
3097 fib6_rules_cleanup();
3098xfrm6_init:
433d49c3 3099 xfrm6_fini();
433d49c3 3100out_fib6_init:
433d49c3 3101 fib6_gc_cleanup();
c3426b47
DM
3102out_register_inetpeer:
3103 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789
DL
3104out_register_subsys:
3105 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3106out_dst_entries:
3107 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3108out_kmem_cache:
f2fc6a54 3109 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3110 goto out;
1da177e4
LT
3111}
3112
3113void ip6_route_cleanup(void)
3114{
8ed67789 3115 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3116 fib6_rules_cleanup();
1da177e4 3117 xfrm6_fini();
1da177e4 3118 fib6_gc_cleanup();
c3426b47 3119 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3120 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3121 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3122 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3123}