Merge tag 'batman-adv-for-davem' of git://git.open-mesh.org/linux-merge
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
1716a961 68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 69 const struct in6_addr *dest);
1da177e4 70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 71static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 72static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
73static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74static void ip6_dst_destroy(struct dst_entry *);
75static void ip6_dst_ifdown(struct dst_entry *,
76 struct net_device *dev, int how);
569d3645 77static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
78
79static int ip6_pkt_discard(struct sk_buff *skb);
80static int ip6_pkt_discard_out(struct sk_buff *skb);
81static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
82static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83 struct sk_buff *skb, u32 mtu);
84static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85 struct sk_buff *skb);
1da177e4 86
70ceb4f5 87#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 88static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex,
95c96174 91 unsigned int pref);
efa2cea0 92static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
95#endif
96
06582540
DM
97static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98{
99 struct rt6_info *rt = (struct rt6_info *) dst;
100 struct inet_peer *peer;
101 u32 *p = NULL;
102
8e2ec639
YZ
103 if (!(rt->dst.flags & DST_HOST))
104 return NULL;
105
fbfe95a4 106 peer = rt6_get_peer_create(rt);
06582540
DM
107 if (peer) {
108 u32 *old_p = __DST_METRICS_PTR(old);
109 unsigned long prev, new;
110
111 p = peer->metrics;
112 if (inet_metrics_new(peer))
113 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115 new = (unsigned long) p;
116 prev = cmpxchg(&dst->_metrics, old, new);
117
118 if (prev != old) {
119 p = __DST_METRICS_PTR(prev);
120 if (prev & DST_METRICS_READ_ONLY)
121 p = NULL;
122 }
123 }
124 return p;
125}
126
f894cbf8
DM
127static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128 struct sk_buff *skb,
129 const void *daddr)
39232973
DM
130{
131 struct in6_addr *p = &rt->rt6i_gateway;
132
a7563f34 133 if (!ipv6_addr_any(p))
39232973 134 return (const void *) p;
f894cbf8
DM
135 else if (skb)
136 return &ipv6_hdr(skb)->daddr;
39232973
DM
137 return daddr;
138}
139
f894cbf8
DM
140static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141 struct sk_buff *skb,
142 const void *daddr)
d3aaeb38 143{
39232973
DM
144 struct rt6_info *rt = (struct rt6_info *) dst;
145 struct neighbour *n;
146
f894cbf8 147 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 148 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
149 if (n)
150 return n;
151 return neigh_create(&nd_tbl, daddr, dst->dev);
152}
153
8ade06c6 154static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 155{
8ade06c6
DM
156 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157 if (!n) {
158 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159 if (IS_ERR(n))
160 return PTR_ERR(n);
161 }
97cac082 162 rt->n = n;
f83c7790
DM
163
164 return 0;
d3aaeb38
DM
165}
166
9a7ec3a9 167static struct dst_ops ip6_dst_ops_template = {
1da177e4 168 .family = AF_INET6,
09640e63 169 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
170 .gc = ip6_dst_gc,
171 .gc_thresh = 1024,
172 .check = ip6_dst_check,
0dbaee3b 173 .default_advmss = ip6_default_advmss,
ebb762f2 174 .mtu = ip6_mtu,
06582540 175 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
176 .destroy = ip6_dst_destroy,
177 .ifdown = ip6_dst_ifdown,
178 .negative_advice = ip6_negative_advice,
179 .link_failure = ip6_link_failure,
180 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 181 .redirect = rt6_do_redirect,
1ac06e03 182 .local_out = __ip6_local_out,
d3aaeb38 183 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
184};
185
ebb762f2 186static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 187{
618f9bc7
SK
188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190 return mtu ? : dst->dev->mtu;
ec831ea7
RD
191}
192
6700c270
DM
193static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194 struct sk_buff *skb, u32 mtu)
14e50e57
DM
195{
196}
197
6700c270
DM
198static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199 struct sk_buff *skb)
b587ee3b
DM
200{
201}
202
0972ddb2
HB
203static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204 unsigned long old)
205{
206 return NULL;
207}
208
14e50e57
DM
209static struct dst_ops ip6_dst_blackhole_ops = {
210 .family = AF_INET6,
09640e63 211 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
212 .destroy = ip6_dst_destroy,
213 .check = ip6_dst_check,
ebb762f2 214 .mtu = ip6_blackhole_mtu,
214f45c9 215 .default_advmss = ip6_default_advmss,
14e50e57 216 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 217 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 218 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 219 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
220};
221
62fa8a84
DM
222static const u32 ip6_template_metrics[RTAX_MAX] = {
223 [RTAX_HOPLIMIT - 1] = 255,
224};
225
fb0af4c7 226static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
2c20cbd7 230 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 231 .error = -ENETUNREACH,
d8d1f30b
CG
232 .input = ip6_pkt_discard,
233 .output = ip6_pkt_discard_out,
1da177e4
LT
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
101367c2
TG
241#ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
6723ab54
DM
243static int ip6_pkt_prohibit(struct sk_buff *skb);
244static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 245
fb0af4c7 246static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
2c20cbd7 250 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 251 .error = -EACCES,
d8d1f30b
CG
252 .input = ip6_pkt_prohibit,
253 .output = ip6_pkt_prohibit_out,
101367c2
TG
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 256 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
fb0af4c7 261static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
262 .dst = {
263 .__refcnt = ATOMIC_INIT(1),
264 .__use = 1,
2c20cbd7 265 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 266 .error = -EINVAL,
d8d1f30b
CG
267 .input = dst_discard,
268 .output = dst_discard,
101367c2
TG
269 },
270 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 271 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
272 .rt6i_metric = ~(u32) 0,
273 .rt6i_ref = ATOMIC_INIT(1),
274};
275
276#endif
277
1da177e4 278/* allocate dst with ip6_dst_ops */
97bab73f 279static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 280 struct net_device *dev,
8b96d22d
DM
281 int flags,
282 struct fib6_table *table)
1da177e4 283{
97bab73f 284 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 285 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 286
97bab73f 287 if (rt) {
8104891b
SK
288 struct dst_entry *dst = &rt->dst;
289
290 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 291 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 292 rt->rt6i_genid = rt_genid(net);
51ebd318
ND
293 INIT_LIST_HEAD(&rt->rt6i_siblings);
294 rt->rt6i_nsiblings = 0;
97bab73f 295 }
cf911662 296 return rt;
1da177e4
LT
297}
298
299static void ip6_dst_destroy(struct dst_entry *dst)
300{
301 struct rt6_info *rt = (struct rt6_info *)dst;
302 struct inet6_dev *idev = rt->rt6i_idev;
303
97cac082
DM
304 if (rt->n)
305 neigh_release(rt->n);
306
8e2ec639
YZ
307 if (!(rt->dst.flags & DST_HOST))
308 dst_destroy_metrics_generic(dst);
309
38308473 310 if (idev) {
1da177e4
LT
311 rt->rt6i_idev = NULL;
312 in6_dev_put(idev);
1ab1457c 313 }
1716a961
G
314
315 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316 dst_release(dst->from);
317
97bab73f
DM
318 if (rt6_has_peer(rt)) {
319 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
320 inet_putpeer(peer);
321 }
322}
323
6431cbc2
DM
324static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
325
326static u32 rt6_peer_genid(void)
327{
328 return atomic_read(&__rt6_peer_genid);
329}
330
b3419363
DM
331void rt6_bind_peer(struct rt6_info *rt, int create)
332{
97bab73f 333 struct inet_peer_base *base;
b3419363
DM
334 struct inet_peer *peer;
335
97bab73f
DM
336 base = inetpeer_base_ptr(rt->_rt6i_peer);
337 if (!base)
338 return;
339
340 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
341 if (peer) {
342 if (!rt6_set_peer(rt, peer))
343 inet_putpeer(peer);
344 else
345 rt->rt6i_peer_genid = rt6_peer_genid();
346 }
1da177e4
LT
347}
348
349static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
350 int how)
351{
352 struct rt6_info *rt = (struct rt6_info *)dst;
353 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 354 struct net_device *loopback_dev =
c346dca1 355 dev_net(dev)->loopback_dev;
1da177e4 356
97cac082
DM
357 if (dev != loopback_dev) {
358 if (idev && idev->dev == dev) {
359 struct inet6_dev *loopback_idev =
360 in6_dev_get(loopback_dev);
361 if (loopback_idev) {
362 rt->rt6i_idev = loopback_idev;
363 in6_dev_put(idev);
364 }
365 }
366 if (rt->n && rt->n->dev == dev) {
367 rt->n->dev = loopback_dev;
368 dev_hold(loopback_dev);
369 dev_put(dev);
1da177e4
LT
370 }
371 }
372}
373
a50feda5 374static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 375{
1716a961
G
376 if (rt->rt6i_flags & RTF_EXPIRES) {
377 if (time_after(jiffies, rt->dst.expires))
a50feda5 378 return true;
1716a961 379 } else if (rt->dst.from) {
3fd91fb3 380 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 381 }
a50feda5 382 return false;
1da177e4
LT
383}
384
a50feda5 385static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 386{
a02cec21
ED
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
389}
390
51ebd318
ND
391/* Multipath route selection:
392 * Hash based function using packet header and flowlabel.
393 * Adapted from fib_info_hashfn()
394 */
395static int rt6_info_hash_nhsfn(unsigned int candidate_count,
396 const struct flowi6 *fl6)
397{
398 unsigned int val = fl6->flowi6_proto;
399
b3ce5ae1
ND
400 val ^= (__force u32)fl6->daddr.s6_addr32[0];
401 val ^= (__force u32)fl6->daddr.s6_addr32[1];
402 val ^= (__force u32)fl6->daddr.s6_addr32[2];
403 val ^= (__force u32)fl6->daddr.s6_addr32[3];
51ebd318 404
b3ce5ae1
ND
405 val ^= (__force u32)fl6->saddr.s6_addr32[0];
406 val ^= (__force u32)fl6->saddr.s6_addr32[1];
407 val ^= (__force u32)fl6->saddr.s6_addr32[2];
408 val ^= (__force u32)fl6->saddr.s6_addr32[3];
51ebd318
ND
409
410 /* Work only if this not encapsulated */
411 switch (fl6->flowi6_proto) {
412 case IPPROTO_UDP:
413 case IPPROTO_TCP:
414 case IPPROTO_SCTP:
b3ce5ae1
ND
415 val ^= (__force u16)fl6->fl6_sport;
416 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
417 break;
418
419 case IPPROTO_ICMPV6:
b3ce5ae1
ND
420 val ^= (__force u16)fl6->fl6_icmp_type;
421 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
422 break;
423 }
424 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 425 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
426
427 /* Perhaps, we need to tune, this function? */
428 val = val ^ (val >> 7) ^ (val >> 12);
429 return val % candidate_count;
430}
431
432static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
433 struct flowi6 *fl6)
434{
435 struct rt6_info *sibling, *next_sibling;
436 int route_choosen;
437
438 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
439 /* Don't change the route, if route_choosen == 0
440 * (siblings does not include ourself)
441 */
442 if (route_choosen)
443 list_for_each_entry_safe(sibling, next_sibling,
444 &match->rt6i_siblings, rt6i_siblings) {
445 route_choosen--;
446 if (route_choosen == 0) {
447 match = sibling;
448 break;
449 }
450 }
451 return match;
452}
453
1da177e4 454/*
c71099ac 455 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
456 */
457
8ed67789
DL
458static inline struct rt6_info *rt6_device_match(struct net *net,
459 struct rt6_info *rt,
b71d1d42 460 const struct in6_addr *saddr,
1da177e4 461 int oif,
d420895e 462 int flags)
1da177e4
LT
463{
464 struct rt6_info *local = NULL;
465 struct rt6_info *sprt;
466
dd3abc4e
YH
467 if (!oif && ipv6_addr_any(saddr))
468 goto out;
469
d8d1f30b 470 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 471 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
472
473 if (oif) {
1da177e4
LT
474 if (dev->ifindex == oif)
475 return sprt;
476 if (dev->flags & IFF_LOOPBACK) {
38308473 477 if (!sprt->rt6i_idev ||
1da177e4 478 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 479 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 480 continue;
1ab1457c 481 if (local && (!oif ||
1da177e4
LT
482 local->rt6i_idev->dev->ifindex == oif))
483 continue;
484 }
485 local = sprt;
486 }
dd3abc4e
YH
487 } else {
488 if (ipv6_chk_addr(net, saddr, dev,
489 flags & RT6_LOOKUP_F_IFACE))
490 return sprt;
1da177e4 491 }
dd3abc4e 492 }
1da177e4 493
dd3abc4e 494 if (oif) {
1da177e4
LT
495 if (local)
496 return local;
497
d420895e 498 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 499 return net->ipv6.ip6_null_entry;
1da177e4 500 }
dd3abc4e 501out:
1da177e4
LT
502 return rt;
503}
504
27097255
YH
505#ifdef CONFIG_IPV6_ROUTER_PREF
506static void rt6_probe(struct rt6_info *rt)
507{
f2c31e32 508 struct neighbour *neigh;
27097255
YH
509 /*
510 * Okay, this does not seem to be appropriate
511 * for now, however, we need to check if it
512 * is really so; aka Router Reachability Probing.
513 *
514 * Router Reachability Probe MUST be rate-limited
515 * to no more than one per minute.
516 */
97cac082 517 neigh = rt ? rt->n : NULL;
27097255 518 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 519 return;
27097255
YH
520 read_lock_bh(&neigh->lock);
521 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 522 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
523 struct in6_addr mcaddr;
524 struct in6_addr *target;
525
526 neigh->updated = jiffies;
527 read_unlock_bh(&neigh->lock);
528
529 target = (struct in6_addr *)&neigh->primary_key;
530 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 531 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 532 } else {
27097255 533 read_unlock_bh(&neigh->lock);
f2c31e32 534 }
27097255
YH
535}
536#else
537static inline void rt6_probe(struct rt6_info *rt)
538{
27097255
YH
539}
540#endif
541
1da177e4 542/*
554cfb7e 543 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 544 */
b6f99a21 545static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 546{
d1918542 547 struct net_device *dev = rt->dst.dev;
161980f4 548 if (!oif || dev->ifindex == oif)
554cfb7e 549 return 2;
161980f4
DM
550 if ((dev->flags & IFF_LOOPBACK) &&
551 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
552 return 1;
553 return 0;
554cfb7e 554}
1da177e4 555
b6f99a21 556static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 557{
f2c31e32 558 struct neighbour *neigh;
398bcbeb 559 int m;
f2c31e32 560
97cac082 561 neigh = rt->n;
4d0c5911
YH
562 if (rt->rt6i_flags & RTF_NONEXTHOP ||
563 !(rt->rt6i_flags & RTF_GATEWAY))
564 m = 1;
565 else if (neigh) {
554cfb7e
YH
566 read_lock_bh(&neigh->lock);
567 if (neigh->nud_state & NUD_VALID)
4d0c5911 568 m = 2;
398bcbeb
YH
569#ifdef CONFIG_IPV6_ROUTER_PREF
570 else if (neigh->nud_state & NUD_FAILED)
571 m = 0;
572#endif
573 else
ea73ee23 574 m = 1;
554cfb7e 575 read_unlock_bh(&neigh->lock);
398bcbeb
YH
576 } else
577 m = 0;
554cfb7e 578 return m;
1da177e4
LT
579}
580
554cfb7e
YH
581static int rt6_score_route(struct rt6_info *rt, int oif,
582 int strict)
1da177e4 583{
4d0c5911 584 int m, n;
1ab1457c 585
4d0c5911 586 m = rt6_check_dev(rt, oif);
77d16f45 587 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 588 return -1;
ebacaaa0
YH
589#ifdef CONFIG_IPV6_ROUTER_PREF
590 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
591#endif
4d0c5911 592 n = rt6_check_neigh(rt);
557e92ef 593 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
594 return -1;
595 return m;
596}
597
f11e6659
DM
598static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
599 int *mpri, struct rt6_info *match)
554cfb7e 600{
f11e6659
DM
601 int m;
602
603 if (rt6_check_expired(rt))
604 goto out;
605
606 m = rt6_score_route(rt, oif, strict);
607 if (m < 0)
608 goto out;
609
610 if (m > *mpri) {
611 if (strict & RT6_LOOKUP_F_REACHABLE)
612 rt6_probe(match);
613 *mpri = m;
614 match = rt;
615 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
616 rt6_probe(rt);
617 }
618
619out:
620 return match;
621}
622
623static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
624 struct rt6_info *rr_head,
625 u32 metric, int oif, int strict)
626{
627 struct rt6_info *rt, *match;
554cfb7e 628 int mpri = -1;
1da177e4 629
f11e6659
DM
630 match = NULL;
631 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 632 rt = rt->dst.rt6_next)
f11e6659
DM
633 match = find_match(rt, oif, strict, &mpri, match);
634 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 635 rt = rt->dst.rt6_next)
f11e6659 636 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 637
f11e6659
DM
638 return match;
639}
1da177e4 640
f11e6659
DM
641static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
642{
643 struct rt6_info *match, *rt0;
8ed67789 644 struct net *net;
1da177e4 645
f11e6659
DM
646 rt0 = fn->rr_ptr;
647 if (!rt0)
648 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 649
f11e6659 650 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 651
554cfb7e 652 if (!match &&
f11e6659 653 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 654 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 655
554cfb7e 656 /* no entries matched; do round-robin */
f11e6659
DM
657 if (!next || next->rt6i_metric != rt0->rt6i_metric)
658 next = fn->leaf;
659
660 if (next != rt0)
661 fn->rr_ptr = next;
1da177e4 662 }
1da177e4 663
d1918542 664 net = dev_net(rt0->dst.dev);
a02cec21 665 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
666}
667
70ceb4f5
YH
668#ifdef CONFIG_IPV6_ROUTE_INFO
669int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 670 const struct in6_addr *gwaddr)
70ceb4f5 671{
c346dca1 672 struct net *net = dev_net(dev);
70ceb4f5
YH
673 struct route_info *rinfo = (struct route_info *) opt;
674 struct in6_addr prefix_buf, *prefix;
675 unsigned int pref;
4bed72e4 676 unsigned long lifetime;
70ceb4f5
YH
677 struct rt6_info *rt;
678
679 if (len < sizeof(struct route_info)) {
680 return -EINVAL;
681 }
682
683 /* Sanity check for prefix_len and length */
684 if (rinfo->length > 3) {
685 return -EINVAL;
686 } else if (rinfo->prefix_len > 128) {
687 return -EINVAL;
688 } else if (rinfo->prefix_len > 64) {
689 if (rinfo->length < 2) {
690 return -EINVAL;
691 }
692 } else if (rinfo->prefix_len > 0) {
693 if (rinfo->length < 1) {
694 return -EINVAL;
695 }
696 }
697
698 pref = rinfo->route_pref;
699 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 700 return -EINVAL;
70ceb4f5 701
4bed72e4 702 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
703
704 if (rinfo->length == 3)
705 prefix = (struct in6_addr *)rinfo->prefix;
706 else {
707 /* this function is safe */
708 ipv6_addr_prefix(&prefix_buf,
709 (struct in6_addr *)rinfo->prefix,
710 rinfo->prefix_len);
711 prefix = &prefix_buf;
712 }
713
efa2cea0
DL
714 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
715 dev->ifindex);
70ceb4f5
YH
716
717 if (rt && !lifetime) {
e0a1ad73 718 ip6_del_rt(rt);
70ceb4f5
YH
719 rt = NULL;
720 }
721
722 if (!rt && lifetime)
efa2cea0 723 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
724 pref);
725 else if (rt)
726 rt->rt6i_flags = RTF_ROUTEINFO |
727 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
728
729 if (rt) {
1716a961
G
730 if (!addrconf_finite_timeout(lifetime))
731 rt6_clean_expires(rt);
732 else
733 rt6_set_expires(rt, jiffies + HZ * lifetime);
734
94e187c0 735 ip6_rt_put(rt);
70ceb4f5
YH
736 }
737 return 0;
738}
739#endif
740
8ed67789 741#define BACKTRACK(__net, saddr) \
982f56f3 742do { \
8ed67789 743 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 744 struct fib6_node *pn; \
e0eda7bb 745 while (1) { \
982f56f3
YH
746 if (fn->fn_flags & RTN_TL_ROOT) \
747 goto out; \
748 pn = fn->parent; \
749 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 750 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
751 else \
752 fn = pn; \
753 if (fn->fn_flags & RTN_RTINFO) \
754 goto restart; \
c71099ac 755 } \
c71099ac 756 } \
38308473 757} while (0)
c71099ac 758
8ed67789
DL
759static struct rt6_info *ip6_pol_route_lookup(struct net *net,
760 struct fib6_table *table,
4c9483b2 761 struct flowi6 *fl6, int flags)
1da177e4
LT
762{
763 struct fib6_node *fn;
764 struct rt6_info *rt;
765
c71099ac 766 read_lock_bh(&table->tb6_lock);
4c9483b2 767 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
768restart:
769 rt = fn->leaf;
4c9483b2 770 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318
ND
771 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
772 rt = rt6_multipath_select(rt, fl6);
4c9483b2 773 BACKTRACK(net, &fl6->saddr);
c71099ac 774out:
d8d1f30b 775 dst_use(&rt->dst, jiffies);
c71099ac 776 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
777 return rt;
778
779}
780
ea6e574e
FW
781struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
782 int flags)
783{
784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
785}
786EXPORT_SYMBOL_GPL(ip6_route_lookup);
787
9acd9f3a
YH
788struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789 const struct in6_addr *saddr, int oif, int strict)
c71099ac 790{
4c9483b2
DM
791 struct flowi6 fl6 = {
792 .flowi6_oif = oif,
793 .daddr = *daddr,
c71099ac
TG
794 };
795 struct dst_entry *dst;
77d16f45 796 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 797
adaa70bb 798 if (saddr) {
4c9483b2 799 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
800 flags |= RT6_LOOKUP_F_HAS_SADDR;
801 }
802
4c9483b2 803 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
804 if (dst->error == 0)
805 return (struct rt6_info *) dst;
806
807 dst_release(dst);
808
1da177e4
LT
809 return NULL;
810}
811
7159039a
YH
812EXPORT_SYMBOL(rt6_lookup);
813
c71099ac 814/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
815 It takes new route entry, the addition fails by any reason the
816 route is freed. In any case, if caller does not hold it, it may
817 be destroyed.
818 */
819
86872cb5 820static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
821{
822 int err;
c71099ac 823 struct fib6_table *table;
1da177e4 824
c71099ac
TG
825 table = rt->rt6i_table;
826 write_lock_bh(&table->tb6_lock);
86872cb5 827 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 828 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
829
830 return err;
831}
832
40e22e8f
TG
833int ip6_ins_rt(struct rt6_info *rt)
834{
4d1169c1 835 struct nl_info info = {
d1918542 836 .nl_net = dev_net(rt->dst.dev),
4d1169c1 837 };
528c4ceb 838 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
839}
840
1716a961 841static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 842 const struct in6_addr *daddr,
b71d1d42 843 const struct in6_addr *saddr)
1da177e4 844{
1da177e4
LT
845 struct rt6_info *rt;
846
847 /*
848 * Clone the route.
849 */
850
21efcfa0 851 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
852
853 if (rt) {
14deae41
DM
854 int attempts = !in_softirq();
855
38308473 856 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 857 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 858 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 859 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 860 rt->rt6i_gateway = *daddr;
58c4fb86 861 }
1da177e4 862
1da177e4 863 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
864
865#ifdef CONFIG_IPV6_SUBTREES
866 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 867 rt->rt6i_src.addr = *saddr;
1da177e4
LT
868 rt->rt6i_src.plen = 128;
869 }
870#endif
871
14deae41 872 retry:
8ade06c6 873 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 874 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
875 int saved_rt_min_interval =
876 net->ipv6.sysctl.ip6_rt_gc_min_interval;
877 int saved_rt_elasticity =
878 net->ipv6.sysctl.ip6_rt_gc_elasticity;
879
880 if (attempts-- > 0) {
881 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
882 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
883
86393e52 884 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
885
886 net->ipv6.sysctl.ip6_rt_gc_elasticity =
887 saved_rt_elasticity;
888 net->ipv6.sysctl.ip6_rt_gc_min_interval =
889 saved_rt_min_interval;
890 goto retry;
891 }
892
f3213831 893 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 894 dst_free(&rt->dst);
14deae41
DM
895 return NULL;
896 }
95a9a5ba 897 }
1da177e4 898
95a9a5ba
YH
899 return rt;
900}
1da177e4 901
21efcfa0
ED
902static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
903 const struct in6_addr *daddr)
299d9939 904{
21efcfa0
ED
905 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
906
299d9939 907 if (rt) {
299d9939 908 rt->rt6i_flags |= RTF_CACHE;
97cac082 909 rt->n = neigh_clone(ort->n);
299d9939
YH
910 }
911 return rt;
912}
913
8ed67789 914static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 915 struct flowi6 *fl6, int flags)
1da177e4
LT
916{
917 struct fib6_node *fn;
519fbd87 918 struct rt6_info *rt, *nrt;
c71099ac 919 int strict = 0;
1da177e4 920 int attempts = 3;
519fbd87 921 int err;
53b7997f 922 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 923
77d16f45 924 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
925
926relookup:
c71099ac 927 read_lock_bh(&table->tb6_lock);
1da177e4 928
8238dd06 929restart_2:
4c9483b2 930 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
931
932restart:
4acad72d 933 rt = rt6_select(fn, oif, strict | reachable);
51ebd318
ND
934 if (rt->rt6i_nsiblings && oif == 0)
935 rt = rt6_multipath_select(rt, fl6);
4c9483b2 936 BACKTRACK(net, &fl6->saddr);
8ed67789 937 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 938 rt->rt6i_flags & RTF_CACHE)
1ddef044 939 goto out;
1da177e4 940
d8d1f30b 941 dst_hold(&rt->dst);
c71099ac 942 read_unlock_bh(&table->tb6_lock);
fb9de91e 943
97cac082 944 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 945 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 946 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 947 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
948 else
949 goto out2;
e40cf353 950
94e187c0 951 ip6_rt_put(rt);
8ed67789 952 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 953
d8d1f30b 954 dst_hold(&rt->dst);
519fbd87 955 if (nrt) {
40e22e8f 956 err = ip6_ins_rt(nrt);
519fbd87 957 if (!err)
1da177e4 958 goto out2;
1da177e4 959 }
1da177e4 960
519fbd87
YH
961 if (--attempts <= 0)
962 goto out2;
963
964 /*
c71099ac 965 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
966 * released someone could insert this route. Relookup.
967 */
94e187c0 968 ip6_rt_put(rt);
519fbd87
YH
969 goto relookup;
970
971out:
8238dd06
YH
972 if (reachable) {
973 reachable = 0;
974 goto restart_2;
975 }
d8d1f30b 976 dst_hold(&rt->dst);
c71099ac 977 read_unlock_bh(&table->tb6_lock);
1da177e4 978out2:
d8d1f30b
CG
979 rt->dst.lastuse = jiffies;
980 rt->dst.__use++;
c71099ac
TG
981
982 return rt;
1da177e4
LT
983}
984
8ed67789 985static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 986 struct flowi6 *fl6, int flags)
4acad72d 987{
4c9483b2 988 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
989}
990
72331bc0
SL
991static struct dst_entry *ip6_route_input_lookup(struct net *net,
992 struct net_device *dev,
993 struct flowi6 *fl6, int flags)
994{
995 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
996 flags |= RT6_LOOKUP_F_IFACE;
997
998 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
999}
1000
c71099ac
TG
1001void ip6_route_input(struct sk_buff *skb)
1002{
b71d1d42 1003 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1004 struct net *net = dev_net(skb->dev);
adaa70bb 1005 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
1006 struct flowi6 fl6 = {
1007 .flowi6_iif = skb->dev->ifindex,
1008 .daddr = iph->daddr,
1009 .saddr = iph->saddr,
38308473 1010 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
1011 .flowi6_mark = skb->mark,
1012 .flowi6_proto = iph->nexthdr,
c71099ac 1013 };
adaa70bb 1014
72331bc0 1015 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1016}
1017
8ed67789 1018static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1019 struct flowi6 *fl6, int flags)
1da177e4 1020{
4c9483b2 1021 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1022}
1023
9c7a4f9c 1024struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1025 struct flowi6 *fl6)
c71099ac
TG
1026{
1027 int flags = 0;
1028
1fb9489b 1029 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1030
4c9483b2 1031 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1032 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1033
4c9483b2 1034 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1035 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1036 else if (sk)
1037 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1038
4c9483b2 1039 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
1040}
1041
7159039a 1042EXPORT_SYMBOL(ip6_route_output);
1da177e4 1043
2774c131 1044struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1045{
5c1e6aa3 1046 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1047 struct dst_entry *new = NULL;
1048
f5b0a874 1049 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1050 if (rt) {
d8d1f30b 1051 new = &rt->dst;
14e50e57 1052
8104891b
SK
1053 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1054 rt6_init_peer(rt, net->ipv6.peers);
1055
14e50e57 1056 new->__use = 1;
352e512c
HX
1057 new->input = dst_discard;
1058 new->output = dst_discard;
14e50e57 1059
21efcfa0
ED
1060 if (dst_metrics_read_only(&ort->dst))
1061 new->_metrics = ort->dst._metrics;
1062 else
1063 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1064 rt->rt6i_idev = ort->rt6i_idev;
1065 if (rt->rt6i_idev)
1066 in6_dev_hold(rt->rt6i_idev);
14e50e57 1067
4e3fd7a0 1068 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1069 rt->rt6i_flags = ort->rt6i_flags;
1070 rt6_clean_expires(rt);
14e50e57
DM
1071 rt->rt6i_metric = 0;
1072
1073 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074#ifdef CONFIG_IPV6_SUBTREES
1075 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1076#endif
1077
1078 dst_free(new);
1079 }
1080
69ead7af
DM
1081 dst_release(dst_orig);
1082 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1083}
14e50e57 1084
1da177e4
LT
1085/*
1086 * Destination cache support functions
1087 */
1088
1089static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1090{
1091 struct rt6_info *rt;
1092
1093 rt = (struct rt6_info *) dst;
1094
6f3118b5
ND
1095 /* All IPV6 dsts are created with ->obsolete set to the value
1096 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1097 * into this function always.
1098 */
1099 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1100 return NULL;
1101
6431cbc2
DM
1102 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1103 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1104 if (!rt6_has_peer(rt))
6431cbc2
DM
1105 rt6_bind_peer(rt, 0);
1106 rt->rt6i_peer_genid = rt6_peer_genid();
1107 }
1da177e4 1108 return dst;
6431cbc2 1109 }
1da177e4
LT
1110 return NULL;
1111}
1112
1113static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114{
1115 struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117 if (rt) {
54c1a859
YH
1118 if (rt->rt6i_flags & RTF_CACHE) {
1119 if (rt6_check_expired(rt)) {
1120 ip6_del_rt(rt);
1121 dst = NULL;
1122 }
1123 } else {
1da177e4 1124 dst_release(dst);
54c1a859
YH
1125 dst = NULL;
1126 }
1da177e4 1127 }
54c1a859 1128 return dst;
1da177e4
LT
1129}
1130
1131static void ip6_link_failure(struct sk_buff *skb)
1132{
1133 struct rt6_info *rt;
1134
3ffe533c 1135 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1136
adf30907 1137 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1138 if (rt) {
1716a961
G
1139 if (rt->rt6i_flags & RTF_CACHE)
1140 rt6_update_expires(rt, 0);
1141 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1142 rt->rt6i_node->fn_sernum = -1;
1143 }
1144}
1145
6700c270
DM
1146static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1147 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1148{
1149 struct rt6_info *rt6 = (struct rt6_info*)dst;
1150
81aded24 1151 dst_confirm(dst);
1da177e4 1152 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1153 struct net *net = dev_net(dst->dev);
1154
1da177e4
LT
1155 rt6->rt6i_flags |= RTF_MODIFIED;
1156 if (mtu < IPV6_MIN_MTU) {
defb3519 1157 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1158 mtu = IPV6_MIN_MTU;
defb3519
DM
1159 features |= RTAX_FEATURE_ALLFRAG;
1160 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1161 }
defb3519 1162 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1163 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1164 }
1165}
1166
42ae66c8
DM
1167void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1168 int oif, u32 mark)
81aded24
DM
1169{
1170 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1171 struct dst_entry *dst;
1172 struct flowi6 fl6;
1173
1174 memset(&fl6, 0, sizeof(fl6));
1175 fl6.flowi6_oif = oif;
1176 fl6.flowi6_mark = mark;
3e12939a 1177 fl6.flowi6_flags = 0;
81aded24
DM
1178 fl6.daddr = iph->daddr;
1179 fl6.saddr = iph->saddr;
1180 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1181
1182 dst = ip6_route_output(net, NULL, &fl6);
1183 if (!dst->error)
6700c270 1184 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1185 dst_release(dst);
1186}
1187EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1188
1189void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1190{
1191 ip6_update_pmtu(skb, sock_net(sk), mtu,
1192 sk->sk_bound_dev_if, sk->sk_mark);
1193}
1194EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1195
3a5ad2ee
DM
1196void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1197{
1198 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1199 struct dst_entry *dst;
1200 struct flowi6 fl6;
1201
1202 memset(&fl6, 0, sizeof(fl6));
1203 fl6.flowi6_oif = oif;
1204 fl6.flowi6_mark = mark;
1205 fl6.flowi6_flags = 0;
1206 fl6.daddr = iph->daddr;
1207 fl6.saddr = iph->saddr;
1208 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1209
1210 dst = ip6_route_output(net, NULL, &fl6);
1211 if (!dst->error)
6700c270 1212 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1213 dst_release(dst);
1214}
1215EXPORT_SYMBOL_GPL(ip6_redirect);
1216
1217void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1218{
1219 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1220}
1221EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1222
0dbaee3b 1223static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1224{
0dbaee3b
DM
1225 struct net_device *dev = dst->dev;
1226 unsigned int mtu = dst_mtu(dst);
1227 struct net *net = dev_net(dev);
1228
1da177e4
LT
1229 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1230
5578689a
DL
1231 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1232 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1233
1234 /*
1ab1457c
YH
1235 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1236 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1237 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1238 * rely only on pmtu discovery"
1239 */
1240 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1241 mtu = IPV6_MAXPLEN;
1242 return mtu;
1243}
1244
ebb762f2 1245static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1246{
d33e4553 1247 struct inet6_dev *idev;
618f9bc7
SK
1248 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1249
1250 if (mtu)
1251 return mtu;
1252
1253 mtu = IPV6_MIN_MTU;
d33e4553
DM
1254
1255 rcu_read_lock();
1256 idev = __in6_dev_get(dst->dev);
1257 if (idev)
1258 mtu = idev->cnf.mtu6;
1259 rcu_read_unlock();
1260
1261 return mtu;
1262}
1263
3b00944c
YH
1264static struct dst_entry *icmp6_dst_gc_list;
1265static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1266
3b00944c 1267struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1268 struct neighbour *neigh,
87a11578 1269 struct flowi6 *fl6)
1da177e4 1270{
87a11578 1271 struct dst_entry *dst;
1da177e4
LT
1272 struct rt6_info *rt;
1273 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1274 struct net *net = dev_net(dev);
1da177e4 1275
38308473 1276 if (unlikely(!idev))
122bdf67 1277 return ERR_PTR(-ENODEV);
1da177e4 1278
8b96d22d 1279 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1280 if (unlikely(!rt)) {
1da177e4 1281 in6_dev_put(idev);
87a11578 1282 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1283 goto out;
1284 }
1285
1da177e4
LT
1286 if (neigh)
1287 neigh_hold(neigh);
14deae41 1288 else {
f894cbf8 1289 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1290 if (IS_ERR(neigh)) {
252c3d84 1291 in6_dev_put(idev);
b43faac6
DM
1292 dst_free(&rt->dst);
1293 return ERR_CAST(neigh);
1294 }
14deae41 1295 }
1da177e4 1296
8e2ec639
YZ
1297 rt->dst.flags |= DST_HOST;
1298 rt->dst.output = ip6_output;
97cac082 1299 rt->n = neigh;
d8d1f30b 1300 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1301 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1302 rt->rt6i_dst.plen = 128;
1303 rt->rt6i_idev = idev;
7011687f 1304 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1305
3b00944c 1306 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1307 rt->dst.next = icmp6_dst_gc_list;
1308 icmp6_dst_gc_list = &rt->dst;
3b00944c 1309 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1310
5578689a 1311 fib6_force_start_gc(net);
1da177e4 1312
87a11578
DM
1313 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1314
1da177e4 1315out:
87a11578 1316 return dst;
1da177e4
LT
1317}
1318
3d0f24a7 1319int icmp6_dst_gc(void)
1da177e4 1320{
e9476e95 1321 struct dst_entry *dst, **pprev;
3d0f24a7 1322 int more = 0;
1da177e4 1323
3b00944c
YH
1324 spin_lock_bh(&icmp6_dst_lock);
1325 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1326
1da177e4
LT
1327 while ((dst = *pprev) != NULL) {
1328 if (!atomic_read(&dst->__refcnt)) {
1329 *pprev = dst->next;
1330 dst_free(dst);
1da177e4
LT
1331 } else {
1332 pprev = &dst->next;
3d0f24a7 1333 ++more;
1da177e4
LT
1334 }
1335 }
1336
3b00944c 1337 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1338
3d0f24a7 1339 return more;
1da177e4
LT
1340}
1341
1e493d19
DM
1342static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1343 void *arg)
1344{
1345 struct dst_entry *dst, **pprev;
1346
1347 spin_lock_bh(&icmp6_dst_lock);
1348 pprev = &icmp6_dst_gc_list;
1349 while ((dst = *pprev) != NULL) {
1350 struct rt6_info *rt = (struct rt6_info *) dst;
1351 if (func(rt, arg)) {
1352 *pprev = dst->next;
1353 dst_free(dst);
1354 } else {
1355 pprev = &dst->next;
1356 }
1357 }
1358 spin_unlock_bh(&icmp6_dst_lock);
1359}
1360
569d3645 1361static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1362{
1da177e4 1363 unsigned long now = jiffies;
86393e52 1364 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1365 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1366 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1367 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1368 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1369 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1370 int entries;
7019b78e 1371
fc66f95c 1372 entries = dst_entries_get_fast(ops);
7019b78e 1373 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1374 entries <= rt_max_size)
1da177e4
LT
1375 goto out;
1376
6891a346
BT
1377 net->ipv6.ip6_rt_gc_expire++;
1378 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1379 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1380 entries = dst_entries_get_slow(ops);
1381 if (entries < ops->gc_thresh)
7019b78e 1382 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1383out:
7019b78e 1384 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1385 return entries > rt_max_size;
1da177e4
LT
1386}
1387
1388/* Clean host part of a prefix. Not necessary in radix tree,
1389 but results in cleaner routing tables.
1390
1391 Remove it only when all the things will work!
1392 */
1393
6b75d090 1394int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1395{
5170ae82 1396 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1397 if (hoplimit == 0) {
6b75d090 1398 struct net_device *dev = dst->dev;
c68f24cc
ED
1399 struct inet6_dev *idev;
1400
1401 rcu_read_lock();
1402 idev = __in6_dev_get(dev);
1403 if (idev)
6b75d090 1404 hoplimit = idev->cnf.hop_limit;
c68f24cc 1405 else
53b7997f 1406 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1407 rcu_read_unlock();
1da177e4
LT
1408 }
1409 return hoplimit;
1410}
abbf46ae 1411EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1412
1413/*
1414 *
1415 */
1416
86872cb5 1417int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1418{
1419 int err;
5578689a 1420 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1421 struct rt6_info *rt = NULL;
1422 struct net_device *dev = NULL;
1423 struct inet6_dev *idev = NULL;
c71099ac 1424 struct fib6_table *table;
1da177e4
LT
1425 int addr_type;
1426
86872cb5 1427 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1428 return -EINVAL;
1429#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1430 if (cfg->fc_src_len)
1da177e4
LT
1431 return -EINVAL;
1432#endif
86872cb5 1433 if (cfg->fc_ifindex) {
1da177e4 1434 err = -ENODEV;
5578689a 1435 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1436 if (!dev)
1437 goto out;
1438 idev = in6_dev_get(dev);
1439 if (!idev)
1440 goto out;
1441 }
1442
86872cb5
TG
1443 if (cfg->fc_metric == 0)
1444 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1445
d71314b4 1446 err = -ENOBUFS;
38308473
DM
1447 if (cfg->fc_nlinfo.nlh &&
1448 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1449 table = fib6_get_table(net, cfg->fc_table);
38308473 1450 if (!table) {
f3213831 1451 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1452 table = fib6_new_table(net, cfg->fc_table);
1453 }
1454 } else {
1455 table = fib6_new_table(net, cfg->fc_table);
1456 }
38308473
DM
1457
1458 if (!table)
c71099ac 1459 goto out;
c71099ac 1460
8b96d22d 1461 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1462
38308473 1463 if (!rt) {
1da177e4
LT
1464 err = -ENOMEM;
1465 goto out;
1466 }
1467
1716a961
G
1468 if (cfg->fc_flags & RTF_EXPIRES)
1469 rt6_set_expires(rt, jiffies +
1470 clock_t_to_jiffies(cfg->fc_expires));
1471 else
1472 rt6_clean_expires(rt);
1da177e4 1473
86872cb5
TG
1474 if (cfg->fc_protocol == RTPROT_UNSPEC)
1475 cfg->fc_protocol = RTPROT_BOOT;
1476 rt->rt6i_protocol = cfg->fc_protocol;
1477
1478 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1479
1480 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1481 rt->dst.input = ip6_mc_input;
ab79ad14
1482 else if (cfg->fc_flags & RTF_LOCAL)
1483 rt->dst.input = ip6_input;
1da177e4 1484 else
d8d1f30b 1485 rt->dst.input = ip6_forward;
1da177e4 1486
d8d1f30b 1487 rt->dst.output = ip6_output;
1da177e4 1488
86872cb5
TG
1489 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1490 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1491 if (rt->rt6i_dst.plen == 128)
11d53b49 1492 rt->dst.flags |= DST_HOST;
1da177e4 1493
8e2ec639
YZ
1494 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1495 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1496 if (!metrics) {
1497 err = -ENOMEM;
1498 goto out;
1499 }
1500 dst_init_metrics(&rt->dst, metrics, 0);
1501 }
1da177e4 1502#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1503 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1504 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1505#endif
1506
86872cb5 1507 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1508
1509 /* We cannot add true routes via loopback here,
1510 they would result in kernel looping; promote them to reject routes
1511 */
86872cb5 1512 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1513 (dev && (dev->flags & IFF_LOOPBACK) &&
1514 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1515 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1516 /* hold loopback dev/idev if we haven't done so. */
5578689a 1517 if (dev != net->loopback_dev) {
1da177e4
LT
1518 if (dev) {
1519 dev_put(dev);
1520 in6_dev_put(idev);
1521 }
5578689a 1522 dev = net->loopback_dev;
1da177e4
LT
1523 dev_hold(dev);
1524 idev = in6_dev_get(dev);
1525 if (!idev) {
1526 err = -ENODEV;
1527 goto out;
1528 }
1529 }
d8d1f30b
CG
1530 rt->dst.output = ip6_pkt_discard_out;
1531 rt->dst.input = ip6_pkt_discard;
1da177e4 1532 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1533 switch (cfg->fc_type) {
1534 case RTN_BLACKHOLE:
1535 rt->dst.error = -EINVAL;
1536 break;
1537 case RTN_PROHIBIT:
1538 rt->dst.error = -EACCES;
1539 break;
b4949ab2
ND
1540 case RTN_THROW:
1541 rt->dst.error = -EAGAIN;
1542 break;
ef2c7d7b
ND
1543 default:
1544 rt->dst.error = -ENETUNREACH;
1545 break;
1546 }
1da177e4
LT
1547 goto install_route;
1548 }
1549
86872cb5 1550 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1551 const struct in6_addr *gw_addr;
1da177e4
LT
1552 int gwa_type;
1553
86872cb5 1554 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1555 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1556 gwa_type = ipv6_addr_type(gw_addr);
1557
1558 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1559 struct rt6_info *grt;
1560
1561 /* IPv6 strictly inhibits using not link-local
1562 addresses as nexthop address.
1563 Otherwise, router will not able to send redirects.
1564 It is very good, but in some (rare!) circumstances
1565 (SIT, PtP, NBMA NOARP links) it is handy to allow
1566 some exceptions. --ANK
1567 */
1568 err = -EINVAL;
38308473 1569 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1570 goto out;
1571
5578689a 1572 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1573
1574 err = -EHOSTUNREACH;
38308473 1575 if (!grt)
1da177e4
LT
1576 goto out;
1577 if (dev) {
d1918542 1578 if (dev != grt->dst.dev) {
94e187c0 1579 ip6_rt_put(grt);
1da177e4
LT
1580 goto out;
1581 }
1582 } else {
d1918542 1583 dev = grt->dst.dev;
1da177e4
LT
1584 idev = grt->rt6i_idev;
1585 dev_hold(dev);
1586 in6_dev_hold(grt->rt6i_idev);
1587 }
38308473 1588 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1589 err = 0;
94e187c0 1590 ip6_rt_put(grt);
1da177e4
LT
1591
1592 if (err)
1593 goto out;
1594 }
1595 err = -EINVAL;
38308473 1596 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1597 goto out;
1598 }
1599
1600 err = -ENODEV;
38308473 1601 if (!dev)
1da177e4
LT
1602 goto out;
1603
c3968a85
DW
1604 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1605 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1606 err = -EINVAL;
1607 goto out;
1608 }
4e3fd7a0 1609 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1610 rt->rt6i_prefsrc.plen = 128;
1611 } else
1612 rt->rt6i_prefsrc.plen = 0;
1613
86872cb5 1614 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1615 err = rt6_bind_neighbour(rt, dev);
f83c7790 1616 if (err)
1da177e4 1617 goto out;
1da177e4
LT
1618 }
1619
86872cb5 1620 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1621
1622install_route:
86872cb5
TG
1623 if (cfg->fc_mx) {
1624 struct nlattr *nla;
1625 int remaining;
1626
1627 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1628 int type = nla_type(nla);
86872cb5
TG
1629
1630 if (type) {
1631 if (type > RTAX_MAX) {
1da177e4
LT
1632 err = -EINVAL;
1633 goto out;
1634 }
86872cb5 1635
defb3519 1636 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1637 }
1da177e4
LT
1638 }
1639 }
1640
d8d1f30b 1641 rt->dst.dev = dev;
1da177e4 1642 rt->rt6i_idev = idev;
c71099ac 1643 rt->rt6i_table = table;
63152fc0 1644
c346dca1 1645 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1646
86872cb5 1647 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1648
1649out:
1650 if (dev)
1651 dev_put(dev);
1652 if (idev)
1653 in6_dev_put(idev);
1654 if (rt)
d8d1f30b 1655 dst_free(&rt->dst);
1da177e4
LT
1656 return err;
1657}
1658
86872cb5 1659static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1660{
1661 int err;
c71099ac 1662 struct fib6_table *table;
d1918542 1663 struct net *net = dev_net(rt->dst.dev);
1da177e4 1664
6825a26c
G
1665 if (rt == net->ipv6.ip6_null_entry) {
1666 err = -ENOENT;
1667 goto out;
1668 }
6c813a72 1669
c71099ac
TG
1670 table = rt->rt6i_table;
1671 write_lock_bh(&table->tb6_lock);
86872cb5 1672 err = fib6_del(rt, info);
c71099ac 1673 write_unlock_bh(&table->tb6_lock);
1da177e4 1674
6825a26c 1675out:
94e187c0 1676 ip6_rt_put(rt);
1da177e4
LT
1677 return err;
1678}
1679
e0a1ad73
TG
1680int ip6_del_rt(struct rt6_info *rt)
1681{
4d1169c1 1682 struct nl_info info = {
d1918542 1683 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1684 };
528c4ceb 1685 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1686}
1687
86872cb5 1688static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1689{
c71099ac 1690 struct fib6_table *table;
1da177e4
LT
1691 struct fib6_node *fn;
1692 struct rt6_info *rt;
1693 int err = -ESRCH;
1694
5578689a 1695 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1696 if (!table)
c71099ac
TG
1697 return err;
1698
1699 read_lock_bh(&table->tb6_lock);
1da177e4 1700
c71099ac 1701 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1702 &cfg->fc_dst, cfg->fc_dst_len,
1703 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1704
1da177e4 1705 if (fn) {
d8d1f30b 1706 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1707 if (cfg->fc_ifindex &&
d1918542
DM
1708 (!rt->dst.dev ||
1709 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1710 continue;
86872cb5
TG
1711 if (cfg->fc_flags & RTF_GATEWAY &&
1712 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1713 continue;
86872cb5 1714 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1715 continue;
d8d1f30b 1716 dst_hold(&rt->dst);
c71099ac 1717 read_unlock_bh(&table->tb6_lock);
1da177e4 1718
86872cb5 1719 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1720 }
1721 }
c71099ac 1722 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1723
1724 return err;
1725}
1726
6700c270 1727static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1728{
e8599ff4 1729 struct net *net = dev_net(skb->dev);
a6279458 1730 struct netevent_redirect netevent;
e8599ff4
DM
1731 struct rt6_info *rt, *nrt = NULL;
1732 const struct in6_addr *target;
e8599ff4 1733 struct ndisc_options ndopts;
6e157b6a
DM
1734 const struct in6_addr *dest;
1735 struct neighbour *old_neigh;
e8599ff4
DM
1736 struct inet6_dev *in6_dev;
1737 struct neighbour *neigh;
1738 struct icmp6hdr *icmph;
6e157b6a
DM
1739 int optlen, on_link;
1740 u8 *lladdr;
e8599ff4
DM
1741
1742 optlen = skb->tail - skb->transport_header;
1743 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1744
1745 if (optlen < 0) {
6e157b6a 1746 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1747 return;
1748 }
1749
1750 icmph = icmp6_hdr(skb);
1751 target = (const struct in6_addr *) (icmph + 1);
1752 dest = target + 1;
1753
1754 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1755 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1756 return;
1757 }
1758
6e157b6a 1759 on_link = 0;
e8599ff4
DM
1760 if (ipv6_addr_equal(dest, target)) {
1761 on_link = 1;
1762 } else if (ipv6_addr_type(target) !=
1763 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1764 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1765 return;
1766 }
1767
1768 in6_dev = __in6_dev_get(skb->dev);
1769 if (!in6_dev)
1770 return;
1771 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1772 return;
1773
1774 /* RFC2461 8.1:
1775 * The IP source address of the Redirect MUST be the same as the current
1776 * first-hop router for the specified ICMP Destination Address.
1777 */
1778
1779 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1780 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1781 return;
1782 }
6e157b6a
DM
1783
1784 lladdr = NULL;
e8599ff4
DM
1785 if (ndopts.nd_opts_tgt_lladdr) {
1786 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1787 skb->dev);
1788 if (!lladdr) {
1789 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1790 return;
1791 }
1792 }
1793
6e157b6a
DM
1794 rt = (struct rt6_info *) dst;
1795 if (rt == net->ipv6.ip6_null_entry) {
1796 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1797 return;
6e157b6a 1798 }
e8599ff4 1799
6e157b6a
DM
1800 /* Redirect received -> path was valid.
1801 * Look, redirects are sent only in response to data packets,
1802 * so that this nexthop apparently is reachable. --ANK
1803 */
1804 dst_confirm(&rt->dst);
a6279458 1805
6e157b6a
DM
1806 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1807 if (!neigh)
1808 return;
a6279458 1809
6e157b6a
DM
1810 /* Duplicate redirect: silently ignore. */
1811 old_neigh = rt->n;
1812 if (neigh == old_neigh)
a6279458 1813 goto out;
1da177e4 1814
1da177e4
LT
1815 /*
1816 * We have finally decided to accept it.
1817 */
1818
1ab1457c 1819 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1820 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1821 NEIGH_UPDATE_F_OVERRIDE|
1822 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1823 NEIGH_UPDATE_F_ISROUTER))
1824 );
1825
21efcfa0 1826 nrt = ip6_rt_copy(rt, dest);
38308473 1827 if (!nrt)
1da177e4
LT
1828 goto out;
1829
1830 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1831 if (on_link)
1832 nrt->rt6i_flags &= ~RTF_GATEWAY;
1833
4e3fd7a0 1834 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1835 nrt->n = neigh_clone(neigh);
1da177e4 1836
40e22e8f 1837 if (ip6_ins_rt(nrt))
1da177e4
LT
1838 goto out;
1839
d8d1f30b 1840 netevent.old = &rt->dst;
1d248b1c 1841 netevent.old_neigh = old_neigh;
d8d1f30b 1842 netevent.new = &nrt->dst;
1d248b1c
DM
1843 netevent.new_neigh = neigh;
1844 netevent.daddr = dest;
8d71740c
TT
1845 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1846
38308473 1847 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1848 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1849 ip6_del_rt(rt);
1da177e4
LT
1850 }
1851
1852out:
e8599ff4 1853 neigh_release(neigh);
6e157b6a
DM
1854}
1855
1da177e4
LT
1856/*
1857 * Misc support functions
1858 */
1859
1716a961 1860static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1861 const struct in6_addr *dest)
1da177e4 1862{
d1918542 1863 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1864 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1865 ort->rt6i_table);
1da177e4
LT
1866
1867 if (rt) {
d8d1f30b
CG
1868 rt->dst.input = ort->dst.input;
1869 rt->dst.output = ort->dst.output;
8e2ec639 1870 rt->dst.flags |= DST_HOST;
d8d1f30b 1871
4e3fd7a0 1872 rt->rt6i_dst.addr = *dest;
8e2ec639 1873 rt->rt6i_dst.plen = 128;
defb3519 1874 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1875 rt->dst.error = ort->dst.error;
1da177e4
LT
1876 rt->rt6i_idev = ort->rt6i_idev;
1877 if (rt->rt6i_idev)
1878 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1879 rt->dst.lastuse = jiffies;
1da177e4 1880
4e3fd7a0 1881 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1882 rt->rt6i_flags = ort->rt6i_flags;
1883 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1884 (RTF_DEFAULT | RTF_ADDRCONF))
1885 rt6_set_from(rt, ort);
1886 else
1887 rt6_clean_expires(rt);
1da177e4
LT
1888 rt->rt6i_metric = 0;
1889
1da177e4
LT
1890#ifdef CONFIG_IPV6_SUBTREES
1891 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1892#endif
0f6c6392 1893 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1894 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1895 }
1896 return rt;
1897}
1898
70ceb4f5 1899#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1900static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1901 const struct in6_addr *prefix, int prefixlen,
1902 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1903{
1904 struct fib6_node *fn;
1905 struct rt6_info *rt = NULL;
c71099ac
TG
1906 struct fib6_table *table;
1907
efa2cea0 1908 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1909 if (!table)
c71099ac 1910 return NULL;
70ceb4f5 1911
5744dd9b 1912 read_lock_bh(&table->tb6_lock);
c71099ac 1913 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1914 if (!fn)
1915 goto out;
1916
d8d1f30b 1917 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1918 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1919 continue;
1920 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1921 continue;
1922 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1923 continue;
d8d1f30b 1924 dst_hold(&rt->dst);
70ceb4f5
YH
1925 break;
1926 }
1927out:
5744dd9b 1928 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1929 return rt;
1930}
1931
efa2cea0 1932static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1933 const struct in6_addr *prefix, int prefixlen,
1934 const struct in6_addr *gwaddr, int ifindex,
95c96174 1935 unsigned int pref)
70ceb4f5 1936{
86872cb5
TG
1937 struct fib6_config cfg = {
1938 .fc_table = RT6_TABLE_INFO,
238fc7ea 1939 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1940 .fc_ifindex = ifindex,
1941 .fc_dst_len = prefixlen,
1942 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1943 RTF_UP | RTF_PREF(pref),
15e47304 1944 .fc_nlinfo.portid = 0,
efa2cea0
DL
1945 .fc_nlinfo.nlh = NULL,
1946 .fc_nlinfo.nl_net = net,
86872cb5
TG
1947 };
1948
4e3fd7a0
AD
1949 cfg.fc_dst = *prefix;
1950 cfg.fc_gateway = *gwaddr;
70ceb4f5 1951
e317da96
YH
1952 /* We should treat it as a default route if prefix length is 0. */
1953 if (!prefixlen)
86872cb5 1954 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1955
86872cb5 1956 ip6_route_add(&cfg);
70ceb4f5 1957
efa2cea0 1958 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1959}
1960#endif
1961
b71d1d42 1962struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1963{
1da177e4 1964 struct rt6_info *rt;
c71099ac 1965 struct fib6_table *table;
1da177e4 1966
c346dca1 1967 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1968 if (!table)
c71099ac 1969 return NULL;
1da177e4 1970
5744dd9b 1971 read_lock_bh(&table->tb6_lock);
d8d1f30b 1972 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1973 if (dev == rt->dst.dev &&
045927ff 1974 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1975 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1976 break;
1977 }
1978 if (rt)
d8d1f30b 1979 dst_hold(&rt->dst);
5744dd9b 1980 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1981 return rt;
1982}
1983
b71d1d42 1984struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1985 struct net_device *dev,
1986 unsigned int pref)
1da177e4 1987{
86872cb5
TG
1988 struct fib6_config cfg = {
1989 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1990 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1991 .fc_ifindex = dev->ifindex,
1992 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1993 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1994 .fc_nlinfo.portid = 0,
5578689a 1995 .fc_nlinfo.nlh = NULL,
c346dca1 1996 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1997 };
1da177e4 1998
4e3fd7a0 1999 cfg.fc_gateway = *gwaddr;
1da177e4 2000
86872cb5 2001 ip6_route_add(&cfg);
1da177e4 2002
1da177e4
LT
2003 return rt6_get_dflt_router(gwaddr, dev);
2004}
2005
7b4da532 2006void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2007{
2008 struct rt6_info *rt;
c71099ac
TG
2009 struct fib6_table *table;
2010
2011 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2012 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2013 if (!table)
c71099ac 2014 return;
1da177e4
LT
2015
2016restart:
c71099ac 2017 read_lock_bh(&table->tb6_lock);
d8d1f30b 2018 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 2019 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 2020 dst_hold(&rt->dst);
c71099ac 2021 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2022 ip6_del_rt(rt);
1da177e4
LT
2023 goto restart;
2024 }
2025 }
c71099ac 2026 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2027}
2028
5578689a
DL
2029static void rtmsg_to_fib6_config(struct net *net,
2030 struct in6_rtmsg *rtmsg,
86872cb5
TG
2031 struct fib6_config *cfg)
2032{
2033 memset(cfg, 0, sizeof(*cfg));
2034
2035 cfg->fc_table = RT6_TABLE_MAIN;
2036 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2037 cfg->fc_metric = rtmsg->rtmsg_metric;
2038 cfg->fc_expires = rtmsg->rtmsg_info;
2039 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2040 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2041 cfg->fc_flags = rtmsg->rtmsg_flags;
2042
5578689a 2043 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2044
4e3fd7a0
AD
2045 cfg->fc_dst = rtmsg->rtmsg_dst;
2046 cfg->fc_src = rtmsg->rtmsg_src;
2047 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2048}
2049
5578689a 2050int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2051{
86872cb5 2052 struct fib6_config cfg;
1da177e4
LT
2053 struct in6_rtmsg rtmsg;
2054 int err;
2055
2056 switch(cmd) {
2057 case SIOCADDRT: /* Add a route */
2058 case SIOCDELRT: /* Delete a route */
2059 if (!capable(CAP_NET_ADMIN))
2060 return -EPERM;
2061 err = copy_from_user(&rtmsg, arg,
2062 sizeof(struct in6_rtmsg));
2063 if (err)
2064 return -EFAULT;
86872cb5 2065
5578689a 2066 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2067
1da177e4
LT
2068 rtnl_lock();
2069 switch (cmd) {
2070 case SIOCADDRT:
86872cb5 2071 err = ip6_route_add(&cfg);
1da177e4
LT
2072 break;
2073 case SIOCDELRT:
86872cb5 2074 err = ip6_route_del(&cfg);
1da177e4
LT
2075 break;
2076 default:
2077 err = -EINVAL;
2078 }
2079 rtnl_unlock();
2080
2081 return err;
3ff50b79 2082 }
1da177e4
LT
2083
2084 return -EINVAL;
2085}
2086
2087/*
2088 * Drop the packet on the floor
2089 */
2090
d5fdd6ba 2091static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2092{
612f09e8 2093 int type;
adf30907 2094 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2095 switch (ipstats_mib_noroutes) {
2096 case IPSTATS_MIB_INNOROUTES:
0660e03f 2097 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2098 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2099 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2100 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2101 break;
2102 }
2103 /* FALLTHROUGH */
2104 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2105 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2106 ipstats_mib_noroutes);
612f09e8
YH
2107 break;
2108 }
3ffe533c 2109 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2110 kfree_skb(skb);
2111 return 0;
2112}
2113
9ce8ade0
TG
2114static int ip6_pkt_discard(struct sk_buff *skb)
2115{
612f09e8 2116 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2117}
2118
20380731 2119static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2120{
adf30907 2121 skb->dev = skb_dst(skb)->dev;
612f09e8 2122 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2123}
2124
6723ab54
DM
2125#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2126
9ce8ade0
TG
2127static int ip6_pkt_prohibit(struct sk_buff *skb)
2128{
612f09e8 2129 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2130}
2131
2132static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2133{
adf30907 2134 skb->dev = skb_dst(skb)->dev;
612f09e8 2135 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2136}
2137
6723ab54
DM
2138#endif
2139
1da177e4
LT
2140/*
2141 * Allocate a dst for local (unicast / anycast) address.
2142 */
2143
2144struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2145 const struct in6_addr *addr,
8f031519 2146 bool anycast)
1da177e4 2147{
c346dca1 2148 struct net *net = dev_net(idev->dev);
8b96d22d 2149 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2150 int err;
1da177e4 2151
38308473 2152 if (!rt) {
f3213831 2153 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2154 return ERR_PTR(-ENOMEM);
40385653 2155 }
1da177e4 2156
1da177e4
LT
2157 in6_dev_hold(idev);
2158
11d53b49 2159 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2160 rt->dst.input = ip6_input;
2161 rt->dst.output = ip6_output;
1da177e4 2162 rt->rt6i_idev = idev;
1da177e4
LT
2163
2164 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2165 if (anycast)
2166 rt->rt6i_flags |= RTF_ANYCAST;
2167 else
1da177e4 2168 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2169 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2170 if (err) {
d8d1f30b 2171 dst_free(&rt->dst);
f83c7790 2172 return ERR_PTR(err);
1da177e4
LT
2173 }
2174
4e3fd7a0 2175 rt->rt6i_dst.addr = *addr;
1da177e4 2176 rt->rt6i_dst.plen = 128;
5578689a 2177 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2178
d8d1f30b 2179 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2180
2181 return rt;
2182}
2183
c3968a85
DW
2184int ip6_route_get_saddr(struct net *net,
2185 struct rt6_info *rt,
b71d1d42 2186 const struct in6_addr *daddr,
c3968a85
DW
2187 unsigned int prefs,
2188 struct in6_addr *saddr)
2189{
2190 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2191 int err = 0;
2192 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2193 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2194 else
2195 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2196 daddr, prefs, saddr);
2197 return err;
2198}
2199
2200/* remove deleted ip from prefsrc entries */
2201struct arg_dev_net_ip {
2202 struct net_device *dev;
2203 struct net *net;
2204 struct in6_addr *addr;
2205};
2206
2207static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2208{
2209 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2210 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2211 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2212
d1918542 2213 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2214 rt != net->ipv6.ip6_null_entry &&
2215 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2216 /* remove prefsrc entry */
2217 rt->rt6i_prefsrc.plen = 0;
2218 }
2219 return 0;
2220}
2221
2222void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2223{
2224 struct net *net = dev_net(ifp->idev->dev);
2225 struct arg_dev_net_ip adni = {
2226 .dev = ifp->idev->dev,
2227 .net = net,
2228 .addr = &ifp->addr,
2229 };
2230 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2231}
2232
8ed67789
DL
2233struct arg_dev_net {
2234 struct net_device *dev;
2235 struct net *net;
2236};
2237
1da177e4
LT
2238static int fib6_ifdown(struct rt6_info *rt, void *arg)
2239{
bc3ef660 2240 const struct arg_dev_net *adn = arg;
2241 const struct net_device *dev = adn->dev;
8ed67789 2242
d1918542 2243 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2244 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2245 return -1;
c159d30c 2246
1da177e4
LT
2247 return 0;
2248}
2249
f3db4851 2250void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2251{
8ed67789
DL
2252 struct arg_dev_net adn = {
2253 .dev = dev,
2254 .net = net,
2255 };
2256
2257 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2258 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2259}
2260
95c96174 2261struct rt6_mtu_change_arg {
1da177e4 2262 struct net_device *dev;
95c96174 2263 unsigned int mtu;
1da177e4
LT
2264};
2265
2266static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2267{
2268 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2269 struct inet6_dev *idev;
2270
2271 /* In IPv6 pmtu discovery is not optional,
2272 so that RTAX_MTU lock cannot disable it.
2273 We still use this lock to block changes
2274 caused by addrconf/ndisc.
2275 */
2276
2277 idev = __in6_dev_get(arg->dev);
38308473 2278 if (!idev)
1da177e4
LT
2279 return 0;
2280
2281 /* For administrative MTU increase, there is no way to discover
2282 IPv6 PMTU increase, so PMTU increase should be updated here.
2283 Since RFC 1981 doesn't include administrative MTU increase
2284 update PMTU increase is a MUST. (i.e. jumbo frame)
2285 */
2286 /*
2287 If new MTU is less than route PMTU, this new MTU will be the
2288 lowest MTU in the path, update the route PMTU to reflect PMTU
2289 decreases; if new MTU is greater than route PMTU, and the
2290 old MTU is the lowest MTU in the path, update the route PMTU
2291 to reflect the increase. In this case if the other nodes' MTU
2292 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2293 PMTU discouvery.
2294 */
d1918542 2295 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2296 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2297 (dst_mtu(&rt->dst) >= arg->mtu ||
2298 (dst_mtu(&rt->dst) < arg->mtu &&
2299 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2300 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2301 }
1da177e4
LT
2302 return 0;
2303}
2304
95c96174 2305void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2306{
c71099ac
TG
2307 struct rt6_mtu_change_arg arg = {
2308 .dev = dev,
2309 .mtu = mtu,
2310 };
1da177e4 2311
c346dca1 2312 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2313}
2314
ef7c79ed 2315static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2316 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2317 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2318 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2319 [RTA_PRIORITY] = { .type = NLA_U32 },
2320 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2321 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
86872cb5
TG
2322};
2323
2324static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2325 struct fib6_config *cfg)
1da177e4 2326{
86872cb5
TG
2327 struct rtmsg *rtm;
2328 struct nlattr *tb[RTA_MAX+1];
2329 int err;
1da177e4 2330
86872cb5
TG
2331 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2332 if (err < 0)
2333 goto errout;
1da177e4 2334
86872cb5
TG
2335 err = -EINVAL;
2336 rtm = nlmsg_data(nlh);
2337 memset(cfg, 0, sizeof(*cfg));
2338
2339 cfg->fc_table = rtm->rtm_table;
2340 cfg->fc_dst_len = rtm->rtm_dst_len;
2341 cfg->fc_src_len = rtm->rtm_src_len;
2342 cfg->fc_flags = RTF_UP;
2343 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2344 cfg->fc_type = rtm->rtm_type;
86872cb5 2345
ef2c7d7b
ND
2346 if (rtm->rtm_type == RTN_UNREACHABLE ||
2347 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2348 rtm->rtm_type == RTN_PROHIBIT ||
2349 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2350 cfg->fc_flags |= RTF_REJECT;
2351
ab79ad14
2352 if (rtm->rtm_type == RTN_LOCAL)
2353 cfg->fc_flags |= RTF_LOCAL;
2354
15e47304 2355 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2356 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2357 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2358
2359 if (tb[RTA_GATEWAY]) {
2360 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2361 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2362 }
86872cb5
TG
2363
2364 if (tb[RTA_DST]) {
2365 int plen = (rtm->rtm_dst_len + 7) >> 3;
2366
2367 if (nla_len(tb[RTA_DST]) < plen)
2368 goto errout;
2369
2370 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2371 }
86872cb5
TG
2372
2373 if (tb[RTA_SRC]) {
2374 int plen = (rtm->rtm_src_len + 7) >> 3;
2375
2376 if (nla_len(tb[RTA_SRC]) < plen)
2377 goto errout;
2378
2379 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2380 }
86872cb5 2381
c3968a85
DW
2382 if (tb[RTA_PREFSRC])
2383 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2384
86872cb5
TG
2385 if (tb[RTA_OIF])
2386 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2387
2388 if (tb[RTA_PRIORITY])
2389 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2390
2391 if (tb[RTA_METRICS]) {
2392 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2393 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2394 }
86872cb5
TG
2395
2396 if (tb[RTA_TABLE])
2397 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2398
51ebd318
ND
2399 if (tb[RTA_MULTIPATH]) {
2400 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2401 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2402 }
2403
86872cb5
TG
2404 err = 0;
2405errout:
2406 return err;
1da177e4
LT
2407}
2408
51ebd318
ND
2409static int ip6_route_multipath(struct fib6_config *cfg, int add)
2410{
2411 struct fib6_config r_cfg;
2412 struct rtnexthop *rtnh;
2413 int remaining;
2414 int attrlen;
2415 int err = 0, last_err = 0;
2416
2417beginning:
2418 rtnh = (struct rtnexthop *)cfg->fc_mp;
2419 remaining = cfg->fc_mp_len;
2420
2421 /* Parse a Multipath Entry */
2422 while (rtnh_ok(rtnh, remaining)) {
2423 memcpy(&r_cfg, cfg, sizeof(*cfg));
2424 if (rtnh->rtnh_ifindex)
2425 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2426
2427 attrlen = rtnh_attrlen(rtnh);
2428 if (attrlen > 0) {
2429 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2430
2431 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2432 if (nla) {
2433 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2434 r_cfg.fc_flags |= RTF_GATEWAY;
2435 }
2436 }
2437 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2438 if (err) {
2439 last_err = err;
2440 /* If we are trying to remove a route, do not stop the
2441 * loop when ip6_route_del() fails (because next hop is
2442 * already gone), we should try to remove all next hops.
2443 */
2444 if (add) {
2445 /* If add fails, we should try to delete all
2446 * next hops that have been already added.
2447 */
2448 add = 0;
2449 goto beginning;
2450 }
2451 }
1a72418b
ND
2452 /* Because each route is added like a single route we remove
2453 * this flag after the first nexthop (if there is a collision,
2454 * we have already fail to add the first nexthop:
2455 * fib6_add_rt2node() has reject it).
2456 */
2457 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
51ebd318
ND
2458 rtnh = rtnh_next(rtnh, &remaining);
2459 }
2460
2461 return last_err;
2462}
2463
c127ea2c 2464static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2465{
86872cb5
TG
2466 struct fib6_config cfg;
2467 int err;
1da177e4 2468
86872cb5
TG
2469 err = rtm_to_fib6_config(skb, nlh, &cfg);
2470 if (err < 0)
2471 return err;
2472
51ebd318
ND
2473 if (cfg.fc_mp)
2474 return ip6_route_multipath(&cfg, 0);
2475 else
2476 return ip6_route_del(&cfg);
1da177e4
LT
2477}
2478
c127ea2c 2479static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2480{
86872cb5
TG
2481 struct fib6_config cfg;
2482 int err;
1da177e4 2483
86872cb5
TG
2484 err = rtm_to_fib6_config(skb, nlh, &cfg);
2485 if (err < 0)
2486 return err;
2487
51ebd318
ND
2488 if (cfg.fc_mp)
2489 return ip6_route_multipath(&cfg, 1);
2490 else
2491 return ip6_route_add(&cfg);
1da177e4
LT
2492}
2493
339bf98f
TG
2494static inline size_t rt6_nlmsg_size(void)
2495{
2496 return NLMSG_ALIGN(sizeof(struct rtmsg))
2497 + nla_total_size(16) /* RTA_SRC */
2498 + nla_total_size(16) /* RTA_DST */
2499 + nla_total_size(16) /* RTA_GATEWAY */
2500 + nla_total_size(16) /* RTA_PREFSRC */
2501 + nla_total_size(4) /* RTA_TABLE */
2502 + nla_total_size(4) /* RTA_IIF */
2503 + nla_total_size(4) /* RTA_OIF */
2504 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2505 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2506 + nla_total_size(sizeof(struct rta_cacheinfo));
2507}
2508
191cd582
BH
2509static int rt6_fill_node(struct net *net,
2510 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2511 struct in6_addr *dst, struct in6_addr *src,
15e47304 2512 int iif, int type, u32 portid, u32 seq,
7bc570c8 2513 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2514{
2515 struct rtmsg *rtm;
2d7202bf 2516 struct nlmsghdr *nlh;
e3703b3d 2517 long expires;
9e762a4a 2518 u32 table;
f2c31e32 2519 struct neighbour *n;
1da177e4
LT
2520
2521 if (prefix) { /* user wants prefix routes only */
2522 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2523 /* success since this is not a prefix route */
2524 return 1;
2525 }
2526 }
2527
15e47304 2528 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2529 if (!nlh)
26932566 2530 return -EMSGSIZE;
2d7202bf
TG
2531
2532 rtm = nlmsg_data(nlh);
1da177e4
LT
2533 rtm->rtm_family = AF_INET6;
2534 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2535 rtm->rtm_src_len = rt->rt6i_src.plen;
2536 rtm->rtm_tos = 0;
c71099ac 2537 if (rt->rt6i_table)
9e762a4a 2538 table = rt->rt6i_table->tb6_id;
c71099ac 2539 else
9e762a4a
PM
2540 table = RT6_TABLE_UNSPEC;
2541 rtm->rtm_table = table;
c78679e8
DM
2542 if (nla_put_u32(skb, RTA_TABLE, table))
2543 goto nla_put_failure;
ef2c7d7b
ND
2544 if (rt->rt6i_flags & RTF_REJECT) {
2545 switch (rt->dst.error) {
2546 case -EINVAL:
2547 rtm->rtm_type = RTN_BLACKHOLE;
2548 break;
2549 case -EACCES:
2550 rtm->rtm_type = RTN_PROHIBIT;
2551 break;
b4949ab2
ND
2552 case -EAGAIN:
2553 rtm->rtm_type = RTN_THROW;
2554 break;
ef2c7d7b
ND
2555 default:
2556 rtm->rtm_type = RTN_UNREACHABLE;
2557 break;
2558 }
2559 }
38308473 2560 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2561 rtm->rtm_type = RTN_LOCAL;
d1918542 2562 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2563 rtm->rtm_type = RTN_LOCAL;
2564 else
2565 rtm->rtm_type = RTN_UNICAST;
2566 rtm->rtm_flags = 0;
2567 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2568 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2569 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2570 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2571 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2572 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2573 rtm->rtm_protocol = RTPROT_RA;
2574 else
2575 rtm->rtm_protocol = RTPROT_KERNEL;
2576 }
1da177e4 2577
38308473 2578 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2579 rtm->rtm_flags |= RTM_F_CLONED;
2580
2581 if (dst) {
c78679e8
DM
2582 if (nla_put(skb, RTA_DST, 16, dst))
2583 goto nla_put_failure;
1ab1457c 2584 rtm->rtm_dst_len = 128;
1da177e4 2585 } else if (rtm->rtm_dst_len)
c78679e8
DM
2586 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2587 goto nla_put_failure;
1da177e4
LT
2588#ifdef CONFIG_IPV6_SUBTREES
2589 if (src) {
c78679e8
DM
2590 if (nla_put(skb, RTA_SRC, 16, src))
2591 goto nla_put_failure;
1ab1457c 2592 rtm->rtm_src_len = 128;
c78679e8
DM
2593 } else if (rtm->rtm_src_len &&
2594 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2595 goto nla_put_failure;
1da177e4 2596#endif
7bc570c8
YH
2597 if (iif) {
2598#ifdef CONFIG_IPV6_MROUTE
2599 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2600 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2601 if (err <= 0) {
2602 if (!nowait) {
2603 if (err == 0)
2604 return 0;
2605 goto nla_put_failure;
2606 } else {
2607 if (err == -EMSGSIZE)
2608 goto nla_put_failure;
2609 }
2610 }
2611 } else
2612#endif
c78679e8
DM
2613 if (nla_put_u32(skb, RTA_IIF, iif))
2614 goto nla_put_failure;
7bc570c8 2615 } else if (dst) {
1da177e4 2616 struct in6_addr saddr_buf;
c78679e8
DM
2617 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2618 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2619 goto nla_put_failure;
1da177e4 2620 }
2d7202bf 2621
c3968a85
DW
2622 if (rt->rt6i_prefsrc.plen) {
2623 struct in6_addr saddr_buf;
4e3fd7a0 2624 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2625 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2626 goto nla_put_failure;
c3968a85
DW
2627 }
2628
defb3519 2629 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2630 goto nla_put_failure;
2631
97cac082 2632 n = rt->n;
94f826b8 2633 if (n) {
fdd6681d 2634 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2635 goto nla_put_failure;
94f826b8 2636 }
2d7202bf 2637
c78679e8
DM
2638 if (rt->dst.dev &&
2639 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2640 goto nla_put_failure;
2641 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2642 goto nla_put_failure;
8253947e
LW
2643
2644 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2645
87a50699 2646 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2647 goto nla_put_failure;
2d7202bf
TG
2648
2649 return nlmsg_end(skb, nlh);
2650
2651nla_put_failure:
26932566
PM
2652 nlmsg_cancel(skb, nlh);
2653 return -EMSGSIZE;
1da177e4
LT
2654}
2655
1b43af54 2656int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2657{
2658 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2659 int prefix;
2660
2d7202bf
TG
2661 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2662 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2663 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2664 } else
2665 prefix = 0;
2666
191cd582
BH
2667 return rt6_fill_node(arg->net,
2668 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2669 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2670 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2671}
2672
c127ea2c 2673static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2674{
3b1e0a65 2675 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2676 struct nlattr *tb[RTA_MAX+1];
2677 struct rt6_info *rt;
1da177e4 2678 struct sk_buff *skb;
ab364a6f 2679 struct rtmsg *rtm;
4c9483b2 2680 struct flowi6 fl6;
72331bc0 2681 int err, iif = 0, oif = 0;
1da177e4 2682
ab364a6f
TG
2683 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2684 if (err < 0)
2685 goto errout;
1da177e4 2686
ab364a6f 2687 err = -EINVAL;
4c9483b2 2688 memset(&fl6, 0, sizeof(fl6));
1da177e4 2689
ab364a6f
TG
2690 if (tb[RTA_SRC]) {
2691 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2692 goto errout;
2693
4e3fd7a0 2694 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2695 }
2696
2697 if (tb[RTA_DST]) {
2698 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2699 goto errout;
2700
4e3fd7a0 2701 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2702 }
2703
2704 if (tb[RTA_IIF])
2705 iif = nla_get_u32(tb[RTA_IIF]);
2706
2707 if (tb[RTA_OIF])
72331bc0 2708 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2709
2710 if (iif) {
2711 struct net_device *dev;
72331bc0
SL
2712 int flags = 0;
2713
5578689a 2714 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2715 if (!dev) {
2716 err = -ENODEV;
ab364a6f 2717 goto errout;
1da177e4 2718 }
72331bc0
SL
2719
2720 fl6.flowi6_iif = iif;
2721
2722 if (!ipv6_addr_any(&fl6.saddr))
2723 flags |= RT6_LOOKUP_F_HAS_SADDR;
2724
2725 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2726 flags);
2727 } else {
2728 fl6.flowi6_oif = oif;
2729
2730 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2731 }
2732
ab364a6f 2733 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2734 if (!skb) {
94e187c0 2735 ip6_rt_put(rt);
ab364a6f
TG
2736 err = -ENOBUFS;
2737 goto errout;
2738 }
1da177e4 2739
ab364a6f
TG
2740 /* Reserve room for dummy headers, this skb can pass
2741 through good chunk of routing engine.
2742 */
459a98ed 2743 skb_reset_mac_header(skb);
ab364a6f 2744 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2745
d8d1f30b 2746 skb_dst_set(skb, &rt->dst);
1da177e4 2747
4c9483b2 2748 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2749 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2750 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2751 if (err < 0) {
ab364a6f
TG
2752 kfree_skb(skb);
2753 goto errout;
1da177e4
LT
2754 }
2755
15e47304 2756 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2757errout:
1da177e4 2758 return err;
1da177e4
LT
2759}
2760
86872cb5 2761void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2762{
2763 struct sk_buff *skb;
5578689a 2764 struct net *net = info->nl_net;
528c4ceb
DL
2765 u32 seq;
2766 int err;
2767
2768 err = -ENOBUFS;
38308473 2769 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2770
339bf98f 2771 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2772 if (!skb)
21713ebc
TG
2773 goto errout;
2774
191cd582 2775 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2776 event, info->portid, seq, 0, 0, 0);
26932566
PM
2777 if (err < 0) {
2778 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2779 WARN_ON(err == -EMSGSIZE);
2780 kfree_skb(skb);
2781 goto errout;
2782 }
15e47304 2783 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2784 info->nlh, gfp_any());
2785 return;
21713ebc
TG
2786errout:
2787 if (err < 0)
5578689a 2788 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2789}
2790
8ed67789
DL
2791static int ip6_route_dev_notify(struct notifier_block *this,
2792 unsigned long event, void *data)
2793{
2794 struct net_device *dev = (struct net_device *)data;
c346dca1 2795 struct net *net = dev_net(dev);
8ed67789
DL
2796
2797 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2798 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2799 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2800#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2801 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2802 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2803 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2804 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2805#endif
2806 }
2807
2808 return NOTIFY_OK;
2809}
2810
1da177e4
LT
2811/*
2812 * /proc
2813 */
2814
2815#ifdef CONFIG_PROC_FS
2816
1da177e4
LT
2817struct rt6_proc_arg
2818{
2819 char *buffer;
2820 int offset;
2821 int length;
2822 int skip;
2823 int len;
2824};
2825
2826static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2827{
33120b30 2828 struct seq_file *m = p_arg;
69cce1d1 2829 struct neighbour *n;
1da177e4 2830
4b7a4274 2831 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2832
2833#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2834 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2835#else
33120b30 2836 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2837#endif
97cac082 2838 n = rt->n;
69cce1d1
DM
2839 if (n) {
2840 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2841 } else {
33120b30 2842 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2843 }
33120b30 2844 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2845 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2846 rt->dst.__use, rt->rt6i_flags,
d1918542 2847 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2848 return 0;
2849}
2850
33120b30 2851static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2852{
f3db4851 2853 struct net *net = (struct net *)m->private;
32b293a5 2854 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2855 return 0;
2856}
1da177e4 2857
33120b30
AD
2858static int ipv6_route_open(struct inode *inode, struct file *file)
2859{
de05c557 2860 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2861}
2862
33120b30
AD
2863static const struct file_operations ipv6_route_proc_fops = {
2864 .owner = THIS_MODULE,
2865 .open = ipv6_route_open,
2866 .read = seq_read,
2867 .llseek = seq_lseek,
b6fcbdb4 2868 .release = single_release_net,
33120b30
AD
2869};
2870
1da177e4
LT
2871static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2872{
69ddb805 2873 struct net *net = (struct net *)seq->private;
1da177e4 2874 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2875 net->ipv6.rt6_stats->fib_nodes,
2876 net->ipv6.rt6_stats->fib_route_nodes,
2877 net->ipv6.rt6_stats->fib_rt_alloc,
2878 net->ipv6.rt6_stats->fib_rt_entries,
2879 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2880 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2881 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2882
2883 return 0;
2884}
2885
2886static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2887{
de05c557 2888 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2889}
2890
9a32144e 2891static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2892 .owner = THIS_MODULE,
2893 .open = rt6_stats_seq_open,
2894 .read = seq_read,
2895 .llseek = seq_lseek,
b6fcbdb4 2896 .release = single_release_net,
1da177e4
LT
2897};
2898#endif /* CONFIG_PROC_FS */
2899
2900#ifdef CONFIG_SYSCTL
2901
1da177e4 2902static
8d65af78 2903int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2904 void __user *buffer, size_t *lenp, loff_t *ppos)
2905{
c486da34
LAG
2906 struct net *net;
2907 int delay;
2908 if (!write)
1da177e4 2909 return -EINVAL;
c486da34
LAG
2910
2911 net = (struct net *)ctl->extra1;
2912 delay = net->ipv6.sysctl.flush_delay;
2913 proc_dointvec(ctl, write, buffer, lenp, ppos);
2914 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2915 return 0;
1da177e4
LT
2916}
2917
760f2d01 2918ctl_table ipv6_route_table_template[] = {
1ab1457c 2919 {
1da177e4 2920 .procname = "flush",
4990509f 2921 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2922 .maxlen = sizeof(int),
89c8b3a1 2923 .mode = 0200,
6d9f239a 2924 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2925 },
2926 {
1da177e4 2927 .procname = "gc_thresh",
9a7ec3a9 2928 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2929 .maxlen = sizeof(int),
2930 .mode = 0644,
6d9f239a 2931 .proc_handler = proc_dointvec,
1da177e4
LT
2932 },
2933 {
1da177e4 2934 .procname = "max_size",
4990509f 2935 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2936 .maxlen = sizeof(int),
2937 .mode = 0644,
6d9f239a 2938 .proc_handler = proc_dointvec,
1da177e4
LT
2939 },
2940 {
1da177e4 2941 .procname = "gc_min_interval",
4990509f 2942 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2943 .maxlen = sizeof(int),
2944 .mode = 0644,
6d9f239a 2945 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2946 },
2947 {
1da177e4 2948 .procname = "gc_timeout",
4990509f 2949 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
6d9f239a 2952 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2953 },
2954 {
1da177e4 2955 .procname = "gc_interval",
4990509f 2956 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2957 .maxlen = sizeof(int),
2958 .mode = 0644,
6d9f239a 2959 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2960 },
2961 {
1da177e4 2962 .procname = "gc_elasticity",
4990509f 2963 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2964 .maxlen = sizeof(int),
2965 .mode = 0644,
f3d3f616 2966 .proc_handler = proc_dointvec,
1da177e4
LT
2967 },
2968 {
1da177e4 2969 .procname = "mtu_expires",
4990509f 2970 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2971 .maxlen = sizeof(int),
2972 .mode = 0644,
6d9f239a 2973 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2974 },
2975 {
1da177e4 2976 .procname = "min_adv_mss",
4990509f 2977 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2978 .maxlen = sizeof(int),
2979 .mode = 0644,
f3d3f616 2980 .proc_handler = proc_dointvec,
1da177e4
LT
2981 },
2982 {
1da177e4 2983 .procname = "gc_min_interval_ms",
4990509f 2984 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2985 .maxlen = sizeof(int),
2986 .mode = 0644,
6d9f239a 2987 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2988 },
f8572d8f 2989 { }
1da177e4
LT
2990};
2991
2c8c1e72 2992struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2993{
2994 struct ctl_table *table;
2995
2996 table = kmemdup(ipv6_route_table_template,
2997 sizeof(ipv6_route_table_template),
2998 GFP_KERNEL);
5ee09105
YH
2999
3000 if (table) {
3001 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3002 table[0].extra1 = net;
86393e52 3003 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3004 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3005 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3006 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3007 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3008 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3009 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3010 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3011 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
3012 }
3013
760f2d01
DL
3014 return table;
3015}
1da177e4
LT
3016#endif
3017
2c8c1e72 3018static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3019{
633d424b 3020 int ret = -ENOMEM;
8ed67789 3021
86393e52
AD
3022 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3023 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3024
fc66f95c
ED
3025 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3026 goto out_ip6_dst_ops;
3027
8ed67789
DL
3028 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3029 sizeof(*net->ipv6.ip6_null_entry),
3030 GFP_KERNEL);
3031 if (!net->ipv6.ip6_null_entry)
fc66f95c 3032 goto out_ip6_dst_entries;
d8d1f30b 3033 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3034 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3035 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3036 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3037 ip6_template_metrics, true);
8ed67789
DL
3038
3039#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3040 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3041 sizeof(*net->ipv6.ip6_prohibit_entry),
3042 GFP_KERNEL);
68fffc67
PZ
3043 if (!net->ipv6.ip6_prohibit_entry)
3044 goto out_ip6_null_entry;
d8d1f30b 3045 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3046 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3047 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3048 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3049 ip6_template_metrics, true);
8ed67789
DL
3050
3051 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3052 sizeof(*net->ipv6.ip6_blk_hole_entry),
3053 GFP_KERNEL);
68fffc67
PZ
3054 if (!net->ipv6.ip6_blk_hole_entry)
3055 goto out_ip6_prohibit_entry;
d8d1f30b 3056 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3057 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3058 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3059 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3060 ip6_template_metrics, true);
8ed67789
DL
3061#endif
3062
b339a47c
PZ
3063 net->ipv6.sysctl.flush_delay = 0;
3064 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3065 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3066 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3067 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3068 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3069 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3070 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3071
6891a346
BT
3072 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3073
8ed67789
DL
3074 ret = 0;
3075out:
3076 return ret;
f2fc6a54 3077
68fffc67
PZ
3078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3079out_ip6_prohibit_entry:
3080 kfree(net->ipv6.ip6_prohibit_entry);
3081out_ip6_null_entry:
3082 kfree(net->ipv6.ip6_null_entry);
3083#endif
fc66f95c
ED
3084out_ip6_dst_entries:
3085 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3086out_ip6_dst_ops:
f2fc6a54 3087 goto out;
cdb18761
DL
3088}
3089
2c8c1e72 3090static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3091{
8ed67789
DL
3092 kfree(net->ipv6.ip6_null_entry);
3093#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3094 kfree(net->ipv6.ip6_prohibit_entry);
3095 kfree(net->ipv6.ip6_blk_hole_entry);
3096#endif
41bb78b4 3097 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3098}
3099
d189634e
TG
3100static int __net_init ip6_route_net_init_late(struct net *net)
3101{
3102#ifdef CONFIG_PROC_FS
3103 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3104 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3105#endif
3106 return 0;
3107}
3108
3109static void __net_exit ip6_route_net_exit_late(struct net *net)
3110{
3111#ifdef CONFIG_PROC_FS
3112 proc_net_remove(net, "ipv6_route");
3113 proc_net_remove(net, "rt6_stats");
3114#endif
3115}
3116
cdb18761
DL
3117static struct pernet_operations ip6_route_net_ops = {
3118 .init = ip6_route_net_init,
3119 .exit = ip6_route_net_exit,
3120};
3121
c3426b47
DM
3122static int __net_init ipv6_inetpeer_init(struct net *net)
3123{
3124 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3125
3126 if (!bp)
3127 return -ENOMEM;
3128 inet_peer_base_init(bp);
3129 net->ipv6.peers = bp;
3130 return 0;
3131}
3132
3133static void __net_exit ipv6_inetpeer_exit(struct net *net)
3134{
3135 struct inet_peer_base *bp = net->ipv6.peers;
3136
3137 net->ipv6.peers = NULL;
56a6b248 3138 inetpeer_invalidate_tree(bp);
c3426b47
DM
3139 kfree(bp);
3140}
3141
2b823f72 3142static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3143 .init = ipv6_inetpeer_init,
3144 .exit = ipv6_inetpeer_exit,
3145};
3146
d189634e
TG
3147static struct pernet_operations ip6_route_net_late_ops = {
3148 .init = ip6_route_net_init_late,
3149 .exit = ip6_route_net_exit_late,
3150};
3151
8ed67789
DL
3152static struct notifier_block ip6_route_dev_notifier = {
3153 .notifier_call = ip6_route_dev_notify,
3154 .priority = 0,
3155};
3156
433d49c3 3157int __init ip6_route_init(void)
1da177e4 3158{
433d49c3
DL
3159 int ret;
3160
9a7ec3a9
DL
3161 ret = -ENOMEM;
3162 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3163 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3164 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3165 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3166 goto out;
14e50e57 3167
fc66f95c 3168 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3169 if (ret)
bdb3289f 3170 goto out_kmem_cache;
bdb3289f 3171
c3426b47
DM
3172 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3173 if (ret)
e8803b6c 3174 goto out_dst_entries;
2a0c451a 3175
7e52b33b
DM
3176 ret = register_pernet_subsys(&ip6_route_net_ops);
3177 if (ret)
3178 goto out_register_inetpeer;
c3426b47 3179
5dc121e9
AE
3180 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3181
8ed67789
DL
3182 /* Registering of the loopback is done before this portion of code,
3183 * the loopback reference in rt6_info will not be taken, do it
3184 * manually for init_net */
d8d1f30b 3185 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3186 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3187 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3188 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3189 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3190 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3191 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3192 #endif
e8803b6c 3193 ret = fib6_init();
433d49c3 3194 if (ret)
8ed67789 3195 goto out_register_subsys;
433d49c3 3196
433d49c3
DL
3197 ret = xfrm6_init();
3198 if (ret)
e8803b6c 3199 goto out_fib6_init;
c35b7e72 3200
433d49c3
DL
3201 ret = fib6_rules_init();
3202 if (ret)
3203 goto xfrm6_init;
7e5449c2 3204
d189634e
TG
3205 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3206 if (ret)
3207 goto fib6_rules_init;
3208
433d49c3 3209 ret = -ENOBUFS;
c7ac8679
GR
3210 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3211 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3212 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3213 goto out_register_late_subsys;
c127ea2c 3214
8ed67789 3215 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3216 if (ret)
d189634e 3217 goto out_register_late_subsys;
8ed67789 3218
433d49c3
DL
3219out:
3220 return ret;
3221
d189634e
TG
3222out_register_late_subsys:
3223 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3224fib6_rules_init:
433d49c3
DL
3225 fib6_rules_cleanup();
3226xfrm6_init:
433d49c3 3227 xfrm6_fini();
2a0c451a
TG
3228out_fib6_init:
3229 fib6_gc_cleanup();
8ed67789
DL
3230out_register_subsys:
3231 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3232out_register_inetpeer:
3233 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3234out_dst_entries:
3235 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3236out_kmem_cache:
f2fc6a54 3237 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3238 goto out;
1da177e4
LT
3239}
3240
3241void ip6_route_cleanup(void)
3242{
8ed67789 3243 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3244 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3245 fib6_rules_cleanup();
1da177e4 3246 xfrm6_fini();
1da177e4 3247 fib6_gc_cleanup();
c3426b47 3248 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3249 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3250 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3251 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3252}