pktgen: fix errata in show results
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
0dbaee3b 106 .default_advmss = ip6_default_advmss,
d33e4553 107 .default_mtu = ip6_default_mtu,
1da177e4
LT
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 113 .local_out = __ip6_local_out,
1da177e4
LT
114};
115
ec831ea7
RD
116static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
117{
118 return 0;
119}
120
14e50e57
DM
121static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
122{
123}
124
125static struct dst_ops ip6_dst_blackhole_ops = {
126 .family = AF_INET6,
09640e63 127 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
128 .destroy = ip6_dst_destroy,
129 .check = ip6_dst_check,
ec831ea7 130 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 131 .default_advmss = ip6_default_advmss,
14e50e57 132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
133};
134
bdb3289f 135static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
136 .dst = {
137 .__refcnt = ATOMIC_INIT(1),
138 .__use = 1,
139 .obsolete = -1,
140 .error = -ENETUNREACH,
d8d1f30b
CG
141 .input = ip6_pkt_discard,
142 .output = ip6_pkt_discard_out,
1da177e4
LT
143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 145 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
146 .rt6i_metric = ~(u32) 0,
147 .rt6i_ref = ATOMIC_INIT(1),
148};
149
101367c2
TG
150#ifdef CONFIG_IPV6_MULTIPLE_TABLES
151
6723ab54
DM
152static int ip6_pkt_prohibit(struct sk_buff *skb);
153static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 154
280a34c8 155static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
159 .obsolete = -1,
160 .error = -EACCES,
d8d1f30b
CG
161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
101367c2
TG
163 },
164 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 165 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
166 .rt6i_metric = ~(u32) 0,
167 .rt6i_ref = ATOMIC_INIT(1),
168};
169
bdb3289f 170static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -EINVAL,
d8d1f30b
CG
176 .input = dst_discard,
177 .output = dst_discard,
101367c2
TG
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
1da177e4 187/* allocate dst with ip6_dst_ops */
f2fc6a54 188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 189{
f2fc6a54 190 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 197 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
1ab1457c 202 }
b3419363 203 if (peer) {
b3419363
DM
204 rt->rt6i_peer = NULL;
205 inet_putpeer(peer);
206 }
207}
208
209void rt6_bind_peer(struct rt6_info *rt, int create)
210{
211 struct inet_peer *peer;
212
b3419363
DM
213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
215 inet_putpeer(peer);
1da177e4
LT
216}
217
218static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 int how)
220{
221 struct rt6_info *rt = (struct rt6_info *)dst;
222 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 223 struct net_device *loopback_dev =
c346dca1 224 dev_net(dev)->loopback_dev;
1da177e4 225
5a3e55d6
DL
226 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
227 struct inet6_dev *loopback_idev =
228 in6_dev_get(loopback_dev);
1da177e4
LT
229 if (loopback_idev != NULL) {
230 rt->rt6i_idev = loopback_idev;
231 in6_dev_put(idev);
232 }
233 }
234}
235
236static __inline__ int rt6_check_expired(const struct rt6_info *rt)
237{
a02cec21
ED
238 return (rt->rt6i_flags & RTF_EXPIRES) &&
239 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
240}
241
c71099ac
TG
242static inline int rt6_need_strict(struct in6_addr *daddr)
243{
a02cec21
ED
244 return ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
246}
247
1da177e4 248/*
c71099ac 249 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
250 */
251
8ed67789
DL
252static inline struct rt6_info *rt6_device_match(struct net *net,
253 struct rt6_info *rt,
dd3abc4e 254 struct in6_addr *saddr,
1da177e4 255 int oif,
d420895e 256 int flags)
1da177e4
LT
257{
258 struct rt6_info *local = NULL;
259 struct rt6_info *sprt;
260
dd3abc4e
YH
261 if (!oif && ipv6_addr_any(saddr))
262 goto out;
263
d8d1f30b 264 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
265 struct net_device *dev = sprt->rt6i_dev;
266
267 if (oif) {
1da177e4
LT
268 if (dev->ifindex == oif)
269 return sprt;
270 if (dev->flags & IFF_LOOPBACK) {
271 if (sprt->rt6i_idev == NULL ||
272 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 273 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 274 continue;
1ab1457c 275 if (local && (!oif ||
1da177e4
LT
276 local->rt6i_idev->dev->ifindex == oif))
277 continue;
278 }
279 local = sprt;
280 }
dd3abc4e
YH
281 } else {
282 if (ipv6_chk_addr(net, saddr, dev,
283 flags & RT6_LOOKUP_F_IFACE))
284 return sprt;
1da177e4 285 }
dd3abc4e 286 }
1da177e4 287
dd3abc4e 288 if (oif) {
1da177e4
LT
289 if (local)
290 return local;
291
d420895e 292 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 293 return net->ipv6.ip6_null_entry;
1da177e4 294 }
dd3abc4e 295out:
1da177e4
LT
296 return rt;
297}
298
27097255
YH
299#ifdef CONFIG_IPV6_ROUTER_PREF
300static void rt6_probe(struct rt6_info *rt)
301{
302 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
303 /*
304 * Okay, this does not seem to be appropriate
305 * for now, however, we need to check if it
306 * is really so; aka Router Reachability Probing.
307 *
308 * Router Reachability Probe MUST be rate-limited
309 * to no more than one per minute.
310 */
311 if (!neigh || (neigh->nud_state & NUD_VALID))
312 return;
313 read_lock_bh(&neigh->lock);
314 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 315 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
316 struct in6_addr mcaddr;
317 struct in6_addr *target;
318
319 neigh->updated = jiffies;
320 read_unlock_bh(&neigh->lock);
321
322 target = (struct in6_addr *)&neigh->primary_key;
323 addrconf_addr_solict_mult(target, &mcaddr);
324 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
325 } else
326 read_unlock_bh(&neigh->lock);
327}
328#else
329static inline void rt6_probe(struct rt6_info *rt)
330{
27097255
YH
331}
332#endif
333
1da177e4 334/*
554cfb7e 335 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 336 */
b6f99a21 337static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
338{
339 struct net_device *dev = rt->rt6i_dev;
161980f4 340 if (!oif || dev->ifindex == oif)
554cfb7e 341 return 2;
161980f4
DM
342 if ((dev->flags & IFF_LOOPBACK) &&
343 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
344 return 1;
345 return 0;
554cfb7e 346}
1da177e4 347
b6f99a21 348static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 349{
554cfb7e 350 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 351 int m;
4d0c5911
YH
352 if (rt->rt6i_flags & RTF_NONEXTHOP ||
353 !(rt->rt6i_flags & RTF_GATEWAY))
354 m = 1;
355 else if (neigh) {
554cfb7e
YH
356 read_lock_bh(&neigh->lock);
357 if (neigh->nud_state & NUD_VALID)
4d0c5911 358 m = 2;
398bcbeb
YH
359#ifdef CONFIG_IPV6_ROUTER_PREF
360 else if (neigh->nud_state & NUD_FAILED)
361 m = 0;
362#endif
363 else
ea73ee23 364 m = 1;
554cfb7e 365 read_unlock_bh(&neigh->lock);
398bcbeb
YH
366 } else
367 m = 0;
554cfb7e 368 return m;
1da177e4
LT
369}
370
554cfb7e
YH
371static int rt6_score_route(struct rt6_info *rt, int oif,
372 int strict)
1da177e4 373{
4d0c5911 374 int m, n;
1ab1457c 375
4d0c5911 376 m = rt6_check_dev(rt, oif);
77d16f45 377 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 378 return -1;
ebacaaa0
YH
379#ifdef CONFIG_IPV6_ROUTER_PREF
380 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
381#endif
4d0c5911 382 n = rt6_check_neigh(rt);
557e92ef 383 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
384 return -1;
385 return m;
386}
387
f11e6659
DM
388static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
389 int *mpri, struct rt6_info *match)
554cfb7e 390{
f11e6659
DM
391 int m;
392
393 if (rt6_check_expired(rt))
394 goto out;
395
396 m = rt6_score_route(rt, oif, strict);
397 if (m < 0)
398 goto out;
399
400 if (m > *mpri) {
401 if (strict & RT6_LOOKUP_F_REACHABLE)
402 rt6_probe(match);
403 *mpri = m;
404 match = rt;
405 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
406 rt6_probe(rt);
407 }
408
409out:
410 return match;
411}
412
413static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
414 struct rt6_info *rr_head,
415 u32 metric, int oif, int strict)
416{
417 struct rt6_info *rt, *match;
554cfb7e 418 int mpri = -1;
1da177e4 419
f11e6659
DM
420 match = NULL;
421 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 422 rt = rt->dst.rt6_next)
f11e6659
DM
423 match = find_match(rt, oif, strict, &mpri, match);
424 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 425 rt = rt->dst.rt6_next)
f11e6659 426 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 427
f11e6659
DM
428 return match;
429}
1da177e4 430
f11e6659
DM
431static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
432{
433 struct rt6_info *match, *rt0;
8ed67789 434 struct net *net;
1da177e4 435
f11e6659 436 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 437 __func__, fn->leaf, oif);
554cfb7e 438
f11e6659
DM
439 rt0 = fn->rr_ptr;
440 if (!rt0)
441 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 442
f11e6659 443 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 444
554cfb7e 445 if (!match &&
f11e6659 446 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 447 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 448
554cfb7e 449 /* no entries matched; do round-robin */
f11e6659
DM
450 if (!next || next->rt6i_metric != rt0->rt6i_metric)
451 next = fn->leaf;
452
453 if (next != rt0)
454 fn->rr_ptr = next;
1da177e4 455 }
1da177e4 456
f11e6659 457 RT6_TRACE("%s() => %p\n",
0dc47877 458 __func__, match);
1da177e4 459
c346dca1 460 net = dev_net(rt0->rt6i_dev);
a02cec21 461 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
462}
463
70ceb4f5
YH
464#ifdef CONFIG_IPV6_ROUTE_INFO
465int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
466 struct in6_addr *gwaddr)
467{
c346dca1 468 struct net *net = dev_net(dev);
70ceb4f5
YH
469 struct route_info *rinfo = (struct route_info *) opt;
470 struct in6_addr prefix_buf, *prefix;
471 unsigned int pref;
4bed72e4 472 unsigned long lifetime;
70ceb4f5
YH
473 struct rt6_info *rt;
474
475 if (len < sizeof(struct route_info)) {
476 return -EINVAL;
477 }
478
479 /* Sanity check for prefix_len and length */
480 if (rinfo->length > 3) {
481 return -EINVAL;
482 } else if (rinfo->prefix_len > 128) {
483 return -EINVAL;
484 } else if (rinfo->prefix_len > 64) {
485 if (rinfo->length < 2) {
486 return -EINVAL;
487 }
488 } else if (rinfo->prefix_len > 0) {
489 if (rinfo->length < 1) {
490 return -EINVAL;
491 }
492 }
493
494 pref = rinfo->route_pref;
495 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 496 return -EINVAL;
70ceb4f5 497
4bed72e4 498 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
499
500 if (rinfo->length == 3)
501 prefix = (struct in6_addr *)rinfo->prefix;
502 else {
503 /* this function is safe */
504 ipv6_addr_prefix(&prefix_buf,
505 (struct in6_addr *)rinfo->prefix,
506 rinfo->prefix_len);
507 prefix = &prefix_buf;
508 }
509
efa2cea0
DL
510 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
511 dev->ifindex);
70ceb4f5
YH
512
513 if (rt && !lifetime) {
e0a1ad73 514 ip6_del_rt(rt);
70ceb4f5
YH
515 rt = NULL;
516 }
517
518 if (!rt && lifetime)
efa2cea0 519 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
520 pref);
521 else if (rt)
522 rt->rt6i_flags = RTF_ROUTEINFO |
523 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
524
525 if (rt) {
4bed72e4 526 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
527 rt->rt6i_flags &= ~RTF_EXPIRES;
528 } else {
529 rt->rt6i_expires = jiffies + HZ * lifetime;
530 rt->rt6i_flags |= RTF_EXPIRES;
531 }
d8d1f30b 532 dst_release(&rt->dst);
70ceb4f5
YH
533 }
534 return 0;
535}
536#endif
537
8ed67789 538#define BACKTRACK(__net, saddr) \
982f56f3 539do { \
8ed67789 540 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 541 struct fib6_node *pn; \
e0eda7bb 542 while (1) { \
982f56f3
YH
543 if (fn->fn_flags & RTN_TL_ROOT) \
544 goto out; \
545 pn = fn->parent; \
546 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 547 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
548 else \
549 fn = pn; \
550 if (fn->fn_flags & RTN_RTINFO) \
551 goto restart; \
c71099ac 552 } \
c71099ac 553 } \
982f56f3 554} while(0)
c71099ac 555
8ed67789
DL
556static struct rt6_info *ip6_pol_route_lookup(struct net *net,
557 struct fib6_table *table,
c71099ac 558 struct flowi *fl, int flags)
1da177e4
LT
559{
560 struct fib6_node *fn;
561 struct rt6_info *rt;
562
c71099ac
TG
563 read_lock_bh(&table->tb6_lock);
564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
565restart:
566 rt = fn->leaf;
dd3abc4e 567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 568 BACKTRACK(net, &fl->fl6_src);
c71099ac 569out:
d8d1f30b 570 dst_use(&rt->dst, jiffies);
c71099ac 571 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
572 return rt;
573
574}
575
9acd9f3a
YH
576struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
577 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
578{
579 struct flowi fl = {
580 .oif = oif,
5811662b 581 .fl6_dst = *daddr,
c71099ac
TG
582 };
583 struct dst_entry *dst;
77d16f45 584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 585
adaa70bb
TG
586 if (saddr) {
587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
588 flags |= RT6_LOOKUP_F_HAS_SADDR;
589 }
590
606a2b48 591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
592 if (dst->error == 0)
593 return (struct rt6_info *) dst;
594
595 dst_release(dst);
596
1da177e4
LT
597 return NULL;
598}
599
7159039a
YH
600EXPORT_SYMBOL(rt6_lookup);
601
c71099ac 602/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
603 It takes new route entry, the addition fails by any reason the
604 route is freed. In any case, if caller does not hold it, it may
605 be destroyed.
606 */
607
86872cb5 608static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
609{
610 int err;
c71099ac 611 struct fib6_table *table;
1da177e4 612
c71099ac
TG
613 table = rt->rt6i_table;
614 write_lock_bh(&table->tb6_lock);
86872cb5 615 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 616 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
617
618 return err;
619}
620
40e22e8f
TG
621int ip6_ins_rt(struct rt6_info *rt)
622{
4d1169c1 623 struct nl_info info = {
c346dca1 624 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 625 };
528c4ceb 626 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
627}
628
95a9a5ba
YH
629static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
630 struct in6_addr *saddr)
1da177e4 631{
1da177e4
LT
632 struct rt6_info *rt;
633
634 /*
635 * Clone the route.
636 */
637
638 rt = ip6_rt_copy(ort);
639
640 if (rt) {
14deae41
DM
641 struct neighbour *neigh;
642 int attempts = !in_softirq();
643
58c4fb86
YH
644 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
645 if (rt->rt6i_dst.plen != 128 &&
646 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
647 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 648 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 649 }
1da177e4 650
58c4fb86 651 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
652 rt->rt6i_dst.plen = 128;
653 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 654 rt->dst.flags |= DST_HOST;
1da177e4
LT
655
656#ifdef CONFIG_IPV6_SUBTREES
657 if (rt->rt6i_src.plen && saddr) {
658 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
659 rt->rt6i_src.plen = 128;
660 }
661#endif
662
14deae41
DM
663 retry:
664 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
665 if (IS_ERR(neigh)) {
666 struct net *net = dev_net(rt->rt6i_dev);
667 int saved_rt_min_interval =
668 net->ipv6.sysctl.ip6_rt_gc_min_interval;
669 int saved_rt_elasticity =
670 net->ipv6.sysctl.ip6_rt_gc_elasticity;
671
672 if (attempts-- > 0) {
673 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
674 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
675
86393e52 676 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
677
678 net->ipv6.sysctl.ip6_rt_gc_elasticity =
679 saved_rt_elasticity;
680 net->ipv6.sysctl.ip6_rt_gc_min_interval =
681 saved_rt_min_interval;
682 goto retry;
683 }
684
685 if (net_ratelimit())
686 printk(KERN_WARNING
7e1b33e5 687 "ipv6: Neighbour table overflow.\n");
d8d1f30b 688 dst_free(&rt->dst);
14deae41
DM
689 return NULL;
690 }
691 rt->rt6i_nexthop = neigh;
1da177e4 692
95a9a5ba 693 }
1da177e4 694
95a9a5ba
YH
695 return rt;
696}
1da177e4 697
299d9939
YH
698static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
699{
700 struct rt6_info *rt = ip6_rt_copy(ort);
701 if (rt) {
702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
703 rt->rt6i_dst.plen = 128;
704 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 705 rt->dst.flags |= DST_HOST;
299d9939
YH
706 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
707 }
708 return rt;
709}
710
8ed67789
DL
711static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
712 struct flowi *fl, int flags)
1da177e4
LT
713{
714 struct fib6_node *fn;
519fbd87 715 struct rt6_info *rt, *nrt;
c71099ac 716 int strict = 0;
1da177e4 717 int attempts = 3;
519fbd87 718 int err;
53b7997f 719 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 720
77d16f45 721 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
722
723relookup:
c71099ac 724 read_lock_bh(&table->tb6_lock);
1da177e4 725
8238dd06 726restart_2:
c71099ac 727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
728
729restart:
4acad72d 730 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
731
732 BACKTRACK(net, &fl->fl6_src);
733 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 734 rt->rt6i_flags & RTF_CACHE)
1ddef044 735 goto out;
1da177e4 736
d8d1f30b 737 dst_hold(&rt->dst);
c71099ac 738 read_unlock_bh(&table->tb6_lock);
fb9de91e 739
519fbd87 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
d80bc0fd 742 else
c71099ac 743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
e40cf353 744
d8d1f30b 745 dst_release(&rt->dst);
8ed67789 746 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 747
d8d1f30b 748 dst_hold(&rt->dst);
519fbd87 749 if (nrt) {
40e22e8f 750 err = ip6_ins_rt(nrt);
519fbd87 751 if (!err)
1da177e4 752 goto out2;
1da177e4 753 }
1da177e4 754
519fbd87
YH
755 if (--attempts <= 0)
756 goto out2;
757
758 /*
c71099ac 759 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
760 * released someone could insert this route. Relookup.
761 */
d8d1f30b 762 dst_release(&rt->dst);
519fbd87
YH
763 goto relookup;
764
765out:
8238dd06
YH
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
769 }
d8d1f30b 770 dst_hold(&rt->dst);
c71099ac 771 read_unlock_bh(&table->tb6_lock);
1da177e4 772out2:
d8d1f30b
CG
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
c71099ac
TG
775
776 return rt;
1da177e4
LT
777}
778
8ed67789 779static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
780 struct flowi *fl, int flags)
781{
8ed67789 782 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
783}
784
c71099ac
TG
785void ip6_route_input(struct sk_buff *skb)
786{
0660e03f 787 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 788 struct net *net = dev_net(skb->dev);
adaa70bb 789 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
5811662b
CG
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 795 .mark = skb->mark,
c71099ac
TG
796 .proto = iph->nexthdr,
797 };
adaa70bb 798
1d6e55f1 799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 800 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 801
adf30907 802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
803}
804
8ed67789 805static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 806 struct flowi *fl, int flags)
1da177e4 807{
8ed67789 808 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
809}
810
4591db4f
DL
811struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
c71099ac
TG
813{
814 int flags = 0;
815
6057fd78 816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 817 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 818
adaa70bb
TG
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 823
4591db4f 824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
825}
826
7159039a 827EXPORT_SYMBOL(ip6_route_output);
1da177e4 828
14e50e57
DM
829int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
830{
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
835
836 if (rt) {
d8d1f30b 837 new = &rt->dst;
14e50e57
DM
838
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
352e512c
HX
841 new->input = dst_discard;
842 new->output = dst_discard;
14e50e57 843
defb3519 844 dst_copy_metrics(new, &ort->dst);
d8d1f30b 845 new->dev = ort->dst.dev;
14e50e57
DM
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
852
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
856
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858#ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860#endif
861
862 dst_free(new);
863 }
864
865 dst_release(*dstp);
866 *dstp = new;
a02cec21 867 return new ? 0 : -ENOMEM;
14e50e57
DM
868}
869EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870
1da177e4
LT
871/*
872 * Destination cache support functions
873 */
874
875static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
876{
877 struct rt6_info *rt;
878
879 rt = (struct rt6_info *) dst;
880
10414444 881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
882 return dst;
883
884 return NULL;
885}
886
887static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
888{
889 struct rt6_info *rt = (struct rt6_info *) dst;
890
891 if (rt) {
54c1a859
YH
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
896 }
897 } else {
1da177e4 898 dst_release(dst);
54c1a859
YH
899 dst = NULL;
900 }
1da177e4 901 }
54c1a859 902 return dst;
1da177e4
LT
903}
904
905static void ip6_link_failure(struct sk_buff *skb)
906{
907 struct rt6_info *rt;
908
3ffe533c 909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 910
adf30907 911 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 914 dst_set_expires(&rt->dst, 0);
1da177e4
LT
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919}
920
921static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922{
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
defb3519 928 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 929 mtu = IPV6_MIN_MTU;
defb3519
DM
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 932 }
defb3519 933 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
935 }
936}
937
0dbaee3b 938static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 939{
0dbaee3b
DM
940 struct net_device *dev = dst->dev;
941 unsigned int mtu = dst_mtu(dst);
942 struct net *net = dev_net(dev);
943
1da177e4
LT
944 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
945
5578689a
DL
946 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
947 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
948
949 /*
1ab1457c
YH
950 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
951 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
952 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
953 * rely only on pmtu discovery"
954 */
955 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
956 mtu = IPV6_MAXPLEN;
957 return mtu;
958}
959
d33e4553
DM
960static unsigned int ip6_default_mtu(const struct dst_entry *dst)
961{
962 unsigned int mtu = IPV6_MIN_MTU;
963 struct inet6_dev *idev;
964
965 rcu_read_lock();
966 idev = __in6_dev_get(dst->dev);
967 if (idev)
968 mtu = idev->cnf.mtu6;
969 rcu_read_unlock();
970
971 return mtu;
972}
973
3b00944c
YH
974static struct dst_entry *icmp6_dst_gc_list;
975static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 976
3b00944c 977struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 978 struct neighbour *neigh,
9acd9f3a 979 const struct in6_addr *addr)
1da177e4
LT
980{
981 struct rt6_info *rt;
982 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 983 struct net *net = dev_net(dev);
1da177e4
LT
984
985 if (unlikely(idev == NULL))
986 return NULL;
987
86393e52 988 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
989 if (unlikely(rt == NULL)) {
990 in6_dev_put(idev);
991 goto out;
992 }
993
994 dev_hold(dev);
995 if (neigh)
996 neigh_hold(neigh);
14deae41 997 else {
1da177e4 998 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
999 if (IS_ERR(neigh))
1000 neigh = NULL;
1001 }
1da177e4
LT
1002
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh;
d8d1f30b 1006 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1007 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1008 rt->dst.output = ip6_output;
1da177e4
LT
1009
1010#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1011 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1012 ? DST_HOST
1da177e4
LT
1013 : 0;
1014 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015 rt->rt6i_dst.plen = 128;
1016#endif
1017
3b00944c 1018 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1019 rt->dst.next = icmp6_dst_gc_list;
1020 icmp6_dst_gc_list = &rt->dst;
3b00944c 1021 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1022
5578689a 1023 fib6_force_start_gc(net);
1da177e4
LT
1024
1025out:
d8d1f30b 1026 return &rt->dst;
1da177e4
LT
1027}
1028
3d0f24a7 1029int icmp6_dst_gc(void)
1da177e4
LT
1030{
1031 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1032 int more = 0;
1da177e4
LT
1033
1034 next = NULL;
5d0bbeeb 1035
3b00944c
YH
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1038
1da177e4
LT
1039 while ((dst = *pprev) != NULL) {
1040 if (!atomic_read(&dst->__refcnt)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
1da177e4
LT
1043 } else {
1044 pprev = &dst->next;
3d0f24a7 1045 ++more;
1da177e4
LT
1046 }
1047 }
1048
3b00944c 1049 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1050
3d0f24a7 1051 return more;
1da177e4
LT
1052}
1053
1e493d19
DM
1054static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1055 void *arg)
1056{
1057 struct dst_entry *dst, **pprev;
1058
1059 spin_lock_bh(&icmp6_dst_lock);
1060 pprev = &icmp6_dst_gc_list;
1061 while ((dst = *pprev) != NULL) {
1062 struct rt6_info *rt = (struct rt6_info *) dst;
1063 if (func(rt, arg)) {
1064 *pprev = dst->next;
1065 dst_free(dst);
1066 } else {
1067 pprev = &dst->next;
1068 }
1069 }
1070 spin_unlock_bh(&icmp6_dst_lock);
1071}
1072
569d3645 1073static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1074{
1da177e4 1075 unsigned long now = jiffies;
86393e52 1076 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1077 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1078 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1079 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1080 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1081 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1082 int entries;
7019b78e 1083
fc66f95c 1084 entries = dst_entries_get_fast(ops);
7019b78e 1085 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1086 entries <= rt_max_size)
1da177e4
LT
1087 goto out;
1088
6891a346
BT
1089 net->ipv6.ip6_rt_gc_expire++;
1090 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1091 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1092 entries = dst_entries_get_slow(ops);
1093 if (entries < ops->gc_thresh)
7019b78e 1094 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1095out:
7019b78e 1096 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1097 return entries > rt_max_size;
1da177e4
LT
1098}
1099
1100/* Clean host part of a prefix. Not necessary in radix tree,
1101 but results in cleaner routing tables.
1102
1103 Remove it only when all the things will work!
1104 */
1105
6b75d090 1106int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1107{
5170ae82 1108 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1109 if (hoplimit == 0) {
6b75d090 1110 struct net_device *dev = dst->dev;
c68f24cc
ED
1111 struct inet6_dev *idev;
1112
1113 rcu_read_lock();
1114 idev = __in6_dev_get(dev);
1115 if (idev)
6b75d090 1116 hoplimit = idev->cnf.hop_limit;
c68f24cc 1117 else
53b7997f 1118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1119 rcu_read_unlock();
1da177e4
LT
1120 }
1121 return hoplimit;
1122}
abbf46ae 1123EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1124
1125/*
1126 *
1127 */
1128
86872cb5 1129int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1130{
1131 int err;
5578689a 1132 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1133 struct rt6_info *rt = NULL;
1134 struct net_device *dev = NULL;
1135 struct inet6_dev *idev = NULL;
c71099ac 1136 struct fib6_table *table;
1da177e4
LT
1137 int addr_type;
1138
86872cb5 1139 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1140 return -EINVAL;
1141#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1142 if (cfg->fc_src_len)
1da177e4
LT
1143 return -EINVAL;
1144#endif
86872cb5 1145 if (cfg->fc_ifindex) {
1da177e4 1146 err = -ENODEV;
5578689a 1147 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1148 if (!dev)
1149 goto out;
1150 idev = in6_dev_get(dev);
1151 if (!idev)
1152 goto out;
1153 }
1154
86872cb5
TG
1155 if (cfg->fc_metric == 0)
1156 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1157
5578689a 1158 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1159 if (table == NULL) {
1160 err = -ENOBUFS;
1161 goto out;
1162 }
1163
86393e52 1164 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1165
1166 if (rt == NULL) {
1167 err = -ENOMEM;
1168 goto out;
1169 }
1170
d8d1f30b 1171 rt->dst.obsolete = -1;
6f704992
YH
1172 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1173 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1174 0;
1da177e4 1175
86872cb5
TG
1176 if (cfg->fc_protocol == RTPROT_UNSPEC)
1177 cfg->fc_protocol = RTPROT_BOOT;
1178 rt->rt6i_protocol = cfg->fc_protocol;
1179
1180 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1181
1182 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1183 rt->dst.input = ip6_mc_input;
ab79ad14
1184 else if (cfg->fc_flags & RTF_LOCAL)
1185 rt->dst.input = ip6_input;
1da177e4 1186 else
d8d1f30b 1187 rt->dst.input = ip6_forward;
1da177e4 1188
d8d1f30b 1189 rt->dst.output = ip6_output;
1da177e4 1190
86872cb5
TG
1191 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1192 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1193 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1194 rt->dst.flags = DST_HOST;
1da177e4
LT
1195
1196#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1197 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1198 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1199#endif
1200
86872cb5 1201 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1202
1203 /* We cannot add true routes via loopback here,
1204 they would result in kernel looping; promote them to reject routes
1205 */
86872cb5 1206 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1207 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1208 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1209 /* hold loopback dev/idev if we haven't done so. */
5578689a 1210 if (dev != net->loopback_dev) {
1da177e4
LT
1211 if (dev) {
1212 dev_put(dev);
1213 in6_dev_put(idev);
1214 }
5578689a 1215 dev = net->loopback_dev;
1da177e4
LT
1216 dev_hold(dev);
1217 idev = in6_dev_get(dev);
1218 if (!idev) {
1219 err = -ENODEV;
1220 goto out;
1221 }
1222 }
d8d1f30b
CG
1223 rt->dst.output = ip6_pkt_discard_out;
1224 rt->dst.input = ip6_pkt_discard;
1225 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1226 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1227 goto install_route;
1228 }
1229
86872cb5 1230 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1231 struct in6_addr *gw_addr;
1232 int gwa_type;
1233
86872cb5
TG
1234 gw_addr = &cfg->fc_gateway;
1235 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1236 gwa_type = ipv6_addr_type(gw_addr);
1237
1238 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1239 struct rt6_info *grt;
1240
1241 /* IPv6 strictly inhibits using not link-local
1242 addresses as nexthop address.
1243 Otherwise, router will not able to send redirects.
1244 It is very good, but in some (rare!) circumstances
1245 (SIT, PtP, NBMA NOARP links) it is handy to allow
1246 some exceptions. --ANK
1247 */
1248 err = -EINVAL;
1249 if (!(gwa_type&IPV6_ADDR_UNICAST))
1250 goto out;
1251
5578689a 1252 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1253
1254 err = -EHOSTUNREACH;
1255 if (grt == NULL)
1256 goto out;
1257 if (dev) {
1258 if (dev != grt->rt6i_dev) {
d8d1f30b 1259 dst_release(&grt->dst);
1da177e4
LT
1260 goto out;
1261 }
1262 } else {
1263 dev = grt->rt6i_dev;
1264 idev = grt->rt6i_idev;
1265 dev_hold(dev);
1266 in6_dev_hold(grt->rt6i_idev);
1267 }
1268 if (!(grt->rt6i_flags&RTF_GATEWAY))
1269 err = 0;
d8d1f30b 1270 dst_release(&grt->dst);
1da177e4
LT
1271
1272 if (err)
1273 goto out;
1274 }
1275 err = -EINVAL;
1276 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1277 goto out;
1278 }
1279
1280 err = -ENODEV;
1281 if (dev == NULL)
1282 goto out;
1283
86872cb5 1284 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1285 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1286 if (IS_ERR(rt->rt6i_nexthop)) {
1287 err = PTR_ERR(rt->rt6i_nexthop);
1288 rt->rt6i_nexthop = NULL;
1289 goto out;
1290 }
1291 }
1292
86872cb5 1293 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1294
1295install_route:
86872cb5
TG
1296 if (cfg->fc_mx) {
1297 struct nlattr *nla;
1298 int remaining;
1299
1300 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1301 int type = nla_type(nla);
86872cb5
TG
1302
1303 if (type) {
1304 if (type > RTAX_MAX) {
1da177e4
LT
1305 err = -EINVAL;
1306 goto out;
1307 }
86872cb5 1308
defb3519 1309 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1310 }
1da177e4
LT
1311 }
1312 }
1313
d8d1f30b 1314 rt->dst.dev = dev;
1da177e4 1315 rt->rt6i_idev = idev;
c71099ac 1316 rt->rt6i_table = table;
63152fc0 1317
c346dca1 1318 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1319
86872cb5 1320 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1321
1322out:
1323 if (dev)
1324 dev_put(dev);
1325 if (idev)
1326 in6_dev_put(idev);
1327 if (rt)
d8d1f30b 1328 dst_free(&rt->dst);
1da177e4
LT
1329 return err;
1330}
1331
86872cb5 1332static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1333{
1334 int err;
c71099ac 1335 struct fib6_table *table;
c346dca1 1336 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1337
8ed67789 1338 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1339 return -ENOENT;
1340
c71099ac
TG
1341 table = rt->rt6i_table;
1342 write_lock_bh(&table->tb6_lock);
1da177e4 1343
86872cb5 1344 err = fib6_del(rt, info);
d8d1f30b 1345 dst_release(&rt->dst);
1da177e4 1346
c71099ac 1347 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1348
1349 return err;
1350}
1351
e0a1ad73
TG
1352int ip6_del_rt(struct rt6_info *rt)
1353{
4d1169c1 1354 struct nl_info info = {
c346dca1 1355 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1356 };
528c4ceb 1357 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1358}
1359
86872cb5 1360static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1361{
c71099ac 1362 struct fib6_table *table;
1da177e4
LT
1363 struct fib6_node *fn;
1364 struct rt6_info *rt;
1365 int err = -ESRCH;
1366
5578689a 1367 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1368 if (table == NULL)
1369 return err;
1370
1371 read_lock_bh(&table->tb6_lock);
1da177e4 1372
c71099ac 1373 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1374 &cfg->fc_dst, cfg->fc_dst_len,
1375 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1376
1da177e4 1377 if (fn) {
d8d1f30b 1378 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1379 if (cfg->fc_ifindex &&
1da177e4 1380 (rt->rt6i_dev == NULL ||
86872cb5 1381 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1382 continue;
86872cb5
TG
1383 if (cfg->fc_flags & RTF_GATEWAY &&
1384 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1385 continue;
86872cb5 1386 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1387 continue;
d8d1f30b 1388 dst_hold(&rt->dst);
c71099ac 1389 read_unlock_bh(&table->tb6_lock);
1da177e4 1390
86872cb5 1391 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1392 }
1393 }
c71099ac 1394 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1395
1396 return err;
1397}
1398
1399/*
1400 * Handle redirects
1401 */
a6279458
YH
1402struct ip6rd_flowi {
1403 struct flowi fl;
1404 struct in6_addr gateway;
1405};
1406
8ed67789
DL
1407static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table,
a6279458
YH
1409 struct flowi *fl,
1410 int flags)
1da177e4 1411{
a6279458
YH
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1413 struct rt6_info *rt;
e843b9e1 1414 struct fib6_node *fn;
c71099ac 1415
1da177e4 1416 /*
e843b9e1
YH
1417 * Get the "current" route for this destination and
1418 * check if the redirect has come from approriate router.
1419 *
1420 * RFC 2461 specifies that redirects should only be
1421 * accepted if they come from the nexthop to the target.
1422 * Due to the way the routes are chosen, this notion
1423 * is a bit fuzzy and one might need to check all possible
1424 * routes.
1da177e4 1425 */
1da177e4 1426
c71099ac 1427 read_lock_bh(&table->tb6_lock);
a6279458 1428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1429restart:
d8d1f30b 1430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1431 /*
1432 * Current route is on-link; redirect is always invalid.
1433 *
1434 * Seems, previous statement is not true. It could
1435 * be node, which looks for us as on-link (f.e. proxy ndisc)
1436 * But then router serving it might decide, that we should
1437 * know truth 8)8) --ANK (980726).
1438 */
1439 if (rt6_check_expired(rt))
1440 continue;
1441 if (!(rt->rt6i_flags & RTF_GATEWAY))
1442 continue;
a6279458 1443 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1444 continue;
a6279458 1445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1446 continue;
1447 break;
1448 }
a6279458 1449
cb15d9c2 1450 if (!rt)
8ed67789
DL
1451 rt = net->ipv6.ip6_null_entry;
1452 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1453out:
d8d1f30b 1454 dst_hold(&rt->dst);
a6279458 1455
c71099ac 1456 read_unlock_bh(&table->tb6_lock);
e843b9e1 1457
a6279458
YH
1458 return rt;
1459};
1460
1461static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1462 struct in6_addr *src,
1463 struct in6_addr *gateway,
1464 struct net_device *dev)
1465{
adaa70bb 1466 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1467 struct net *net = dev_net(dev);
a6279458
YH
1468 struct ip6rd_flowi rdfl = {
1469 .fl = {
1470 .oif = dev->ifindex,
5811662b
CG
1471 .fl6_dst = *dest,
1472 .fl6_src = *src,
a6279458 1473 },
a6279458 1474 };
adaa70bb 1475
86c36ce4
BH
1476 ipv6_addr_copy(&rdfl.gateway, gateway);
1477
adaa70bb
TG
1478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1480
5578689a 1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1482 flags, __ip6_route_redirect);
a6279458
YH
1483}
1484
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
c346dca1 1491 struct net *net = dev_net(neigh->dev);
a6279458
YH
1492
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494
8ed67789 1495 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
a6279458 1499 goto out;
1da177e4
LT
1500 }
1501
1da177e4
LT
1502 /*
1503 * We have finally decided to accept it.
1504 */
1505
1ab1457c 1506 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1511 );
1512
1513 /*
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1517 */
d8d1f30b 1518 dst_confirm(&rt->dst);
1da177e4
LT
1519
1520 /* Duplicate redirect: silently ignore. */
d8d1f30b 1521 if (neigh == rt->dst.neighbour)
1da177e4
LT
1522 goto out;
1523
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1527
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
d8d1f30b 1534 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1535
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1538
40e22e8f 1539 if (ip6_ins_rt(nrt))
1da177e4
LT
1540 goto out;
1541
d8d1f30b
CG
1542 netevent.old = &rt->dst;
1543 netevent.new = &nrt->dst;
8d71740c
TT
1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1545
1da177e4 1546 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1547 ip6_del_rt(rt);
1da177e4
LT
1548 return;
1549 }
1550
1551out:
d8d1f30b 1552 dst_release(&rt->dst);
1da177e4
LT
1553}
1554
1555/*
1556 * Handle ICMP "packet too big" messages
1557 * i.e. Path MTU discovery
1558 */
1559
ae878ae2
1560static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1561 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1562{
1563 struct rt6_info *rt, *nrt;
1564 int allfrag = 0;
d3052b55 1565again:
ae878ae2 1566 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1567 if (rt == NULL)
1568 return;
1569
d3052b55
AV
1570 if (rt6_check_expired(rt)) {
1571 ip6_del_rt(rt);
1572 goto again;
1573 }
1574
d8d1f30b 1575 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1576 goto out;
1577
1578 if (pmtu < IPV6_MIN_MTU) {
1579 /*
1ab1457c 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1584 */
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1587 }
1588
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1592 */
d8d1f30b 1593 dst_confirm(&rt->dst);
1da177e4
LT
1594
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1599 */
1600 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1601 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1602 if (allfrag) {
1603 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1604 features |= RTAX_FEATURE_ALLFRAG;
1605 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1606 }
d8d1f30b 1607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1609 goto out;
1610 }
1611
1612 /* Network route.
1613 Two cases are possible:
1614 1. It is connected route. Action: COW
1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1616 */
d5315b50 1617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1618 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1619 else
1620 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1621
d5315b50 1622 if (nrt) {
defb3519
DM
1623 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1624 if (allfrag) {
1625 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1626 features |= RTAX_FEATURE_ALLFRAG;
1627 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1628 }
a1e78363
YH
1629
1630 /* According to RFC 1981, detecting PMTU increase shouldn't be
1631 * happened within 5 mins, the recommended timer is 10 mins.
1632 * Here this route expiration time is set to ip6_rt_mtu_expires
1633 * which is 10 mins. After 10 mins the decreased pmtu is expired
1634 * and detecting PMTU increase will be automatically happened.
1635 */
d8d1f30b 1636 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1637 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1638
40e22e8f 1639 ip6_ins_rt(nrt);
1da177e4 1640 }
1da177e4 1641out:
d8d1f30b 1642 dst_release(&rt->dst);
1da177e4
LT
1643}
1644
ae878ae2
1645void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1646 struct net_device *dev, u32 pmtu)
1647{
1648 struct net *net = dev_net(dev);
1649
1650 /*
1651 * RFC 1981 states that a node "MUST reduce the size of the packets it
1652 * is sending along the path" that caused the Packet Too Big message.
1653 * Since it's not possible in the general case to determine which
1654 * interface was used to send the original packet, we update the MTU
1655 * on the interface that will be used to send future packets. We also
1656 * update the MTU on the interface that received the Packet Too Big in
1657 * case the original packet was forced out that interface with
1658 * SO_BINDTODEVICE or similar. This is the next best thing to the
1659 * correct behaviour, which would be to update the MTU on all
1660 * interfaces.
1661 */
1662 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1663 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1664}
1665
1da177e4
LT
1666/*
1667 * Misc support functions
1668 */
1669
1670static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1671{
c346dca1 1672 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1673 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1674
1675 if (rt) {
d8d1f30b
CG
1676 rt->dst.input = ort->dst.input;
1677 rt->dst.output = ort->dst.output;
1678
defb3519 1679 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1680 rt->dst.error = ort->dst.error;
1681 rt->dst.dev = ort->dst.dev;
1682 if (rt->dst.dev)
1683 dev_hold(rt->dst.dev);
1da177e4
LT
1684 rt->rt6i_idev = ort->rt6i_idev;
1685 if (rt->rt6i_idev)
1686 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1687 rt->dst.lastuse = jiffies;
1da177e4
LT
1688 rt->rt6i_expires = 0;
1689
1690 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1691 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1692 rt->rt6i_metric = 0;
1693
1694 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1695#ifdef CONFIG_IPV6_SUBTREES
1696 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1697#endif
c71099ac 1698 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1699 }
1700 return rt;
1701}
1702
70ceb4f5 1703#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1704static struct rt6_info *rt6_get_route_info(struct net *net,
1705 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1706 struct in6_addr *gwaddr, int ifindex)
1707{
1708 struct fib6_node *fn;
1709 struct rt6_info *rt = NULL;
c71099ac
TG
1710 struct fib6_table *table;
1711
efa2cea0 1712 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1713 if (table == NULL)
1714 return NULL;
70ceb4f5 1715
c71099ac
TG
1716 write_lock_bh(&table->tb6_lock);
1717 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1718 if (!fn)
1719 goto out;
1720
d8d1f30b 1721 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1722 if (rt->rt6i_dev->ifindex != ifindex)
1723 continue;
1724 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1725 continue;
1726 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1727 continue;
d8d1f30b 1728 dst_hold(&rt->dst);
70ceb4f5
YH
1729 break;
1730 }
1731out:
c71099ac 1732 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1733 return rt;
1734}
1735
efa2cea0
DL
1736static struct rt6_info *rt6_add_route_info(struct net *net,
1737 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1738 struct in6_addr *gwaddr, int ifindex,
1739 unsigned pref)
1740{
86872cb5
TG
1741 struct fib6_config cfg = {
1742 .fc_table = RT6_TABLE_INFO,
238fc7ea 1743 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1744 .fc_ifindex = ifindex,
1745 .fc_dst_len = prefixlen,
1746 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1747 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1748 .fc_nlinfo.pid = 0,
1749 .fc_nlinfo.nlh = NULL,
1750 .fc_nlinfo.nl_net = net,
86872cb5
TG
1751 };
1752
1753 ipv6_addr_copy(&cfg.fc_dst, prefix);
1754 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1755
e317da96
YH
1756 /* We should treat it as a default route if prefix length is 0. */
1757 if (!prefixlen)
86872cb5 1758 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1759
86872cb5 1760 ip6_route_add(&cfg);
70ceb4f5 1761
efa2cea0 1762 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1763}
1764#endif
1765
1da177e4 1766struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1767{
1da177e4 1768 struct rt6_info *rt;
c71099ac 1769 struct fib6_table *table;
1da177e4 1770
c346dca1 1771 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1772 if (table == NULL)
1773 return NULL;
1da177e4 1774
c71099ac 1775 write_lock_bh(&table->tb6_lock);
d8d1f30b 1776 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1777 if (dev == rt->rt6i_dev &&
045927ff 1778 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1779 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1780 break;
1781 }
1782 if (rt)
d8d1f30b 1783 dst_hold(&rt->dst);
c71099ac 1784 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1785 return rt;
1786}
1787
1788struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1789 struct net_device *dev,
1790 unsigned int pref)
1da177e4 1791{
86872cb5
TG
1792 struct fib6_config cfg = {
1793 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1794 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1795 .fc_ifindex = dev->ifindex,
1796 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1797 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1798 .fc_nlinfo.pid = 0,
1799 .fc_nlinfo.nlh = NULL,
c346dca1 1800 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1801 };
1da177e4 1802
86872cb5 1803 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1804
86872cb5 1805 ip6_route_add(&cfg);
1da177e4 1806
1da177e4
LT
1807 return rt6_get_dflt_router(gwaddr, dev);
1808}
1809
7b4da532 1810void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1811{
1812 struct rt6_info *rt;
c71099ac
TG
1813 struct fib6_table *table;
1814
1815 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1816 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1817 if (table == NULL)
1818 return;
1da177e4
LT
1819
1820restart:
c71099ac 1821 read_lock_bh(&table->tb6_lock);
d8d1f30b 1822 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1823 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1824 dst_hold(&rt->dst);
c71099ac 1825 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1826 ip6_del_rt(rt);
1da177e4
LT
1827 goto restart;
1828 }
1829 }
c71099ac 1830 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1831}
1832
5578689a
DL
1833static void rtmsg_to_fib6_config(struct net *net,
1834 struct in6_rtmsg *rtmsg,
86872cb5
TG
1835 struct fib6_config *cfg)
1836{
1837 memset(cfg, 0, sizeof(*cfg));
1838
1839 cfg->fc_table = RT6_TABLE_MAIN;
1840 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1841 cfg->fc_metric = rtmsg->rtmsg_metric;
1842 cfg->fc_expires = rtmsg->rtmsg_info;
1843 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1844 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1845 cfg->fc_flags = rtmsg->rtmsg_flags;
1846
5578689a 1847 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1848
86872cb5
TG
1849 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1850 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1851 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1852}
1853
5578689a 1854int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1855{
86872cb5 1856 struct fib6_config cfg;
1da177e4
LT
1857 struct in6_rtmsg rtmsg;
1858 int err;
1859
1860 switch(cmd) {
1861 case SIOCADDRT: /* Add a route */
1862 case SIOCDELRT: /* Delete a route */
1863 if (!capable(CAP_NET_ADMIN))
1864 return -EPERM;
1865 err = copy_from_user(&rtmsg, arg,
1866 sizeof(struct in6_rtmsg));
1867 if (err)
1868 return -EFAULT;
86872cb5 1869
5578689a 1870 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1871
1da177e4
LT
1872 rtnl_lock();
1873 switch (cmd) {
1874 case SIOCADDRT:
86872cb5 1875 err = ip6_route_add(&cfg);
1da177e4
LT
1876 break;
1877 case SIOCDELRT:
86872cb5 1878 err = ip6_route_del(&cfg);
1da177e4
LT
1879 break;
1880 default:
1881 err = -EINVAL;
1882 }
1883 rtnl_unlock();
1884
1885 return err;
3ff50b79 1886 }
1da177e4
LT
1887
1888 return -EINVAL;
1889}
1890
1891/*
1892 * Drop the packet on the floor
1893 */
1894
d5fdd6ba 1895static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1896{
612f09e8 1897 int type;
adf30907 1898 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1899 switch (ipstats_mib_noroutes) {
1900 case IPSTATS_MIB_INNOROUTES:
0660e03f 1901 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1902 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1903 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1904 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1905 break;
1906 }
1907 /* FALLTHROUGH */
1908 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1909 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1910 ipstats_mib_noroutes);
612f09e8
YH
1911 break;
1912 }
3ffe533c 1913 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1914 kfree_skb(skb);
1915 return 0;
1916}
1917
9ce8ade0
TG
1918static int ip6_pkt_discard(struct sk_buff *skb)
1919{
612f09e8 1920 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1921}
1922
20380731 1923static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1924{
adf30907 1925 skb->dev = skb_dst(skb)->dev;
612f09e8 1926 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1927}
1928
6723ab54
DM
1929#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1930
9ce8ade0
TG
1931static int ip6_pkt_prohibit(struct sk_buff *skb)
1932{
612f09e8 1933 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1934}
1935
1936static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1937{
adf30907 1938 skb->dev = skb_dst(skb)->dev;
612f09e8 1939 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1940}
1941
6723ab54
DM
1942#endif
1943
1da177e4
LT
1944/*
1945 * Allocate a dst for local (unicast / anycast) address.
1946 */
1947
1948struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1949 const struct in6_addr *addr,
1950 int anycast)
1951{
c346dca1 1952 struct net *net = dev_net(idev->dev);
86393e52 1953 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1954 struct neighbour *neigh;
1da177e4 1955
40385653
BG
1956 if (rt == NULL) {
1957 if (net_ratelimit())
1958 pr_warning("IPv6: Maximum number of routes reached,"
1959 " consider increasing route/max_size.\n");
1da177e4 1960 return ERR_PTR(-ENOMEM);
40385653 1961 }
1da177e4 1962
5578689a 1963 dev_hold(net->loopback_dev);
1da177e4
LT
1964 in6_dev_hold(idev);
1965
d8d1f30b
CG
1966 rt->dst.flags = DST_HOST;
1967 rt->dst.input = ip6_input;
1968 rt->dst.output = ip6_output;
5578689a 1969 rt->rt6i_dev = net->loopback_dev;
1da177e4 1970 rt->rt6i_idev = idev;
defb3519 1971 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1972 rt->dst.obsolete = -1;
1da177e4
LT
1973
1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1975 if (anycast)
1976 rt->rt6i_flags |= RTF_ANYCAST;
1977 else
1da177e4 1978 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1979 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1980 if (IS_ERR(neigh)) {
d8d1f30b 1981 dst_free(&rt->dst);
14deae41
DM
1982
1983 /* We are casting this because that is the return
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1987 */
1988 return (struct rt6_info *) neigh;
1da177e4 1989 }
14deae41 1990 rt->rt6i_nexthop = neigh;
1da177e4
LT
1991
1992 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1993 rt->rt6i_dst.plen = 128;
5578689a 1994 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1995
d8d1f30b 1996 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1997
1998 return rt;
1999}
2000
8ed67789
DL
2001struct arg_dev_net {
2002 struct net_device *dev;
2003 struct net *net;
2004};
2005
1da177e4
LT
2006static int fib6_ifdown(struct rt6_info *rt, void *arg)
2007{
bc3ef660 2008 const struct arg_dev_net *adn = arg;
2009 const struct net_device *dev = adn->dev;
8ed67789 2010
bc3ef660 2011 if ((rt->rt6i_dev == dev || dev == NULL) &&
2012 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2013 RT6_TRACE("deleted by ifdown %p\n", rt);
2014 return -1;
2015 }
2016 return 0;
2017}
2018
f3db4851 2019void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2020{
8ed67789
DL
2021 struct arg_dev_net adn = {
2022 .dev = dev,
2023 .net = net,
2024 };
2025
2026 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2027 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2028}
2029
2030struct rt6_mtu_change_arg
2031{
2032 struct net_device *dev;
2033 unsigned mtu;
2034};
2035
2036static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2037{
2038 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2039 struct inet6_dev *idev;
2040
2041 /* In IPv6 pmtu discovery is not optional,
2042 so that RTAX_MTU lock cannot disable it.
2043 We still use this lock to block changes
2044 caused by addrconf/ndisc.
2045 */
2046
2047 idev = __in6_dev_get(arg->dev);
2048 if (idev == NULL)
2049 return 0;
2050
2051 /* For administrative MTU increase, there is no way to discover
2052 IPv6 PMTU increase, so PMTU increase should be updated here.
2053 Since RFC 1981 doesn't include administrative MTU increase
2054 update PMTU increase is a MUST. (i.e. jumbo frame)
2055 */
2056 /*
2057 If new MTU is less than route PMTU, this new MTU will be the
2058 lowest MTU in the path, update the route PMTU to reflect PMTU
2059 decreases; if new MTU is greater than route PMTU, and the
2060 old MTU is the lowest MTU in the path, update the route PMTU
2061 to reflect the increase. In this case if the other nodes' MTU
2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2063 PMTU discouvery.
2064 */
2065 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2066 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2067 (dst_mtu(&rt->dst) >= arg->mtu ||
2068 (dst_mtu(&rt->dst) < arg->mtu &&
2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2070 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2071 }
1da177e4
LT
2072 return 0;
2073}
2074
2075void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2076{
c71099ac
TG
2077 struct rt6_mtu_change_arg arg = {
2078 .dev = dev,
2079 .mtu = mtu,
2080 };
1da177e4 2081
c346dca1 2082 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2083}
2084
ef7c79ed 2085static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2086 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2087 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2088 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2089 [RTA_PRIORITY] = { .type = NLA_U32 },
2090 [RTA_METRICS] = { .type = NLA_NESTED },
2091};
2092
2093static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2094 struct fib6_config *cfg)
1da177e4 2095{
86872cb5
TG
2096 struct rtmsg *rtm;
2097 struct nlattr *tb[RTA_MAX+1];
2098 int err;
1da177e4 2099
86872cb5
TG
2100 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2101 if (err < 0)
2102 goto errout;
1da177e4 2103
86872cb5
TG
2104 err = -EINVAL;
2105 rtm = nlmsg_data(nlh);
2106 memset(cfg, 0, sizeof(*cfg));
2107
2108 cfg->fc_table = rtm->rtm_table;
2109 cfg->fc_dst_len = rtm->rtm_dst_len;
2110 cfg->fc_src_len = rtm->rtm_src_len;
2111 cfg->fc_flags = RTF_UP;
2112 cfg->fc_protocol = rtm->rtm_protocol;
2113
2114 if (rtm->rtm_type == RTN_UNREACHABLE)
2115 cfg->fc_flags |= RTF_REJECT;
2116
ab79ad14
2117 if (rtm->rtm_type == RTN_LOCAL)
2118 cfg->fc_flags |= RTF_LOCAL;
2119
86872cb5
TG
2120 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2121 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2122 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2123
2124 if (tb[RTA_GATEWAY]) {
2125 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2126 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2127 }
86872cb5
TG
2128
2129 if (tb[RTA_DST]) {
2130 int plen = (rtm->rtm_dst_len + 7) >> 3;
2131
2132 if (nla_len(tb[RTA_DST]) < plen)
2133 goto errout;
2134
2135 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2136 }
86872cb5
TG
2137
2138 if (tb[RTA_SRC]) {
2139 int plen = (rtm->rtm_src_len + 7) >> 3;
2140
2141 if (nla_len(tb[RTA_SRC]) < plen)
2142 goto errout;
2143
2144 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2145 }
86872cb5
TG
2146
2147 if (tb[RTA_OIF])
2148 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2149
2150 if (tb[RTA_PRIORITY])
2151 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2152
2153 if (tb[RTA_METRICS]) {
2154 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2155 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2156 }
86872cb5
TG
2157
2158 if (tb[RTA_TABLE])
2159 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2160
2161 err = 0;
2162errout:
2163 return err;
1da177e4
LT
2164}
2165
c127ea2c 2166static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2167{
86872cb5
TG
2168 struct fib6_config cfg;
2169 int err;
1da177e4 2170
86872cb5
TG
2171 err = rtm_to_fib6_config(skb, nlh, &cfg);
2172 if (err < 0)
2173 return err;
2174
2175 return ip6_route_del(&cfg);
1da177e4
LT
2176}
2177
c127ea2c 2178static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2179{
86872cb5
TG
2180 struct fib6_config cfg;
2181 int err;
1da177e4 2182
86872cb5
TG
2183 err = rtm_to_fib6_config(skb, nlh, &cfg);
2184 if (err < 0)
2185 return err;
2186
2187 return ip6_route_add(&cfg);
1da177e4
LT
2188}
2189
339bf98f
TG
2190static inline size_t rt6_nlmsg_size(void)
2191{
2192 return NLMSG_ALIGN(sizeof(struct rtmsg))
2193 + nla_total_size(16) /* RTA_SRC */
2194 + nla_total_size(16) /* RTA_DST */
2195 + nla_total_size(16) /* RTA_GATEWAY */
2196 + nla_total_size(16) /* RTA_PREFSRC */
2197 + nla_total_size(4) /* RTA_TABLE */
2198 + nla_total_size(4) /* RTA_IIF */
2199 + nla_total_size(4) /* RTA_OIF */
2200 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2201 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2202 + nla_total_size(sizeof(struct rta_cacheinfo));
2203}
2204
191cd582
BH
2205static int rt6_fill_node(struct net *net,
2206 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2207 struct in6_addr *dst, struct in6_addr *src,
2208 int iif, int type, u32 pid, u32 seq,
7bc570c8 2209 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2210{
2211 struct rtmsg *rtm;
2d7202bf 2212 struct nlmsghdr *nlh;
e3703b3d 2213 long expires;
9e762a4a 2214 u32 table;
1da177e4
LT
2215
2216 if (prefix) { /* user wants prefix routes only */
2217 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2218 /* success since this is not a prefix route */
2219 return 1;
2220 }
2221 }
2222
2d7202bf
TG
2223 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2224 if (nlh == NULL)
26932566 2225 return -EMSGSIZE;
2d7202bf
TG
2226
2227 rtm = nlmsg_data(nlh);
1da177e4
LT
2228 rtm->rtm_family = AF_INET6;
2229 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2230 rtm->rtm_src_len = rt->rt6i_src.plen;
2231 rtm->rtm_tos = 0;
c71099ac 2232 if (rt->rt6i_table)
9e762a4a 2233 table = rt->rt6i_table->tb6_id;
c71099ac 2234 else
9e762a4a
PM
2235 table = RT6_TABLE_UNSPEC;
2236 rtm->rtm_table = table;
2d7202bf 2237 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2238 if (rt->rt6i_flags&RTF_REJECT)
2239 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2240 else if (rt->rt6i_flags&RTF_LOCAL)
2241 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2242 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2243 rtm->rtm_type = RTN_LOCAL;
2244 else
2245 rtm->rtm_type = RTN_UNICAST;
2246 rtm->rtm_flags = 0;
2247 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2248 rtm->rtm_protocol = rt->rt6i_protocol;
2249 if (rt->rt6i_flags&RTF_DYNAMIC)
2250 rtm->rtm_protocol = RTPROT_REDIRECT;
2251 else if (rt->rt6i_flags & RTF_ADDRCONF)
2252 rtm->rtm_protocol = RTPROT_KERNEL;
2253 else if (rt->rt6i_flags&RTF_DEFAULT)
2254 rtm->rtm_protocol = RTPROT_RA;
2255
2256 if (rt->rt6i_flags&RTF_CACHE)
2257 rtm->rtm_flags |= RTM_F_CLONED;
2258
2259 if (dst) {
2d7202bf 2260 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2261 rtm->rtm_dst_len = 128;
1da177e4 2262 } else if (rtm->rtm_dst_len)
2d7202bf 2263 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2264#ifdef CONFIG_IPV6_SUBTREES
2265 if (src) {
2d7202bf 2266 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2267 rtm->rtm_src_len = 128;
1da177e4 2268 } else if (rtm->rtm_src_len)
2d7202bf 2269 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2270#endif
7bc570c8
YH
2271 if (iif) {
2272#ifdef CONFIG_IPV6_MROUTE
2273 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2274 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2275 if (err <= 0) {
2276 if (!nowait) {
2277 if (err == 0)
2278 return 0;
2279 goto nla_put_failure;
2280 } else {
2281 if (err == -EMSGSIZE)
2282 goto nla_put_failure;
2283 }
2284 }
2285 } else
2286#endif
2287 NLA_PUT_U32(skb, RTA_IIF, iif);
2288 } else if (dst) {
d8d1f30b 2289 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2290 struct in6_addr saddr_buf;
191cd582 2291 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2292 dst, 0, &saddr_buf) == 0)
2d7202bf 2293 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2294 }
2d7202bf 2295
defb3519 2296 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2297 goto nla_put_failure;
2298
d8d1f30b
CG
2299 if (rt->dst.neighbour)
2300 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2301
d8d1f30b 2302 if (rt->dst.dev)
2d7202bf
TG
2303 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2304
2305 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2306
36e3deae
YH
2307 if (!(rt->rt6i_flags & RTF_EXPIRES))
2308 expires = 0;
2309 else if (rt->rt6i_expires - jiffies < INT_MAX)
2310 expires = rt->rt6i_expires - jiffies;
2311 else
2312 expires = INT_MAX;
69cdf8f9 2313
d8d1f30b
CG
2314 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2315 expires, rt->dst.error) < 0)
e3703b3d 2316 goto nla_put_failure;
2d7202bf
TG
2317
2318 return nlmsg_end(skb, nlh);
2319
2320nla_put_failure:
26932566
PM
2321 nlmsg_cancel(skb, nlh);
2322 return -EMSGSIZE;
1da177e4
LT
2323}
2324
1b43af54 2325int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2326{
2327 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2328 int prefix;
2329
2d7202bf
TG
2330 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2331 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2332 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2333 } else
2334 prefix = 0;
2335
191cd582
BH
2336 return rt6_fill_node(arg->net,
2337 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2338 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2339 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2340}
2341
c127ea2c 2342static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2343{
3b1e0a65 2344 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2345 struct nlattr *tb[RTA_MAX+1];
2346 struct rt6_info *rt;
1da177e4 2347 struct sk_buff *skb;
ab364a6f 2348 struct rtmsg *rtm;
1da177e4 2349 struct flowi fl;
ab364a6f 2350 int err, iif = 0;
1da177e4 2351
ab364a6f
TG
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2353 if (err < 0)
2354 goto errout;
1da177e4 2355
ab364a6f 2356 err = -EINVAL;
1da177e4 2357 memset(&fl, 0, sizeof(fl));
1da177e4 2358
ab364a6f
TG
2359 if (tb[RTA_SRC]) {
2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2361 goto errout;
2362
2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2364 }
2365
2366 if (tb[RTA_DST]) {
2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2368 goto errout;
2369
2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2371 }
2372
2373 if (tb[RTA_IIF])
2374 iif = nla_get_u32(tb[RTA_IIF]);
2375
2376 if (tb[RTA_OIF])
2377 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2378
2379 if (iif) {
2380 struct net_device *dev;
5578689a 2381 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2382 if (!dev) {
2383 err = -ENODEV;
ab364a6f 2384 goto errout;
1da177e4
LT
2385 }
2386 }
2387
ab364a6f
TG
2388 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2389 if (skb == NULL) {
2390 err = -ENOBUFS;
2391 goto errout;
2392 }
1da177e4 2393
ab364a6f
TG
2394 /* Reserve room for dummy headers, this skb can pass
2395 through good chunk of routing engine.
2396 */
459a98ed 2397 skb_reset_mac_header(skb);
ab364a6f 2398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2399
8a3edd80 2400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2401 skb_dst_set(skb, &rt->dst);
1da177e4 2402
191cd582 2403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2405 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2406 if (err < 0) {
ab364a6f
TG
2407 kfree_skb(skb);
2408 goto errout;
1da177e4
LT
2409 }
2410
5578689a 2411 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2412errout:
1da177e4 2413 return err;
1da177e4
LT
2414}
2415
86872cb5 2416void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2417{
2418 struct sk_buff *skb;
5578689a 2419 struct net *net = info->nl_net;
528c4ceb
DL
2420 u32 seq;
2421 int err;
2422
2423 err = -ENOBUFS;
2424 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2425
339bf98f 2426 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2427 if (skb == NULL)
2428 goto errout;
2429
191cd582 2430 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2431 event, info->pid, seq, 0, 0, 0);
26932566
PM
2432 if (err < 0) {
2433 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2434 WARN_ON(err == -EMSGSIZE);
2435 kfree_skb(skb);
2436 goto errout;
2437 }
1ce85fe4
PNA
2438 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2439 info->nlh, gfp_any());
2440 return;
21713ebc
TG
2441errout:
2442 if (err < 0)
5578689a 2443 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2444}
2445
8ed67789
DL
2446static int ip6_route_dev_notify(struct notifier_block *this,
2447 unsigned long event, void *data)
2448{
2449 struct net_device *dev = (struct net_device *)data;
c346dca1 2450 struct net *net = dev_net(dev);
8ed67789
DL
2451
2452 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2453 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2454 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2455#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2456 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2457 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2458 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2459 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2460#endif
2461 }
2462
2463 return NOTIFY_OK;
2464}
2465
1da177e4
LT
2466/*
2467 * /proc
2468 */
2469
2470#ifdef CONFIG_PROC_FS
2471
1da177e4
LT
2472struct rt6_proc_arg
2473{
2474 char *buffer;
2475 int offset;
2476 int length;
2477 int skip;
2478 int len;
2479};
2480
2481static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2482{
33120b30 2483 struct seq_file *m = p_arg;
1da177e4 2484
4b7a4274 2485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2486
2487#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2489#else
33120b30 2490 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2491#endif
2492
2493 if (rt->rt6i_nexthop) {
4b7a4274 2494 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2495 } else {
33120b30 2496 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2497 }
33120b30 2498 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2499 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2500 rt->dst.__use, rt->rt6i_flags,
33120b30 2501 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2502 return 0;
2503}
2504
33120b30 2505static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2506{
f3db4851
DL
2507 struct net *net = (struct net *)m->private;
2508 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2509 return 0;
2510}
1da177e4 2511
33120b30
AD
2512static int ipv6_route_open(struct inode *inode, struct file *file)
2513{
de05c557 2514 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2515}
2516
33120b30
AD
2517static const struct file_operations ipv6_route_proc_fops = {
2518 .owner = THIS_MODULE,
2519 .open = ipv6_route_open,
2520 .read = seq_read,
2521 .llseek = seq_lseek,
b6fcbdb4 2522 .release = single_release_net,
33120b30
AD
2523};
2524
1da177e4
LT
2525static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2526{
69ddb805 2527 struct net *net = (struct net *)seq->private;
1da177e4 2528 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2529 net->ipv6.rt6_stats->fib_nodes,
2530 net->ipv6.rt6_stats->fib_route_nodes,
2531 net->ipv6.rt6_stats->fib_rt_alloc,
2532 net->ipv6.rt6_stats->fib_rt_entries,
2533 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2534 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2535 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2536
2537 return 0;
2538}
2539
2540static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2541{
de05c557 2542 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2543}
2544
9a32144e 2545static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2546 .owner = THIS_MODULE,
2547 .open = rt6_stats_seq_open,
2548 .read = seq_read,
2549 .llseek = seq_lseek,
b6fcbdb4 2550 .release = single_release_net,
1da177e4
LT
2551};
2552#endif /* CONFIG_PROC_FS */
2553
2554#ifdef CONFIG_SYSCTL
2555
1da177e4 2556static
8d65af78 2557int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2558 void __user *buffer, size_t *lenp, loff_t *ppos)
2559{
c486da34
LAG
2560 struct net *net;
2561 int delay;
2562 if (!write)
1da177e4 2563 return -EINVAL;
c486da34
LAG
2564
2565 net = (struct net *)ctl->extra1;
2566 delay = net->ipv6.sysctl.flush_delay;
2567 proc_dointvec(ctl, write, buffer, lenp, ppos);
2568 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2569 return 0;
1da177e4
LT
2570}
2571
760f2d01 2572ctl_table ipv6_route_table_template[] = {
1ab1457c 2573 {
1da177e4 2574 .procname = "flush",
4990509f 2575 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2576 .maxlen = sizeof(int),
89c8b3a1 2577 .mode = 0200,
6d9f239a 2578 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2579 },
2580 {
1da177e4 2581 .procname = "gc_thresh",
9a7ec3a9 2582 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2583 .maxlen = sizeof(int),
2584 .mode = 0644,
6d9f239a 2585 .proc_handler = proc_dointvec,
1da177e4
LT
2586 },
2587 {
1da177e4 2588 .procname = "max_size",
4990509f 2589 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2590 .maxlen = sizeof(int),
2591 .mode = 0644,
6d9f239a 2592 .proc_handler = proc_dointvec,
1da177e4
LT
2593 },
2594 {
1da177e4 2595 .procname = "gc_min_interval",
4990509f 2596 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2597 .maxlen = sizeof(int),
2598 .mode = 0644,
6d9f239a 2599 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2600 },
2601 {
1da177e4 2602 .procname = "gc_timeout",
4990509f 2603 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2604 .maxlen = sizeof(int),
2605 .mode = 0644,
6d9f239a 2606 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2607 },
2608 {
1da177e4 2609 .procname = "gc_interval",
4990509f 2610 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2611 .maxlen = sizeof(int),
2612 .mode = 0644,
6d9f239a 2613 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2614 },
2615 {
1da177e4 2616 .procname = "gc_elasticity",
4990509f 2617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2618 .maxlen = sizeof(int),
2619 .mode = 0644,
f3d3f616 2620 .proc_handler = proc_dointvec,
1da177e4
LT
2621 },
2622 {
1da177e4 2623 .procname = "mtu_expires",
4990509f 2624 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2625 .maxlen = sizeof(int),
2626 .mode = 0644,
6d9f239a 2627 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2628 },
2629 {
1da177e4 2630 .procname = "min_adv_mss",
4990509f 2631 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2632 .maxlen = sizeof(int),
2633 .mode = 0644,
f3d3f616 2634 .proc_handler = proc_dointvec,
1da177e4
LT
2635 },
2636 {
1da177e4 2637 .procname = "gc_min_interval_ms",
4990509f 2638 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2639 .maxlen = sizeof(int),
2640 .mode = 0644,
6d9f239a 2641 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2642 },
f8572d8f 2643 { }
1da177e4
LT
2644};
2645
2c8c1e72 2646struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2647{
2648 struct ctl_table *table;
2649
2650 table = kmemdup(ipv6_route_table_template,
2651 sizeof(ipv6_route_table_template),
2652 GFP_KERNEL);
5ee09105
YH
2653
2654 if (table) {
2655 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2656 table[0].extra1 = net;
86393e52 2657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2666 }
2667
760f2d01
DL
2668 return table;
2669}
1da177e4
LT
2670#endif
2671
2c8c1e72 2672static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2673{
633d424b 2674 int ret = -ENOMEM;
8ed67789 2675
86393e52
AD
2676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2678
fc66f95c
ED
2679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2681
8ed67789
DL
2682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
fc66f95c 2686 goto out_ip6_dst_entries;
d8d1f30b 2687 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2690 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2691
2692#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2694 sizeof(*net->ipv6.ip6_prohibit_entry),
2695 GFP_KERNEL);
68fffc67
PZ
2696 if (!net->ipv6.ip6_prohibit_entry)
2697 goto out_ip6_null_entry;
d8d1f30b 2698 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2699 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2700 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2701 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2702
2703 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2704 sizeof(*net->ipv6.ip6_blk_hole_entry),
2705 GFP_KERNEL);
68fffc67
PZ
2706 if (!net->ipv6.ip6_blk_hole_entry)
2707 goto out_ip6_prohibit_entry;
d8d1f30b 2708 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2709 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2710 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2711 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2712#endif
2713
b339a47c
PZ
2714 net->ipv6.sysctl.flush_delay = 0;
2715 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2716 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2717 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2718 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2719 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2720 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2721 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2722
cdb18761
DL
2723#ifdef CONFIG_PROC_FS
2724 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2725 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2726#endif
6891a346
BT
2727 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2728
8ed67789
DL
2729 ret = 0;
2730out:
2731 return ret;
f2fc6a54 2732
68fffc67
PZ
2733#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734out_ip6_prohibit_entry:
2735 kfree(net->ipv6.ip6_prohibit_entry);
2736out_ip6_null_entry:
2737 kfree(net->ipv6.ip6_null_entry);
2738#endif
fc66f95c
ED
2739out_ip6_dst_entries:
2740 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2741out_ip6_dst_ops:
f2fc6a54 2742 goto out;
cdb18761
DL
2743}
2744
2c8c1e72 2745static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2746{
2747#ifdef CONFIG_PROC_FS
2748 proc_net_remove(net, "ipv6_route");
2749 proc_net_remove(net, "rt6_stats");
2750#endif
8ed67789
DL
2751 kfree(net->ipv6.ip6_null_entry);
2752#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2753 kfree(net->ipv6.ip6_prohibit_entry);
2754 kfree(net->ipv6.ip6_blk_hole_entry);
2755#endif
41bb78b4 2756 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2757}
2758
2759static struct pernet_operations ip6_route_net_ops = {
2760 .init = ip6_route_net_init,
2761 .exit = ip6_route_net_exit,
2762};
2763
8ed67789
DL
2764static struct notifier_block ip6_route_dev_notifier = {
2765 .notifier_call = ip6_route_dev_notify,
2766 .priority = 0,
2767};
2768
433d49c3 2769int __init ip6_route_init(void)
1da177e4 2770{
433d49c3
DL
2771 int ret;
2772
9a7ec3a9
DL
2773 ret = -ENOMEM;
2774 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2775 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2776 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2777 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2778 goto out;
14e50e57 2779
fc66f95c 2780 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2781 if (ret)
bdb3289f 2782 goto out_kmem_cache;
bdb3289f 2783
fc66f95c
ED
2784 ret = register_pernet_subsys(&ip6_route_net_ops);
2785 if (ret)
2786 goto out_dst_entries;
2787
5dc121e9
AE
2788 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2789
8ed67789
DL
2790 /* Registering of the loopback is done before this portion of code,
2791 * the loopback reference in rt6_info will not be taken, do it
2792 * manually for init_net */
d8d1f30b 2793 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2794 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2796 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2797 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2798 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2799 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2800 #endif
433d49c3
DL
2801 ret = fib6_init();
2802 if (ret)
8ed67789 2803 goto out_register_subsys;
433d49c3 2804
433d49c3
DL
2805 ret = xfrm6_init();
2806 if (ret)
cdb18761 2807 goto out_fib6_init;
c35b7e72 2808
433d49c3
DL
2809 ret = fib6_rules_init();
2810 if (ret)
2811 goto xfrm6_init;
7e5449c2 2812
433d49c3
DL
2813 ret = -ENOBUFS;
2814 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2815 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2816 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2817 goto fib6_rules_init;
c127ea2c 2818
8ed67789 2819 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2820 if (ret)
2821 goto fib6_rules_init;
8ed67789 2822
433d49c3
DL
2823out:
2824 return ret;
2825
2826fib6_rules_init:
433d49c3
DL
2827 fib6_rules_cleanup();
2828xfrm6_init:
433d49c3 2829 xfrm6_fini();
433d49c3 2830out_fib6_init:
433d49c3 2831 fib6_gc_cleanup();
8ed67789
DL
2832out_register_subsys:
2833 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2834out_dst_entries:
2835 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2836out_kmem_cache:
f2fc6a54 2837 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2838 goto out;
1da177e4
LT
2839}
2840
2841void ip6_route_cleanup(void)
2842{
8ed67789 2843 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2844 fib6_rules_cleanup();
1da177e4 2845 xfrm6_fini();
1da177e4 2846 fib6_gc_cleanup();
8ed67789 2847 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2848 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2849 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2850}