cdc_ncm: fix short packet issue on some devices
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
1da177e4
LT
75static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 77static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 78static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
06582540
DM
100static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101{
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128}
129
9a7ec3a9 130static struct dst_ops ip6_dst_ops_template = {
1da177e4 131 .family = AF_INET6,
09640e63 132 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
0dbaee3b 136 .default_advmss = ip6_default_advmss,
d33e4553 137 .default_mtu = ip6_default_mtu,
06582540 138 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 144 .local_out = __ip6_local_out,
1da177e4
LT
145};
146
ec831ea7
RD
147static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148{
149 return 0;
150}
151
14e50e57
DM
152static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153{
154}
155
156static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
09640e63 158 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
ec831ea7 161 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 162 .default_advmss = ip6_default_advmss,
14e50e57 163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
164};
165
62fa8a84
DM
166static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168};
169
bdb3289f 170static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
d8d1f30b
CG
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
1da177e4
LT
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 180 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
101367c2
TG
185#ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
6723ab54
DM
187static int ip6_pkt_prohibit(struct sk_buff *skb);
188static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 189
280a34c8 190static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
d8d1f30b
CG
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
101367c2
TG
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 200 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203};
204
bdb3289f 205static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
d8d1f30b
CG
211 .input = dst_discard,
212 .output = dst_discard,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
220#endif
221
1da177e4 222/* allocate dst with ip6_dst_ops */
f2fc6a54 223static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 224{
3c7bd1a1 225 return (struct rt6_info *)dst_alloc(ops, 0);
1da177e4
LT
226}
227
228static void ip6_dst_destroy(struct dst_entry *dst)
229{
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 232 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
1ab1457c 237 }
b3419363 238 if (peer) {
b3419363
DM
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242}
243
6431cbc2
DM
244static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246static u32 rt6_peer_genid(void)
247{
248 return atomic_read(&__rt6_peer_genid);
249}
250
b3419363
DM
251void rt6_bind_peer(struct rt6_info *rt, int create)
252{
253 struct inet_peer *peer;
254
b3419363
DM
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
6431cbc2
DM
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
260}
261
262static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264{
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 267 struct net_device *loopback_dev =
c346dca1 268 dev_net(dev)->loopback_dev;
1da177e4 269
5a3e55d6
DL
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
1da177e4
LT
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278}
279
280static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281{
a02cec21
ED
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
284}
285
c71099ac
TG
286static inline int rt6_need_strict(struct in6_addr *daddr)
287{
a02cec21
ED
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
290}
291
1da177e4 292/*
c71099ac 293 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
294 */
295
8ed67789
DL
296static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
dd3abc4e 298 struct in6_addr *saddr,
1da177e4 299 int oif,
d420895e 300 int flags)
1da177e4
LT
301{
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
dd3abc4e
YH
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
d8d1f30b 308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
1da177e4
LT
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 317 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 318 continue;
1ab1457c 319 if (local && (!oif ||
1da177e4
LT
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
dd3abc4e
YH
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
1da177e4 329 }
dd3abc4e 330 }
1da177e4 331
dd3abc4e 332 if (oif) {
1da177e4
LT
333 if (local)
334 return local;
335
d420895e 336 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 337 return net->ipv6.ip6_null_entry;
1da177e4 338 }
dd3abc4e 339out:
1da177e4
LT
340 return rt;
341}
342
27097255
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344static void rt6_probe(struct rt6_info *rt)
345{
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371}
372#else
373static inline void rt6_probe(struct rt6_info *rt)
374{
27097255
YH
375}
376#endif
377
1da177e4 378/*
554cfb7e 379 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 380 */
b6f99a21 381static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
382{
383 struct net_device *dev = rt->rt6i_dev;
161980f4 384 if (!oif || dev->ifindex == oif)
554cfb7e 385 return 2;
161980f4
DM
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
554cfb7e 390}
1da177e4 391
b6f99a21 392static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 393{
554cfb7e 394 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 395 int m;
4d0c5911
YH
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
554cfb7e
YH
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
4d0c5911 402 m = 2;
398bcbeb
YH
403#ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406#endif
407 else
ea73ee23 408 m = 1;
554cfb7e 409 read_unlock_bh(&neigh->lock);
398bcbeb
YH
410 } else
411 m = 0;
554cfb7e 412 return m;
1da177e4
LT
413}
414
554cfb7e
YH
415static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
1da177e4 417{
4d0c5911 418 int m, n;
1ab1457c 419
4d0c5911 420 m = rt6_check_dev(rt, oif);
77d16f45 421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 422 return -1;
ebacaaa0
YH
423#ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425#endif
4d0c5911 426 n = rt6_check_neigh(rt);
557e92ef 427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
428 return -1;
429 return m;
430}
431
f11e6659
DM
432static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
554cfb7e 434{
f11e6659
DM
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453out:
454 return match;
455}
456
457static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460{
461 struct rt6_info *rt, *match;
554cfb7e 462 int mpri = -1;
1da177e4 463
f11e6659
DM
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 466 rt = rt->dst.rt6_next)
f11e6659
DM
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 469 rt = rt->dst.rt6_next)
f11e6659 470 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 471
f11e6659
DM
472 return match;
473}
1da177e4 474
f11e6659
DM
475static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476{
477 struct rt6_info *match, *rt0;
8ed67789 478 struct net *net;
1da177e4 479
f11e6659 480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 481 __func__, fn->leaf, oif);
554cfb7e 482
f11e6659
DM
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 486
f11e6659 487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 488
554cfb7e 489 if (!match &&
f11e6659 490 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 491 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 492
554cfb7e 493 /* no entries matched; do round-robin */
f11e6659
DM
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
1da177e4 499 }
1da177e4 500
f11e6659 501 RT6_TRACE("%s() => %p\n",
0dc47877 502 __func__, match);
1da177e4 503
c346dca1 504 net = dev_net(rt0->rt6i_dev);
a02cec21 505 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
506}
507
70ceb4f5
YH
508#ifdef CONFIG_IPV6_ROUTE_INFO
509int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511{
c346dca1 512 struct net *net = dev_net(dev);
70ceb4f5
YH
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
4bed72e4 516 unsigned long lifetime;
70ceb4f5
YH
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 540 return -EINVAL;
70ceb4f5 541
4bed72e4 542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
efa2cea0
DL
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
70ceb4f5
YH
556
557 if (rt && !lifetime) {
e0a1ad73 558 ip6_del_rt(rt);
70ceb4f5
YH
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
efa2cea0 563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
4bed72e4 570 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
d8d1f30b 576 dst_release(&rt->dst);
70ceb4f5
YH
577 }
578 return 0;
579}
580#endif
581
8ed67789 582#define BACKTRACK(__net, saddr) \
982f56f3 583do { \
8ed67789 584 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 585 struct fib6_node *pn; \
e0eda7bb 586 while (1) { \
982f56f3
YH
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
c71099ac 596 } \
c71099ac 597 } \
982f56f3 598} while(0)
c71099ac 599
8ed67789
DL
600static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
4c9483b2 602 struct flowi6 *fl6, int flags)
1da177e4
LT
603{
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
c71099ac 607 read_lock_bh(&table->tb6_lock);
4c9483b2 608 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
609restart:
610 rt = fn->leaf;
4c9483b2
DM
611 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
612 BACKTRACK(net, &fl6->saddr);
c71099ac 613out:
d8d1f30b 614 dst_use(&rt->dst, jiffies);
c71099ac 615 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
616 return rt;
617
618}
619
9acd9f3a
YH
620struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
c71099ac 622{
4c9483b2
DM
623 struct flowi6 fl6 = {
624 .flowi6_oif = oif,
625 .daddr = *daddr,
c71099ac
TG
626 };
627 struct dst_entry *dst;
77d16f45 628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 629
adaa70bb 630 if (saddr) {
4c9483b2 631 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
4c9483b2 635 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
1da177e4
LT
641 return NULL;
642}
643
7159039a
YH
644EXPORT_SYMBOL(rt6_lookup);
645
c71099ac 646/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
86872cb5 652static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
653{
654 int err;
c71099ac 655 struct fib6_table *table;
1da177e4 656
c71099ac
TG
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
86872cb5 659 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 660 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
661
662 return err;
663}
664
40e22e8f
TG
665int ip6_ins_rt(struct rt6_info *rt)
666{
4d1169c1 667 struct nl_info info = {
c346dca1 668 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 669 };
528c4ceb 670 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
671}
672
95a9a5ba
YH
673static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
1da177e4 675{
1da177e4
LT
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
14deae41
DM
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
58c4fb86
YH
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 693 }
1da177e4 694
58c4fb86 695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 698 rt->dst.flags |= DST_HOST;
1da177e4
LT
699
700#ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705#endif
706
14deae41
DM
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
86393e52 720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
7e1b33e5 731 "ipv6: Neighbour table overflow.\n");
d8d1f30b 732 dst_free(&rt->dst);
14deae41
DM
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
1da177e4 736
95a9a5ba 737 }
1da177e4 738
95a9a5ba
YH
739 return rt;
740}
1da177e4 741
299d9939
YH
742static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743{
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 749 rt->dst.flags |= DST_HOST;
299d9939
YH
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753}
754
8ed67789 755static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 756 struct flowi6 *fl6, int flags)
1da177e4
LT
757{
758 struct fib6_node *fn;
519fbd87 759 struct rt6_info *rt, *nrt;
c71099ac 760 int strict = 0;
1da177e4 761 int attempts = 3;
519fbd87 762 int err;
53b7997f 763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 764
77d16f45 765 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
766
767relookup:
c71099ac 768 read_lock_bh(&table->tb6_lock);
1da177e4 769
8238dd06 770restart_2:
4c9483b2 771 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
772
773restart:
4acad72d 774 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 775
4c9483b2 776 BACKTRACK(net, &fl6->saddr);
8ed67789 777 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 778 rt->rt6i_flags & RTF_CACHE)
1ddef044 779 goto out;
1da177e4 780
d8d1f30b 781 dst_hold(&rt->dst);
c71099ac 782 read_unlock_bh(&table->tb6_lock);
fb9de91e 783
519fbd87 784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 785 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 786 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 787 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
788 else
789 goto out2;
e40cf353 790
d8d1f30b 791 dst_release(&rt->dst);
8ed67789 792 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 793
d8d1f30b 794 dst_hold(&rt->dst);
519fbd87 795 if (nrt) {
40e22e8f 796 err = ip6_ins_rt(nrt);
519fbd87 797 if (!err)
1da177e4 798 goto out2;
1da177e4 799 }
1da177e4 800
519fbd87
YH
801 if (--attempts <= 0)
802 goto out2;
803
804 /*
c71099ac 805 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
806 * released someone could insert this route. Relookup.
807 */
d8d1f30b 808 dst_release(&rt->dst);
519fbd87
YH
809 goto relookup;
810
811out:
8238dd06
YH
812 if (reachable) {
813 reachable = 0;
814 goto restart_2;
815 }
d8d1f30b 816 dst_hold(&rt->dst);
c71099ac 817 read_unlock_bh(&table->tb6_lock);
1da177e4 818out2:
d8d1f30b
CG
819 rt->dst.lastuse = jiffies;
820 rt->dst.__use++;
c71099ac
TG
821
822 return rt;
1da177e4
LT
823}
824
8ed67789 825static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 826 struct flowi6 *fl6, int flags)
4acad72d 827{
4c9483b2 828 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
829}
830
c71099ac
TG
831void ip6_route_input(struct sk_buff *skb)
832{
0660e03f 833 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 834 struct net *net = dev_net(skb->dev);
adaa70bb 835 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
836 struct flowi6 fl6 = {
837 .flowi6_iif = skb->dev->ifindex,
838 .daddr = iph->daddr,
839 .saddr = iph->saddr,
840 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
841 .flowi6_mark = skb->mark,
842 .flowi6_proto = iph->nexthdr,
c71099ac 843 };
adaa70bb 844
1d6e55f1 845 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 846 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 847
4c9483b2 848 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
849}
850
8ed67789 851static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 852 struct flowi6 *fl6, int flags)
1da177e4 853{
4c9483b2 854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
855}
856
9c7a4f9c 857struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 858 struct flowi6 *fl6)
c71099ac
TG
859{
860 int flags = 0;
861
4c9483b2 862 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 863 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 864
4c9483b2 865 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 866 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
867 else if (sk)
868 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 869
4c9483b2 870 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
871}
872
7159039a 873EXPORT_SYMBOL(ip6_route_output);
1da177e4 874
2774c131 875struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 876{
69ead7af
DM
877 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
878 struct rt6_info *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
879 struct dst_entry *new = NULL;
880
881 if (rt) {
d8d1f30b 882 new = &rt->dst;
14e50e57 883
14e50e57 884 new->__use = 1;
352e512c
HX
885 new->input = dst_discard;
886 new->output = dst_discard;
14e50e57 887
defb3519 888 dst_copy_metrics(new, &ort->dst);
d8d1f30b 889 new->dev = ort->dst.dev;
14e50e57
DM
890 if (new->dev)
891 dev_hold(new->dev);
892 rt->rt6i_idev = ort->rt6i_idev;
893 if (rt->rt6i_idev)
894 in6_dev_hold(rt->rt6i_idev);
895 rt->rt6i_expires = 0;
896
897 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
898 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
899 rt->rt6i_metric = 0;
900
901 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
902#ifdef CONFIG_IPV6_SUBTREES
903 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
904#endif
905
906 dst_free(new);
907 }
908
69ead7af
DM
909 dst_release(dst_orig);
910 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 911}
14e50e57 912
1da177e4
LT
913/*
914 * Destination cache support functions
915 */
916
917static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
918{
919 struct rt6_info *rt;
920
921 rt = (struct rt6_info *) dst;
922
6431cbc2
DM
923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
928 }
1da177e4 929 return dst;
6431cbc2 930 }
1da177e4
LT
931 return NULL;
932}
933
934static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
935{
936 struct rt6_info *rt = (struct rt6_info *) dst;
937
938 if (rt) {
54c1a859
YH
939 if (rt->rt6i_flags & RTF_CACHE) {
940 if (rt6_check_expired(rt)) {
941 ip6_del_rt(rt);
942 dst = NULL;
943 }
944 } else {
1da177e4 945 dst_release(dst);
54c1a859
YH
946 dst = NULL;
947 }
1da177e4 948 }
54c1a859 949 return dst;
1da177e4
LT
950}
951
952static void ip6_link_failure(struct sk_buff *skb)
953{
954 struct rt6_info *rt;
955
3ffe533c 956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 957
adf30907 958 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
959 if (rt) {
960 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 961 dst_set_expires(&rt->dst, 0);
1da177e4
LT
962 rt->rt6i_flags |= RTF_EXPIRES;
963 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
964 rt->rt6i_node->fn_sernum = -1;
965 }
966}
967
968static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
969{
970 struct rt6_info *rt6 = (struct rt6_info*)dst;
971
972 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
973 rt6->rt6i_flags |= RTF_MODIFIED;
974 if (mtu < IPV6_MIN_MTU) {
defb3519 975 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 976 mtu = IPV6_MIN_MTU;
defb3519
DM
977 features |= RTAX_FEATURE_ALLFRAG;
978 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 979 }
defb3519 980 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
981 }
982}
983
0dbaee3b 984static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 985{
0dbaee3b
DM
986 struct net_device *dev = dst->dev;
987 unsigned int mtu = dst_mtu(dst);
988 struct net *net = dev_net(dev);
989
1da177e4
LT
990 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
991
5578689a
DL
992 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
993 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
994
995 /*
1ab1457c
YH
996 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
997 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
998 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
999 * rely only on pmtu discovery"
1000 */
1001 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1002 mtu = IPV6_MAXPLEN;
1003 return mtu;
1004}
1005
d33e4553
DM
1006static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1007{
1008 unsigned int mtu = IPV6_MIN_MTU;
1009 struct inet6_dev *idev;
1010
1011 rcu_read_lock();
1012 idev = __in6_dev_get(dst->dev);
1013 if (idev)
1014 mtu = idev->cnf.mtu6;
1015 rcu_read_unlock();
1016
1017 return mtu;
1018}
1019
3b00944c
YH
1020static struct dst_entry *icmp6_dst_gc_list;
1021static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1022
3b00944c 1023struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1024 struct neighbour *neigh,
9acd9f3a 1025 const struct in6_addr *addr)
1da177e4
LT
1026{
1027 struct rt6_info *rt;
1028 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1029 struct net *net = dev_net(dev);
1da177e4
LT
1030
1031 if (unlikely(idev == NULL))
1032 return NULL;
1033
86393e52 1034 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1035 if (unlikely(rt == NULL)) {
1036 in6_dev_put(idev);
1037 goto out;
1038 }
1039
1040 dev_hold(dev);
1041 if (neigh)
1042 neigh_hold(neigh);
14deae41 1043 else {
1da177e4 1044 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1045 if (IS_ERR(neigh))
1046 neigh = NULL;
1047 }
1da177e4
LT
1048
1049 rt->rt6i_dev = dev;
1050 rt->rt6i_idev = idev;
1051 rt->rt6i_nexthop = neigh;
d8d1f30b 1052 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1053 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1054 rt->dst.output = ip6_output;
1da177e4
LT
1055
1056#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1057 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1058 ? DST_HOST
1da177e4
LT
1059 : 0;
1060 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1061 rt->rt6i_dst.plen = 128;
1062#endif
1063
3b00944c 1064 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1065 rt->dst.next = icmp6_dst_gc_list;
1066 icmp6_dst_gc_list = &rt->dst;
3b00944c 1067 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1068
5578689a 1069 fib6_force_start_gc(net);
1da177e4
LT
1070
1071out:
d8d1f30b 1072 return &rt->dst;
1da177e4
LT
1073}
1074
3d0f24a7 1075int icmp6_dst_gc(void)
1da177e4 1076{
e9476e95 1077 struct dst_entry *dst, **pprev;
3d0f24a7 1078 int more = 0;
1da177e4 1079
3b00944c
YH
1080 spin_lock_bh(&icmp6_dst_lock);
1081 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1082
1da177e4
LT
1083 while ((dst = *pprev) != NULL) {
1084 if (!atomic_read(&dst->__refcnt)) {
1085 *pprev = dst->next;
1086 dst_free(dst);
1da177e4
LT
1087 } else {
1088 pprev = &dst->next;
3d0f24a7 1089 ++more;
1da177e4
LT
1090 }
1091 }
1092
3b00944c 1093 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1094
3d0f24a7 1095 return more;
1da177e4
LT
1096}
1097
1e493d19
DM
1098static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1099 void *arg)
1100{
1101 struct dst_entry *dst, **pprev;
1102
1103 spin_lock_bh(&icmp6_dst_lock);
1104 pprev = &icmp6_dst_gc_list;
1105 while ((dst = *pprev) != NULL) {
1106 struct rt6_info *rt = (struct rt6_info *) dst;
1107 if (func(rt, arg)) {
1108 *pprev = dst->next;
1109 dst_free(dst);
1110 } else {
1111 pprev = &dst->next;
1112 }
1113 }
1114 spin_unlock_bh(&icmp6_dst_lock);
1115}
1116
569d3645 1117static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1118{
1da177e4 1119 unsigned long now = jiffies;
86393e52 1120 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1121 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1122 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1123 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1124 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1125 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1126 int entries;
7019b78e 1127
fc66f95c 1128 entries = dst_entries_get_fast(ops);
7019b78e 1129 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1130 entries <= rt_max_size)
1da177e4
LT
1131 goto out;
1132
6891a346
BT
1133 net->ipv6.ip6_rt_gc_expire++;
1134 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1135 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1136 entries = dst_entries_get_slow(ops);
1137 if (entries < ops->gc_thresh)
7019b78e 1138 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1139out:
7019b78e 1140 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1141 return entries > rt_max_size;
1da177e4
LT
1142}
1143
1144/* Clean host part of a prefix. Not necessary in radix tree,
1145 but results in cleaner routing tables.
1146
1147 Remove it only when all the things will work!
1148 */
1149
6b75d090 1150int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1151{
5170ae82 1152 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1153 if (hoplimit == 0) {
6b75d090 1154 struct net_device *dev = dst->dev;
c68f24cc
ED
1155 struct inet6_dev *idev;
1156
1157 rcu_read_lock();
1158 idev = __in6_dev_get(dev);
1159 if (idev)
6b75d090 1160 hoplimit = idev->cnf.hop_limit;
c68f24cc 1161 else
53b7997f 1162 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1163 rcu_read_unlock();
1da177e4
LT
1164 }
1165 return hoplimit;
1166}
abbf46ae 1167EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1168
1169/*
1170 *
1171 */
1172
86872cb5 1173int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1174{
1175 int err;
5578689a 1176 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1177 struct rt6_info *rt = NULL;
1178 struct net_device *dev = NULL;
1179 struct inet6_dev *idev = NULL;
c71099ac 1180 struct fib6_table *table;
1da177e4
LT
1181 int addr_type;
1182
86872cb5 1183 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1184 return -EINVAL;
1185#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1186 if (cfg->fc_src_len)
1da177e4
LT
1187 return -EINVAL;
1188#endif
86872cb5 1189 if (cfg->fc_ifindex) {
1da177e4 1190 err = -ENODEV;
5578689a 1191 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1192 if (!dev)
1193 goto out;
1194 idev = in6_dev_get(dev);
1195 if (!idev)
1196 goto out;
1197 }
1198
86872cb5
TG
1199 if (cfg->fc_metric == 0)
1200 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1201
5578689a 1202 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1203 if (table == NULL) {
1204 err = -ENOBUFS;
1205 goto out;
1206 }
1207
86393e52 1208 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1209
1210 if (rt == NULL) {
1211 err = -ENOMEM;
1212 goto out;
1213 }
1214
d8d1f30b 1215 rt->dst.obsolete = -1;
6f704992
YH
1216 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1217 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1218 0;
1da177e4 1219
86872cb5
TG
1220 if (cfg->fc_protocol == RTPROT_UNSPEC)
1221 cfg->fc_protocol = RTPROT_BOOT;
1222 rt->rt6i_protocol = cfg->fc_protocol;
1223
1224 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1225
1226 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1227 rt->dst.input = ip6_mc_input;
ab79ad14
1228 else if (cfg->fc_flags & RTF_LOCAL)
1229 rt->dst.input = ip6_input;
1da177e4 1230 else
d8d1f30b 1231 rt->dst.input = ip6_forward;
1da177e4 1232
d8d1f30b 1233 rt->dst.output = ip6_output;
1da177e4 1234
86872cb5
TG
1235 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1236 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1237 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1238 rt->dst.flags = DST_HOST;
1da177e4
LT
1239
1240#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1241 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1242 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1243#endif
1244
86872cb5 1245 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1246
1247 /* We cannot add true routes via loopback here,
1248 they would result in kernel looping; promote them to reject routes
1249 */
86872cb5 1250 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1251 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1252 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1253 /* hold loopback dev/idev if we haven't done so. */
5578689a 1254 if (dev != net->loopback_dev) {
1da177e4
LT
1255 if (dev) {
1256 dev_put(dev);
1257 in6_dev_put(idev);
1258 }
5578689a 1259 dev = net->loopback_dev;
1da177e4
LT
1260 dev_hold(dev);
1261 idev = in6_dev_get(dev);
1262 if (!idev) {
1263 err = -ENODEV;
1264 goto out;
1265 }
1266 }
d8d1f30b
CG
1267 rt->dst.output = ip6_pkt_discard_out;
1268 rt->dst.input = ip6_pkt_discard;
1269 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1270 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1271 goto install_route;
1272 }
1273
86872cb5 1274 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1275 struct in6_addr *gw_addr;
1276 int gwa_type;
1277
86872cb5
TG
1278 gw_addr = &cfg->fc_gateway;
1279 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1280 gwa_type = ipv6_addr_type(gw_addr);
1281
1282 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1283 struct rt6_info *grt;
1284
1285 /* IPv6 strictly inhibits using not link-local
1286 addresses as nexthop address.
1287 Otherwise, router will not able to send redirects.
1288 It is very good, but in some (rare!) circumstances
1289 (SIT, PtP, NBMA NOARP links) it is handy to allow
1290 some exceptions. --ANK
1291 */
1292 err = -EINVAL;
1293 if (!(gwa_type&IPV6_ADDR_UNICAST))
1294 goto out;
1295
5578689a 1296 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1297
1298 err = -EHOSTUNREACH;
1299 if (grt == NULL)
1300 goto out;
1301 if (dev) {
1302 if (dev != grt->rt6i_dev) {
d8d1f30b 1303 dst_release(&grt->dst);
1da177e4
LT
1304 goto out;
1305 }
1306 } else {
1307 dev = grt->rt6i_dev;
1308 idev = grt->rt6i_idev;
1309 dev_hold(dev);
1310 in6_dev_hold(grt->rt6i_idev);
1311 }
1312 if (!(grt->rt6i_flags&RTF_GATEWAY))
1313 err = 0;
d8d1f30b 1314 dst_release(&grt->dst);
1da177e4
LT
1315
1316 if (err)
1317 goto out;
1318 }
1319 err = -EINVAL;
1320 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1321 goto out;
1322 }
1323
1324 err = -ENODEV;
1325 if (dev == NULL)
1326 goto out;
1327
86872cb5 1328 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1329 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1330 if (IS_ERR(rt->rt6i_nexthop)) {
1331 err = PTR_ERR(rt->rt6i_nexthop);
1332 rt->rt6i_nexthop = NULL;
1333 goto out;
1334 }
1335 }
1336
86872cb5 1337 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1338
1339install_route:
86872cb5
TG
1340 if (cfg->fc_mx) {
1341 struct nlattr *nla;
1342 int remaining;
1343
1344 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1345 int type = nla_type(nla);
86872cb5
TG
1346
1347 if (type) {
1348 if (type > RTAX_MAX) {
1da177e4
LT
1349 err = -EINVAL;
1350 goto out;
1351 }
86872cb5 1352
defb3519 1353 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1354 }
1da177e4
LT
1355 }
1356 }
1357
d8d1f30b 1358 rt->dst.dev = dev;
1da177e4 1359 rt->rt6i_idev = idev;
c71099ac 1360 rt->rt6i_table = table;
63152fc0 1361
c346dca1 1362 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1363
86872cb5 1364 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1365
1366out:
1367 if (dev)
1368 dev_put(dev);
1369 if (idev)
1370 in6_dev_put(idev);
1371 if (rt)
d8d1f30b 1372 dst_free(&rt->dst);
1da177e4
LT
1373 return err;
1374}
1375
86872cb5 1376static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1377{
1378 int err;
c71099ac 1379 struct fib6_table *table;
c346dca1 1380 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1381
8ed67789 1382 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1383 return -ENOENT;
1384
c71099ac
TG
1385 table = rt->rt6i_table;
1386 write_lock_bh(&table->tb6_lock);
1da177e4 1387
86872cb5 1388 err = fib6_del(rt, info);
d8d1f30b 1389 dst_release(&rt->dst);
1da177e4 1390
c71099ac 1391 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1392
1393 return err;
1394}
1395
e0a1ad73
TG
1396int ip6_del_rt(struct rt6_info *rt)
1397{
4d1169c1 1398 struct nl_info info = {
c346dca1 1399 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1400 };
528c4ceb 1401 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1402}
1403
86872cb5 1404static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1405{
c71099ac 1406 struct fib6_table *table;
1da177e4
LT
1407 struct fib6_node *fn;
1408 struct rt6_info *rt;
1409 int err = -ESRCH;
1410
5578689a 1411 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1412 if (table == NULL)
1413 return err;
1414
1415 read_lock_bh(&table->tb6_lock);
1da177e4 1416
c71099ac 1417 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1418 &cfg->fc_dst, cfg->fc_dst_len,
1419 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1420
1da177e4 1421 if (fn) {
d8d1f30b 1422 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1423 if (cfg->fc_ifindex &&
1da177e4 1424 (rt->rt6i_dev == NULL ||
86872cb5 1425 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1426 continue;
86872cb5
TG
1427 if (cfg->fc_flags & RTF_GATEWAY &&
1428 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1429 continue;
86872cb5 1430 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1431 continue;
d8d1f30b 1432 dst_hold(&rt->dst);
c71099ac 1433 read_unlock_bh(&table->tb6_lock);
1da177e4 1434
86872cb5 1435 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1436 }
1437 }
c71099ac 1438 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1439
1440 return err;
1441}
1442
1443/*
1444 * Handle redirects
1445 */
a6279458 1446struct ip6rd_flowi {
4c9483b2 1447 struct flowi6 fl6;
a6279458
YH
1448 struct in6_addr gateway;
1449};
1450
8ed67789
DL
1451static struct rt6_info *__ip6_route_redirect(struct net *net,
1452 struct fib6_table *table,
4c9483b2 1453 struct flowi6 *fl6,
a6279458 1454 int flags)
1da177e4 1455{
4c9483b2 1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1457 struct rt6_info *rt;
e843b9e1 1458 struct fib6_node *fn;
c71099ac 1459
1da177e4 1460 /*
e843b9e1
YH
1461 * Get the "current" route for this destination and
1462 * check if the redirect has come from approriate router.
1463 *
1464 * RFC 2461 specifies that redirects should only be
1465 * accepted if they come from the nexthop to the target.
1466 * Due to the way the routes are chosen, this notion
1467 * is a bit fuzzy and one might need to check all possible
1468 * routes.
1da177e4 1469 */
1da177e4 1470
c71099ac 1471 read_lock_bh(&table->tb6_lock);
4c9483b2 1472 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1473restart:
d8d1f30b 1474 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1475 /*
1476 * Current route is on-link; redirect is always invalid.
1477 *
1478 * Seems, previous statement is not true. It could
1479 * be node, which looks for us as on-link (f.e. proxy ndisc)
1480 * But then router serving it might decide, that we should
1481 * know truth 8)8) --ANK (980726).
1482 */
1483 if (rt6_check_expired(rt))
1484 continue;
1485 if (!(rt->rt6i_flags & RTF_GATEWAY))
1486 continue;
4c9483b2 1487 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1488 continue;
a6279458 1489 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1490 continue;
1491 break;
1492 }
a6279458 1493
cb15d9c2 1494 if (!rt)
8ed67789 1495 rt = net->ipv6.ip6_null_entry;
4c9483b2 1496 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1497out:
d8d1f30b 1498 dst_hold(&rt->dst);
a6279458 1499
c71099ac 1500 read_unlock_bh(&table->tb6_lock);
e843b9e1 1501
a6279458
YH
1502 return rt;
1503};
1504
1505static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1506 struct in6_addr *src,
1507 struct in6_addr *gateway,
1508 struct net_device *dev)
1509{
adaa70bb 1510 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1511 struct net *net = dev_net(dev);
a6279458 1512 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1513 .fl6 = {
1514 .flowi6_oif = dev->ifindex,
1515 .daddr = *dest,
1516 .saddr = *src,
a6279458 1517 },
a6279458 1518 };
adaa70bb 1519
86c36ce4
BH
1520 ipv6_addr_copy(&rdfl.gateway, gateway);
1521
adaa70bb
TG
1522 if (rt6_need_strict(dest))
1523 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1524
4c9483b2 1525 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1526 flags, __ip6_route_redirect);
a6279458
YH
1527}
1528
1529void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1530 struct in6_addr *saddr,
1531 struct neighbour *neigh, u8 *lladdr, int on_link)
1532{
1533 struct rt6_info *rt, *nrt = NULL;
1534 struct netevent_redirect netevent;
c346dca1 1535 struct net *net = dev_net(neigh->dev);
a6279458
YH
1536
1537 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1538
8ed67789 1539 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1540 if (net_ratelimit())
1541 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1542 "for redirect target\n");
a6279458 1543 goto out;
1da177e4
LT
1544 }
1545
1da177e4
LT
1546 /*
1547 * We have finally decided to accept it.
1548 */
1549
1ab1457c 1550 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1551 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1552 NEIGH_UPDATE_F_OVERRIDE|
1553 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1554 NEIGH_UPDATE_F_ISROUTER))
1555 );
1556
1557 /*
1558 * Redirect received -> path was valid.
1559 * Look, redirects are sent only in response to data packets,
1560 * so that this nexthop apparently is reachable. --ANK
1561 */
d8d1f30b 1562 dst_confirm(&rt->dst);
1da177e4
LT
1563
1564 /* Duplicate redirect: silently ignore. */
d8d1f30b 1565 if (neigh == rt->dst.neighbour)
1da177e4
LT
1566 goto out;
1567
1568 nrt = ip6_rt_copy(rt);
1569 if (nrt == NULL)
1570 goto out;
1571
1572 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1573 if (on_link)
1574 nrt->rt6i_flags &= ~RTF_GATEWAY;
1575
1576 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1577 nrt->rt6i_dst.plen = 128;
d8d1f30b 1578 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1579
1580 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1581 nrt->rt6i_nexthop = neigh_clone(neigh);
1da177e4 1582
40e22e8f 1583 if (ip6_ins_rt(nrt))
1da177e4
LT
1584 goto out;
1585
d8d1f30b
CG
1586 netevent.old = &rt->dst;
1587 netevent.new = &nrt->dst;
8d71740c
TT
1588 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1589
1da177e4 1590 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1591 ip6_del_rt(rt);
1da177e4
LT
1592 return;
1593 }
1594
1595out:
d8d1f30b 1596 dst_release(&rt->dst);
1da177e4
LT
1597}
1598
1599/*
1600 * Handle ICMP "packet too big" messages
1601 * i.e. Path MTU discovery
1602 */
1603
ae878ae2
1604static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1605 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1606{
1607 struct rt6_info *rt, *nrt;
1608 int allfrag = 0;
d3052b55 1609again:
ae878ae2 1610 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1611 if (rt == NULL)
1612 return;
1613
d3052b55
AV
1614 if (rt6_check_expired(rt)) {
1615 ip6_del_rt(rt);
1616 goto again;
1617 }
1618
d8d1f30b 1619 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1620 goto out;
1621
1622 if (pmtu < IPV6_MIN_MTU) {
1623 /*
1ab1457c 1624 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1625 * MTU (1280) and a fragment header should always be included
1626 * after a node receiving Too Big message reporting PMTU is
1627 * less than the IPv6 Minimum Link MTU.
1628 */
1629 pmtu = IPV6_MIN_MTU;
1630 allfrag = 1;
1631 }
1632
1633 /* New mtu received -> path was valid.
1634 They are sent only in response to data packets,
1635 so that this nexthop apparently is reachable. --ANK
1636 */
d8d1f30b 1637 dst_confirm(&rt->dst);
1da177e4
LT
1638
1639 /* Host route. If it is static, it would be better
1640 not to override it, but add new one, so that
1641 when cache entry will expire old pmtu
1642 would return automatically.
1643 */
1644 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1645 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1646 if (allfrag) {
1647 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1648 features |= RTAX_FEATURE_ALLFRAG;
1649 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1650 }
d8d1f30b 1651 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1652 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1653 goto out;
1654 }
1655
1656 /* Network route.
1657 Two cases are possible:
1658 1. It is connected route. Action: COW
1659 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1660 */
d5315b50 1661 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1662 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1663 else
1664 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1665
d5315b50 1666 if (nrt) {
defb3519
DM
1667 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1668 if (allfrag) {
1669 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1670 features |= RTAX_FEATURE_ALLFRAG;
1671 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1672 }
a1e78363
YH
1673
1674 /* According to RFC 1981, detecting PMTU increase shouldn't be
1675 * happened within 5 mins, the recommended timer is 10 mins.
1676 * Here this route expiration time is set to ip6_rt_mtu_expires
1677 * which is 10 mins. After 10 mins the decreased pmtu is expired
1678 * and detecting PMTU increase will be automatically happened.
1679 */
d8d1f30b 1680 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1681 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1682
40e22e8f 1683 ip6_ins_rt(nrt);
1da177e4 1684 }
1da177e4 1685out:
d8d1f30b 1686 dst_release(&rt->dst);
1da177e4
LT
1687}
1688
ae878ae2
1689void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1690 struct net_device *dev, u32 pmtu)
1691{
1692 struct net *net = dev_net(dev);
1693
1694 /*
1695 * RFC 1981 states that a node "MUST reduce the size of the packets it
1696 * is sending along the path" that caused the Packet Too Big message.
1697 * Since it's not possible in the general case to determine which
1698 * interface was used to send the original packet, we update the MTU
1699 * on the interface that will be used to send future packets. We also
1700 * update the MTU on the interface that received the Packet Too Big in
1701 * case the original packet was forced out that interface with
1702 * SO_BINDTODEVICE or similar. This is the next best thing to the
1703 * correct behaviour, which would be to update the MTU on all
1704 * interfaces.
1705 */
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1707 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1708}
1709
1da177e4
LT
1710/*
1711 * Misc support functions
1712 */
1713
1714static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1715{
c346dca1 1716 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1717 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1718
1719 if (rt) {
d8d1f30b
CG
1720 rt->dst.input = ort->dst.input;
1721 rt->dst.output = ort->dst.output;
1722
defb3519 1723 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1724 rt->dst.error = ort->dst.error;
1725 rt->dst.dev = ort->dst.dev;
1726 if (rt->dst.dev)
1727 dev_hold(rt->dst.dev);
1da177e4
LT
1728 rt->rt6i_idev = ort->rt6i_idev;
1729 if (rt->rt6i_idev)
1730 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1731 rt->dst.lastuse = jiffies;
1da177e4
LT
1732 rt->rt6i_expires = 0;
1733
1734 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1735 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1736 rt->rt6i_metric = 0;
1737
1738 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1739#ifdef CONFIG_IPV6_SUBTREES
1740 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1741#endif
c71099ac 1742 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1743 }
1744 return rt;
1745}
1746
70ceb4f5 1747#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1748static struct rt6_info *rt6_get_route_info(struct net *net,
1749 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1750 struct in6_addr *gwaddr, int ifindex)
1751{
1752 struct fib6_node *fn;
1753 struct rt6_info *rt = NULL;
c71099ac
TG
1754 struct fib6_table *table;
1755
efa2cea0 1756 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1757 if (table == NULL)
1758 return NULL;
70ceb4f5 1759
c71099ac
TG
1760 write_lock_bh(&table->tb6_lock);
1761 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1762 if (!fn)
1763 goto out;
1764
d8d1f30b 1765 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1766 if (rt->rt6i_dev->ifindex != ifindex)
1767 continue;
1768 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1769 continue;
1770 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1771 continue;
d8d1f30b 1772 dst_hold(&rt->dst);
70ceb4f5
YH
1773 break;
1774 }
1775out:
c71099ac 1776 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1777 return rt;
1778}
1779
efa2cea0
DL
1780static struct rt6_info *rt6_add_route_info(struct net *net,
1781 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1782 struct in6_addr *gwaddr, int ifindex,
1783 unsigned pref)
1784{
86872cb5
TG
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_INFO,
238fc7ea 1787 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1788 .fc_ifindex = ifindex,
1789 .fc_dst_len = prefixlen,
1790 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1791 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1792 .fc_nlinfo.pid = 0,
1793 .fc_nlinfo.nlh = NULL,
1794 .fc_nlinfo.nl_net = net,
86872cb5
TG
1795 };
1796
1797 ipv6_addr_copy(&cfg.fc_dst, prefix);
1798 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1799
e317da96
YH
1800 /* We should treat it as a default route if prefix length is 0. */
1801 if (!prefixlen)
86872cb5 1802 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1803
86872cb5 1804 ip6_route_add(&cfg);
70ceb4f5 1805
efa2cea0 1806 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1807}
1808#endif
1809
1da177e4 1810struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1811{
1da177e4 1812 struct rt6_info *rt;
c71099ac 1813 struct fib6_table *table;
1da177e4 1814
c346dca1 1815 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1816 if (table == NULL)
1817 return NULL;
1da177e4 1818
c71099ac 1819 write_lock_bh(&table->tb6_lock);
d8d1f30b 1820 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1821 if (dev == rt->rt6i_dev &&
045927ff 1822 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1823 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1824 break;
1825 }
1826 if (rt)
d8d1f30b 1827 dst_hold(&rt->dst);
c71099ac 1828 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1829 return rt;
1830}
1831
1832struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1833 struct net_device *dev,
1834 unsigned int pref)
1da177e4 1835{
86872cb5
TG
1836 struct fib6_config cfg = {
1837 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1838 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1839 .fc_ifindex = dev->ifindex,
1840 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1841 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1842 .fc_nlinfo.pid = 0,
1843 .fc_nlinfo.nlh = NULL,
c346dca1 1844 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1845 };
1da177e4 1846
86872cb5 1847 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1848
86872cb5 1849 ip6_route_add(&cfg);
1da177e4 1850
1da177e4
LT
1851 return rt6_get_dflt_router(gwaddr, dev);
1852}
1853
7b4da532 1854void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1855{
1856 struct rt6_info *rt;
c71099ac
TG
1857 struct fib6_table *table;
1858
1859 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1860 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1861 if (table == NULL)
1862 return;
1da177e4
LT
1863
1864restart:
c71099ac 1865 read_lock_bh(&table->tb6_lock);
d8d1f30b 1866 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1867 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1868 dst_hold(&rt->dst);
c71099ac 1869 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1870 ip6_del_rt(rt);
1da177e4
LT
1871 goto restart;
1872 }
1873 }
c71099ac 1874 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1875}
1876
5578689a
DL
1877static void rtmsg_to_fib6_config(struct net *net,
1878 struct in6_rtmsg *rtmsg,
86872cb5
TG
1879 struct fib6_config *cfg)
1880{
1881 memset(cfg, 0, sizeof(*cfg));
1882
1883 cfg->fc_table = RT6_TABLE_MAIN;
1884 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1885 cfg->fc_metric = rtmsg->rtmsg_metric;
1886 cfg->fc_expires = rtmsg->rtmsg_info;
1887 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1888 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1889 cfg->fc_flags = rtmsg->rtmsg_flags;
1890
5578689a 1891 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1892
86872cb5
TG
1893 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1894 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1895 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1896}
1897
5578689a 1898int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1899{
86872cb5 1900 struct fib6_config cfg;
1da177e4
LT
1901 struct in6_rtmsg rtmsg;
1902 int err;
1903
1904 switch(cmd) {
1905 case SIOCADDRT: /* Add a route */
1906 case SIOCDELRT: /* Delete a route */
1907 if (!capable(CAP_NET_ADMIN))
1908 return -EPERM;
1909 err = copy_from_user(&rtmsg, arg,
1910 sizeof(struct in6_rtmsg));
1911 if (err)
1912 return -EFAULT;
86872cb5 1913
5578689a 1914 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1915
1da177e4
LT
1916 rtnl_lock();
1917 switch (cmd) {
1918 case SIOCADDRT:
86872cb5 1919 err = ip6_route_add(&cfg);
1da177e4
LT
1920 break;
1921 case SIOCDELRT:
86872cb5 1922 err = ip6_route_del(&cfg);
1da177e4
LT
1923 break;
1924 default:
1925 err = -EINVAL;
1926 }
1927 rtnl_unlock();
1928
1929 return err;
3ff50b79 1930 }
1da177e4
LT
1931
1932 return -EINVAL;
1933}
1934
1935/*
1936 * Drop the packet on the floor
1937 */
1938
d5fdd6ba 1939static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1940{
612f09e8 1941 int type;
adf30907 1942 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1943 switch (ipstats_mib_noroutes) {
1944 case IPSTATS_MIB_INNOROUTES:
0660e03f 1945 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1946 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1947 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1948 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1949 break;
1950 }
1951 /* FALLTHROUGH */
1952 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1953 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1954 ipstats_mib_noroutes);
612f09e8
YH
1955 break;
1956 }
3ffe533c 1957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1958 kfree_skb(skb);
1959 return 0;
1960}
1961
9ce8ade0
TG
1962static int ip6_pkt_discard(struct sk_buff *skb)
1963{
612f09e8 1964 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1965}
1966
20380731 1967static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1968{
adf30907 1969 skb->dev = skb_dst(skb)->dev;
612f09e8 1970 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1971}
1972
6723ab54
DM
1973#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1974
9ce8ade0
TG
1975static int ip6_pkt_prohibit(struct sk_buff *skb)
1976{
612f09e8 1977 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1978}
1979
1980static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1981{
adf30907 1982 skb->dev = skb_dst(skb)->dev;
612f09e8 1983 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1984}
1985
6723ab54
DM
1986#endif
1987
1da177e4
LT
1988/*
1989 * Allocate a dst for local (unicast / anycast) address.
1990 */
1991
1992struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1993 const struct in6_addr *addr,
1994 int anycast)
1995{
c346dca1 1996 struct net *net = dev_net(idev->dev);
86393e52 1997 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1998 struct neighbour *neigh;
1da177e4 1999
40385653
BG
2000 if (rt == NULL) {
2001 if (net_ratelimit())
2002 pr_warning("IPv6: Maximum number of routes reached,"
2003 " consider increasing route/max_size.\n");
1da177e4 2004 return ERR_PTR(-ENOMEM);
40385653 2005 }
1da177e4 2006
5578689a 2007 dev_hold(net->loopback_dev);
1da177e4
LT
2008 in6_dev_hold(idev);
2009
d8d1f30b
CG
2010 rt->dst.flags = DST_HOST;
2011 rt->dst.input = ip6_input;
2012 rt->dst.output = ip6_output;
5578689a 2013 rt->rt6i_dev = net->loopback_dev;
1da177e4 2014 rt->rt6i_idev = idev;
d8d1f30b 2015 rt->dst.obsolete = -1;
1da177e4
LT
2016
2017 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2018 if (anycast)
2019 rt->rt6i_flags |= RTF_ANYCAST;
2020 else
1da177e4 2021 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2022 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2023 if (IS_ERR(neigh)) {
d8d1f30b 2024 dst_free(&rt->dst);
14deae41 2025
29546a64 2026 return ERR_CAST(neigh);
1da177e4 2027 }
14deae41 2028 rt->rt6i_nexthop = neigh;
1da177e4
LT
2029
2030 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2031 rt->rt6i_dst.plen = 128;
5578689a 2032 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2033
d8d1f30b 2034 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2035
2036 return rt;
2037}
2038
8ed67789
DL
2039struct arg_dev_net {
2040 struct net_device *dev;
2041 struct net *net;
2042};
2043
1da177e4
LT
2044static int fib6_ifdown(struct rt6_info *rt, void *arg)
2045{
bc3ef660 2046 const struct arg_dev_net *adn = arg;
2047 const struct net_device *dev = adn->dev;
8ed67789 2048
bc3ef660 2049 if ((rt->rt6i_dev == dev || dev == NULL) &&
2050 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2051 RT6_TRACE("deleted by ifdown %p\n", rt);
2052 return -1;
2053 }
2054 return 0;
2055}
2056
f3db4851 2057void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2058{
8ed67789
DL
2059 struct arg_dev_net adn = {
2060 .dev = dev,
2061 .net = net,
2062 };
2063
2064 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2065 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2066}
2067
2068struct rt6_mtu_change_arg
2069{
2070 struct net_device *dev;
2071 unsigned mtu;
2072};
2073
2074static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2075{
2076 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2077 struct inet6_dev *idev;
2078
2079 /* In IPv6 pmtu discovery is not optional,
2080 so that RTAX_MTU lock cannot disable it.
2081 We still use this lock to block changes
2082 caused by addrconf/ndisc.
2083 */
2084
2085 idev = __in6_dev_get(arg->dev);
2086 if (idev == NULL)
2087 return 0;
2088
2089 /* For administrative MTU increase, there is no way to discover
2090 IPv6 PMTU increase, so PMTU increase should be updated here.
2091 Since RFC 1981 doesn't include administrative MTU increase
2092 update PMTU increase is a MUST. (i.e. jumbo frame)
2093 */
2094 /*
2095 If new MTU is less than route PMTU, this new MTU will be the
2096 lowest MTU in the path, update the route PMTU to reflect PMTU
2097 decreases; if new MTU is greater than route PMTU, and the
2098 old MTU is the lowest MTU in the path, update the route PMTU
2099 to reflect the increase. In this case if the other nodes' MTU
2100 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2101 PMTU discouvery.
2102 */
2103 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2104 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2105 (dst_mtu(&rt->dst) >= arg->mtu ||
2106 (dst_mtu(&rt->dst) < arg->mtu &&
2107 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2108 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2109 }
1da177e4
LT
2110 return 0;
2111}
2112
2113void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2114{
c71099ac
TG
2115 struct rt6_mtu_change_arg arg = {
2116 .dev = dev,
2117 .mtu = mtu,
2118 };
1da177e4 2119
c346dca1 2120 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2121}
2122
ef7c79ed 2123static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2124 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2125 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2126 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2127 [RTA_PRIORITY] = { .type = NLA_U32 },
2128 [RTA_METRICS] = { .type = NLA_NESTED },
2129};
2130
2131static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2132 struct fib6_config *cfg)
1da177e4 2133{
86872cb5
TG
2134 struct rtmsg *rtm;
2135 struct nlattr *tb[RTA_MAX+1];
2136 int err;
1da177e4 2137
86872cb5
TG
2138 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2139 if (err < 0)
2140 goto errout;
1da177e4 2141
86872cb5
TG
2142 err = -EINVAL;
2143 rtm = nlmsg_data(nlh);
2144 memset(cfg, 0, sizeof(*cfg));
2145
2146 cfg->fc_table = rtm->rtm_table;
2147 cfg->fc_dst_len = rtm->rtm_dst_len;
2148 cfg->fc_src_len = rtm->rtm_src_len;
2149 cfg->fc_flags = RTF_UP;
2150 cfg->fc_protocol = rtm->rtm_protocol;
2151
2152 if (rtm->rtm_type == RTN_UNREACHABLE)
2153 cfg->fc_flags |= RTF_REJECT;
2154
ab79ad14
2155 if (rtm->rtm_type == RTN_LOCAL)
2156 cfg->fc_flags |= RTF_LOCAL;
2157
86872cb5
TG
2158 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2159 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2160 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2161
2162 if (tb[RTA_GATEWAY]) {
2163 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2164 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2165 }
86872cb5
TG
2166
2167 if (tb[RTA_DST]) {
2168 int plen = (rtm->rtm_dst_len + 7) >> 3;
2169
2170 if (nla_len(tb[RTA_DST]) < plen)
2171 goto errout;
2172
2173 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2174 }
86872cb5
TG
2175
2176 if (tb[RTA_SRC]) {
2177 int plen = (rtm->rtm_src_len + 7) >> 3;
2178
2179 if (nla_len(tb[RTA_SRC]) < plen)
2180 goto errout;
2181
2182 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2183 }
86872cb5
TG
2184
2185 if (tb[RTA_OIF])
2186 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2187
2188 if (tb[RTA_PRIORITY])
2189 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2190
2191 if (tb[RTA_METRICS]) {
2192 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2193 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2194 }
86872cb5
TG
2195
2196 if (tb[RTA_TABLE])
2197 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2198
2199 err = 0;
2200errout:
2201 return err;
1da177e4
LT
2202}
2203
c127ea2c 2204static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2205{
86872cb5
TG
2206 struct fib6_config cfg;
2207 int err;
1da177e4 2208
86872cb5
TG
2209 err = rtm_to_fib6_config(skb, nlh, &cfg);
2210 if (err < 0)
2211 return err;
2212
2213 return ip6_route_del(&cfg);
1da177e4
LT
2214}
2215
c127ea2c 2216static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2217{
86872cb5
TG
2218 struct fib6_config cfg;
2219 int err;
1da177e4 2220
86872cb5
TG
2221 err = rtm_to_fib6_config(skb, nlh, &cfg);
2222 if (err < 0)
2223 return err;
2224
2225 return ip6_route_add(&cfg);
1da177e4
LT
2226}
2227
339bf98f
TG
2228static inline size_t rt6_nlmsg_size(void)
2229{
2230 return NLMSG_ALIGN(sizeof(struct rtmsg))
2231 + nla_total_size(16) /* RTA_SRC */
2232 + nla_total_size(16) /* RTA_DST */
2233 + nla_total_size(16) /* RTA_GATEWAY */
2234 + nla_total_size(16) /* RTA_PREFSRC */
2235 + nla_total_size(4) /* RTA_TABLE */
2236 + nla_total_size(4) /* RTA_IIF */
2237 + nla_total_size(4) /* RTA_OIF */
2238 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2239 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2240 + nla_total_size(sizeof(struct rta_cacheinfo));
2241}
2242
191cd582
BH
2243static int rt6_fill_node(struct net *net,
2244 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2245 struct in6_addr *dst, struct in6_addr *src,
2246 int iif, int type, u32 pid, u32 seq,
7bc570c8 2247 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2248{
2249 struct rtmsg *rtm;
2d7202bf 2250 struct nlmsghdr *nlh;
e3703b3d 2251 long expires;
9e762a4a 2252 u32 table;
1da177e4
LT
2253
2254 if (prefix) { /* user wants prefix routes only */
2255 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2256 /* success since this is not a prefix route */
2257 return 1;
2258 }
2259 }
2260
2d7202bf
TG
2261 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2262 if (nlh == NULL)
26932566 2263 return -EMSGSIZE;
2d7202bf
TG
2264
2265 rtm = nlmsg_data(nlh);
1da177e4
LT
2266 rtm->rtm_family = AF_INET6;
2267 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2268 rtm->rtm_src_len = rt->rt6i_src.plen;
2269 rtm->rtm_tos = 0;
c71099ac 2270 if (rt->rt6i_table)
9e762a4a 2271 table = rt->rt6i_table->tb6_id;
c71099ac 2272 else
9e762a4a
PM
2273 table = RT6_TABLE_UNSPEC;
2274 rtm->rtm_table = table;
2d7202bf 2275 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2276 if (rt->rt6i_flags&RTF_REJECT)
2277 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2278 else if (rt->rt6i_flags&RTF_LOCAL)
2279 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2280 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2281 rtm->rtm_type = RTN_LOCAL;
2282 else
2283 rtm->rtm_type = RTN_UNICAST;
2284 rtm->rtm_flags = 0;
2285 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2286 rtm->rtm_protocol = rt->rt6i_protocol;
2287 if (rt->rt6i_flags&RTF_DYNAMIC)
2288 rtm->rtm_protocol = RTPROT_REDIRECT;
2289 else if (rt->rt6i_flags & RTF_ADDRCONF)
2290 rtm->rtm_protocol = RTPROT_KERNEL;
2291 else if (rt->rt6i_flags&RTF_DEFAULT)
2292 rtm->rtm_protocol = RTPROT_RA;
2293
2294 if (rt->rt6i_flags&RTF_CACHE)
2295 rtm->rtm_flags |= RTM_F_CLONED;
2296
2297 if (dst) {
2d7202bf 2298 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2299 rtm->rtm_dst_len = 128;
1da177e4 2300 } else if (rtm->rtm_dst_len)
2d7202bf 2301 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2302#ifdef CONFIG_IPV6_SUBTREES
2303 if (src) {
2d7202bf 2304 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2305 rtm->rtm_src_len = 128;
1da177e4 2306 } else if (rtm->rtm_src_len)
2d7202bf 2307 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2308#endif
7bc570c8
YH
2309 if (iif) {
2310#ifdef CONFIG_IPV6_MROUTE
2311 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2312 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2313 if (err <= 0) {
2314 if (!nowait) {
2315 if (err == 0)
2316 return 0;
2317 goto nla_put_failure;
2318 } else {
2319 if (err == -EMSGSIZE)
2320 goto nla_put_failure;
2321 }
2322 }
2323 } else
2324#endif
2325 NLA_PUT_U32(skb, RTA_IIF, iif);
2326 } else if (dst) {
d8d1f30b 2327 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2328 struct in6_addr saddr_buf;
191cd582 2329 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2330 dst, 0, &saddr_buf) == 0)
2d7202bf 2331 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2332 }
2d7202bf 2333
defb3519 2334 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2335 goto nla_put_failure;
2336
d8d1f30b
CG
2337 if (rt->dst.neighbour)
2338 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2339
d8d1f30b 2340 if (rt->dst.dev)
2d7202bf
TG
2341 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2342
2343 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2344
36e3deae
YH
2345 if (!(rt->rt6i_flags & RTF_EXPIRES))
2346 expires = 0;
2347 else if (rt->rt6i_expires - jiffies < INT_MAX)
2348 expires = rt->rt6i_expires - jiffies;
2349 else
2350 expires = INT_MAX;
69cdf8f9 2351
d8d1f30b
CG
2352 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2353 expires, rt->dst.error) < 0)
e3703b3d 2354 goto nla_put_failure;
2d7202bf
TG
2355
2356 return nlmsg_end(skb, nlh);
2357
2358nla_put_failure:
26932566
PM
2359 nlmsg_cancel(skb, nlh);
2360 return -EMSGSIZE;
1da177e4
LT
2361}
2362
1b43af54 2363int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2364{
2365 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2366 int prefix;
2367
2d7202bf
TG
2368 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2369 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2370 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2371 } else
2372 prefix = 0;
2373
191cd582
BH
2374 return rt6_fill_node(arg->net,
2375 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2376 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2377 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2378}
2379
c127ea2c 2380static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2381{
3b1e0a65 2382 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2383 struct nlattr *tb[RTA_MAX+1];
2384 struct rt6_info *rt;
1da177e4 2385 struct sk_buff *skb;
ab364a6f 2386 struct rtmsg *rtm;
4c9483b2 2387 struct flowi6 fl6;
ab364a6f 2388 int err, iif = 0;
1da177e4 2389
ab364a6f
TG
2390 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2391 if (err < 0)
2392 goto errout;
1da177e4 2393
ab364a6f 2394 err = -EINVAL;
4c9483b2 2395 memset(&fl6, 0, sizeof(fl6));
1da177e4 2396
ab364a6f
TG
2397 if (tb[RTA_SRC]) {
2398 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2399 goto errout;
2400
4c9483b2 2401 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2402 }
2403
2404 if (tb[RTA_DST]) {
2405 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2406 goto errout;
2407
4c9483b2 2408 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2409 }
2410
2411 if (tb[RTA_IIF])
2412 iif = nla_get_u32(tb[RTA_IIF]);
2413
2414 if (tb[RTA_OIF])
4c9483b2 2415 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2416
2417 if (iif) {
2418 struct net_device *dev;
5578689a 2419 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2420 if (!dev) {
2421 err = -ENODEV;
ab364a6f 2422 goto errout;
1da177e4
LT
2423 }
2424 }
2425
ab364a6f
TG
2426 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2427 if (skb == NULL) {
2428 err = -ENOBUFS;
2429 goto errout;
2430 }
1da177e4 2431
ab364a6f
TG
2432 /* Reserve room for dummy headers, this skb can pass
2433 through good chunk of routing engine.
2434 */
459a98ed 2435 skb_reset_mac_header(skb);
ab364a6f 2436 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2437
4c9483b2 2438 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2439 skb_dst_set(skb, &rt->dst);
1da177e4 2440
4c9483b2 2441 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2442 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2443 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2444 if (err < 0) {
ab364a6f
TG
2445 kfree_skb(skb);
2446 goto errout;
1da177e4
LT
2447 }
2448
5578689a 2449 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2450errout:
1da177e4 2451 return err;
1da177e4
LT
2452}
2453
86872cb5 2454void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2455{
2456 struct sk_buff *skb;
5578689a 2457 struct net *net = info->nl_net;
528c4ceb
DL
2458 u32 seq;
2459 int err;
2460
2461 err = -ENOBUFS;
2462 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2463
339bf98f 2464 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2465 if (skb == NULL)
2466 goto errout;
2467
191cd582 2468 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2469 event, info->pid, seq, 0, 0, 0);
26932566
PM
2470 if (err < 0) {
2471 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2472 WARN_ON(err == -EMSGSIZE);
2473 kfree_skb(skb);
2474 goto errout;
2475 }
1ce85fe4
PNA
2476 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2477 info->nlh, gfp_any());
2478 return;
21713ebc
TG
2479errout:
2480 if (err < 0)
5578689a 2481 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2482}
2483
8ed67789
DL
2484static int ip6_route_dev_notify(struct notifier_block *this,
2485 unsigned long event, void *data)
2486{
2487 struct net_device *dev = (struct net_device *)data;
c346dca1 2488 struct net *net = dev_net(dev);
8ed67789
DL
2489
2490 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2491 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2492 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2493#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2494 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2495 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2496 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2497 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2498#endif
2499 }
2500
2501 return NOTIFY_OK;
2502}
2503
1da177e4
LT
2504/*
2505 * /proc
2506 */
2507
2508#ifdef CONFIG_PROC_FS
2509
1da177e4
LT
2510struct rt6_proc_arg
2511{
2512 char *buffer;
2513 int offset;
2514 int length;
2515 int skip;
2516 int len;
2517};
2518
2519static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2520{
33120b30 2521 struct seq_file *m = p_arg;
1da177e4 2522
4b7a4274 2523 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2524
2525#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2526 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2527#else
33120b30 2528 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2529#endif
2530
2531 if (rt->rt6i_nexthop) {
4b7a4274 2532 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2533 } else {
33120b30 2534 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2535 }
33120b30 2536 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2537 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2538 rt->dst.__use, rt->rt6i_flags,
33120b30 2539 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2540 return 0;
2541}
2542
33120b30 2543static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2544{
f3db4851
DL
2545 struct net *net = (struct net *)m->private;
2546 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2547 return 0;
2548}
1da177e4 2549
33120b30
AD
2550static int ipv6_route_open(struct inode *inode, struct file *file)
2551{
de05c557 2552 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2553}
2554
33120b30
AD
2555static const struct file_operations ipv6_route_proc_fops = {
2556 .owner = THIS_MODULE,
2557 .open = ipv6_route_open,
2558 .read = seq_read,
2559 .llseek = seq_lseek,
b6fcbdb4 2560 .release = single_release_net,
33120b30
AD
2561};
2562
1da177e4
LT
2563static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2564{
69ddb805 2565 struct net *net = (struct net *)seq->private;
1da177e4 2566 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2567 net->ipv6.rt6_stats->fib_nodes,
2568 net->ipv6.rt6_stats->fib_route_nodes,
2569 net->ipv6.rt6_stats->fib_rt_alloc,
2570 net->ipv6.rt6_stats->fib_rt_entries,
2571 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2572 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2573 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2574
2575 return 0;
2576}
2577
2578static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2579{
de05c557 2580 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2581}
2582
9a32144e 2583static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2584 .owner = THIS_MODULE,
2585 .open = rt6_stats_seq_open,
2586 .read = seq_read,
2587 .llseek = seq_lseek,
b6fcbdb4 2588 .release = single_release_net,
1da177e4
LT
2589};
2590#endif /* CONFIG_PROC_FS */
2591
2592#ifdef CONFIG_SYSCTL
2593
1da177e4 2594static
8d65af78 2595int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2596 void __user *buffer, size_t *lenp, loff_t *ppos)
2597{
c486da34
LAG
2598 struct net *net;
2599 int delay;
2600 if (!write)
1da177e4 2601 return -EINVAL;
c486da34
LAG
2602
2603 net = (struct net *)ctl->extra1;
2604 delay = net->ipv6.sysctl.flush_delay;
2605 proc_dointvec(ctl, write, buffer, lenp, ppos);
2606 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2607 return 0;
1da177e4
LT
2608}
2609
760f2d01 2610ctl_table ipv6_route_table_template[] = {
1ab1457c 2611 {
1da177e4 2612 .procname = "flush",
4990509f 2613 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2614 .maxlen = sizeof(int),
89c8b3a1 2615 .mode = 0200,
6d9f239a 2616 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2617 },
2618 {
1da177e4 2619 .procname = "gc_thresh",
9a7ec3a9 2620 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2621 .maxlen = sizeof(int),
2622 .mode = 0644,
6d9f239a 2623 .proc_handler = proc_dointvec,
1da177e4
LT
2624 },
2625 {
1da177e4 2626 .procname = "max_size",
4990509f 2627 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2628 .maxlen = sizeof(int),
2629 .mode = 0644,
6d9f239a 2630 .proc_handler = proc_dointvec,
1da177e4
LT
2631 },
2632 {
1da177e4 2633 .procname = "gc_min_interval",
4990509f 2634 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2635 .maxlen = sizeof(int),
2636 .mode = 0644,
6d9f239a 2637 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2638 },
2639 {
1da177e4 2640 .procname = "gc_timeout",
4990509f 2641 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2642 .maxlen = sizeof(int),
2643 .mode = 0644,
6d9f239a 2644 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2645 },
2646 {
1da177e4 2647 .procname = "gc_interval",
4990509f 2648 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2649 .maxlen = sizeof(int),
2650 .mode = 0644,
6d9f239a 2651 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2652 },
2653 {
1da177e4 2654 .procname = "gc_elasticity",
4990509f 2655 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2656 .maxlen = sizeof(int),
2657 .mode = 0644,
f3d3f616 2658 .proc_handler = proc_dointvec,
1da177e4
LT
2659 },
2660 {
1da177e4 2661 .procname = "mtu_expires",
4990509f 2662 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2663 .maxlen = sizeof(int),
2664 .mode = 0644,
6d9f239a 2665 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2666 },
2667 {
1da177e4 2668 .procname = "min_adv_mss",
4990509f 2669 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2670 .maxlen = sizeof(int),
2671 .mode = 0644,
f3d3f616 2672 .proc_handler = proc_dointvec,
1da177e4
LT
2673 },
2674 {
1da177e4 2675 .procname = "gc_min_interval_ms",
4990509f 2676 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2677 .maxlen = sizeof(int),
2678 .mode = 0644,
6d9f239a 2679 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2680 },
f8572d8f 2681 { }
1da177e4
LT
2682};
2683
2c8c1e72 2684struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2685{
2686 struct ctl_table *table;
2687
2688 table = kmemdup(ipv6_route_table_template,
2689 sizeof(ipv6_route_table_template),
2690 GFP_KERNEL);
5ee09105
YH
2691
2692 if (table) {
2693 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2694 table[0].extra1 = net;
86393e52 2695 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2696 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2697 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2698 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2699 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2700 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2701 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2702 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2703 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2704 }
2705
760f2d01
DL
2706 return table;
2707}
1da177e4
LT
2708#endif
2709
2c8c1e72 2710static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2711{
633d424b 2712 int ret = -ENOMEM;
8ed67789 2713
86393e52
AD
2714 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2715 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2716
fc66f95c
ED
2717 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2718 goto out_ip6_dst_ops;
2719
8ed67789
DL
2720 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2721 sizeof(*net->ipv6.ip6_null_entry),
2722 GFP_KERNEL);
2723 if (!net->ipv6.ip6_null_entry)
fc66f95c 2724 goto out_ip6_dst_entries;
d8d1f30b 2725 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2726 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2727 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2728 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2729 ip6_template_metrics, true);
8ed67789
DL
2730
2731#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2732 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2733 sizeof(*net->ipv6.ip6_prohibit_entry),
2734 GFP_KERNEL);
68fffc67
PZ
2735 if (!net->ipv6.ip6_prohibit_entry)
2736 goto out_ip6_null_entry;
d8d1f30b 2737 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2738 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2739 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2740 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2741 ip6_template_metrics, true);
8ed67789
DL
2742
2743 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2744 sizeof(*net->ipv6.ip6_blk_hole_entry),
2745 GFP_KERNEL);
68fffc67
PZ
2746 if (!net->ipv6.ip6_blk_hole_entry)
2747 goto out_ip6_prohibit_entry;
d8d1f30b 2748 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2749 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2750 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2751 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2752 ip6_template_metrics, true);
8ed67789
DL
2753#endif
2754
b339a47c
PZ
2755 net->ipv6.sysctl.flush_delay = 0;
2756 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2757 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2758 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2759 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2760 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2761 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2762 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2763
cdb18761
DL
2764#ifdef CONFIG_PROC_FS
2765 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2766 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2767#endif
6891a346
BT
2768 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2769
8ed67789
DL
2770 ret = 0;
2771out:
2772 return ret;
f2fc6a54 2773
68fffc67
PZ
2774#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2775out_ip6_prohibit_entry:
2776 kfree(net->ipv6.ip6_prohibit_entry);
2777out_ip6_null_entry:
2778 kfree(net->ipv6.ip6_null_entry);
2779#endif
fc66f95c
ED
2780out_ip6_dst_entries:
2781 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2782out_ip6_dst_ops:
f2fc6a54 2783 goto out;
cdb18761
DL
2784}
2785
2c8c1e72 2786static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2787{
2788#ifdef CONFIG_PROC_FS
2789 proc_net_remove(net, "ipv6_route");
2790 proc_net_remove(net, "rt6_stats");
2791#endif
8ed67789
DL
2792 kfree(net->ipv6.ip6_null_entry);
2793#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2794 kfree(net->ipv6.ip6_prohibit_entry);
2795 kfree(net->ipv6.ip6_blk_hole_entry);
2796#endif
41bb78b4 2797 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2798}
2799
2800static struct pernet_operations ip6_route_net_ops = {
2801 .init = ip6_route_net_init,
2802 .exit = ip6_route_net_exit,
2803};
2804
8ed67789
DL
2805static struct notifier_block ip6_route_dev_notifier = {
2806 .notifier_call = ip6_route_dev_notify,
2807 .priority = 0,
2808};
2809
433d49c3 2810int __init ip6_route_init(void)
1da177e4 2811{
433d49c3
DL
2812 int ret;
2813
9a7ec3a9
DL
2814 ret = -ENOMEM;
2815 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2816 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2817 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2818 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2819 goto out;
14e50e57 2820
fc66f95c 2821 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2822 if (ret)
bdb3289f 2823 goto out_kmem_cache;
bdb3289f 2824
fc66f95c
ED
2825 ret = register_pernet_subsys(&ip6_route_net_ops);
2826 if (ret)
2827 goto out_dst_entries;
2828
5dc121e9
AE
2829 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2830
8ed67789
DL
2831 /* Registering of the loopback is done before this portion of code,
2832 * the loopback reference in rt6_info will not be taken, do it
2833 * manually for init_net */
d8d1f30b 2834 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2835 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2836 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2837 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2838 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2839 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2840 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2841 #endif
433d49c3
DL
2842 ret = fib6_init();
2843 if (ret)
8ed67789 2844 goto out_register_subsys;
433d49c3 2845
433d49c3
DL
2846 ret = xfrm6_init();
2847 if (ret)
cdb18761 2848 goto out_fib6_init;
c35b7e72 2849
433d49c3
DL
2850 ret = fib6_rules_init();
2851 if (ret)
2852 goto xfrm6_init;
7e5449c2 2853
433d49c3
DL
2854 ret = -ENOBUFS;
2855 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2856 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2857 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2858 goto fib6_rules_init;
c127ea2c 2859
8ed67789 2860 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2861 if (ret)
2862 goto fib6_rules_init;
8ed67789 2863
433d49c3
DL
2864out:
2865 return ret;
2866
2867fib6_rules_init:
433d49c3
DL
2868 fib6_rules_cleanup();
2869xfrm6_init:
433d49c3 2870 xfrm6_fini();
433d49c3 2871out_fib6_init:
433d49c3 2872 fib6_gc_cleanup();
8ed67789
DL
2873out_register_subsys:
2874 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2875out_dst_entries:
2876 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2877out_kmem_cache:
f2fc6a54 2878 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2879 goto out;
1da177e4
LT
2880}
2881
2882void ip6_route_cleanup(void)
2883{
8ed67789 2884 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2885 fib6_rules_cleanup();
1da177e4 2886 xfrm6_fini();
1da177e4 2887 fib6_gc_cleanup();
8ed67789 2888 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2889 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2890 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2891}