Merge branch 'iommu/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 80static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
ebb762f2 147 .mtu = ip6_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ebb762f2 158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 159{
618f9bc7
SK
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
ec831ea7
RD
163}
164
14e50e57
DM
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
0972ddb2
HB
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
14e50e57
DM
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
09640e63 177 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
ebb762f2 180 .mtu = ip6_blackhole_mtu,
214f45c9 181 .default_advmss = ip6_default_advmss,
14e50e57 182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 184 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
185};
186
62fa8a84
DM
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
bdb3289f 191static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
d8d1f30b
CG
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
1da177e4
LT
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 201 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
101367c2
TG
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
6723ab54
DM
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 210
280a34c8 211static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
d8d1f30b
CG
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
101367c2
TG
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
bdb3289f 226static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
d8d1f30b
CG
232 .input = dst_discard,
233 .output = dst_discard,
101367c2
TG
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
1da177e4 243/* allocate dst with ip6_dst_ops */
5c1e6aa3 244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
245 struct net_device *dev,
246 int flags)
1da177e4 247{
957c665f 248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 249
fbe58186
MB
250 if (rt != NULL)
251 memset(&rt->rt6i_table, 0,
252 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
253
254 return rt;
1da177e4
LT
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 261 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 262
8e2ec639
YZ
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
1da177e4
LT
266 if (idev != NULL) {
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
1ab1457c 269 }
b3419363 270 if (peer) {
b3419363
DM
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
6431cbc2
DM
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
b3419363
DM
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
b3419363
DM
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
6431cbc2
DM
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 299 struct net_device *loopback_dev =
c346dca1 300 dev_net(dev)->loopback_dev;
1da177e4 301
5a3e55d6
DL
302 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
1da177e4
LT
305 if (loopback_idev != NULL) {
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
a02cec21
ED
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
316}
317
b71d1d42 318static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 319{
a02cec21
ED
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
322}
323
1da177e4 324/*
c71099ac 325 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
326 */
327
8ed67789
DL
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
b71d1d42 330 const struct in6_addr *saddr,
1da177e4 331 int oif,
d420895e 332 int flags)
1da177e4
LT
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
dd3abc4e
YH
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
d8d1f30b 340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
1da177e4
LT
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
347 if (sprt->rt6i_idev == NULL ||
348 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 349 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 350 continue;
1ab1457c 351 if (local && (!oif ||
1da177e4
LT
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
dd3abc4e
YH
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
1da177e4 361 }
dd3abc4e 362 }
1da177e4 363
dd3abc4e 364 if (oif) {
1da177e4
LT
365 if (local)
366 return local;
367
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 369 return net->ipv6.ip6_null_entry;
1da177e4 370 }
dd3abc4e 371out:
1da177e4
LT
372 return rt;
373}
374
27097255
YH
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
f2c31e32 378 struct neighbour *neigh;
27097255
YH
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
f2c31e32
ED
387 rcu_read_lock();
388 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255 389 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 390 goto out;
27097255
YH
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 403 } else {
27097255 404 read_unlock_bh(&neigh->lock);
f2c31e32
ED
405 }
406out:
407 rcu_read_unlock();
27097255
YH
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
27097255
YH
412}
413#endif
414
1da177e4 415/*
554cfb7e 416 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 417 */
b6f99a21 418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
419{
420 struct net_device *dev = rt->rt6i_dev;
161980f4 421 if (!oif || dev->ifindex == oif)
554cfb7e 422 return 2;
161980f4
DM
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
554cfb7e 427}
1da177e4 428
b6f99a21 429static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 430{
f2c31e32 431 struct neighbour *neigh;
398bcbeb 432 int m;
f2c31e32
ED
433
434 rcu_read_lock();
435 neigh = dst_get_neighbour(&rt->dst);
4d0c5911
YH
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
554cfb7e
YH
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
4d0c5911 442 m = 2;
398bcbeb
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
ea73ee23 448 m = 1;
554cfb7e 449 read_unlock_bh(&neigh->lock);
398bcbeb
YH
450 } else
451 m = 0;
f2c31e32 452 rcu_read_unlock();
554cfb7e 453 return m;
1da177e4
LT
454}
455
554cfb7e
YH
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
1da177e4 458{
4d0c5911 459 int m, n;
1ab1457c 460
4d0c5911 461 m = rt6_check_dev(rt, oif);
77d16f45 462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 463 return -1;
ebacaaa0
YH
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
4d0c5911 467 n = rt6_check_neigh(rt);
557e92ef 468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
469 return -1;
470 return m;
471}
472
f11e6659
DM
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
554cfb7e 475{
f11e6659
DM
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
554cfb7e 503 int mpri = -1;
1da177e4 504
f11e6659
DM
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 507 rt = rt->dst.rt6_next)
f11e6659
DM
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 510 rt = rt->dst.rt6_next)
f11e6659 511 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 512
f11e6659
DM
513 return match;
514}
1da177e4 515
f11e6659
DM
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
8ed67789 519 struct net *net;
1da177e4 520
f11e6659 521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 522 __func__, fn->leaf, oif);
554cfb7e 523
f11e6659
DM
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 527
f11e6659 528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 529
554cfb7e 530 if (!match &&
f11e6659 531 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 532 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 533
554cfb7e 534 /* no entries matched; do round-robin */
f11e6659
DM
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
1da177e4 540 }
1da177e4 541
f11e6659 542 RT6_TRACE("%s() => %p\n",
0dc47877 543 __func__, match);
1da177e4 544
c346dca1 545 net = dev_net(rt0->rt6i_dev);
a02cec21 546 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
547}
548
70ceb4f5
YH
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 551 const struct in6_addr *gwaddr)
70ceb4f5 552{
c346dca1 553 struct net *net = dev_net(dev);
70ceb4f5
YH
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
4bed72e4 557 unsigned long lifetime;
70ceb4f5
YH
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 581 return -EINVAL;
70ceb4f5 582
4bed72e4 583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
efa2cea0
DL
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
70ceb4f5
YH
597
598 if (rt && !lifetime) {
e0a1ad73 599 ip6_del_rt(rt);
70ceb4f5
YH
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
efa2cea0 604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
4bed72e4 611 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
d8d1f30b 617 dst_release(&rt->dst);
70ceb4f5
YH
618 }
619 return 0;
620}
621#endif
622
8ed67789 623#define BACKTRACK(__net, saddr) \
982f56f3 624do { \
8ed67789 625 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 626 struct fib6_node *pn; \
e0eda7bb 627 while (1) { \
982f56f3
YH
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
c71099ac 637 } \
c71099ac 638 } \
982f56f3 639} while(0)
c71099ac 640
8ed67789
DL
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
4c9483b2 643 struct flowi6 *fl6, int flags)
1da177e4
LT
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
c71099ac 648 read_lock_bh(&table->tb6_lock);
4c9483b2 649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
650restart:
651 rt = fn->leaf;
4c9483b2
DM
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
c71099ac 654out:
d8d1f30b 655 dst_use(&rt->dst, jiffies);
c71099ac 656 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
657 return rt;
658
659}
660
9acd9f3a
YH
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
c71099ac 663{
4c9483b2
DM
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
c71099ac
TG
667 };
668 struct dst_entry *dst;
77d16f45 669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 670
adaa70bb 671 if (saddr) {
4c9483b2 672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
4c9483b2 676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
1da177e4
LT
682 return NULL;
683}
684
7159039a
YH
685EXPORT_SYMBOL(rt6_lookup);
686
c71099ac 687/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
86872cb5 693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
694{
695 int err;
c71099ac 696 struct fib6_table *table;
1da177e4 697
c71099ac
TG
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
86872cb5 700 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 701 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
702
703 return err;
704}
705
40e22e8f
TG
706int ip6_ins_rt(struct rt6_info *rt)
707{
4d1169c1 708 struct nl_info info = {
c346dca1 709 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 710 };
528c4ceb 711 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
712}
713
21efcfa0
ED
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
b71d1d42 716 const struct in6_addr *saddr)
1da177e4 717{
1da177e4
LT
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
21efcfa0 724 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
725
726 if (rt) {
14deae41
DM
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
58c4fb86
YH
730 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
731 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 733 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 734 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 735 }
1da177e4 736
1da177e4 737 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
741 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
14deae41
DM
746 retry:
747 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
748 if (IS_ERR(neigh)) {
749 struct net *net = dev_net(rt->rt6i_dev);
750 int saved_rt_min_interval =
751 net->ipv6.sysctl.ip6_rt_gc_min_interval;
752 int saved_rt_elasticity =
753 net->ipv6.sysctl.ip6_rt_gc_elasticity;
754
755 if (attempts-- > 0) {
756 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
757 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
758
86393e52 759 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
760
761 net->ipv6.sysctl.ip6_rt_gc_elasticity =
762 saved_rt_elasticity;
763 net->ipv6.sysctl.ip6_rt_gc_min_interval =
764 saved_rt_min_interval;
765 goto retry;
766 }
767
768 if (net_ratelimit())
769 printk(KERN_WARNING
7e1b33e5 770 "ipv6: Neighbour table overflow.\n");
d8d1f30b 771 dst_free(&rt->dst);
14deae41
DM
772 return NULL;
773 }
69cce1d1 774 dst_set_neighbour(&rt->dst, neigh);
1da177e4 775
95a9a5ba 776 }
1da177e4 777
95a9a5ba
YH
778 return rt;
779}
1da177e4 780
21efcfa0
ED
781static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
782 const struct in6_addr *daddr)
299d9939 783{
21efcfa0
ED
784 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
785
299d9939 786 if (rt) {
299d9939 787 rt->rt6i_flags |= RTF_CACHE;
f2c31e32 788 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
299d9939
YH
789 }
790 return rt;
791}
792
8ed67789 793static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 794 struct flowi6 *fl6, int flags)
1da177e4
LT
795{
796 struct fib6_node *fn;
519fbd87 797 struct rt6_info *rt, *nrt;
c71099ac 798 int strict = 0;
1da177e4 799 int attempts = 3;
519fbd87 800 int err;
53b7997f 801 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 802
77d16f45 803 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
804
805relookup:
c71099ac 806 read_lock_bh(&table->tb6_lock);
1da177e4 807
8238dd06 808restart_2:
4c9483b2 809 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
810
811restart:
4acad72d 812 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 813
4c9483b2 814 BACKTRACK(net, &fl6->saddr);
8ed67789 815 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 816 rt->rt6i_flags & RTF_CACHE)
1ddef044 817 goto out;
1da177e4 818
d8d1f30b 819 dst_hold(&rt->dst);
c71099ac 820 read_unlock_bh(&table->tb6_lock);
fb9de91e 821
f2c31e32 822 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 823 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 824 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 825 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
826 else
827 goto out2;
e40cf353 828
d8d1f30b 829 dst_release(&rt->dst);
8ed67789 830 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 831
d8d1f30b 832 dst_hold(&rt->dst);
519fbd87 833 if (nrt) {
40e22e8f 834 err = ip6_ins_rt(nrt);
519fbd87 835 if (!err)
1da177e4 836 goto out2;
1da177e4 837 }
1da177e4 838
519fbd87
YH
839 if (--attempts <= 0)
840 goto out2;
841
842 /*
c71099ac 843 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
844 * released someone could insert this route. Relookup.
845 */
d8d1f30b 846 dst_release(&rt->dst);
519fbd87
YH
847 goto relookup;
848
849out:
8238dd06
YH
850 if (reachable) {
851 reachable = 0;
852 goto restart_2;
853 }
d8d1f30b 854 dst_hold(&rt->dst);
c71099ac 855 read_unlock_bh(&table->tb6_lock);
1da177e4 856out2:
d8d1f30b
CG
857 rt->dst.lastuse = jiffies;
858 rt->dst.__use++;
c71099ac
TG
859
860 return rt;
1da177e4
LT
861}
862
8ed67789 863static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 864 struct flowi6 *fl6, int flags)
4acad72d 865{
4c9483b2 866 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
867}
868
c71099ac
TG
869void ip6_route_input(struct sk_buff *skb)
870{
b71d1d42 871 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 872 struct net *net = dev_net(skb->dev);
adaa70bb 873 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
874 struct flowi6 fl6 = {
875 .flowi6_iif = skb->dev->ifindex,
876 .daddr = iph->daddr,
877 .saddr = iph->saddr,
878 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
879 .flowi6_mark = skb->mark,
880 .flowi6_proto = iph->nexthdr,
c71099ac 881 };
adaa70bb 882
1d6e55f1 883 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 884 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 885
4c9483b2 886 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
887}
888
8ed67789 889static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 890 struct flowi6 *fl6, int flags)
1da177e4 891{
4c9483b2 892 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
893}
894
9c7a4f9c 895struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 896 struct flowi6 *fl6)
c71099ac
TG
897{
898 int flags = 0;
899
4c9483b2 900 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 901 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 902
4c9483b2 903 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 904 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
905 else if (sk)
906 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 907
4c9483b2 908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
909}
910
7159039a 911EXPORT_SYMBOL(ip6_route_output);
1da177e4 912
2774c131 913struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 914{
5c1e6aa3 915 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
916 struct dst_entry *new = NULL;
917
5c1e6aa3 918 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 919 if (rt) {
cf911662
DM
920 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
921
d8d1f30b 922 new = &rt->dst;
14e50e57 923
14e50e57 924 new->__use = 1;
352e512c
HX
925 new->input = dst_discard;
926 new->output = dst_discard;
14e50e57 927
21efcfa0
ED
928 if (dst_metrics_read_only(&ort->dst))
929 new->_metrics = ort->dst._metrics;
930 else
931 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
932 rt->rt6i_idev = ort->rt6i_idev;
933 if (rt->rt6i_idev)
934 in6_dev_hold(rt->rt6i_idev);
935 rt->rt6i_expires = 0;
936
937 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
938 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
939 rt->rt6i_metric = 0;
940
941 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
942#ifdef CONFIG_IPV6_SUBTREES
943 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
944#endif
945
946 dst_free(new);
947 }
948
69ead7af
DM
949 dst_release(dst_orig);
950 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 951}
14e50e57 952
1da177e4
LT
953/*
954 * Destination cache support functions
955 */
956
957static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
958{
959 struct rt6_info *rt;
960
961 rt = (struct rt6_info *) dst;
962
6431cbc2
DM
963 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
964 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
965 if (!rt->rt6i_peer)
966 rt6_bind_peer(rt, 0);
967 rt->rt6i_peer_genid = rt6_peer_genid();
968 }
1da177e4 969 return dst;
6431cbc2 970 }
1da177e4
LT
971 return NULL;
972}
973
974static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
975{
976 struct rt6_info *rt = (struct rt6_info *) dst;
977
978 if (rt) {
54c1a859
YH
979 if (rt->rt6i_flags & RTF_CACHE) {
980 if (rt6_check_expired(rt)) {
981 ip6_del_rt(rt);
982 dst = NULL;
983 }
984 } else {
1da177e4 985 dst_release(dst);
54c1a859
YH
986 dst = NULL;
987 }
1da177e4 988 }
54c1a859 989 return dst;
1da177e4
LT
990}
991
992static void ip6_link_failure(struct sk_buff *skb)
993{
994 struct rt6_info *rt;
995
3ffe533c 996 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 997
adf30907 998 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
999 if (rt) {
1000 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 1001 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1002 rt->rt6i_flags |= RTF_EXPIRES;
1003 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1004 rt->rt6i_node->fn_sernum = -1;
1005 }
1006}
1007
1008static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1009{
1010 struct rt6_info *rt6 = (struct rt6_info*)dst;
1011
1012 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1013 rt6->rt6i_flags |= RTF_MODIFIED;
1014 if (mtu < IPV6_MIN_MTU) {
defb3519 1015 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1016 mtu = IPV6_MIN_MTU;
defb3519
DM
1017 features |= RTAX_FEATURE_ALLFRAG;
1018 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1019 }
defb3519 1020 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1021 }
1022}
1023
0dbaee3b 1024static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1025{
0dbaee3b
DM
1026 struct net_device *dev = dst->dev;
1027 unsigned int mtu = dst_mtu(dst);
1028 struct net *net = dev_net(dev);
1029
1da177e4
LT
1030 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1031
5578689a
DL
1032 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1033 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1034
1035 /*
1ab1457c
YH
1036 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1037 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1038 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1039 * rely only on pmtu discovery"
1040 */
1041 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1042 mtu = IPV6_MAXPLEN;
1043 return mtu;
1044}
1045
ebb762f2 1046static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1047{
d33e4553 1048 struct inet6_dev *idev;
618f9bc7
SK
1049 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1050
1051 if (mtu)
1052 return mtu;
1053
1054 mtu = IPV6_MIN_MTU;
d33e4553
DM
1055
1056 rcu_read_lock();
1057 idev = __in6_dev_get(dst->dev);
1058 if (idev)
1059 mtu = idev->cnf.mtu6;
1060 rcu_read_unlock();
1061
1062 return mtu;
1063}
1064
3b00944c
YH
1065static struct dst_entry *icmp6_dst_gc_list;
1066static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1067
3b00944c 1068struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1069 struct neighbour *neigh,
9acd9f3a 1070 const struct in6_addr *addr)
1da177e4
LT
1071{
1072 struct rt6_info *rt;
1073 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1074 struct net *net = dev_net(dev);
1da177e4
LT
1075
1076 if (unlikely(idev == NULL))
1077 return NULL;
1078
957c665f 1079 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1da177e4
LT
1080 if (unlikely(rt == NULL)) {
1081 in6_dev_put(idev);
1082 goto out;
1083 }
1084
1da177e4
LT
1085 if (neigh)
1086 neigh_hold(neigh);
14deae41 1087 else {
1da177e4 1088 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1089 if (IS_ERR(neigh))
1090 neigh = NULL;
1091 }
1da177e4 1092
8e2ec639
YZ
1093 rt->dst.flags |= DST_HOST;
1094 rt->dst.output = ip6_output;
69cce1d1 1095 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1096 atomic_set(&rt->dst.__refcnt, 1);
8e2ec639
YZ
1097 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1098 rt->rt6i_dst.plen = 128;
1099 rt->rt6i_idev = idev;
7011687f 1100 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1101
3b00944c 1102 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1103 rt->dst.next = icmp6_dst_gc_list;
1104 icmp6_dst_gc_list = &rt->dst;
3b00944c 1105 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1106
5578689a 1107 fib6_force_start_gc(net);
1da177e4
LT
1108
1109out:
d8d1f30b 1110 return &rt->dst;
1da177e4
LT
1111}
1112
3d0f24a7 1113int icmp6_dst_gc(void)
1da177e4 1114{
e9476e95 1115 struct dst_entry *dst, **pprev;
3d0f24a7 1116 int more = 0;
1da177e4 1117
3b00944c
YH
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1120
1da177e4
LT
1121 while ((dst = *pprev) != NULL) {
1122 if (!atomic_read(&dst->__refcnt)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1da177e4
LT
1125 } else {
1126 pprev = &dst->next;
3d0f24a7 1127 ++more;
1da177e4
LT
1128 }
1129 }
1130
3b00944c 1131 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1132
3d0f24a7 1133 return more;
1da177e4
LT
1134}
1135
1e493d19
DM
1136static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1137 void *arg)
1138{
1139 struct dst_entry *dst, **pprev;
1140
1141 spin_lock_bh(&icmp6_dst_lock);
1142 pprev = &icmp6_dst_gc_list;
1143 while ((dst = *pprev) != NULL) {
1144 struct rt6_info *rt = (struct rt6_info *) dst;
1145 if (func(rt, arg)) {
1146 *pprev = dst->next;
1147 dst_free(dst);
1148 } else {
1149 pprev = &dst->next;
1150 }
1151 }
1152 spin_unlock_bh(&icmp6_dst_lock);
1153}
1154
569d3645 1155static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1156{
1da177e4 1157 unsigned long now = jiffies;
86393e52 1158 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1159 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1160 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1161 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1162 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1163 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1164 int entries;
7019b78e 1165
fc66f95c 1166 entries = dst_entries_get_fast(ops);
7019b78e 1167 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1168 entries <= rt_max_size)
1da177e4
LT
1169 goto out;
1170
6891a346
BT
1171 net->ipv6.ip6_rt_gc_expire++;
1172 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1173 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1174 entries = dst_entries_get_slow(ops);
1175 if (entries < ops->gc_thresh)
7019b78e 1176 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1177out:
7019b78e 1178 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1179 return entries > rt_max_size;
1da177e4
LT
1180}
1181
1182/* Clean host part of a prefix. Not necessary in radix tree,
1183 but results in cleaner routing tables.
1184
1185 Remove it only when all the things will work!
1186 */
1187
6b75d090 1188int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1189{
5170ae82 1190 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1191 if (hoplimit == 0) {
6b75d090 1192 struct net_device *dev = dst->dev;
c68f24cc
ED
1193 struct inet6_dev *idev;
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dev);
1197 if (idev)
6b75d090 1198 hoplimit = idev->cnf.hop_limit;
c68f24cc 1199 else
53b7997f 1200 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1201 rcu_read_unlock();
1da177e4
LT
1202 }
1203 return hoplimit;
1204}
abbf46ae 1205EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1206
1207/*
1208 *
1209 */
1210
86872cb5 1211int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1212{
1213 int err;
5578689a 1214 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1215 struct rt6_info *rt = NULL;
1216 struct net_device *dev = NULL;
1217 struct inet6_dev *idev = NULL;
c71099ac 1218 struct fib6_table *table;
1da177e4
LT
1219 int addr_type;
1220
86872cb5 1221 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1222 return -EINVAL;
1223#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1224 if (cfg->fc_src_len)
1da177e4
LT
1225 return -EINVAL;
1226#endif
86872cb5 1227 if (cfg->fc_ifindex) {
1da177e4 1228 err = -ENODEV;
5578689a 1229 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1230 if (!dev)
1231 goto out;
1232 idev = in6_dev_get(dev);
1233 if (!idev)
1234 goto out;
1235 }
1236
86872cb5
TG
1237 if (cfg->fc_metric == 0)
1238 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1239
5578689a 1240 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1241 if (table == NULL) {
1242 err = -ENOBUFS;
1243 goto out;
1244 }
1245
957c665f 1246 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4
LT
1247
1248 if (rt == NULL) {
1249 err = -ENOMEM;
1250 goto out;
1251 }
1252
d8d1f30b 1253 rt->dst.obsolete = -1;
6f704992
YH
1254 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1255 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1256 0;
1da177e4 1257
86872cb5
TG
1258 if (cfg->fc_protocol == RTPROT_UNSPEC)
1259 cfg->fc_protocol = RTPROT_BOOT;
1260 rt->rt6i_protocol = cfg->fc_protocol;
1261
1262 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1263
1264 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1265 rt->dst.input = ip6_mc_input;
ab79ad14
1266 else if (cfg->fc_flags & RTF_LOCAL)
1267 rt->dst.input = ip6_input;
1da177e4 1268 else
d8d1f30b 1269 rt->dst.input = ip6_forward;
1da177e4 1270
d8d1f30b 1271 rt->dst.output = ip6_output;
1da177e4 1272
86872cb5
TG
1273 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1274 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1275 if (rt->rt6i_dst.plen == 128)
11d53b49 1276 rt->dst.flags |= DST_HOST;
1da177e4 1277
8e2ec639
YZ
1278 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1279 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1280 if (!metrics) {
1281 err = -ENOMEM;
1282 goto out;
1283 }
1284 dst_init_metrics(&rt->dst, metrics, 0);
1285 }
1da177e4 1286#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1287 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1288 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1289#endif
1290
86872cb5 1291 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1292
1293 /* We cannot add true routes via loopback here,
1294 they would result in kernel looping; promote them to reject routes
1295 */
86872cb5 1296 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1297 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1298 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1299 /* hold loopback dev/idev if we haven't done so. */
5578689a 1300 if (dev != net->loopback_dev) {
1da177e4
LT
1301 if (dev) {
1302 dev_put(dev);
1303 in6_dev_put(idev);
1304 }
5578689a 1305 dev = net->loopback_dev;
1da177e4
LT
1306 dev_hold(dev);
1307 idev = in6_dev_get(dev);
1308 if (!idev) {
1309 err = -ENODEV;
1310 goto out;
1311 }
1312 }
d8d1f30b
CG
1313 rt->dst.output = ip6_pkt_discard_out;
1314 rt->dst.input = ip6_pkt_discard;
1315 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1316 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1317 goto install_route;
1318 }
1319
86872cb5 1320 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1321 const struct in6_addr *gw_addr;
1da177e4
LT
1322 int gwa_type;
1323
86872cb5
TG
1324 gw_addr = &cfg->fc_gateway;
1325 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1326 gwa_type = ipv6_addr_type(gw_addr);
1327
1328 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1329 struct rt6_info *grt;
1330
1331 /* IPv6 strictly inhibits using not link-local
1332 addresses as nexthop address.
1333 Otherwise, router will not able to send redirects.
1334 It is very good, but in some (rare!) circumstances
1335 (SIT, PtP, NBMA NOARP links) it is handy to allow
1336 some exceptions. --ANK
1337 */
1338 err = -EINVAL;
1339 if (!(gwa_type&IPV6_ADDR_UNICAST))
1340 goto out;
1341
5578689a 1342 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1343
1344 err = -EHOSTUNREACH;
1345 if (grt == NULL)
1346 goto out;
1347 if (dev) {
1348 if (dev != grt->rt6i_dev) {
d8d1f30b 1349 dst_release(&grt->dst);
1da177e4
LT
1350 goto out;
1351 }
1352 } else {
1353 dev = grt->rt6i_dev;
1354 idev = grt->rt6i_idev;
1355 dev_hold(dev);
1356 in6_dev_hold(grt->rt6i_idev);
1357 }
1358 if (!(grt->rt6i_flags&RTF_GATEWAY))
1359 err = 0;
d8d1f30b 1360 dst_release(&grt->dst);
1da177e4
LT
1361
1362 if (err)
1363 goto out;
1364 }
1365 err = -EINVAL;
1366 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1367 goto out;
1368 }
1369
1370 err = -ENODEV;
1371 if (dev == NULL)
1372 goto out;
1373
c3968a85
DW
1374 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1375 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1376 err = -EINVAL;
1377 goto out;
1378 }
1379 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1380 rt->rt6i_prefsrc.plen = 128;
1381 } else
1382 rt->rt6i_prefsrc.plen = 0;
1383
86872cb5 1384 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1385 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1386 if (IS_ERR(n)) {
1387 err = PTR_ERR(n);
1da177e4
LT
1388 goto out;
1389 }
69cce1d1 1390 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1391 }
1392
86872cb5 1393 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1394
1395install_route:
86872cb5
TG
1396 if (cfg->fc_mx) {
1397 struct nlattr *nla;
1398 int remaining;
1399
1400 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1401 int type = nla_type(nla);
86872cb5
TG
1402
1403 if (type) {
1404 if (type > RTAX_MAX) {
1da177e4
LT
1405 err = -EINVAL;
1406 goto out;
1407 }
86872cb5 1408
defb3519 1409 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1410 }
1da177e4
LT
1411 }
1412 }
1413
d8d1f30b 1414 rt->dst.dev = dev;
1da177e4 1415 rt->rt6i_idev = idev;
c71099ac 1416 rt->rt6i_table = table;
63152fc0 1417
c346dca1 1418 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1419
86872cb5 1420 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1421
1422out:
1423 if (dev)
1424 dev_put(dev);
1425 if (idev)
1426 in6_dev_put(idev);
1427 if (rt)
d8d1f30b 1428 dst_free(&rt->dst);
1da177e4
LT
1429 return err;
1430}
1431
86872cb5 1432static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1433{
1434 int err;
c71099ac 1435 struct fib6_table *table;
c346dca1 1436 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1437
8ed67789 1438 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1439 return -ENOENT;
1440
c71099ac
TG
1441 table = rt->rt6i_table;
1442 write_lock_bh(&table->tb6_lock);
1da177e4 1443
86872cb5 1444 err = fib6_del(rt, info);
d8d1f30b 1445 dst_release(&rt->dst);
1da177e4 1446
c71099ac 1447 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1448
1449 return err;
1450}
1451
e0a1ad73
TG
1452int ip6_del_rt(struct rt6_info *rt)
1453{
4d1169c1 1454 struct nl_info info = {
c346dca1 1455 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1456 };
528c4ceb 1457 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1458}
1459
86872cb5 1460static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1461{
c71099ac 1462 struct fib6_table *table;
1da177e4
LT
1463 struct fib6_node *fn;
1464 struct rt6_info *rt;
1465 int err = -ESRCH;
1466
5578689a 1467 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1468 if (table == NULL)
1469 return err;
1470
1471 read_lock_bh(&table->tb6_lock);
1da177e4 1472
c71099ac 1473 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1474 &cfg->fc_dst, cfg->fc_dst_len,
1475 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1476
1da177e4 1477 if (fn) {
d8d1f30b 1478 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1479 if (cfg->fc_ifindex &&
1da177e4 1480 (rt->rt6i_dev == NULL ||
86872cb5 1481 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1482 continue;
86872cb5
TG
1483 if (cfg->fc_flags & RTF_GATEWAY &&
1484 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1485 continue;
86872cb5 1486 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1487 continue;
d8d1f30b 1488 dst_hold(&rt->dst);
c71099ac 1489 read_unlock_bh(&table->tb6_lock);
1da177e4 1490
86872cb5 1491 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1492 }
1493 }
c71099ac 1494 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1495
1496 return err;
1497}
1498
1499/*
1500 * Handle redirects
1501 */
a6279458 1502struct ip6rd_flowi {
4c9483b2 1503 struct flowi6 fl6;
a6279458
YH
1504 struct in6_addr gateway;
1505};
1506
8ed67789
DL
1507static struct rt6_info *__ip6_route_redirect(struct net *net,
1508 struct fib6_table *table,
4c9483b2 1509 struct flowi6 *fl6,
a6279458 1510 int flags)
1da177e4 1511{
4c9483b2 1512 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1513 struct rt6_info *rt;
e843b9e1 1514 struct fib6_node *fn;
c71099ac 1515
1da177e4 1516 /*
e843b9e1
YH
1517 * Get the "current" route for this destination and
1518 * check if the redirect has come from approriate router.
1519 *
1520 * RFC 2461 specifies that redirects should only be
1521 * accepted if they come from the nexthop to the target.
1522 * Due to the way the routes are chosen, this notion
1523 * is a bit fuzzy and one might need to check all possible
1524 * routes.
1da177e4 1525 */
1da177e4 1526
c71099ac 1527 read_lock_bh(&table->tb6_lock);
4c9483b2 1528 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1529restart:
d8d1f30b 1530 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1531 /*
1532 * Current route is on-link; redirect is always invalid.
1533 *
1534 * Seems, previous statement is not true. It could
1535 * be node, which looks for us as on-link (f.e. proxy ndisc)
1536 * But then router serving it might decide, that we should
1537 * know truth 8)8) --ANK (980726).
1538 */
1539 if (rt6_check_expired(rt))
1540 continue;
1541 if (!(rt->rt6i_flags & RTF_GATEWAY))
1542 continue;
4c9483b2 1543 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1544 continue;
a6279458 1545 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1546 continue;
1547 break;
1548 }
a6279458 1549
cb15d9c2 1550 if (!rt)
8ed67789 1551 rt = net->ipv6.ip6_null_entry;
4c9483b2 1552 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1553out:
d8d1f30b 1554 dst_hold(&rt->dst);
a6279458 1555
c71099ac 1556 read_unlock_bh(&table->tb6_lock);
e843b9e1 1557
a6279458
YH
1558 return rt;
1559};
1560
b71d1d42
ED
1561static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1562 const struct in6_addr *src,
1563 const struct in6_addr *gateway,
a6279458
YH
1564 struct net_device *dev)
1565{
adaa70bb 1566 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1567 struct net *net = dev_net(dev);
a6279458 1568 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1569 .fl6 = {
1570 .flowi6_oif = dev->ifindex,
1571 .daddr = *dest,
1572 .saddr = *src,
a6279458 1573 },
a6279458 1574 };
adaa70bb 1575
86c36ce4
BH
1576 ipv6_addr_copy(&rdfl.gateway, gateway);
1577
adaa70bb
TG
1578 if (rt6_need_strict(dest))
1579 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1580
4c9483b2 1581 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1582 flags, __ip6_route_redirect);
a6279458
YH
1583}
1584
b71d1d42
ED
1585void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1586 const struct in6_addr *saddr,
a6279458
YH
1587 struct neighbour *neigh, u8 *lladdr, int on_link)
1588{
1589 struct rt6_info *rt, *nrt = NULL;
1590 struct netevent_redirect netevent;
c346dca1 1591 struct net *net = dev_net(neigh->dev);
a6279458
YH
1592
1593 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1594
8ed67789 1595 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1596 if (net_ratelimit())
1597 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1598 "for redirect target\n");
a6279458 1599 goto out;
1da177e4
LT
1600 }
1601
1da177e4
LT
1602 /*
1603 * We have finally decided to accept it.
1604 */
1605
1ab1457c 1606 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1607 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1608 NEIGH_UPDATE_F_OVERRIDE|
1609 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1610 NEIGH_UPDATE_F_ISROUTER))
1611 );
1612
1613 /*
1614 * Redirect received -> path was valid.
1615 * Look, redirects are sent only in response to data packets,
1616 * so that this nexthop apparently is reachable. --ANK
1617 */
d8d1f30b 1618 dst_confirm(&rt->dst);
1da177e4
LT
1619
1620 /* Duplicate redirect: silently ignore. */
f2c31e32 1621 if (neigh == dst_get_neighbour_raw(&rt->dst))
1da177e4
LT
1622 goto out;
1623
21efcfa0 1624 nrt = ip6_rt_copy(rt, dest);
1da177e4
LT
1625 if (nrt == NULL)
1626 goto out;
1627
1628 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1629 if (on_link)
1630 nrt->rt6i_flags &= ~RTF_GATEWAY;
1631
1da177e4 1632 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
69cce1d1 1633 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1634
40e22e8f 1635 if (ip6_ins_rt(nrt))
1da177e4
LT
1636 goto out;
1637
d8d1f30b
CG
1638 netevent.old = &rt->dst;
1639 netevent.new = &nrt->dst;
8d71740c
TT
1640 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1641
1da177e4 1642 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1643 ip6_del_rt(rt);
1da177e4
LT
1644 return;
1645 }
1646
1647out:
d8d1f30b 1648 dst_release(&rt->dst);
1da177e4
LT
1649}
1650
1651/*
1652 * Handle ICMP "packet too big" messages
1653 * i.e. Path MTU discovery
1654 */
1655
b71d1d42 1656static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1657 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1658{
1659 struct rt6_info *rt, *nrt;
1660 int allfrag = 0;
d3052b55 1661again:
ae878ae2 1662 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1663 if (rt == NULL)
1664 return;
1665
d3052b55
AV
1666 if (rt6_check_expired(rt)) {
1667 ip6_del_rt(rt);
1668 goto again;
1669 }
1670
d8d1f30b 1671 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1672 goto out;
1673
1674 if (pmtu < IPV6_MIN_MTU) {
1675 /*
1ab1457c 1676 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1677 * MTU (1280) and a fragment header should always be included
1678 * after a node receiving Too Big message reporting PMTU is
1679 * less than the IPv6 Minimum Link MTU.
1680 */
1681 pmtu = IPV6_MIN_MTU;
1682 allfrag = 1;
1683 }
1684
1685 /* New mtu received -> path was valid.
1686 They are sent only in response to data packets,
1687 so that this nexthop apparently is reachable. --ANK
1688 */
d8d1f30b 1689 dst_confirm(&rt->dst);
1da177e4
LT
1690
1691 /* Host route. If it is static, it would be better
1692 not to override it, but add new one, so that
1693 when cache entry will expire old pmtu
1694 would return automatically.
1695 */
1696 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1697 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1698 if (allfrag) {
1699 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1700 features |= RTAX_FEATURE_ALLFRAG;
1701 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1702 }
d8d1f30b 1703 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1704 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1705 goto out;
1706 }
1707
1708 /* Network route.
1709 Two cases are possible:
1710 1. It is connected route. Action: COW
1711 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1712 */
f2c31e32 1713 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1714 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1715 else
1716 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1717
d5315b50 1718 if (nrt) {
defb3519
DM
1719 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1720 if (allfrag) {
1721 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1722 features |= RTAX_FEATURE_ALLFRAG;
1723 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1724 }
a1e78363
YH
1725
1726 /* According to RFC 1981, detecting PMTU increase shouldn't be
1727 * happened within 5 mins, the recommended timer is 10 mins.
1728 * Here this route expiration time is set to ip6_rt_mtu_expires
1729 * which is 10 mins. After 10 mins the decreased pmtu is expired
1730 * and detecting PMTU increase will be automatically happened.
1731 */
d8d1f30b 1732 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1733 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1734
40e22e8f 1735 ip6_ins_rt(nrt);
1da177e4 1736 }
1da177e4 1737out:
d8d1f30b 1738 dst_release(&rt->dst);
1da177e4
LT
1739}
1740
b71d1d42 1741void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1742 struct net_device *dev, u32 pmtu)
1743{
1744 struct net *net = dev_net(dev);
1745
1746 /*
1747 * RFC 1981 states that a node "MUST reduce the size of the packets it
1748 * is sending along the path" that caused the Packet Too Big message.
1749 * Since it's not possible in the general case to determine which
1750 * interface was used to send the original packet, we update the MTU
1751 * on the interface that will be used to send future packets. We also
1752 * update the MTU on the interface that received the Packet Too Big in
1753 * case the original packet was forced out that interface with
1754 * SO_BINDTODEVICE or similar. This is the next best thing to the
1755 * correct behaviour, which would be to update the MTU on all
1756 * interfaces.
1757 */
1758 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1759 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1760}
1761
1da177e4
LT
1762/*
1763 * Misc support functions
1764 */
1765
21efcfa0
ED
1766static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1767 const struct in6_addr *dest)
1da177e4 1768{
c346dca1 1769 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1770 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1771 ort->dst.dev, 0);
1da177e4
LT
1772
1773 if (rt) {
d8d1f30b
CG
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
8e2ec639 1776 rt->dst.flags |= DST_HOST;
d8d1f30b 1777
21efcfa0 1778 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
8e2ec639 1779 rt->rt6i_dst.plen = 128;
defb3519 1780 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1781 rt->dst.error = ort->dst.error;
1da177e4
LT
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1785 rt->dst.lastuse = jiffies;
1da177e4
LT
1786 rt->rt6i_expires = 0;
1787
1788 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1789 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1790 rt->rt6i_metric = 0;
1791
1da177e4
LT
1792#ifdef CONFIG_IPV6_SUBTREES
1793 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1794#endif
0f6c6392 1795 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1796 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1797 }
1798 return rt;
1799}
1800
70ceb4f5 1801#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1802static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1803 const struct in6_addr *prefix, int prefixlen,
1804 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1805{
1806 struct fib6_node *fn;
1807 struct rt6_info *rt = NULL;
c71099ac
TG
1808 struct fib6_table *table;
1809
efa2cea0 1810 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1811 if (table == NULL)
1812 return NULL;
70ceb4f5 1813
c71099ac
TG
1814 write_lock_bh(&table->tb6_lock);
1815 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1816 if (!fn)
1817 goto out;
1818
d8d1f30b 1819 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1820 if (rt->rt6i_dev->ifindex != ifindex)
1821 continue;
1822 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1823 continue;
1824 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1825 continue;
d8d1f30b 1826 dst_hold(&rt->dst);
70ceb4f5
YH
1827 break;
1828 }
1829out:
c71099ac 1830 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1831 return rt;
1832}
1833
efa2cea0 1834static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1835 const struct in6_addr *prefix, int prefixlen,
1836 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1837 unsigned pref)
1838{
86872cb5
TG
1839 struct fib6_config cfg = {
1840 .fc_table = RT6_TABLE_INFO,
238fc7ea 1841 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1842 .fc_ifindex = ifindex,
1843 .fc_dst_len = prefixlen,
1844 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1845 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1846 .fc_nlinfo.pid = 0,
1847 .fc_nlinfo.nlh = NULL,
1848 .fc_nlinfo.nl_net = net,
86872cb5
TG
1849 };
1850
1851 ipv6_addr_copy(&cfg.fc_dst, prefix);
1852 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1853
e317da96
YH
1854 /* We should treat it as a default route if prefix length is 0. */
1855 if (!prefixlen)
86872cb5 1856 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1857
86872cb5 1858 ip6_route_add(&cfg);
70ceb4f5 1859
efa2cea0 1860 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1861}
1862#endif
1863
b71d1d42 1864struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1865{
1da177e4 1866 struct rt6_info *rt;
c71099ac 1867 struct fib6_table *table;
1da177e4 1868
c346dca1 1869 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1870 if (table == NULL)
1871 return NULL;
1da177e4 1872
c71099ac 1873 write_lock_bh(&table->tb6_lock);
d8d1f30b 1874 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1875 if (dev == rt->rt6i_dev &&
045927ff 1876 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1877 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1878 break;
1879 }
1880 if (rt)
d8d1f30b 1881 dst_hold(&rt->dst);
c71099ac 1882 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1883 return rt;
1884}
1885
b71d1d42 1886struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1887 struct net_device *dev,
1888 unsigned int pref)
1da177e4 1889{
86872cb5
TG
1890 struct fib6_config cfg = {
1891 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1892 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1893 .fc_ifindex = dev->ifindex,
1894 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1895 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1896 .fc_nlinfo.pid = 0,
1897 .fc_nlinfo.nlh = NULL,
c346dca1 1898 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1899 };
1da177e4 1900
86872cb5 1901 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1902
86872cb5 1903 ip6_route_add(&cfg);
1da177e4 1904
1da177e4
LT
1905 return rt6_get_dflt_router(gwaddr, dev);
1906}
1907
7b4da532 1908void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1909{
1910 struct rt6_info *rt;
c71099ac
TG
1911 struct fib6_table *table;
1912
1913 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1914 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1915 if (table == NULL)
1916 return;
1da177e4
LT
1917
1918restart:
c71099ac 1919 read_lock_bh(&table->tb6_lock);
d8d1f30b 1920 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1921 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1922 dst_hold(&rt->dst);
c71099ac 1923 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1924 ip6_del_rt(rt);
1da177e4
LT
1925 goto restart;
1926 }
1927 }
c71099ac 1928 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1929}
1930
5578689a
DL
1931static void rtmsg_to_fib6_config(struct net *net,
1932 struct in6_rtmsg *rtmsg,
86872cb5
TG
1933 struct fib6_config *cfg)
1934{
1935 memset(cfg, 0, sizeof(*cfg));
1936
1937 cfg->fc_table = RT6_TABLE_MAIN;
1938 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1939 cfg->fc_metric = rtmsg->rtmsg_metric;
1940 cfg->fc_expires = rtmsg->rtmsg_info;
1941 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1942 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1943 cfg->fc_flags = rtmsg->rtmsg_flags;
1944
5578689a 1945 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1946
86872cb5
TG
1947 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1948 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1949 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1950}
1951
5578689a 1952int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1953{
86872cb5 1954 struct fib6_config cfg;
1da177e4
LT
1955 struct in6_rtmsg rtmsg;
1956 int err;
1957
1958 switch(cmd) {
1959 case SIOCADDRT: /* Add a route */
1960 case SIOCDELRT: /* Delete a route */
1961 if (!capable(CAP_NET_ADMIN))
1962 return -EPERM;
1963 err = copy_from_user(&rtmsg, arg,
1964 sizeof(struct in6_rtmsg));
1965 if (err)
1966 return -EFAULT;
86872cb5 1967
5578689a 1968 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1969
1da177e4
LT
1970 rtnl_lock();
1971 switch (cmd) {
1972 case SIOCADDRT:
86872cb5 1973 err = ip6_route_add(&cfg);
1da177e4
LT
1974 break;
1975 case SIOCDELRT:
86872cb5 1976 err = ip6_route_del(&cfg);
1da177e4
LT
1977 break;
1978 default:
1979 err = -EINVAL;
1980 }
1981 rtnl_unlock();
1982
1983 return err;
3ff50b79 1984 }
1da177e4
LT
1985
1986 return -EINVAL;
1987}
1988
1989/*
1990 * Drop the packet on the floor
1991 */
1992
d5fdd6ba 1993static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1994{
612f09e8 1995 int type;
adf30907 1996 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1997 switch (ipstats_mib_noroutes) {
1998 case IPSTATS_MIB_INNOROUTES:
0660e03f 1999 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2000 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2001 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2002 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2003 break;
2004 }
2005 /* FALLTHROUGH */
2006 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2007 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2008 ipstats_mib_noroutes);
612f09e8
YH
2009 break;
2010 }
3ffe533c 2011 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2012 kfree_skb(skb);
2013 return 0;
2014}
2015
9ce8ade0
TG
2016static int ip6_pkt_discard(struct sk_buff *skb)
2017{
612f09e8 2018 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2019}
2020
20380731 2021static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2022{
adf30907 2023 skb->dev = skb_dst(skb)->dev;
612f09e8 2024 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2025}
2026
6723ab54
DM
2027#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2028
9ce8ade0
TG
2029static int ip6_pkt_prohibit(struct sk_buff *skb)
2030{
612f09e8 2031 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2032}
2033
2034static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2035{
adf30907 2036 skb->dev = skb_dst(skb)->dev;
612f09e8 2037 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2038}
2039
6723ab54
DM
2040#endif
2041
1da177e4
LT
2042/*
2043 * Allocate a dst for local (unicast / anycast) address.
2044 */
2045
2046struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2047 const struct in6_addr *addr,
2048 int anycast)
2049{
c346dca1 2050 struct net *net = dev_net(idev->dev);
5c1e6aa3 2051 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2052 net->loopback_dev, 0);
14deae41 2053 struct neighbour *neigh;
1da177e4 2054
40385653
BG
2055 if (rt == NULL) {
2056 if (net_ratelimit())
2057 pr_warning("IPv6: Maximum number of routes reached,"
2058 " consider increasing route/max_size.\n");
1da177e4 2059 return ERR_PTR(-ENOMEM);
40385653 2060 }
1da177e4 2061
1da177e4
LT
2062 in6_dev_hold(idev);
2063
11d53b49 2064 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2065 rt->dst.input = ip6_input;
2066 rt->dst.output = ip6_output;
1da177e4 2067 rt->rt6i_idev = idev;
d8d1f30b 2068 rt->dst.obsolete = -1;
1da177e4
LT
2069
2070 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2071 if (anycast)
2072 rt->rt6i_flags |= RTF_ANYCAST;
2073 else
1da177e4 2074 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2075 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2076 if (IS_ERR(neigh)) {
d8d1f30b 2077 dst_free(&rt->dst);
14deae41 2078
29546a64 2079 return ERR_CAST(neigh);
1da177e4 2080 }
69cce1d1 2081 dst_set_neighbour(&rt->dst, neigh);
1da177e4
LT
2082
2083 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2084 rt->rt6i_dst.plen = 128;
5578689a 2085 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2086
d8d1f30b 2087 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2088
2089 return rt;
2090}
2091
c3968a85
DW
2092int ip6_route_get_saddr(struct net *net,
2093 struct rt6_info *rt,
b71d1d42 2094 const struct in6_addr *daddr,
c3968a85
DW
2095 unsigned int prefs,
2096 struct in6_addr *saddr)
2097{
2098 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2099 int err = 0;
2100 if (rt->rt6i_prefsrc.plen)
2101 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2102 else
2103 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2104 daddr, prefs, saddr);
2105 return err;
2106}
2107
2108/* remove deleted ip from prefsrc entries */
2109struct arg_dev_net_ip {
2110 struct net_device *dev;
2111 struct net *net;
2112 struct in6_addr *addr;
2113};
2114
2115static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2116{
2117 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2118 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2119 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2120
2121 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2122 rt != net->ipv6.ip6_null_entry &&
2123 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2124 /* remove prefsrc entry */
2125 rt->rt6i_prefsrc.plen = 0;
2126 }
2127 return 0;
2128}
2129
2130void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2131{
2132 struct net *net = dev_net(ifp->idev->dev);
2133 struct arg_dev_net_ip adni = {
2134 .dev = ifp->idev->dev,
2135 .net = net,
2136 .addr = &ifp->addr,
2137 };
2138 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2139}
2140
8ed67789
DL
2141struct arg_dev_net {
2142 struct net_device *dev;
2143 struct net *net;
2144};
2145
1da177e4
LT
2146static int fib6_ifdown(struct rt6_info *rt, void *arg)
2147{
bc3ef660 2148 const struct arg_dev_net *adn = arg;
2149 const struct net_device *dev = adn->dev;
8ed67789 2150
bc3ef660 2151 if ((rt->rt6i_dev == dev || dev == NULL) &&
2152 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2153 RT6_TRACE("deleted by ifdown %p\n", rt);
2154 return -1;
2155 }
2156 return 0;
2157}
2158
f3db4851 2159void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2160{
8ed67789
DL
2161 struct arg_dev_net adn = {
2162 .dev = dev,
2163 .net = net,
2164 };
2165
2166 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2167 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2168}
2169
2170struct rt6_mtu_change_arg
2171{
2172 struct net_device *dev;
2173 unsigned mtu;
2174};
2175
2176static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2177{
2178 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2179 struct inet6_dev *idev;
2180
2181 /* In IPv6 pmtu discovery is not optional,
2182 so that RTAX_MTU lock cannot disable it.
2183 We still use this lock to block changes
2184 caused by addrconf/ndisc.
2185 */
2186
2187 idev = __in6_dev_get(arg->dev);
2188 if (idev == NULL)
2189 return 0;
2190
2191 /* For administrative MTU increase, there is no way to discover
2192 IPv6 PMTU increase, so PMTU increase should be updated here.
2193 Since RFC 1981 doesn't include administrative MTU increase
2194 update PMTU increase is a MUST. (i.e. jumbo frame)
2195 */
2196 /*
2197 If new MTU is less than route PMTU, this new MTU will be the
2198 lowest MTU in the path, update the route PMTU to reflect PMTU
2199 decreases; if new MTU is greater than route PMTU, and the
2200 old MTU is the lowest MTU in the path, update the route PMTU
2201 to reflect the increase. In this case if the other nodes' MTU
2202 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2203 PMTU discouvery.
2204 */
2205 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2206 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2207 (dst_mtu(&rt->dst) >= arg->mtu ||
2208 (dst_mtu(&rt->dst) < arg->mtu &&
2209 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2210 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2211 }
1da177e4
LT
2212 return 0;
2213}
2214
2215void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2216{
c71099ac
TG
2217 struct rt6_mtu_change_arg arg = {
2218 .dev = dev,
2219 .mtu = mtu,
2220 };
1da177e4 2221
c346dca1 2222 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2223}
2224
ef7c79ed 2225static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2226 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2227 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2228 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2229 [RTA_PRIORITY] = { .type = NLA_U32 },
2230 [RTA_METRICS] = { .type = NLA_NESTED },
2231};
2232
2233static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2234 struct fib6_config *cfg)
1da177e4 2235{
86872cb5
TG
2236 struct rtmsg *rtm;
2237 struct nlattr *tb[RTA_MAX+1];
2238 int err;
1da177e4 2239
86872cb5
TG
2240 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2241 if (err < 0)
2242 goto errout;
1da177e4 2243
86872cb5
TG
2244 err = -EINVAL;
2245 rtm = nlmsg_data(nlh);
2246 memset(cfg, 0, sizeof(*cfg));
2247
2248 cfg->fc_table = rtm->rtm_table;
2249 cfg->fc_dst_len = rtm->rtm_dst_len;
2250 cfg->fc_src_len = rtm->rtm_src_len;
2251 cfg->fc_flags = RTF_UP;
2252 cfg->fc_protocol = rtm->rtm_protocol;
2253
2254 if (rtm->rtm_type == RTN_UNREACHABLE)
2255 cfg->fc_flags |= RTF_REJECT;
2256
ab79ad14
2257 if (rtm->rtm_type == RTN_LOCAL)
2258 cfg->fc_flags |= RTF_LOCAL;
2259
86872cb5
TG
2260 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2261 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2262 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2263
2264 if (tb[RTA_GATEWAY]) {
2265 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2266 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2267 }
86872cb5
TG
2268
2269 if (tb[RTA_DST]) {
2270 int plen = (rtm->rtm_dst_len + 7) >> 3;
2271
2272 if (nla_len(tb[RTA_DST]) < plen)
2273 goto errout;
2274
2275 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2276 }
86872cb5
TG
2277
2278 if (tb[RTA_SRC]) {
2279 int plen = (rtm->rtm_src_len + 7) >> 3;
2280
2281 if (nla_len(tb[RTA_SRC]) < plen)
2282 goto errout;
2283
2284 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2285 }
86872cb5 2286
c3968a85
DW
2287 if (tb[RTA_PREFSRC])
2288 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2289
86872cb5
TG
2290 if (tb[RTA_OIF])
2291 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2292
2293 if (tb[RTA_PRIORITY])
2294 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2295
2296 if (tb[RTA_METRICS]) {
2297 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2298 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2299 }
86872cb5
TG
2300
2301 if (tb[RTA_TABLE])
2302 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2303
2304 err = 0;
2305errout:
2306 return err;
1da177e4
LT
2307}
2308
c127ea2c 2309static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2310{
86872cb5
TG
2311 struct fib6_config cfg;
2312 int err;
1da177e4 2313
86872cb5
TG
2314 err = rtm_to_fib6_config(skb, nlh, &cfg);
2315 if (err < 0)
2316 return err;
2317
2318 return ip6_route_del(&cfg);
1da177e4
LT
2319}
2320
c127ea2c 2321static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2322{
86872cb5
TG
2323 struct fib6_config cfg;
2324 int err;
1da177e4 2325
86872cb5
TG
2326 err = rtm_to_fib6_config(skb, nlh, &cfg);
2327 if (err < 0)
2328 return err;
2329
2330 return ip6_route_add(&cfg);
1da177e4
LT
2331}
2332
339bf98f
TG
2333static inline size_t rt6_nlmsg_size(void)
2334{
2335 return NLMSG_ALIGN(sizeof(struct rtmsg))
2336 + nla_total_size(16) /* RTA_SRC */
2337 + nla_total_size(16) /* RTA_DST */
2338 + nla_total_size(16) /* RTA_GATEWAY */
2339 + nla_total_size(16) /* RTA_PREFSRC */
2340 + nla_total_size(4) /* RTA_TABLE */
2341 + nla_total_size(4) /* RTA_IIF */
2342 + nla_total_size(4) /* RTA_OIF */
2343 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2344 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2345 + nla_total_size(sizeof(struct rta_cacheinfo));
2346}
2347
191cd582
BH
2348static int rt6_fill_node(struct net *net,
2349 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2350 struct in6_addr *dst, struct in6_addr *src,
2351 int iif, int type, u32 pid, u32 seq,
7bc570c8 2352 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2353{
2354 struct rtmsg *rtm;
2d7202bf 2355 struct nlmsghdr *nlh;
e3703b3d 2356 long expires;
9e762a4a 2357 u32 table;
f2c31e32 2358 struct neighbour *n;
1da177e4
LT
2359
2360 if (prefix) { /* user wants prefix routes only */
2361 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2362 /* success since this is not a prefix route */
2363 return 1;
2364 }
2365 }
2366
2d7202bf
TG
2367 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2368 if (nlh == NULL)
26932566 2369 return -EMSGSIZE;
2d7202bf
TG
2370
2371 rtm = nlmsg_data(nlh);
1da177e4
LT
2372 rtm->rtm_family = AF_INET6;
2373 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2374 rtm->rtm_src_len = rt->rt6i_src.plen;
2375 rtm->rtm_tos = 0;
c71099ac 2376 if (rt->rt6i_table)
9e762a4a 2377 table = rt->rt6i_table->tb6_id;
c71099ac 2378 else
9e762a4a
PM
2379 table = RT6_TABLE_UNSPEC;
2380 rtm->rtm_table = table;
2d7202bf 2381 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2382 if (rt->rt6i_flags&RTF_REJECT)
2383 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2384 else if (rt->rt6i_flags&RTF_LOCAL)
2385 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2386 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2387 rtm->rtm_type = RTN_LOCAL;
2388 else
2389 rtm->rtm_type = RTN_UNICAST;
2390 rtm->rtm_flags = 0;
2391 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2392 rtm->rtm_protocol = rt->rt6i_protocol;
2393 if (rt->rt6i_flags&RTF_DYNAMIC)
2394 rtm->rtm_protocol = RTPROT_REDIRECT;
2395 else if (rt->rt6i_flags & RTF_ADDRCONF)
2396 rtm->rtm_protocol = RTPROT_KERNEL;
2397 else if (rt->rt6i_flags&RTF_DEFAULT)
2398 rtm->rtm_protocol = RTPROT_RA;
2399
2400 if (rt->rt6i_flags&RTF_CACHE)
2401 rtm->rtm_flags |= RTM_F_CLONED;
2402
2403 if (dst) {
2d7202bf 2404 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2405 rtm->rtm_dst_len = 128;
1da177e4 2406 } else if (rtm->rtm_dst_len)
2d7202bf 2407 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2408#ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
2d7202bf 2410 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2411 rtm->rtm_src_len = 128;
1da177e4 2412 } else if (rtm->rtm_src_len)
2d7202bf 2413 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2414#endif
7bc570c8
YH
2415 if (iif) {
2416#ifdef CONFIG_IPV6_MROUTE
2417 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2418 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2419 if (err <= 0) {
2420 if (!nowait) {
2421 if (err == 0)
2422 return 0;
2423 goto nla_put_failure;
2424 } else {
2425 if (err == -EMSGSIZE)
2426 goto nla_put_failure;
2427 }
2428 }
2429 } else
2430#endif
2431 NLA_PUT_U32(skb, RTA_IIF, iif);
2432 } else if (dst) {
1da177e4 2433 struct in6_addr saddr_buf;
c3968a85 2434 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2435 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2436 }
2d7202bf 2437
c3968a85
DW
2438 if (rt->rt6i_prefsrc.plen) {
2439 struct in6_addr saddr_buf;
2440 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2441 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2442 }
2443
defb3519 2444 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2445 goto nla_put_failure;
2446
f2c31e32
ED
2447 rcu_read_lock();
2448 n = dst_get_neighbour(&rt->dst);
2449 if (n)
2450 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2451 rcu_read_unlock();
2d7202bf 2452
d8d1f30b 2453 if (rt->dst.dev)
2d7202bf
TG
2454 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2455
2456 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2457
36e3deae
YH
2458 if (!(rt->rt6i_flags & RTF_EXPIRES))
2459 expires = 0;
2460 else if (rt->rt6i_expires - jiffies < INT_MAX)
2461 expires = rt->rt6i_expires - jiffies;
2462 else
2463 expires = INT_MAX;
69cdf8f9 2464
d8d1f30b
CG
2465 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2466 expires, rt->dst.error) < 0)
e3703b3d 2467 goto nla_put_failure;
2d7202bf
TG
2468
2469 return nlmsg_end(skb, nlh);
2470
2471nla_put_failure:
26932566
PM
2472 nlmsg_cancel(skb, nlh);
2473 return -EMSGSIZE;
1da177e4
LT
2474}
2475
1b43af54 2476int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2477{
2478 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2479 int prefix;
2480
2d7202bf
TG
2481 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2482 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2483 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2484 } else
2485 prefix = 0;
2486
191cd582
BH
2487 return rt6_fill_node(arg->net,
2488 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2489 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2490 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2491}
2492
c127ea2c 2493static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2494{
3b1e0a65 2495 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2496 struct nlattr *tb[RTA_MAX+1];
2497 struct rt6_info *rt;
1da177e4 2498 struct sk_buff *skb;
ab364a6f 2499 struct rtmsg *rtm;
4c9483b2 2500 struct flowi6 fl6;
ab364a6f 2501 int err, iif = 0;
1da177e4 2502
ab364a6f
TG
2503 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2504 if (err < 0)
2505 goto errout;
1da177e4 2506
ab364a6f 2507 err = -EINVAL;
4c9483b2 2508 memset(&fl6, 0, sizeof(fl6));
1da177e4 2509
ab364a6f
TG
2510 if (tb[RTA_SRC]) {
2511 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2512 goto errout;
2513
4c9483b2 2514 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2515 }
2516
2517 if (tb[RTA_DST]) {
2518 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2519 goto errout;
2520
4c9483b2 2521 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2522 }
2523
2524 if (tb[RTA_IIF])
2525 iif = nla_get_u32(tb[RTA_IIF]);
2526
2527 if (tb[RTA_OIF])
4c9483b2 2528 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2529
2530 if (iif) {
2531 struct net_device *dev;
5578689a 2532 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2533 if (!dev) {
2534 err = -ENODEV;
ab364a6f 2535 goto errout;
1da177e4
LT
2536 }
2537 }
2538
ab364a6f
TG
2539 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2540 if (skb == NULL) {
2541 err = -ENOBUFS;
2542 goto errout;
2543 }
1da177e4 2544
ab364a6f
TG
2545 /* Reserve room for dummy headers, this skb can pass
2546 through good chunk of routing engine.
2547 */
459a98ed 2548 skb_reset_mac_header(skb);
ab364a6f 2549 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2550
4c9483b2 2551 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2552 skb_dst_set(skb, &rt->dst);
1da177e4 2553
4c9483b2 2554 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2555 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2556 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2557 if (err < 0) {
ab364a6f
TG
2558 kfree_skb(skb);
2559 goto errout;
1da177e4
LT
2560 }
2561
5578689a 2562 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2563errout:
1da177e4 2564 return err;
1da177e4
LT
2565}
2566
86872cb5 2567void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2568{
2569 struct sk_buff *skb;
5578689a 2570 struct net *net = info->nl_net;
528c4ceb
DL
2571 u32 seq;
2572 int err;
2573
2574 err = -ENOBUFS;
2575 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2576
339bf98f 2577 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2578 if (skb == NULL)
2579 goto errout;
2580
191cd582 2581 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2582 event, info->pid, seq, 0, 0, 0);
26932566
PM
2583 if (err < 0) {
2584 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2585 WARN_ON(err == -EMSGSIZE);
2586 kfree_skb(skb);
2587 goto errout;
2588 }
1ce85fe4
PNA
2589 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2590 info->nlh, gfp_any());
2591 return;
21713ebc
TG
2592errout:
2593 if (err < 0)
5578689a 2594 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2595}
2596
8ed67789
DL
2597static int ip6_route_dev_notify(struct notifier_block *this,
2598 unsigned long event, void *data)
2599{
2600 struct net_device *dev = (struct net_device *)data;
c346dca1 2601 struct net *net = dev_net(dev);
8ed67789
DL
2602
2603 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2604 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2605 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2606#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2607 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2608 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2609 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2610 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2611#endif
2612 }
2613
2614 return NOTIFY_OK;
2615}
2616
1da177e4
LT
2617/*
2618 * /proc
2619 */
2620
2621#ifdef CONFIG_PROC_FS
2622
1da177e4
LT
2623struct rt6_proc_arg
2624{
2625 char *buffer;
2626 int offset;
2627 int length;
2628 int skip;
2629 int len;
2630};
2631
2632static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2633{
33120b30 2634 struct seq_file *m = p_arg;
69cce1d1 2635 struct neighbour *n;
1da177e4 2636
4b7a4274 2637 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2638
2639#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2640 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2641#else
33120b30 2642 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2643#endif
f2c31e32 2644 rcu_read_lock();
69cce1d1
DM
2645 n = dst_get_neighbour(&rt->dst);
2646 if (n) {
2647 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2648 } else {
33120b30 2649 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2650 }
f2c31e32 2651 rcu_read_unlock();
33120b30 2652 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2653 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2654 rt->dst.__use, rt->rt6i_flags,
33120b30 2655 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2656 return 0;
2657}
2658
33120b30 2659static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2660{
f3db4851
DL
2661 struct net *net = (struct net *)m->private;
2662 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2663 return 0;
2664}
1da177e4 2665
33120b30
AD
2666static int ipv6_route_open(struct inode *inode, struct file *file)
2667{
de05c557 2668 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2669}
2670
33120b30
AD
2671static const struct file_operations ipv6_route_proc_fops = {
2672 .owner = THIS_MODULE,
2673 .open = ipv6_route_open,
2674 .read = seq_read,
2675 .llseek = seq_lseek,
b6fcbdb4 2676 .release = single_release_net,
33120b30
AD
2677};
2678
1da177e4
LT
2679static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2680{
69ddb805 2681 struct net *net = (struct net *)seq->private;
1da177e4 2682 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2683 net->ipv6.rt6_stats->fib_nodes,
2684 net->ipv6.rt6_stats->fib_route_nodes,
2685 net->ipv6.rt6_stats->fib_rt_alloc,
2686 net->ipv6.rt6_stats->fib_rt_entries,
2687 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2688 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2689 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2690
2691 return 0;
2692}
2693
2694static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2695{
de05c557 2696 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2697}
2698
9a32144e 2699static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2700 .owner = THIS_MODULE,
2701 .open = rt6_stats_seq_open,
2702 .read = seq_read,
2703 .llseek = seq_lseek,
b6fcbdb4 2704 .release = single_release_net,
1da177e4
LT
2705};
2706#endif /* CONFIG_PROC_FS */
2707
2708#ifdef CONFIG_SYSCTL
2709
1da177e4 2710static
8d65af78 2711int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2712 void __user *buffer, size_t *lenp, loff_t *ppos)
2713{
c486da34
LAG
2714 struct net *net;
2715 int delay;
2716 if (!write)
1da177e4 2717 return -EINVAL;
c486da34
LAG
2718
2719 net = (struct net *)ctl->extra1;
2720 delay = net->ipv6.sysctl.flush_delay;
2721 proc_dointvec(ctl, write, buffer, lenp, ppos);
2722 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2723 return 0;
1da177e4
LT
2724}
2725
760f2d01 2726ctl_table ipv6_route_table_template[] = {
1ab1457c 2727 {
1da177e4 2728 .procname = "flush",
4990509f 2729 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2730 .maxlen = sizeof(int),
89c8b3a1 2731 .mode = 0200,
6d9f239a 2732 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2733 },
2734 {
1da177e4 2735 .procname = "gc_thresh",
9a7ec3a9 2736 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2737 .maxlen = sizeof(int),
2738 .mode = 0644,
6d9f239a 2739 .proc_handler = proc_dointvec,
1da177e4
LT
2740 },
2741 {
1da177e4 2742 .procname = "max_size",
4990509f 2743 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2744 .maxlen = sizeof(int),
2745 .mode = 0644,
6d9f239a 2746 .proc_handler = proc_dointvec,
1da177e4
LT
2747 },
2748 {
1da177e4 2749 .procname = "gc_min_interval",
4990509f 2750 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2751 .maxlen = sizeof(int),
2752 .mode = 0644,
6d9f239a 2753 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2754 },
2755 {
1da177e4 2756 .procname = "gc_timeout",
4990509f 2757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2758 .maxlen = sizeof(int),
2759 .mode = 0644,
6d9f239a 2760 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2761 },
2762 {
1da177e4 2763 .procname = "gc_interval",
4990509f 2764 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2765 .maxlen = sizeof(int),
2766 .mode = 0644,
6d9f239a 2767 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2768 },
2769 {
1da177e4 2770 .procname = "gc_elasticity",
4990509f 2771 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2772 .maxlen = sizeof(int),
2773 .mode = 0644,
f3d3f616 2774 .proc_handler = proc_dointvec,
1da177e4
LT
2775 },
2776 {
1da177e4 2777 .procname = "mtu_expires",
4990509f 2778 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2779 .maxlen = sizeof(int),
2780 .mode = 0644,
6d9f239a 2781 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2782 },
2783 {
1da177e4 2784 .procname = "min_adv_mss",
4990509f 2785 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2786 .maxlen = sizeof(int),
2787 .mode = 0644,
f3d3f616 2788 .proc_handler = proc_dointvec,
1da177e4
LT
2789 },
2790 {
1da177e4 2791 .procname = "gc_min_interval_ms",
4990509f 2792 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2793 .maxlen = sizeof(int),
2794 .mode = 0644,
6d9f239a 2795 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2796 },
f8572d8f 2797 { }
1da177e4
LT
2798};
2799
2c8c1e72 2800struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2801{
2802 struct ctl_table *table;
2803
2804 table = kmemdup(ipv6_route_table_template,
2805 sizeof(ipv6_route_table_template),
2806 GFP_KERNEL);
5ee09105
YH
2807
2808 if (table) {
2809 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2810 table[0].extra1 = net;
86393e52 2811 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2812 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2813 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2814 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2815 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2816 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2817 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2818 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2819 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2820 }
2821
760f2d01
DL
2822 return table;
2823}
1da177e4
LT
2824#endif
2825
2c8c1e72 2826static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2827{
633d424b 2828 int ret = -ENOMEM;
8ed67789 2829
86393e52
AD
2830 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2831 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2832
fc66f95c
ED
2833 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2834 goto out_ip6_dst_ops;
2835
8ed67789
DL
2836 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2837 sizeof(*net->ipv6.ip6_null_entry),
2838 GFP_KERNEL);
2839 if (!net->ipv6.ip6_null_entry)
fc66f95c 2840 goto out_ip6_dst_entries;
d8d1f30b 2841 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2842 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2843 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2844 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2845 ip6_template_metrics, true);
8ed67789
DL
2846
2847#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2848 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2849 sizeof(*net->ipv6.ip6_prohibit_entry),
2850 GFP_KERNEL);
68fffc67
PZ
2851 if (!net->ipv6.ip6_prohibit_entry)
2852 goto out_ip6_null_entry;
d8d1f30b 2853 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2854 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2855 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2856 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2857 ip6_template_metrics, true);
8ed67789
DL
2858
2859 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2860 sizeof(*net->ipv6.ip6_blk_hole_entry),
2861 GFP_KERNEL);
68fffc67
PZ
2862 if (!net->ipv6.ip6_blk_hole_entry)
2863 goto out_ip6_prohibit_entry;
d8d1f30b 2864 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2865 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2866 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2867 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2868 ip6_template_metrics, true);
8ed67789
DL
2869#endif
2870
b339a47c
PZ
2871 net->ipv6.sysctl.flush_delay = 0;
2872 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2873 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2874 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2875 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2876 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2877 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2878 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2879
cdb18761
DL
2880#ifdef CONFIG_PROC_FS
2881 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2882 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2883#endif
6891a346
BT
2884 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2885
8ed67789
DL
2886 ret = 0;
2887out:
2888 return ret;
f2fc6a54 2889
68fffc67
PZ
2890#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2891out_ip6_prohibit_entry:
2892 kfree(net->ipv6.ip6_prohibit_entry);
2893out_ip6_null_entry:
2894 kfree(net->ipv6.ip6_null_entry);
2895#endif
fc66f95c
ED
2896out_ip6_dst_entries:
2897 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2898out_ip6_dst_ops:
f2fc6a54 2899 goto out;
cdb18761
DL
2900}
2901
2c8c1e72 2902static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2903{
2904#ifdef CONFIG_PROC_FS
2905 proc_net_remove(net, "ipv6_route");
2906 proc_net_remove(net, "rt6_stats");
2907#endif
8ed67789
DL
2908 kfree(net->ipv6.ip6_null_entry);
2909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 kfree(net->ipv6.ip6_prohibit_entry);
2911 kfree(net->ipv6.ip6_blk_hole_entry);
2912#endif
41bb78b4 2913 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2914}
2915
2916static struct pernet_operations ip6_route_net_ops = {
2917 .init = ip6_route_net_init,
2918 .exit = ip6_route_net_exit,
2919};
2920
8ed67789
DL
2921static struct notifier_block ip6_route_dev_notifier = {
2922 .notifier_call = ip6_route_dev_notify,
2923 .priority = 0,
2924};
2925
433d49c3 2926int __init ip6_route_init(void)
1da177e4 2927{
433d49c3
DL
2928 int ret;
2929
9a7ec3a9
DL
2930 ret = -ENOMEM;
2931 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2932 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2933 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2934 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2935 goto out;
14e50e57 2936
fc66f95c 2937 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2938 if (ret)
bdb3289f 2939 goto out_kmem_cache;
bdb3289f 2940
fc66f95c
ED
2941 ret = register_pernet_subsys(&ip6_route_net_ops);
2942 if (ret)
2943 goto out_dst_entries;
2944
5dc121e9
AE
2945 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2946
8ed67789
DL
2947 /* Registering of the loopback is done before this portion of code,
2948 * the loopback reference in rt6_info will not be taken, do it
2949 * manually for init_net */
d8d1f30b 2950 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2951 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2952 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2953 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2954 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2955 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2956 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2957 #endif
433d49c3
DL
2958 ret = fib6_init();
2959 if (ret)
8ed67789 2960 goto out_register_subsys;
433d49c3 2961
433d49c3
DL
2962 ret = xfrm6_init();
2963 if (ret)
cdb18761 2964 goto out_fib6_init;
c35b7e72 2965
433d49c3
DL
2966 ret = fib6_rules_init();
2967 if (ret)
2968 goto xfrm6_init;
7e5449c2 2969
433d49c3 2970 ret = -ENOBUFS;
c7ac8679
GR
2971 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2972 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2973 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2974 goto fib6_rules_init;
c127ea2c 2975
8ed67789 2976 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2977 if (ret)
2978 goto fib6_rules_init;
8ed67789 2979
433d49c3
DL
2980out:
2981 return ret;
2982
2983fib6_rules_init:
433d49c3
DL
2984 fib6_rules_cleanup();
2985xfrm6_init:
433d49c3 2986 xfrm6_fini();
433d49c3 2987out_fib6_init:
433d49c3 2988 fib6_gc_cleanup();
8ed67789
DL
2989out_register_subsys:
2990 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2991out_dst_entries:
2992 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2993out_kmem_cache:
f2fc6a54 2994 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2995 goto out;
1da177e4
LT
2996}
2997
2998void ip6_route_cleanup(void)
2999{
8ed67789 3000 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3001 fib6_rules_cleanup();
1da177e4 3002 xfrm6_fini();
1da177e4 3003 fib6_gc_cleanup();
8ed67789 3004 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3005 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3006 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3007}