mlx4: Add missing include of linux/slab.h
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 80static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
ebb762f2 147 .mtu = ip6_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ebb762f2 158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 159{
618f9bc7
SK
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
ec831ea7
RD
163}
164
14e50e57
DM
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
0972ddb2
HB
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
14e50e57
DM
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
09640e63 177 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
ebb762f2 180 .mtu = ip6_blackhole_mtu,
214f45c9 181 .default_advmss = ip6_default_advmss,
14e50e57 182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 184 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
185};
186
62fa8a84
DM
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
bdb3289f 191static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
d8d1f30b
CG
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
1da177e4
LT
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 201 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
101367c2
TG
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
6723ab54
DM
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 210
280a34c8 211static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
d8d1f30b
CG
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
101367c2
TG
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
bdb3289f 226static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
d8d1f30b
CG
232 .input = dst_discard,
233 .output = dst_discard,
101367c2
TG
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
1da177e4 243/* allocate dst with ip6_dst_ops */
5c1e6aa3 244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
245 struct net_device *dev,
246 int flags)
1da177e4 247{
957c665f 248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 249
38308473 250 if (rt)
fbe58186 251 memset(&rt->rt6i_table, 0,
38308473 252 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
253
254 return rt;
1da177e4
LT
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 261 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 262
8e2ec639
YZ
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
38308473 266 if (idev) {
1da177e4
LT
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
1ab1457c 269 }
b3419363 270 if (peer) {
b3419363
DM
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
6431cbc2
DM
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
b3419363
DM
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
b3419363
DM
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
6431cbc2
DM
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 299 struct net_device *loopback_dev =
c346dca1 300 dev_net(dev)->loopback_dev;
1da177e4 301
38308473 302 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
38308473 305 if (loopback_idev) {
1da177e4
LT
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
a02cec21
ED
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
316}
317
b71d1d42 318static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 319{
a02cec21
ED
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
322}
323
1da177e4 324/*
c71099ac 325 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
326 */
327
8ed67789
DL
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
b71d1d42 330 const struct in6_addr *saddr,
1da177e4 331 int oif,
d420895e 332 int flags)
1da177e4
LT
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
dd3abc4e
YH
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
d8d1f30b 340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
1da177e4
LT
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
38308473 347 if (!sprt->rt6i_idev ||
1da177e4 348 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 349 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 350 continue;
1ab1457c 351 if (local && (!oif ||
1da177e4
LT
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
dd3abc4e
YH
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
1da177e4 361 }
dd3abc4e 362 }
1da177e4 363
dd3abc4e 364 if (oif) {
1da177e4
LT
365 if (local)
366 return local;
367
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 369 return net->ipv6.ip6_null_entry;
1da177e4 370 }
dd3abc4e 371out:
1da177e4
LT
372 return rt;
373}
374
27097255
YH
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
f2c31e32 378 struct neighbour *neigh;
27097255
YH
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
f2c31e32 387 rcu_read_lock();
27217455 388 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 389 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 390 goto out;
27097255
YH
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 403 } else {
27097255 404 read_unlock_bh(&neigh->lock);
f2c31e32
ED
405 }
406out:
407 rcu_read_unlock();
27097255
YH
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
27097255
YH
412}
413#endif
414
1da177e4 415/*
554cfb7e 416 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 417 */
b6f99a21 418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
419{
420 struct net_device *dev = rt->rt6i_dev;
161980f4 421 if (!oif || dev->ifindex == oif)
554cfb7e 422 return 2;
161980f4
DM
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
554cfb7e 427}
1da177e4 428
b6f99a21 429static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 430{
f2c31e32 431 struct neighbour *neigh;
398bcbeb 432 int m;
f2c31e32
ED
433
434 rcu_read_lock();
27217455 435 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
554cfb7e
YH
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
4d0c5911 442 m = 2;
398bcbeb
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
ea73ee23 448 m = 1;
554cfb7e 449 read_unlock_bh(&neigh->lock);
398bcbeb
YH
450 } else
451 m = 0;
f2c31e32 452 rcu_read_unlock();
554cfb7e 453 return m;
1da177e4
LT
454}
455
554cfb7e
YH
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
1da177e4 458{
4d0c5911 459 int m, n;
1ab1457c 460
4d0c5911 461 m = rt6_check_dev(rt, oif);
77d16f45 462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 463 return -1;
ebacaaa0
YH
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
4d0c5911 467 n = rt6_check_neigh(rt);
557e92ef 468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
469 return -1;
470 return m;
471}
472
f11e6659
DM
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
554cfb7e 475{
f11e6659
DM
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
554cfb7e 503 int mpri = -1;
1da177e4 504
f11e6659
DM
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 507 rt = rt->dst.rt6_next)
f11e6659
DM
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 510 rt = rt->dst.rt6_next)
f11e6659 511 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 512
f11e6659
DM
513 return match;
514}
1da177e4 515
f11e6659
DM
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
8ed67789 519 struct net *net;
1da177e4 520
f11e6659 521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 522 __func__, fn->leaf, oif);
554cfb7e 523
f11e6659
DM
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 527
f11e6659 528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 529
554cfb7e 530 if (!match &&
f11e6659 531 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 532 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 533
554cfb7e 534 /* no entries matched; do round-robin */
f11e6659
DM
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
1da177e4 540 }
1da177e4 541
f11e6659 542 RT6_TRACE("%s() => %p\n",
0dc47877 543 __func__, match);
1da177e4 544
c346dca1 545 net = dev_net(rt0->rt6i_dev);
a02cec21 546 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
547}
548
70ceb4f5
YH
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 551 const struct in6_addr *gwaddr)
70ceb4f5 552{
c346dca1 553 struct net *net = dev_net(dev);
70ceb4f5
YH
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
4bed72e4 557 unsigned long lifetime;
70ceb4f5
YH
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 581 return -EINVAL;
70ceb4f5 582
4bed72e4 583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
efa2cea0
DL
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
70ceb4f5
YH
597
598 if (rt && !lifetime) {
e0a1ad73 599 ip6_del_rt(rt);
70ceb4f5
YH
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
efa2cea0 604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
4bed72e4 611 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
d8d1f30b 617 dst_release(&rt->dst);
70ceb4f5
YH
618 }
619 return 0;
620}
621#endif
622
8ed67789 623#define BACKTRACK(__net, saddr) \
982f56f3 624do { \
8ed67789 625 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 626 struct fib6_node *pn; \
e0eda7bb 627 while (1) { \
982f56f3
YH
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
c71099ac 637 } \
c71099ac 638 } \
38308473 639} while (0)
c71099ac 640
8ed67789
DL
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
4c9483b2 643 struct flowi6 *fl6, int flags)
1da177e4
LT
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
c71099ac 648 read_lock_bh(&table->tb6_lock);
4c9483b2 649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
650restart:
651 rt = fn->leaf;
4c9483b2
DM
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
c71099ac 654out:
d8d1f30b 655 dst_use(&rt->dst, jiffies);
c71099ac 656 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
657 return rt;
658
659}
660
ea6e574e
FW
661struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
662 int flags)
663{
664 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
665}
666EXPORT_SYMBOL_GPL(ip6_route_lookup);
667
9acd9f3a
YH
668struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
669 const struct in6_addr *saddr, int oif, int strict)
c71099ac 670{
4c9483b2
DM
671 struct flowi6 fl6 = {
672 .flowi6_oif = oif,
673 .daddr = *daddr,
c71099ac
TG
674 };
675 struct dst_entry *dst;
77d16f45 676 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 677
adaa70bb 678 if (saddr) {
4c9483b2 679 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
680 flags |= RT6_LOOKUP_F_HAS_SADDR;
681 }
682
4c9483b2 683 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
684 if (dst->error == 0)
685 return (struct rt6_info *) dst;
686
687 dst_release(dst);
688
1da177e4
LT
689 return NULL;
690}
691
7159039a
YH
692EXPORT_SYMBOL(rt6_lookup);
693
c71099ac 694/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
695 It takes new route entry, the addition fails by any reason the
696 route is freed. In any case, if caller does not hold it, it may
697 be destroyed.
698 */
699
86872cb5 700static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
701{
702 int err;
c71099ac 703 struct fib6_table *table;
1da177e4 704
c71099ac
TG
705 table = rt->rt6i_table;
706 write_lock_bh(&table->tb6_lock);
86872cb5 707 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 708 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
709
710 return err;
711}
712
40e22e8f
TG
713int ip6_ins_rt(struct rt6_info *rt)
714{
4d1169c1 715 struct nl_info info = {
c346dca1 716 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 717 };
528c4ceb 718 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
719}
720
21efcfa0
ED
721static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
722 const struct in6_addr *daddr,
b71d1d42 723 const struct in6_addr *saddr)
1da177e4 724{
1da177e4
LT
725 struct rt6_info *rt;
726
727 /*
728 * Clone the route.
729 */
730
21efcfa0 731 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
732
733 if (rt) {
14deae41
DM
734 struct neighbour *neigh;
735 int attempts = !in_softirq();
736
38308473 737 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 738 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 739 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 740 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 741 rt->rt6i_gateway = *daddr;
58c4fb86 742 }
1da177e4 743
1da177e4 744 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
745
746#ifdef CONFIG_IPV6_SUBTREES
747 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 748 rt->rt6i_src.addr = *saddr;
1da177e4
LT
749 rt->rt6i_src.plen = 128;
750 }
751#endif
752
14deae41 753 retry:
04a6f441
DM
754 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway,
755 rt->rt6i_dev);
14deae41
DM
756 if (IS_ERR(neigh)) {
757 struct net *net = dev_net(rt->rt6i_dev);
758 int saved_rt_min_interval =
759 net->ipv6.sysctl.ip6_rt_gc_min_interval;
760 int saved_rt_elasticity =
761 net->ipv6.sysctl.ip6_rt_gc_elasticity;
762
763 if (attempts-- > 0) {
764 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
765 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
766
86393e52 767 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
768
769 net->ipv6.sysctl.ip6_rt_gc_elasticity =
770 saved_rt_elasticity;
771 net->ipv6.sysctl.ip6_rt_gc_min_interval =
772 saved_rt_min_interval;
773 goto retry;
774 }
775
776 if (net_ratelimit())
777 printk(KERN_WARNING
7e1b33e5 778 "ipv6: Neighbour table overflow.\n");
d8d1f30b 779 dst_free(&rt->dst);
14deae41
DM
780 return NULL;
781 }
69cce1d1 782 dst_set_neighbour(&rt->dst, neigh);
1da177e4 783
95a9a5ba 784 }
1da177e4 785
95a9a5ba
YH
786 return rt;
787}
1da177e4 788
21efcfa0
ED
789static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
790 const struct in6_addr *daddr)
299d9939 791{
21efcfa0
ED
792 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
793
299d9939 794 if (rt) {
299d9939 795 rt->rt6i_flags |= RTF_CACHE;
27217455 796 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
797 }
798 return rt;
799}
800
8ed67789 801static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 802 struct flowi6 *fl6, int flags)
1da177e4
LT
803{
804 struct fib6_node *fn;
519fbd87 805 struct rt6_info *rt, *nrt;
c71099ac 806 int strict = 0;
1da177e4 807 int attempts = 3;
519fbd87 808 int err;
53b7997f 809 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 810
77d16f45 811 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
812
813relookup:
c71099ac 814 read_lock_bh(&table->tb6_lock);
1da177e4 815
8238dd06 816restart_2:
4c9483b2 817 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
818
819restart:
4acad72d 820 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 821
4c9483b2 822 BACKTRACK(net, &fl6->saddr);
8ed67789 823 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 824 rt->rt6i_flags & RTF_CACHE)
1ddef044 825 goto out;
1da177e4 826
d8d1f30b 827 dst_hold(&rt->dst);
c71099ac 828 read_unlock_bh(&table->tb6_lock);
fb9de91e 829
27217455 830 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 831 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 832 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 833 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
834 else
835 goto out2;
e40cf353 836
d8d1f30b 837 dst_release(&rt->dst);
8ed67789 838 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 839
d8d1f30b 840 dst_hold(&rt->dst);
519fbd87 841 if (nrt) {
40e22e8f 842 err = ip6_ins_rt(nrt);
519fbd87 843 if (!err)
1da177e4 844 goto out2;
1da177e4 845 }
1da177e4 846
519fbd87
YH
847 if (--attempts <= 0)
848 goto out2;
849
850 /*
c71099ac 851 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
852 * released someone could insert this route. Relookup.
853 */
d8d1f30b 854 dst_release(&rt->dst);
519fbd87
YH
855 goto relookup;
856
857out:
8238dd06
YH
858 if (reachable) {
859 reachable = 0;
860 goto restart_2;
861 }
d8d1f30b 862 dst_hold(&rt->dst);
c71099ac 863 read_unlock_bh(&table->tb6_lock);
1da177e4 864out2:
d8d1f30b
CG
865 rt->dst.lastuse = jiffies;
866 rt->dst.__use++;
c71099ac
TG
867
868 return rt;
1da177e4
LT
869}
870
8ed67789 871static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 872 struct flowi6 *fl6, int flags)
4acad72d 873{
4c9483b2 874 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
875}
876
c71099ac
TG
877void ip6_route_input(struct sk_buff *skb)
878{
b71d1d42 879 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 880 struct net *net = dev_net(skb->dev);
adaa70bb 881 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
882 struct flowi6 fl6 = {
883 .flowi6_iif = skb->dev->ifindex,
884 .daddr = iph->daddr,
885 .saddr = iph->saddr,
38308473 886 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
887 .flowi6_mark = skb->mark,
888 .flowi6_proto = iph->nexthdr,
c71099ac 889 };
adaa70bb 890
1d6e55f1 891 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 892 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 893
4c9483b2 894 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
895}
896
8ed67789 897static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 898 struct flowi6 *fl6, int flags)
1da177e4 899{
4c9483b2 900 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
901}
902
9c7a4f9c 903struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 904 struct flowi6 *fl6)
c71099ac
TG
905{
906 int flags = 0;
907
4c9483b2 908 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 909 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 910
4c9483b2 911 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 912 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
913 else if (sk)
914 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 915
4c9483b2 916 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
917}
918
7159039a 919EXPORT_SYMBOL(ip6_route_output);
1da177e4 920
2774c131 921struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 922{
5c1e6aa3 923 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
924 struct dst_entry *new = NULL;
925
5c1e6aa3 926 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 927 if (rt) {
cf911662
DM
928 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
929
d8d1f30b 930 new = &rt->dst;
14e50e57 931
14e50e57 932 new->__use = 1;
352e512c
HX
933 new->input = dst_discard;
934 new->output = dst_discard;
14e50e57 935
21efcfa0
ED
936 if (dst_metrics_read_only(&ort->dst))
937 new->_metrics = ort->dst._metrics;
938 else
939 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
940 rt->rt6i_idev = ort->rt6i_idev;
941 if (rt->rt6i_idev)
942 in6_dev_hold(rt->rt6i_idev);
943 rt->rt6i_expires = 0;
944
4e3fd7a0 945 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
946 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
947 rt->rt6i_metric = 0;
948
949 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
950#ifdef CONFIG_IPV6_SUBTREES
951 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
952#endif
953
954 dst_free(new);
955 }
956
69ead7af
DM
957 dst_release(dst_orig);
958 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 959}
14e50e57 960
1da177e4
LT
961/*
962 * Destination cache support functions
963 */
964
965static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
966{
967 struct rt6_info *rt;
968
969 rt = (struct rt6_info *) dst;
970
6431cbc2
DM
971 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
972 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
973 if (!rt->rt6i_peer)
974 rt6_bind_peer(rt, 0);
975 rt->rt6i_peer_genid = rt6_peer_genid();
976 }
1da177e4 977 return dst;
6431cbc2 978 }
1da177e4
LT
979 return NULL;
980}
981
982static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
983{
984 struct rt6_info *rt = (struct rt6_info *) dst;
985
986 if (rt) {
54c1a859
YH
987 if (rt->rt6i_flags & RTF_CACHE) {
988 if (rt6_check_expired(rt)) {
989 ip6_del_rt(rt);
990 dst = NULL;
991 }
992 } else {
1da177e4 993 dst_release(dst);
54c1a859
YH
994 dst = NULL;
995 }
1da177e4 996 }
54c1a859 997 return dst;
1da177e4
LT
998}
999
1000static void ip6_link_failure(struct sk_buff *skb)
1001{
1002 struct rt6_info *rt;
1003
3ffe533c 1004 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1005
adf30907 1006 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1007 if (rt) {
38308473 1008 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1009 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1010 rt->rt6i_flags |= RTF_EXPIRES;
1011 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1012 rt->rt6i_node->fn_sernum = -1;
1013 }
1014}
1015
1016static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1017{
1018 struct rt6_info *rt6 = (struct rt6_info*)dst;
1019
1020 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1021 rt6->rt6i_flags |= RTF_MODIFIED;
1022 if (mtu < IPV6_MIN_MTU) {
defb3519 1023 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1024 mtu = IPV6_MIN_MTU;
defb3519
DM
1025 features |= RTAX_FEATURE_ALLFRAG;
1026 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1027 }
defb3519 1028 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1029 }
1030}
1031
0dbaee3b 1032static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1033{
0dbaee3b
DM
1034 struct net_device *dev = dst->dev;
1035 unsigned int mtu = dst_mtu(dst);
1036 struct net *net = dev_net(dev);
1037
1da177e4
LT
1038 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1039
5578689a
DL
1040 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1041 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1042
1043 /*
1ab1457c
YH
1044 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1045 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1046 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1047 * rely only on pmtu discovery"
1048 */
1049 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1050 mtu = IPV6_MAXPLEN;
1051 return mtu;
1052}
1053
ebb762f2 1054static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1055{
d33e4553 1056 struct inet6_dev *idev;
618f9bc7
SK
1057 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1058
1059 if (mtu)
1060 return mtu;
1061
1062 mtu = IPV6_MIN_MTU;
d33e4553
DM
1063
1064 rcu_read_lock();
1065 idev = __in6_dev_get(dst->dev);
1066 if (idev)
1067 mtu = idev->cnf.mtu6;
1068 rcu_read_unlock();
1069
1070 return mtu;
1071}
1072
3b00944c
YH
1073static struct dst_entry *icmp6_dst_gc_list;
1074static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1075
3b00944c 1076struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1077 struct neighbour *neigh,
87a11578 1078 struct flowi6 *fl6)
1da177e4 1079{
87a11578 1080 struct dst_entry *dst;
1da177e4
LT
1081 struct rt6_info *rt;
1082 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1083 struct net *net = dev_net(dev);
1da177e4 1084
38308473 1085 if (unlikely(!idev))
1da177e4
LT
1086 return NULL;
1087
957c665f 1088 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1089 if (unlikely(!rt)) {
1da177e4 1090 in6_dev_put(idev);
87a11578 1091 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1092 goto out;
1093 }
1094
1da177e4
LT
1095 if (neigh)
1096 neigh_hold(neigh);
14deae41 1097 else {
87a11578 1098 neigh = __neigh_lookup_errno(&nd_tbl, &fl6->daddr, dev);
b43faac6
DM
1099 if (IS_ERR(neigh)) {
1100 dst_free(&rt->dst);
1101 return ERR_CAST(neigh);
1102 }
14deae41 1103 }
1da177e4 1104
8e2ec639
YZ
1105 rt->dst.flags |= DST_HOST;
1106 rt->dst.output = ip6_output;
69cce1d1 1107 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1108 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1109 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1110 rt->rt6i_dst.plen = 128;
1111 rt->rt6i_idev = idev;
7011687f 1112 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1113
3b00944c 1114 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1115 rt->dst.next = icmp6_dst_gc_list;
1116 icmp6_dst_gc_list = &rt->dst;
3b00944c 1117 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1118
5578689a 1119 fib6_force_start_gc(net);
1da177e4 1120
87a11578
DM
1121 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1122
1da177e4 1123out:
87a11578 1124 return dst;
1da177e4
LT
1125}
1126
3d0f24a7 1127int icmp6_dst_gc(void)
1da177e4 1128{
e9476e95 1129 struct dst_entry *dst, **pprev;
3d0f24a7 1130 int more = 0;
1da177e4 1131
3b00944c
YH
1132 spin_lock_bh(&icmp6_dst_lock);
1133 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1134
1da177e4
LT
1135 while ((dst = *pprev) != NULL) {
1136 if (!atomic_read(&dst->__refcnt)) {
1137 *pprev = dst->next;
1138 dst_free(dst);
1da177e4
LT
1139 } else {
1140 pprev = &dst->next;
3d0f24a7 1141 ++more;
1da177e4
LT
1142 }
1143 }
1144
3b00944c 1145 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1146
3d0f24a7 1147 return more;
1da177e4
LT
1148}
1149
1e493d19
DM
1150static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1151 void *arg)
1152{
1153 struct dst_entry *dst, **pprev;
1154
1155 spin_lock_bh(&icmp6_dst_lock);
1156 pprev = &icmp6_dst_gc_list;
1157 while ((dst = *pprev) != NULL) {
1158 struct rt6_info *rt = (struct rt6_info *) dst;
1159 if (func(rt, arg)) {
1160 *pprev = dst->next;
1161 dst_free(dst);
1162 } else {
1163 pprev = &dst->next;
1164 }
1165 }
1166 spin_unlock_bh(&icmp6_dst_lock);
1167}
1168
569d3645 1169static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1170{
1da177e4 1171 unsigned long now = jiffies;
86393e52 1172 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1173 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1174 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1175 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1176 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1177 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1178 int entries;
7019b78e 1179
fc66f95c 1180 entries = dst_entries_get_fast(ops);
7019b78e 1181 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1182 entries <= rt_max_size)
1da177e4
LT
1183 goto out;
1184
6891a346
BT
1185 net->ipv6.ip6_rt_gc_expire++;
1186 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1187 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1188 entries = dst_entries_get_slow(ops);
1189 if (entries < ops->gc_thresh)
7019b78e 1190 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1191out:
7019b78e 1192 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1193 return entries > rt_max_size;
1da177e4
LT
1194}
1195
1196/* Clean host part of a prefix. Not necessary in radix tree,
1197 but results in cleaner routing tables.
1198
1199 Remove it only when all the things will work!
1200 */
1201
6b75d090 1202int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1203{
5170ae82 1204 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1205 if (hoplimit == 0) {
6b75d090 1206 struct net_device *dev = dst->dev;
c68f24cc
ED
1207 struct inet6_dev *idev;
1208
1209 rcu_read_lock();
1210 idev = __in6_dev_get(dev);
1211 if (idev)
6b75d090 1212 hoplimit = idev->cnf.hop_limit;
c68f24cc 1213 else
53b7997f 1214 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1215 rcu_read_unlock();
1da177e4
LT
1216 }
1217 return hoplimit;
1218}
abbf46ae 1219EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1220
1221/*
1222 *
1223 */
1224
86872cb5 1225int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1226{
1227 int err;
5578689a 1228 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1229 struct rt6_info *rt = NULL;
1230 struct net_device *dev = NULL;
1231 struct inet6_dev *idev = NULL;
c71099ac 1232 struct fib6_table *table;
1da177e4
LT
1233 int addr_type;
1234
86872cb5 1235 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1236 return -EINVAL;
1237#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1238 if (cfg->fc_src_len)
1da177e4
LT
1239 return -EINVAL;
1240#endif
86872cb5 1241 if (cfg->fc_ifindex) {
1da177e4 1242 err = -ENODEV;
5578689a 1243 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1244 if (!dev)
1245 goto out;
1246 idev = in6_dev_get(dev);
1247 if (!idev)
1248 goto out;
1249 }
1250
86872cb5
TG
1251 if (cfg->fc_metric == 0)
1252 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1253
d71314b4 1254 err = -ENOBUFS;
38308473
DM
1255 if (cfg->fc_nlinfo.nlh &&
1256 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1257 table = fib6_get_table(net, cfg->fc_table);
38308473 1258 if (!table) {
d71314b4
MV
1259 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1260 table = fib6_new_table(net, cfg->fc_table);
1261 }
1262 } else {
1263 table = fib6_new_table(net, cfg->fc_table);
1264 }
38308473
DM
1265
1266 if (!table)
c71099ac 1267 goto out;
c71099ac 1268
957c665f 1269 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1270
38308473 1271 if (!rt) {
1da177e4
LT
1272 err = -ENOMEM;
1273 goto out;
1274 }
1275
d8d1f30b 1276 rt->dst.obsolete = -1;
6f704992
YH
1277 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1278 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1279 0;
1da177e4 1280
86872cb5
TG
1281 if (cfg->fc_protocol == RTPROT_UNSPEC)
1282 cfg->fc_protocol = RTPROT_BOOT;
1283 rt->rt6i_protocol = cfg->fc_protocol;
1284
1285 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1286
1287 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1288 rt->dst.input = ip6_mc_input;
ab79ad14
1289 else if (cfg->fc_flags & RTF_LOCAL)
1290 rt->dst.input = ip6_input;
1da177e4 1291 else
d8d1f30b 1292 rt->dst.input = ip6_forward;
1da177e4 1293
d8d1f30b 1294 rt->dst.output = ip6_output;
1da177e4 1295
86872cb5
TG
1296 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1297 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1298 if (rt->rt6i_dst.plen == 128)
11d53b49 1299 rt->dst.flags |= DST_HOST;
1da177e4 1300
8e2ec639
YZ
1301 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1302 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1303 if (!metrics) {
1304 err = -ENOMEM;
1305 goto out;
1306 }
1307 dst_init_metrics(&rt->dst, metrics, 0);
1308 }
1da177e4 1309#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1310 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1311 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1312#endif
1313
86872cb5 1314 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1315
1316 /* We cannot add true routes via loopback here,
1317 they would result in kernel looping; promote them to reject routes
1318 */
86872cb5 1319 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1320 (dev && (dev->flags & IFF_LOOPBACK) &&
1321 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1322 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1323 /* hold loopback dev/idev if we haven't done so. */
5578689a 1324 if (dev != net->loopback_dev) {
1da177e4
LT
1325 if (dev) {
1326 dev_put(dev);
1327 in6_dev_put(idev);
1328 }
5578689a 1329 dev = net->loopback_dev;
1da177e4
LT
1330 dev_hold(dev);
1331 idev = in6_dev_get(dev);
1332 if (!idev) {
1333 err = -ENODEV;
1334 goto out;
1335 }
1336 }
d8d1f30b
CG
1337 rt->dst.output = ip6_pkt_discard_out;
1338 rt->dst.input = ip6_pkt_discard;
1339 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1340 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1341 goto install_route;
1342 }
1343
86872cb5 1344 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1345 const struct in6_addr *gw_addr;
1da177e4
LT
1346 int gwa_type;
1347
86872cb5 1348 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1349 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1350 gwa_type = ipv6_addr_type(gw_addr);
1351
1352 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1353 struct rt6_info *grt;
1354
1355 /* IPv6 strictly inhibits using not link-local
1356 addresses as nexthop address.
1357 Otherwise, router will not able to send redirects.
1358 It is very good, but in some (rare!) circumstances
1359 (SIT, PtP, NBMA NOARP links) it is handy to allow
1360 some exceptions. --ANK
1361 */
1362 err = -EINVAL;
38308473 1363 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1364 goto out;
1365
5578689a 1366 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1367
1368 err = -EHOSTUNREACH;
38308473 1369 if (!grt)
1da177e4
LT
1370 goto out;
1371 if (dev) {
1372 if (dev != grt->rt6i_dev) {
d8d1f30b 1373 dst_release(&grt->dst);
1da177e4
LT
1374 goto out;
1375 }
1376 } else {
1377 dev = grt->rt6i_dev;
1378 idev = grt->rt6i_idev;
1379 dev_hold(dev);
1380 in6_dev_hold(grt->rt6i_idev);
1381 }
38308473 1382 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1383 err = 0;
d8d1f30b 1384 dst_release(&grt->dst);
1da177e4
LT
1385
1386 if (err)
1387 goto out;
1388 }
1389 err = -EINVAL;
38308473 1390 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1391 goto out;
1392 }
1393
1394 err = -ENODEV;
38308473 1395 if (!dev)
1da177e4
LT
1396 goto out;
1397
c3968a85
DW
1398 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1399 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1400 err = -EINVAL;
1401 goto out;
1402 }
4e3fd7a0 1403 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1404 rt->rt6i_prefsrc.plen = 128;
1405 } else
1406 rt->rt6i_prefsrc.plen = 0;
1407
86872cb5 1408 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1409 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1410 if (IS_ERR(n)) {
1411 err = PTR_ERR(n);
1da177e4
LT
1412 goto out;
1413 }
69cce1d1 1414 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1415 }
1416
86872cb5 1417 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1418
1419install_route:
86872cb5
TG
1420 if (cfg->fc_mx) {
1421 struct nlattr *nla;
1422 int remaining;
1423
1424 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1425 int type = nla_type(nla);
86872cb5
TG
1426
1427 if (type) {
1428 if (type > RTAX_MAX) {
1da177e4
LT
1429 err = -EINVAL;
1430 goto out;
1431 }
86872cb5 1432
defb3519 1433 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1434 }
1da177e4
LT
1435 }
1436 }
1437
d8d1f30b 1438 rt->dst.dev = dev;
1da177e4 1439 rt->rt6i_idev = idev;
c71099ac 1440 rt->rt6i_table = table;
63152fc0 1441
c346dca1 1442 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1443
86872cb5 1444 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1445
1446out:
1447 if (dev)
1448 dev_put(dev);
1449 if (idev)
1450 in6_dev_put(idev);
1451 if (rt)
d8d1f30b 1452 dst_free(&rt->dst);
1da177e4
LT
1453 return err;
1454}
1455
86872cb5 1456static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1457{
1458 int err;
c71099ac 1459 struct fib6_table *table;
c346dca1 1460 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1461
8ed67789 1462 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1463 return -ENOENT;
1464
c71099ac
TG
1465 table = rt->rt6i_table;
1466 write_lock_bh(&table->tb6_lock);
1da177e4 1467
86872cb5 1468 err = fib6_del(rt, info);
d8d1f30b 1469 dst_release(&rt->dst);
1da177e4 1470
c71099ac 1471 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1472
1473 return err;
1474}
1475
e0a1ad73
TG
1476int ip6_del_rt(struct rt6_info *rt)
1477{
4d1169c1 1478 struct nl_info info = {
c346dca1 1479 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1480 };
528c4ceb 1481 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1482}
1483
86872cb5 1484static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1485{
c71099ac 1486 struct fib6_table *table;
1da177e4
LT
1487 struct fib6_node *fn;
1488 struct rt6_info *rt;
1489 int err = -ESRCH;
1490
5578689a 1491 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1492 if (!table)
c71099ac
TG
1493 return err;
1494
1495 read_lock_bh(&table->tb6_lock);
1da177e4 1496
c71099ac 1497 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1498 &cfg->fc_dst, cfg->fc_dst_len,
1499 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1500
1da177e4 1501 if (fn) {
d8d1f30b 1502 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1503 if (cfg->fc_ifindex &&
38308473 1504 (!rt->rt6i_dev ||
86872cb5 1505 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1506 continue;
86872cb5
TG
1507 if (cfg->fc_flags & RTF_GATEWAY &&
1508 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1509 continue;
86872cb5 1510 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1511 continue;
d8d1f30b 1512 dst_hold(&rt->dst);
c71099ac 1513 read_unlock_bh(&table->tb6_lock);
1da177e4 1514
86872cb5 1515 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1516 }
1517 }
c71099ac 1518 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1519
1520 return err;
1521}
1522
1523/*
1524 * Handle redirects
1525 */
a6279458 1526struct ip6rd_flowi {
4c9483b2 1527 struct flowi6 fl6;
a6279458
YH
1528 struct in6_addr gateway;
1529};
1530
8ed67789
DL
1531static struct rt6_info *__ip6_route_redirect(struct net *net,
1532 struct fib6_table *table,
4c9483b2 1533 struct flowi6 *fl6,
a6279458 1534 int flags)
1da177e4 1535{
4c9483b2 1536 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1537 struct rt6_info *rt;
e843b9e1 1538 struct fib6_node *fn;
c71099ac 1539
1da177e4 1540 /*
e843b9e1
YH
1541 * Get the "current" route for this destination and
1542 * check if the redirect has come from approriate router.
1543 *
1544 * RFC 2461 specifies that redirects should only be
1545 * accepted if they come from the nexthop to the target.
1546 * Due to the way the routes are chosen, this notion
1547 * is a bit fuzzy and one might need to check all possible
1548 * routes.
1da177e4 1549 */
1da177e4 1550
c71099ac 1551 read_lock_bh(&table->tb6_lock);
4c9483b2 1552 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1553restart:
d8d1f30b 1554 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1555 /*
1556 * Current route is on-link; redirect is always invalid.
1557 *
1558 * Seems, previous statement is not true. It could
1559 * be node, which looks for us as on-link (f.e. proxy ndisc)
1560 * But then router serving it might decide, that we should
1561 * know truth 8)8) --ANK (980726).
1562 */
1563 if (rt6_check_expired(rt))
1564 continue;
1565 if (!(rt->rt6i_flags & RTF_GATEWAY))
1566 continue;
4c9483b2 1567 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1568 continue;
a6279458 1569 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1570 continue;
1571 break;
1572 }
a6279458 1573
cb15d9c2 1574 if (!rt)
8ed67789 1575 rt = net->ipv6.ip6_null_entry;
4c9483b2 1576 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1577out:
d8d1f30b 1578 dst_hold(&rt->dst);
a6279458 1579
c71099ac 1580 read_unlock_bh(&table->tb6_lock);
e843b9e1 1581
a6279458
YH
1582 return rt;
1583};
1584
b71d1d42
ED
1585static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1586 const struct in6_addr *src,
1587 const struct in6_addr *gateway,
a6279458
YH
1588 struct net_device *dev)
1589{
adaa70bb 1590 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1591 struct net *net = dev_net(dev);
a6279458 1592 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1593 .fl6 = {
1594 .flowi6_oif = dev->ifindex,
1595 .daddr = *dest,
1596 .saddr = *src,
a6279458 1597 },
a6279458 1598 };
adaa70bb 1599
4e3fd7a0 1600 rdfl.gateway = *gateway;
86c36ce4 1601
adaa70bb
TG
1602 if (rt6_need_strict(dest))
1603 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1604
4c9483b2 1605 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1606 flags, __ip6_route_redirect);
a6279458
YH
1607}
1608
b71d1d42
ED
1609void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1610 const struct in6_addr *saddr,
a6279458
YH
1611 struct neighbour *neigh, u8 *lladdr, int on_link)
1612{
1613 struct rt6_info *rt, *nrt = NULL;
1614 struct netevent_redirect netevent;
c346dca1 1615 struct net *net = dev_net(neigh->dev);
a6279458
YH
1616
1617 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1618
8ed67789 1619 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1620 if (net_ratelimit())
1621 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1622 "for redirect target\n");
a6279458 1623 goto out;
1da177e4
LT
1624 }
1625
1da177e4
LT
1626 /*
1627 * We have finally decided to accept it.
1628 */
1629
1ab1457c 1630 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1631 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1632 NEIGH_UPDATE_F_OVERRIDE|
1633 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1634 NEIGH_UPDATE_F_ISROUTER))
1635 );
1636
1637 /*
1638 * Redirect received -> path was valid.
1639 * Look, redirects are sent only in response to data packets,
1640 * so that this nexthop apparently is reachable. --ANK
1641 */
d8d1f30b 1642 dst_confirm(&rt->dst);
1da177e4
LT
1643
1644 /* Duplicate redirect: silently ignore. */
27217455 1645 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1646 goto out;
1647
21efcfa0 1648 nrt = ip6_rt_copy(rt, dest);
38308473 1649 if (!nrt)
1da177e4
LT
1650 goto out;
1651
1652 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1653 if (on_link)
1654 nrt->rt6i_flags &= ~RTF_GATEWAY;
1655
4e3fd7a0 1656 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1657 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1658
40e22e8f 1659 if (ip6_ins_rt(nrt))
1da177e4
LT
1660 goto out;
1661
d8d1f30b
CG
1662 netevent.old = &rt->dst;
1663 netevent.new = &nrt->dst;
8d71740c
TT
1664 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1665
38308473 1666 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1667 ip6_del_rt(rt);
1da177e4
LT
1668 return;
1669 }
1670
1671out:
d8d1f30b 1672 dst_release(&rt->dst);
1da177e4
LT
1673}
1674
1675/*
1676 * Handle ICMP "packet too big" messages
1677 * i.e. Path MTU discovery
1678 */
1679
b71d1d42 1680static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1681 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1682{
1683 struct rt6_info *rt, *nrt;
1684 int allfrag = 0;
d3052b55 1685again:
ae878ae2 1686 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1687 if (!rt)
1da177e4
LT
1688 return;
1689
d3052b55
AV
1690 if (rt6_check_expired(rt)) {
1691 ip6_del_rt(rt);
1692 goto again;
1693 }
1694
d8d1f30b 1695 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1696 goto out;
1697
1698 if (pmtu < IPV6_MIN_MTU) {
1699 /*
1ab1457c 1700 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1701 * MTU (1280) and a fragment header should always be included
1702 * after a node receiving Too Big message reporting PMTU is
1703 * less than the IPv6 Minimum Link MTU.
1704 */
1705 pmtu = IPV6_MIN_MTU;
1706 allfrag = 1;
1707 }
1708
1709 /* New mtu received -> path was valid.
1710 They are sent only in response to data packets,
1711 so that this nexthop apparently is reachable. --ANK
1712 */
d8d1f30b 1713 dst_confirm(&rt->dst);
1da177e4
LT
1714
1715 /* Host route. If it is static, it would be better
1716 not to override it, but add new one, so that
1717 when cache entry will expire old pmtu
1718 would return automatically.
1719 */
1720 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1721 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1722 if (allfrag) {
1723 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1724 features |= RTAX_FEATURE_ALLFRAG;
1725 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1726 }
d8d1f30b 1727 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1728 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1729 goto out;
1730 }
1731
1732 /* Network route.
1733 Two cases are possible:
1734 1. It is connected route. Action: COW
1735 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1736 */
27217455 1737 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1738 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1739 else
1740 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1741
d5315b50 1742 if (nrt) {
defb3519
DM
1743 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1744 if (allfrag) {
1745 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1746 features |= RTAX_FEATURE_ALLFRAG;
1747 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1748 }
a1e78363
YH
1749
1750 /* According to RFC 1981, detecting PMTU increase shouldn't be
1751 * happened within 5 mins, the recommended timer is 10 mins.
1752 * Here this route expiration time is set to ip6_rt_mtu_expires
1753 * which is 10 mins. After 10 mins the decreased pmtu is expired
1754 * and detecting PMTU increase will be automatically happened.
1755 */
d8d1f30b 1756 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1757 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1758
40e22e8f 1759 ip6_ins_rt(nrt);
1da177e4 1760 }
1da177e4 1761out:
d8d1f30b 1762 dst_release(&rt->dst);
1da177e4
LT
1763}
1764
b71d1d42 1765void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1766 struct net_device *dev, u32 pmtu)
1767{
1768 struct net *net = dev_net(dev);
1769
1770 /*
1771 * RFC 1981 states that a node "MUST reduce the size of the packets it
1772 * is sending along the path" that caused the Packet Too Big message.
1773 * Since it's not possible in the general case to determine which
1774 * interface was used to send the original packet, we update the MTU
1775 * on the interface that will be used to send future packets. We also
1776 * update the MTU on the interface that received the Packet Too Big in
1777 * case the original packet was forced out that interface with
1778 * SO_BINDTODEVICE or similar. This is the next best thing to the
1779 * correct behaviour, which would be to update the MTU on all
1780 * interfaces.
1781 */
1782 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1783 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1784}
1785
1da177e4
LT
1786/*
1787 * Misc support functions
1788 */
1789
21efcfa0
ED
1790static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1791 const struct in6_addr *dest)
1da177e4 1792{
c346dca1 1793 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1794 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1795 ort->dst.dev, 0);
1da177e4
LT
1796
1797 if (rt) {
d8d1f30b
CG
1798 rt->dst.input = ort->dst.input;
1799 rt->dst.output = ort->dst.output;
8e2ec639 1800 rt->dst.flags |= DST_HOST;
d8d1f30b 1801
4e3fd7a0 1802 rt->rt6i_dst.addr = *dest;
8e2ec639 1803 rt->rt6i_dst.plen = 128;
defb3519 1804 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1805 rt->dst.error = ort->dst.error;
1da177e4
LT
1806 rt->rt6i_idev = ort->rt6i_idev;
1807 if (rt->rt6i_idev)
1808 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1809 rt->dst.lastuse = jiffies;
1da177e4
LT
1810 rt->rt6i_expires = 0;
1811
4e3fd7a0 1812 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1813 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1814 rt->rt6i_metric = 0;
1815
1da177e4
LT
1816#ifdef CONFIG_IPV6_SUBTREES
1817 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1818#endif
0f6c6392 1819 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1820 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1821 }
1822 return rt;
1823}
1824
70ceb4f5 1825#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1826static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1827 const struct in6_addr *prefix, int prefixlen,
1828 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1829{
1830 struct fib6_node *fn;
1831 struct rt6_info *rt = NULL;
c71099ac
TG
1832 struct fib6_table *table;
1833
efa2cea0 1834 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1835 if (!table)
c71099ac 1836 return NULL;
70ceb4f5 1837
c71099ac
TG
1838 write_lock_bh(&table->tb6_lock);
1839 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1840 if (!fn)
1841 goto out;
1842
d8d1f30b 1843 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1844 if (rt->rt6i_dev->ifindex != ifindex)
1845 continue;
1846 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1847 continue;
1848 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1849 continue;
d8d1f30b 1850 dst_hold(&rt->dst);
70ceb4f5
YH
1851 break;
1852 }
1853out:
c71099ac 1854 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1855 return rt;
1856}
1857
efa2cea0 1858static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1859 const struct in6_addr *prefix, int prefixlen,
1860 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1861 unsigned pref)
1862{
86872cb5
TG
1863 struct fib6_config cfg = {
1864 .fc_table = RT6_TABLE_INFO,
238fc7ea 1865 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1866 .fc_ifindex = ifindex,
1867 .fc_dst_len = prefixlen,
1868 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1870 .fc_nlinfo.pid = 0,
1871 .fc_nlinfo.nlh = NULL,
1872 .fc_nlinfo.nl_net = net,
86872cb5
TG
1873 };
1874
4e3fd7a0
AD
1875 cfg.fc_dst = *prefix;
1876 cfg.fc_gateway = *gwaddr;
70ceb4f5 1877
e317da96
YH
1878 /* We should treat it as a default route if prefix length is 0. */
1879 if (!prefixlen)
86872cb5 1880 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1881
86872cb5 1882 ip6_route_add(&cfg);
70ceb4f5 1883
efa2cea0 1884 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1885}
1886#endif
1887
b71d1d42 1888struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1889{
1da177e4 1890 struct rt6_info *rt;
c71099ac 1891 struct fib6_table *table;
1da177e4 1892
c346dca1 1893 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1894 if (!table)
c71099ac 1895 return NULL;
1da177e4 1896
c71099ac 1897 write_lock_bh(&table->tb6_lock);
d8d1f30b 1898 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1899 if (dev == rt->rt6i_dev &&
045927ff 1900 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1901 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1902 break;
1903 }
1904 if (rt)
d8d1f30b 1905 dst_hold(&rt->dst);
c71099ac 1906 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1907 return rt;
1908}
1909
b71d1d42 1910struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1911 struct net_device *dev,
1912 unsigned int pref)
1da177e4 1913{
86872cb5
TG
1914 struct fib6_config cfg = {
1915 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1916 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1917 .fc_ifindex = dev->ifindex,
1918 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1920 .fc_nlinfo.pid = 0,
1921 .fc_nlinfo.nlh = NULL,
c346dca1 1922 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1923 };
1da177e4 1924
4e3fd7a0 1925 cfg.fc_gateway = *gwaddr;
1da177e4 1926
86872cb5 1927 ip6_route_add(&cfg);
1da177e4 1928
1da177e4
LT
1929 return rt6_get_dflt_router(gwaddr, dev);
1930}
1931
7b4da532 1932void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1933{
1934 struct rt6_info *rt;
c71099ac
TG
1935 struct fib6_table *table;
1936
1937 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1938 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1939 if (!table)
c71099ac 1940 return;
1da177e4
LT
1941
1942restart:
c71099ac 1943 read_lock_bh(&table->tb6_lock);
d8d1f30b 1944 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1945 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1946 dst_hold(&rt->dst);
c71099ac 1947 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1948 ip6_del_rt(rt);
1da177e4
LT
1949 goto restart;
1950 }
1951 }
c71099ac 1952 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1953}
1954
5578689a
DL
1955static void rtmsg_to_fib6_config(struct net *net,
1956 struct in6_rtmsg *rtmsg,
86872cb5
TG
1957 struct fib6_config *cfg)
1958{
1959 memset(cfg, 0, sizeof(*cfg));
1960
1961 cfg->fc_table = RT6_TABLE_MAIN;
1962 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963 cfg->fc_metric = rtmsg->rtmsg_metric;
1964 cfg->fc_expires = rtmsg->rtmsg_info;
1965 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967 cfg->fc_flags = rtmsg->rtmsg_flags;
1968
5578689a 1969 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1970
4e3fd7a0
AD
1971 cfg->fc_dst = rtmsg->rtmsg_dst;
1972 cfg->fc_src = rtmsg->rtmsg_src;
1973 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1974}
1975
5578689a 1976int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1977{
86872cb5 1978 struct fib6_config cfg;
1da177e4
LT
1979 struct in6_rtmsg rtmsg;
1980 int err;
1981
1982 switch(cmd) {
1983 case SIOCADDRT: /* Add a route */
1984 case SIOCDELRT: /* Delete a route */
1985 if (!capable(CAP_NET_ADMIN))
1986 return -EPERM;
1987 err = copy_from_user(&rtmsg, arg,
1988 sizeof(struct in6_rtmsg));
1989 if (err)
1990 return -EFAULT;
86872cb5 1991
5578689a 1992 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1993
1da177e4
LT
1994 rtnl_lock();
1995 switch (cmd) {
1996 case SIOCADDRT:
86872cb5 1997 err = ip6_route_add(&cfg);
1da177e4
LT
1998 break;
1999 case SIOCDELRT:
86872cb5 2000 err = ip6_route_del(&cfg);
1da177e4
LT
2001 break;
2002 default:
2003 err = -EINVAL;
2004 }
2005 rtnl_unlock();
2006
2007 return err;
3ff50b79 2008 }
1da177e4
LT
2009
2010 return -EINVAL;
2011}
2012
2013/*
2014 * Drop the packet on the floor
2015 */
2016
d5fdd6ba 2017static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2018{
612f09e8 2019 int type;
adf30907 2020 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2021 switch (ipstats_mib_noroutes) {
2022 case IPSTATS_MIB_INNOROUTES:
0660e03f 2023 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2024 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2025 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2027 break;
2028 }
2029 /* FALLTHROUGH */
2030 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2031 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032 ipstats_mib_noroutes);
612f09e8
YH
2033 break;
2034 }
3ffe533c 2035 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2036 kfree_skb(skb);
2037 return 0;
2038}
2039
9ce8ade0
TG
2040static int ip6_pkt_discard(struct sk_buff *skb)
2041{
612f09e8 2042 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2043}
2044
20380731 2045static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2046{
adf30907 2047 skb->dev = skb_dst(skb)->dev;
612f09e8 2048 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2049}
2050
6723ab54
DM
2051#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052
9ce8ade0
TG
2053static int ip6_pkt_prohibit(struct sk_buff *skb)
2054{
612f09e8 2055 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2056}
2057
2058static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059{
adf30907 2060 skb->dev = skb_dst(skb)->dev;
612f09e8 2061 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2062}
2063
6723ab54
DM
2064#endif
2065
1da177e4
LT
2066/*
2067 * Allocate a dst for local (unicast / anycast) address.
2068 */
2069
2070struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071 const struct in6_addr *addr,
8f031519 2072 bool anycast)
1da177e4 2073{
c346dca1 2074 struct net *net = dev_net(idev->dev);
5c1e6aa3 2075 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2076 net->loopback_dev, 0);
14deae41 2077 struct neighbour *neigh;
1da177e4 2078
38308473 2079 if (!rt) {
40385653
BG
2080 if (net_ratelimit())
2081 pr_warning("IPv6: Maximum number of routes reached,"
2082 " consider increasing route/max_size.\n");
1da177e4 2083 return ERR_PTR(-ENOMEM);
40385653 2084 }
1da177e4 2085
1da177e4
LT
2086 in6_dev_hold(idev);
2087
11d53b49 2088 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2089 rt->dst.input = ip6_input;
2090 rt->dst.output = ip6_output;
1da177e4 2091 rt->rt6i_idev = idev;
d8d1f30b 2092 rt->dst.obsolete = -1;
1da177e4
LT
2093
2094 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2095 if (anycast)
2096 rt->rt6i_flags |= RTF_ANYCAST;
2097 else
1da177e4 2098 rt->rt6i_flags |= RTF_LOCAL;
04a6f441 2099 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
14deae41 2100 if (IS_ERR(neigh)) {
d8d1f30b 2101 dst_free(&rt->dst);
14deae41 2102
29546a64 2103 return ERR_CAST(neigh);
1da177e4 2104 }
69cce1d1 2105 dst_set_neighbour(&rt->dst, neigh);
1da177e4 2106
4e3fd7a0 2107 rt->rt6i_dst.addr = *addr;
1da177e4 2108 rt->rt6i_dst.plen = 128;
5578689a 2109 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2110
d8d1f30b 2111 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2112
2113 return rt;
2114}
2115
c3968a85
DW
2116int ip6_route_get_saddr(struct net *net,
2117 struct rt6_info *rt,
b71d1d42 2118 const struct in6_addr *daddr,
c3968a85
DW
2119 unsigned int prefs,
2120 struct in6_addr *saddr)
2121{
2122 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2123 int err = 0;
2124 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2125 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2126 else
2127 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2128 daddr, prefs, saddr);
2129 return err;
2130}
2131
2132/* remove deleted ip from prefsrc entries */
2133struct arg_dev_net_ip {
2134 struct net_device *dev;
2135 struct net *net;
2136 struct in6_addr *addr;
2137};
2138
2139static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2140{
2141 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2142 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2143 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2144
38308473 2145 if (((void *)rt->rt6i_dev == dev || !dev) &&
c3968a85
DW
2146 rt != net->ipv6.ip6_null_entry &&
2147 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2148 /* remove prefsrc entry */
2149 rt->rt6i_prefsrc.plen = 0;
2150 }
2151 return 0;
2152}
2153
2154void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2155{
2156 struct net *net = dev_net(ifp->idev->dev);
2157 struct arg_dev_net_ip adni = {
2158 .dev = ifp->idev->dev,
2159 .net = net,
2160 .addr = &ifp->addr,
2161 };
2162 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2163}
2164
8ed67789
DL
2165struct arg_dev_net {
2166 struct net_device *dev;
2167 struct net *net;
2168};
2169
1da177e4
LT
2170static int fib6_ifdown(struct rt6_info *rt, void *arg)
2171{
bc3ef660 2172 const struct arg_dev_net *adn = arg;
2173 const struct net_device *dev = adn->dev;
8ed67789 2174
38308473 2175 if ((rt->rt6i_dev == dev || !dev) &&
bc3ef660 2176 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2177 RT6_TRACE("deleted by ifdown %p\n", rt);
2178 return -1;
2179 }
2180 return 0;
2181}
2182
f3db4851 2183void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2184{
8ed67789
DL
2185 struct arg_dev_net adn = {
2186 .dev = dev,
2187 .net = net,
2188 };
2189
2190 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2191 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2192}
2193
2194struct rt6_mtu_change_arg
2195{
2196 struct net_device *dev;
2197 unsigned mtu;
2198};
2199
2200static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2201{
2202 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2203 struct inet6_dev *idev;
2204
2205 /* In IPv6 pmtu discovery is not optional,
2206 so that RTAX_MTU lock cannot disable it.
2207 We still use this lock to block changes
2208 caused by addrconf/ndisc.
2209 */
2210
2211 idev = __in6_dev_get(arg->dev);
38308473 2212 if (!idev)
1da177e4
LT
2213 return 0;
2214
2215 /* For administrative MTU increase, there is no way to discover
2216 IPv6 PMTU increase, so PMTU increase should be updated here.
2217 Since RFC 1981 doesn't include administrative MTU increase
2218 update PMTU increase is a MUST. (i.e. jumbo frame)
2219 */
2220 /*
2221 If new MTU is less than route PMTU, this new MTU will be the
2222 lowest MTU in the path, update the route PMTU to reflect PMTU
2223 decreases; if new MTU is greater than route PMTU, and the
2224 old MTU is the lowest MTU in the path, update the route PMTU
2225 to reflect the increase. In this case if the other nodes' MTU
2226 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2227 PMTU discouvery.
2228 */
2229 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2230 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2231 (dst_mtu(&rt->dst) >= arg->mtu ||
2232 (dst_mtu(&rt->dst) < arg->mtu &&
2233 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2234 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2235 }
1da177e4
LT
2236 return 0;
2237}
2238
2239void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2240{
c71099ac
TG
2241 struct rt6_mtu_change_arg arg = {
2242 .dev = dev,
2243 .mtu = mtu,
2244 };
1da177e4 2245
c346dca1 2246 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2247}
2248
ef7c79ed 2249static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2250 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2251 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2252 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2253 [RTA_PRIORITY] = { .type = NLA_U32 },
2254 [RTA_METRICS] = { .type = NLA_NESTED },
2255};
2256
2257static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2258 struct fib6_config *cfg)
1da177e4 2259{
86872cb5
TG
2260 struct rtmsg *rtm;
2261 struct nlattr *tb[RTA_MAX+1];
2262 int err;
1da177e4 2263
86872cb5
TG
2264 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2265 if (err < 0)
2266 goto errout;
1da177e4 2267
86872cb5
TG
2268 err = -EINVAL;
2269 rtm = nlmsg_data(nlh);
2270 memset(cfg, 0, sizeof(*cfg));
2271
2272 cfg->fc_table = rtm->rtm_table;
2273 cfg->fc_dst_len = rtm->rtm_dst_len;
2274 cfg->fc_src_len = rtm->rtm_src_len;
2275 cfg->fc_flags = RTF_UP;
2276 cfg->fc_protocol = rtm->rtm_protocol;
2277
2278 if (rtm->rtm_type == RTN_UNREACHABLE)
2279 cfg->fc_flags |= RTF_REJECT;
2280
ab79ad14
2281 if (rtm->rtm_type == RTN_LOCAL)
2282 cfg->fc_flags |= RTF_LOCAL;
2283
86872cb5
TG
2284 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2285 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2286 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2287
2288 if (tb[RTA_GATEWAY]) {
2289 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2290 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2291 }
86872cb5
TG
2292
2293 if (tb[RTA_DST]) {
2294 int plen = (rtm->rtm_dst_len + 7) >> 3;
2295
2296 if (nla_len(tb[RTA_DST]) < plen)
2297 goto errout;
2298
2299 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2300 }
86872cb5
TG
2301
2302 if (tb[RTA_SRC]) {
2303 int plen = (rtm->rtm_src_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_SRC]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2309 }
86872cb5 2310
c3968a85
DW
2311 if (tb[RTA_PREFSRC])
2312 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2313
86872cb5
TG
2314 if (tb[RTA_OIF])
2315 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2316
2317 if (tb[RTA_PRIORITY])
2318 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2319
2320 if (tb[RTA_METRICS]) {
2321 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2322 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2323 }
86872cb5
TG
2324
2325 if (tb[RTA_TABLE])
2326 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2327
2328 err = 0;
2329errout:
2330 return err;
1da177e4
LT
2331}
2332
c127ea2c 2333static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2334{
86872cb5
TG
2335 struct fib6_config cfg;
2336 int err;
1da177e4 2337
86872cb5
TG
2338 err = rtm_to_fib6_config(skb, nlh, &cfg);
2339 if (err < 0)
2340 return err;
2341
2342 return ip6_route_del(&cfg);
1da177e4
LT
2343}
2344
c127ea2c 2345static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2346{
86872cb5
TG
2347 struct fib6_config cfg;
2348 int err;
1da177e4 2349
86872cb5
TG
2350 err = rtm_to_fib6_config(skb, nlh, &cfg);
2351 if (err < 0)
2352 return err;
2353
2354 return ip6_route_add(&cfg);
1da177e4
LT
2355}
2356
339bf98f
TG
2357static inline size_t rt6_nlmsg_size(void)
2358{
2359 return NLMSG_ALIGN(sizeof(struct rtmsg))
2360 + nla_total_size(16) /* RTA_SRC */
2361 + nla_total_size(16) /* RTA_DST */
2362 + nla_total_size(16) /* RTA_GATEWAY */
2363 + nla_total_size(16) /* RTA_PREFSRC */
2364 + nla_total_size(4) /* RTA_TABLE */
2365 + nla_total_size(4) /* RTA_IIF */
2366 + nla_total_size(4) /* RTA_OIF */
2367 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2368 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2369 + nla_total_size(sizeof(struct rta_cacheinfo));
2370}
2371
191cd582
BH
2372static int rt6_fill_node(struct net *net,
2373 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2374 struct in6_addr *dst, struct in6_addr *src,
2375 int iif, int type, u32 pid, u32 seq,
7bc570c8 2376 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2377{
2378 struct rtmsg *rtm;
2d7202bf 2379 struct nlmsghdr *nlh;
e3703b3d 2380 long expires;
9e762a4a 2381 u32 table;
f2c31e32 2382 struct neighbour *n;
1da177e4
LT
2383
2384 if (prefix) { /* user wants prefix routes only */
2385 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2386 /* success since this is not a prefix route */
2387 return 1;
2388 }
2389 }
2390
2d7202bf 2391 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2392 if (!nlh)
26932566 2393 return -EMSGSIZE;
2d7202bf
TG
2394
2395 rtm = nlmsg_data(nlh);
1da177e4
LT
2396 rtm->rtm_family = AF_INET6;
2397 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2398 rtm->rtm_src_len = rt->rt6i_src.plen;
2399 rtm->rtm_tos = 0;
c71099ac 2400 if (rt->rt6i_table)
9e762a4a 2401 table = rt->rt6i_table->tb6_id;
c71099ac 2402 else
9e762a4a
PM
2403 table = RT6_TABLE_UNSPEC;
2404 rtm->rtm_table = table;
2d7202bf 2405 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2406 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2407 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2408 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2409 rtm->rtm_type = RTN_LOCAL;
38308473 2410 else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
1da177e4
LT
2411 rtm->rtm_type = RTN_LOCAL;
2412 else
2413 rtm->rtm_type = RTN_UNICAST;
2414 rtm->rtm_flags = 0;
2415 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2416 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2417 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2418 rtm->rtm_protocol = RTPROT_REDIRECT;
2419 else if (rt->rt6i_flags & RTF_ADDRCONF)
2420 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2421 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2422 rtm->rtm_protocol = RTPROT_RA;
2423
38308473 2424 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2425 rtm->rtm_flags |= RTM_F_CLONED;
2426
2427 if (dst) {
2d7202bf 2428 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2429 rtm->rtm_dst_len = 128;
1da177e4 2430 } else if (rtm->rtm_dst_len)
2d7202bf 2431 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2432#ifdef CONFIG_IPV6_SUBTREES
2433 if (src) {
2d7202bf 2434 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2435 rtm->rtm_src_len = 128;
1da177e4 2436 } else if (rtm->rtm_src_len)
2d7202bf 2437 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2438#endif
7bc570c8
YH
2439 if (iif) {
2440#ifdef CONFIG_IPV6_MROUTE
2441 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2442 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2443 if (err <= 0) {
2444 if (!nowait) {
2445 if (err == 0)
2446 return 0;
2447 goto nla_put_failure;
2448 } else {
2449 if (err == -EMSGSIZE)
2450 goto nla_put_failure;
2451 }
2452 }
2453 } else
2454#endif
2455 NLA_PUT_U32(skb, RTA_IIF, iif);
2456 } else if (dst) {
1da177e4 2457 struct in6_addr saddr_buf;
c3968a85 2458 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2459 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2460 }
2d7202bf 2461
c3968a85
DW
2462 if (rt->rt6i_prefsrc.plen) {
2463 struct in6_addr saddr_buf;
4e3fd7a0 2464 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2465 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2466 }
2467
defb3519 2468 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2469 goto nla_put_failure;
2470
f2c31e32 2471 rcu_read_lock();
27217455 2472 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2473 if (n)
2474 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2475 rcu_read_unlock();
2d7202bf 2476
d8d1f30b 2477 if (rt->dst.dev)
2d7202bf
TG
2478 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2479
2480 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2481
36e3deae
YH
2482 if (!(rt->rt6i_flags & RTF_EXPIRES))
2483 expires = 0;
2484 else if (rt->rt6i_expires - jiffies < INT_MAX)
2485 expires = rt->rt6i_expires - jiffies;
2486 else
2487 expires = INT_MAX;
69cdf8f9 2488
d8d1f30b
CG
2489 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2490 expires, rt->dst.error) < 0)
e3703b3d 2491 goto nla_put_failure;
2d7202bf
TG
2492
2493 return nlmsg_end(skb, nlh);
2494
2495nla_put_failure:
26932566
PM
2496 nlmsg_cancel(skb, nlh);
2497 return -EMSGSIZE;
1da177e4
LT
2498}
2499
1b43af54 2500int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2501{
2502 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2503 int prefix;
2504
2d7202bf
TG
2505 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2506 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2507 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2508 } else
2509 prefix = 0;
2510
191cd582
BH
2511 return rt6_fill_node(arg->net,
2512 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2513 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2514 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2515}
2516
c127ea2c 2517static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2518{
3b1e0a65 2519 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2520 struct nlattr *tb[RTA_MAX+1];
2521 struct rt6_info *rt;
1da177e4 2522 struct sk_buff *skb;
ab364a6f 2523 struct rtmsg *rtm;
4c9483b2 2524 struct flowi6 fl6;
ab364a6f 2525 int err, iif = 0;
1da177e4 2526
ab364a6f
TG
2527 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2528 if (err < 0)
2529 goto errout;
1da177e4 2530
ab364a6f 2531 err = -EINVAL;
4c9483b2 2532 memset(&fl6, 0, sizeof(fl6));
1da177e4 2533
ab364a6f
TG
2534 if (tb[RTA_SRC]) {
2535 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2536 goto errout;
2537
4e3fd7a0 2538 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2539 }
2540
2541 if (tb[RTA_DST]) {
2542 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2543 goto errout;
2544
4e3fd7a0 2545 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2546 }
2547
2548 if (tb[RTA_IIF])
2549 iif = nla_get_u32(tb[RTA_IIF]);
2550
2551 if (tb[RTA_OIF])
4c9483b2 2552 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2553
2554 if (iif) {
2555 struct net_device *dev;
5578689a 2556 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2557 if (!dev) {
2558 err = -ENODEV;
ab364a6f 2559 goto errout;
1da177e4
LT
2560 }
2561 }
2562
ab364a6f 2563 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2564 if (!skb) {
ab364a6f
TG
2565 err = -ENOBUFS;
2566 goto errout;
2567 }
1da177e4 2568
ab364a6f
TG
2569 /* Reserve room for dummy headers, this skb can pass
2570 through good chunk of routing engine.
2571 */
459a98ed 2572 skb_reset_mac_header(skb);
ab364a6f 2573 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2574
4c9483b2 2575 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2576 skb_dst_set(skb, &rt->dst);
1da177e4 2577
4c9483b2 2578 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2579 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2580 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2581 if (err < 0) {
ab364a6f
TG
2582 kfree_skb(skb);
2583 goto errout;
1da177e4
LT
2584 }
2585
5578689a 2586 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2587errout:
1da177e4 2588 return err;
1da177e4
LT
2589}
2590
86872cb5 2591void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2592{
2593 struct sk_buff *skb;
5578689a 2594 struct net *net = info->nl_net;
528c4ceb
DL
2595 u32 seq;
2596 int err;
2597
2598 err = -ENOBUFS;
38308473 2599 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2600
339bf98f 2601 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2602 if (!skb)
21713ebc
TG
2603 goto errout;
2604
191cd582 2605 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2606 event, info->pid, seq, 0, 0, 0);
26932566
PM
2607 if (err < 0) {
2608 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2609 WARN_ON(err == -EMSGSIZE);
2610 kfree_skb(skb);
2611 goto errout;
2612 }
1ce85fe4
PNA
2613 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2614 info->nlh, gfp_any());
2615 return;
21713ebc
TG
2616errout:
2617 if (err < 0)
5578689a 2618 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2619}
2620
8ed67789
DL
2621static int ip6_route_dev_notify(struct notifier_block *this,
2622 unsigned long event, void *data)
2623{
2624 struct net_device *dev = (struct net_device *)data;
c346dca1 2625 struct net *net = dev_net(dev);
8ed67789
DL
2626
2627 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2628 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2629 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2630#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2631 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2632 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2633 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2634 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2635#endif
2636 }
2637
2638 return NOTIFY_OK;
2639}
2640
1da177e4
LT
2641/*
2642 * /proc
2643 */
2644
2645#ifdef CONFIG_PROC_FS
2646
1da177e4
LT
2647struct rt6_proc_arg
2648{
2649 char *buffer;
2650 int offset;
2651 int length;
2652 int skip;
2653 int len;
2654};
2655
2656static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2657{
33120b30 2658 struct seq_file *m = p_arg;
69cce1d1 2659 struct neighbour *n;
1da177e4 2660
4b7a4274 2661 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2662
2663#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2664 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2665#else
33120b30 2666 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2667#endif
f2c31e32 2668 rcu_read_lock();
27217455 2669 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2670 if (n) {
2671 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2672 } else {
33120b30 2673 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2674 }
f2c31e32 2675 rcu_read_unlock();
33120b30 2676 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2677 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2678 rt->dst.__use, rt->rt6i_flags,
33120b30 2679 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2680 return 0;
2681}
2682
33120b30 2683static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2684{
f3db4851
DL
2685 struct net *net = (struct net *)m->private;
2686 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2687 return 0;
2688}
1da177e4 2689
33120b30
AD
2690static int ipv6_route_open(struct inode *inode, struct file *file)
2691{
de05c557 2692 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2693}
2694
33120b30
AD
2695static const struct file_operations ipv6_route_proc_fops = {
2696 .owner = THIS_MODULE,
2697 .open = ipv6_route_open,
2698 .read = seq_read,
2699 .llseek = seq_lseek,
b6fcbdb4 2700 .release = single_release_net,
33120b30
AD
2701};
2702
1da177e4
LT
2703static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2704{
69ddb805 2705 struct net *net = (struct net *)seq->private;
1da177e4 2706 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2707 net->ipv6.rt6_stats->fib_nodes,
2708 net->ipv6.rt6_stats->fib_route_nodes,
2709 net->ipv6.rt6_stats->fib_rt_alloc,
2710 net->ipv6.rt6_stats->fib_rt_entries,
2711 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2712 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2713 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2714
2715 return 0;
2716}
2717
2718static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2719{
de05c557 2720 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2721}
2722
9a32144e 2723static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2724 .owner = THIS_MODULE,
2725 .open = rt6_stats_seq_open,
2726 .read = seq_read,
2727 .llseek = seq_lseek,
b6fcbdb4 2728 .release = single_release_net,
1da177e4
LT
2729};
2730#endif /* CONFIG_PROC_FS */
2731
2732#ifdef CONFIG_SYSCTL
2733
1da177e4 2734static
8d65af78 2735int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2736 void __user *buffer, size_t *lenp, loff_t *ppos)
2737{
c486da34
LAG
2738 struct net *net;
2739 int delay;
2740 if (!write)
1da177e4 2741 return -EINVAL;
c486da34
LAG
2742
2743 net = (struct net *)ctl->extra1;
2744 delay = net->ipv6.sysctl.flush_delay;
2745 proc_dointvec(ctl, write, buffer, lenp, ppos);
2746 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2747 return 0;
1da177e4
LT
2748}
2749
760f2d01 2750ctl_table ipv6_route_table_template[] = {
1ab1457c 2751 {
1da177e4 2752 .procname = "flush",
4990509f 2753 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2754 .maxlen = sizeof(int),
89c8b3a1 2755 .mode = 0200,
6d9f239a 2756 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2757 },
2758 {
1da177e4 2759 .procname = "gc_thresh",
9a7ec3a9 2760 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2761 .maxlen = sizeof(int),
2762 .mode = 0644,
6d9f239a 2763 .proc_handler = proc_dointvec,
1da177e4
LT
2764 },
2765 {
1da177e4 2766 .procname = "max_size",
4990509f 2767 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2768 .maxlen = sizeof(int),
2769 .mode = 0644,
6d9f239a 2770 .proc_handler = proc_dointvec,
1da177e4
LT
2771 },
2772 {
1da177e4 2773 .procname = "gc_min_interval",
4990509f 2774 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2775 .maxlen = sizeof(int),
2776 .mode = 0644,
6d9f239a 2777 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2778 },
2779 {
1da177e4 2780 .procname = "gc_timeout",
4990509f 2781 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2782 .maxlen = sizeof(int),
2783 .mode = 0644,
6d9f239a 2784 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2785 },
2786 {
1da177e4 2787 .procname = "gc_interval",
4990509f 2788 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2789 .maxlen = sizeof(int),
2790 .mode = 0644,
6d9f239a 2791 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2792 },
2793 {
1da177e4 2794 .procname = "gc_elasticity",
4990509f 2795 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
f3d3f616 2798 .proc_handler = proc_dointvec,
1da177e4
LT
2799 },
2800 {
1da177e4 2801 .procname = "mtu_expires",
4990509f 2802 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
6d9f239a 2805 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2806 },
2807 {
1da177e4 2808 .procname = "min_adv_mss",
4990509f 2809 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2810 .maxlen = sizeof(int),
2811 .mode = 0644,
f3d3f616 2812 .proc_handler = proc_dointvec,
1da177e4
LT
2813 },
2814 {
1da177e4 2815 .procname = "gc_min_interval_ms",
4990509f 2816 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2817 .maxlen = sizeof(int),
2818 .mode = 0644,
6d9f239a 2819 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2820 },
f8572d8f 2821 { }
1da177e4
LT
2822};
2823
2c8c1e72 2824struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2825{
2826 struct ctl_table *table;
2827
2828 table = kmemdup(ipv6_route_table_template,
2829 sizeof(ipv6_route_table_template),
2830 GFP_KERNEL);
5ee09105
YH
2831
2832 if (table) {
2833 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2834 table[0].extra1 = net;
86393e52 2835 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2836 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2837 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2838 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2839 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2840 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2841 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2842 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2843 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2844 }
2845
760f2d01
DL
2846 return table;
2847}
1da177e4
LT
2848#endif
2849
2c8c1e72 2850static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2851{
633d424b 2852 int ret = -ENOMEM;
8ed67789 2853
86393e52
AD
2854 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2855 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2856
fc66f95c
ED
2857 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2858 goto out_ip6_dst_ops;
2859
8ed67789
DL
2860 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2861 sizeof(*net->ipv6.ip6_null_entry),
2862 GFP_KERNEL);
2863 if (!net->ipv6.ip6_null_entry)
fc66f95c 2864 goto out_ip6_dst_entries;
d8d1f30b 2865 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2866 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2867 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2868 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2869 ip6_template_metrics, true);
8ed67789
DL
2870
2871#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2872 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2873 sizeof(*net->ipv6.ip6_prohibit_entry),
2874 GFP_KERNEL);
68fffc67
PZ
2875 if (!net->ipv6.ip6_prohibit_entry)
2876 goto out_ip6_null_entry;
d8d1f30b 2877 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2878 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2879 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2880 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2881 ip6_template_metrics, true);
8ed67789
DL
2882
2883 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2884 sizeof(*net->ipv6.ip6_blk_hole_entry),
2885 GFP_KERNEL);
68fffc67
PZ
2886 if (!net->ipv6.ip6_blk_hole_entry)
2887 goto out_ip6_prohibit_entry;
d8d1f30b 2888 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2889 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2890 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2891 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2892 ip6_template_metrics, true);
8ed67789
DL
2893#endif
2894
b339a47c
PZ
2895 net->ipv6.sysctl.flush_delay = 0;
2896 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2897 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2898 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2899 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2900 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2901 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2902 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2903
cdb18761
DL
2904#ifdef CONFIG_PROC_FS
2905 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2906 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2907#endif
6891a346
BT
2908 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2909
8ed67789
DL
2910 ret = 0;
2911out:
2912 return ret;
f2fc6a54 2913
68fffc67
PZ
2914#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2915out_ip6_prohibit_entry:
2916 kfree(net->ipv6.ip6_prohibit_entry);
2917out_ip6_null_entry:
2918 kfree(net->ipv6.ip6_null_entry);
2919#endif
fc66f95c
ED
2920out_ip6_dst_entries:
2921 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2922out_ip6_dst_ops:
f2fc6a54 2923 goto out;
cdb18761
DL
2924}
2925
2c8c1e72 2926static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2927{
2928#ifdef CONFIG_PROC_FS
2929 proc_net_remove(net, "ipv6_route");
2930 proc_net_remove(net, "rt6_stats");
2931#endif
8ed67789
DL
2932 kfree(net->ipv6.ip6_null_entry);
2933#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934 kfree(net->ipv6.ip6_prohibit_entry);
2935 kfree(net->ipv6.ip6_blk_hole_entry);
2936#endif
41bb78b4 2937 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2938}
2939
2940static struct pernet_operations ip6_route_net_ops = {
2941 .init = ip6_route_net_init,
2942 .exit = ip6_route_net_exit,
2943};
2944
8ed67789
DL
2945static struct notifier_block ip6_route_dev_notifier = {
2946 .notifier_call = ip6_route_dev_notify,
2947 .priority = 0,
2948};
2949
433d49c3 2950int __init ip6_route_init(void)
1da177e4 2951{
433d49c3
DL
2952 int ret;
2953
9a7ec3a9
DL
2954 ret = -ENOMEM;
2955 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2956 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2957 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2958 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2959 goto out;
14e50e57 2960
fc66f95c 2961 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2962 if (ret)
bdb3289f 2963 goto out_kmem_cache;
bdb3289f 2964
fc66f95c
ED
2965 ret = register_pernet_subsys(&ip6_route_net_ops);
2966 if (ret)
2967 goto out_dst_entries;
2968
5dc121e9
AE
2969 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2970
8ed67789
DL
2971 /* Registering of the loopback is done before this portion of code,
2972 * the loopback reference in rt6_info will not be taken, do it
2973 * manually for init_net */
d8d1f30b 2974 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2975 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2976 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2977 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2978 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2979 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2980 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2981 #endif
433d49c3
DL
2982 ret = fib6_init();
2983 if (ret)
8ed67789 2984 goto out_register_subsys;
433d49c3 2985
433d49c3
DL
2986 ret = xfrm6_init();
2987 if (ret)
cdb18761 2988 goto out_fib6_init;
c35b7e72 2989
433d49c3
DL
2990 ret = fib6_rules_init();
2991 if (ret)
2992 goto xfrm6_init;
7e5449c2 2993
433d49c3 2994 ret = -ENOBUFS;
c7ac8679
GR
2995 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2996 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2997 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2998 goto fib6_rules_init;
c127ea2c 2999
8ed67789 3000 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3001 if (ret)
3002 goto fib6_rules_init;
8ed67789 3003
433d49c3
DL
3004out:
3005 return ret;
3006
3007fib6_rules_init:
433d49c3
DL
3008 fib6_rules_cleanup();
3009xfrm6_init:
433d49c3 3010 xfrm6_fini();
433d49c3 3011out_fib6_init:
433d49c3 3012 fib6_gc_cleanup();
8ed67789
DL
3013out_register_subsys:
3014 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3015out_dst_entries:
3016 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3017out_kmem_cache:
f2fc6a54 3018 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3019 goto out;
1da177e4
LT
3020}
3021
3022void ip6_route_cleanup(void)
3023{
8ed67789 3024 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3025 fib6_rules_cleanup();
1da177e4 3026 xfrm6_fini();
1da177e4 3027 fib6_gc_cleanup();
8ed67789 3028 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3029 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3030 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3031}