tcp: add LINUX_MIB_TCPRETRANSFAIL counter
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
d3aaeb38
DM
124static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
125{
f83c7790
DM
126 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
127 if (n)
128 return n;
129 return neigh_create(&nd_tbl, daddr, dst->dev);
130}
131
8ade06c6 132static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 133{
8ade06c6
DM
134 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
135 if (!n) {
136 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
137 if (IS_ERR(n))
138 return PTR_ERR(n);
139 }
f83c7790
DM
140 dst_set_neighbour(&rt->dst, n);
141
142 return 0;
d3aaeb38
DM
143}
144
9a7ec3a9 145static struct dst_ops ip6_dst_ops_template = {
1da177e4 146 .family = AF_INET6,
09640e63 147 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
148 .gc = ip6_dst_gc,
149 .gc_thresh = 1024,
150 .check = ip6_dst_check,
0dbaee3b 151 .default_advmss = ip6_default_advmss,
ebb762f2 152 .mtu = ip6_mtu,
06582540 153 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
154 .destroy = ip6_dst_destroy,
155 .ifdown = ip6_dst_ifdown,
156 .negative_advice = ip6_negative_advice,
157 .link_failure = ip6_link_failure,
158 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 159 .local_out = __ip6_local_out,
d3aaeb38 160 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
161};
162
ebb762f2 163static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 164{
618f9bc7
SK
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167 return mtu ? : dst->dev->mtu;
ec831ea7
RD
168}
169
14e50e57
DM
170static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
171{
172}
173
0972ddb2
HB
174static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
175 unsigned long old)
176{
177 return NULL;
178}
179
14e50e57
DM
180static struct dst_ops ip6_dst_blackhole_ops = {
181 .family = AF_INET6,
09640e63 182 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
183 .destroy = ip6_dst_destroy,
184 .check = ip6_dst_check,
ebb762f2 185 .mtu = ip6_blackhole_mtu,
214f45c9 186 .default_advmss = ip6_default_advmss,
14e50e57 187 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 188 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 189 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
190};
191
62fa8a84
DM
192static const u32 ip6_template_metrics[RTAX_MAX] = {
193 [RTAX_HOPLIMIT - 1] = 255,
194};
195
bdb3289f 196static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
197 .dst = {
198 .__refcnt = ATOMIC_INIT(1),
199 .__use = 1,
200 .obsolete = -1,
201 .error = -ENETUNREACH,
d8d1f30b
CG
202 .input = ip6_pkt_discard,
203 .output = ip6_pkt_discard_out,
1da177e4
LT
204 },
205 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 206 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
207 .rt6i_metric = ~(u32) 0,
208 .rt6i_ref = ATOMIC_INIT(1),
209};
210
101367c2
TG
211#ifdef CONFIG_IPV6_MULTIPLE_TABLES
212
6723ab54
DM
213static int ip6_pkt_prohibit(struct sk_buff *skb);
214static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 215
280a34c8 216static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
217 .dst = {
218 .__refcnt = ATOMIC_INIT(1),
219 .__use = 1,
220 .obsolete = -1,
221 .error = -EACCES,
d8d1f30b
CG
222 .input = ip6_pkt_prohibit,
223 .output = ip6_pkt_prohibit_out,
101367c2
TG
224 },
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 226 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
229};
230
bdb3289f 231static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EINVAL,
d8d1f30b
CG
237 .input = dst_discard,
238 .output = dst_discard,
101367c2
TG
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 241 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244};
245
246#endif
247
1da177e4 248/* allocate dst with ip6_dst_ops */
5c1e6aa3 249static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
250 struct net_device *dev,
251 int flags)
1da177e4 252{
957c665f 253 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 254
38308473 255 if (rt)
fbe58186 256 memset(&rt->rt6i_table, 0,
38308473 257 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
258
259 return rt;
1da177e4
LT
260}
261
262static void ip6_dst_destroy(struct dst_entry *dst)
263{
264 struct rt6_info *rt = (struct rt6_info *)dst;
265 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 266 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 267
8e2ec639
YZ
268 if (!(rt->dst.flags & DST_HOST))
269 dst_destroy_metrics_generic(dst);
270
38308473 271 if (idev) {
1da177e4
LT
272 rt->rt6i_idev = NULL;
273 in6_dev_put(idev);
1ab1457c 274 }
b3419363 275 if (peer) {
b3419363
DM
276 rt->rt6i_peer = NULL;
277 inet_putpeer(peer);
278 }
279}
280
6431cbc2
DM
281static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
282
283static u32 rt6_peer_genid(void)
284{
285 return atomic_read(&__rt6_peer_genid);
286}
287
b3419363
DM
288void rt6_bind_peer(struct rt6_info *rt, int create)
289{
290 struct inet_peer *peer;
291
b3419363
DM
292 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
293 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
294 inet_putpeer(peer);
6431cbc2
DM
295 else
296 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
297}
298
299static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
300 int how)
301{
302 struct rt6_info *rt = (struct rt6_info *)dst;
303 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 304 struct net_device *loopback_dev =
c346dca1 305 dev_net(dev)->loopback_dev;
1da177e4 306
38308473 307 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
308 struct inet6_dev *loopback_idev =
309 in6_dev_get(loopback_dev);
38308473 310 if (loopback_idev) {
1da177e4
LT
311 rt->rt6i_idev = loopback_idev;
312 in6_dev_put(idev);
313 }
314 }
315}
316
317static __inline__ int rt6_check_expired(const struct rt6_info *rt)
318{
a02cec21 319 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 320 time_after(jiffies, rt->dst.expires);
1da177e4
LT
321}
322
b71d1d42 323static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 324{
a02cec21
ED
325 return ipv6_addr_type(daddr) &
326 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
327}
328
1da177e4 329/*
c71099ac 330 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
331 */
332
8ed67789
DL
333static inline struct rt6_info *rt6_device_match(struct net *net,
334 struct rt6_info *rt,
b71d1d42 335 const struct in6_addr *saddr,
1da177e4 336 int oif,
d420895e 337 int flags)
1da177e4
LT
338{
339 struct rt6_info *local = NULL;
340 struct rt6_info *sprt;
341
dd3abc4e
YH
342 if (!oif && ipv6_addr_any(saddr))
343 goto out;
344
d8d1f30b 345 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 346 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
347
348 if (oif) {
1da177e4
LT
349 if (dev->ifindex == oif)
350 return sprt;
351 if (dev->flags & IFF_LOOPBACK) {
38308473 352 if (!sprt->rt6i_idev ||
1da177e4 353 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 354 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 355 continue;
1ab1457c 356 if (local && (!oif ||
1da177e4
LT
357 local->rt6i_idev->dev->ifindex == oif))
358 continue;
359 }
360 local = sprt;
361 }
dd3abc4e
YH
362 } else {
363 if (ipv6_chk_addr(net, saddr, dev,
364 flags & RT6_LOOKUP_F_IFACE))
365 return sprt;
1da177e4 366 }
dd3abc4e 367 }
1da177e4 368
dd3abc4e 369 if (oif) {
1da177e4
LT
370 if (local)
371 return local;
372
d420895e 373 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 374 return net->ipv6.ip6_null_entry;
1da177e4 375 }
dd3abc4e 376out:
1da177e4
LT
377 return rt;
378}
379
27097255
YH
380#ifdef CONFIG_IPV6_ROUTER_PREF
381static void rt6_probe(struct rt6_info *rt)
382{
f2c31e32 383 struct neighbour *neigh;
27097255
YH
384 /*
385 * Okay, this does not seem to be appropriate
386 * for now, however, we need to check if it
387 * is really so; aka Router Reachability Probing.
388 *
389 * Router Reachability Probe MUST be rate-limited
390 * to no more than one per minute.
391 */
f2c31e32 392 rcu_read_lock();
27217455 393 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 394 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 395 goto out;
27097255
YH
396 read_lock_bh(&neigh->lock);
397 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 398 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
399 struct in6_addr mcaddr;
400 struct in6_addr *target;
401
402 neigh->updated = jiffies;
403 read_unlock_bh(&neigh->lock);
404
405 target = (struct in6_addr *)&neigh->primary_key;
406 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 407 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 408 } else {
27097255 409 read_unlock_bh(&neigh->lock);
f2c31e32
ED
410 }
411out:
412 rcu_read_unlock();
27097255
YH
413}
414#else
415static inline void rt6_probe(struct rt6_info *rt)
416{
27097255
YH
417}
418#endif
419
1da177e4 420/*
554cfb7e 421 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 422 */
b6f99a21 423static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 424{
d1918542 425 struct net_device *dev = rt->dst.dev;
161980f4 426 if (!oif || dev->ifindex == oif)
554cfb7e 427 return 2;
161980f4
DM
428 if ((dev->flags & IFF_LOOPBACK) &&
429 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
430 return 1;
431 return 0;
554cfb7e 432}
1da177e4 433
b6f99a21 434static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 435{
f2c31e32 436 struct neighbour *neigh;
398bcbeb 437 int m;
f2c31e32
ED
438
439 rcu_read_lock();
27217455 440 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
441 if (rt->rt6i_flags & RTF_NONEXTHOP ||
442 !(rt->rt6i_flags & RTF_GATEWAY))
443 m = 1;
444 else if (neigh) {
554cfb7e
YH
445 read_lock_bh(&neigh->lock);
446 if (neigh->nud_state & NUD_VALID)
4d0c5911 447 m = 2;
398bcbeb
YH
448#ifdef CONFIG_IPV6_ROUTER_PREF
449 else if (neigh->nud_state & NUD_FAILED)
450 m = 0;
451#endif
452 else
ea73ee23 453 m = 1;
554cfb7e 454 read_unlock_bh(&neigh->lock);
398bcbeb
YH
455 } else
456 m = 0;
f2c31e32 457 rcu_read_unlock();
554cfb7e 458 return m;
1da177e4
LT
459}
460
554cfb7e
YH
461static int rt6_score_route(struct rt6_info *rt, int oif,
462 int strict)
1da177e4 463{
4d0c5911 464 int m, n;
1ab1457c 465
4d0c5911 466 m = rt6_check_dev(rt, oif);
77d16f45 467 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 468 return -1;
ebacaaa0
YH
469#ifdef CONFIG_IPV6_ROUTER_PREF
470 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
471#endif
4d0c5911 472 n = rt6_check_neigh(rt);
557e92ef 473 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
474 return -1;
475 return m;
476}
477
f11e6659
DM
478static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
479 int *mpri, struct rt6_info *match)
554cfb7e 480{
f11e6659
DM
481 int m;
482
483 if (rt6_check_expired(rt))
484 goto out;
485
486 m = rt6_score_route(rt, oif, strict);
487 if (m < 0)
488 goto out;
489
490 if (m > *mpri) {
491 if (strict & RT6_LOOKUP_F_REACHABLE)
492 rt6_probe(match);
493 *mpri = m;
494 match = rt;
495 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
496 rt6_probe(rt);
497 }
498
499out:
500 return match;
501}
502
503static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
504 struct rt6_info *rr_head,
505 u32 metric, int oif, int strict)
506{
507 struct rt6_info *rt, *match;
554cfb7e 508 int mpri = -1;
1da177e4 509
f11e6659
DM
510 match = NULL;
511 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 512 rt = rt->dst.rt6_next)
f11e6659
DM
513 match = find_match(rt, oif, strict, &mpri, match);
514 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 515 rt = rt->dst.rt6_next)
f11e6659 516 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 517
f11e6659
DM
518 return match;
519}
1da177e4 520
f11e6659
DM
521static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
522{
523 struct rt6_info *match, *rt0;
8ed67789 524 struct net *net;
1da177e4 525
f11e6659
DM
526 rt0 = fn->rr_ptr;
527 if (!rt0)
528 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 529
f11e6659 530 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 531
554cfb7e 532 if (!match &&
f11e6659 533 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 534 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 535
554cfb7e 536 /* no entries matched; do round-robin */
f11e6659
DM
537 if (!next || next->rt6i_metric != rt0->rt6i_metric)
538 next = fn->leaf;
539
540 if (next != rt0)
541 fn->rr_ptr = next;
1da177e4 542 }
1da177e4 543
d1918542 544 net = dev_net(rt0->dst.dev);
a02cec21 545 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
546}
547
70ceb4f5
YH
548#ifdef CONFIG_IPV6_ROUTE_INFO
549int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 550 const struct in6_addr *gwaddr)
70ceb4f5 551{
c346dca1 552 struct net *net = dev_net(dev);
70ceb4f5
YH
553 struct route_info *rinfo = (struct route_info *) opt;
554 struct in6_addr prefix_buf, *prefix;
555 unsigned int pref;
4bed72e4 556 unsigned long lifetime;
70ceb4f5
YH
557 struct rt6_info *rt;
558
559 if (len < sizeof(struct route_info)) {
560 return -EINVAL;
561 }
562
563 /* Sanity check for prefix_len and length */
564 if (rinfo->length > 3) {
565 return -EINVAL;
566 } else if (rinfo->prefix_len > 128) {
567 return -EINVAL;
568 } else if (rinfo->prefix_len > 64) {
569 if (rinfo->length < 2) {
570 return -EINVAL;
571 }
572 } else if (rinfo->prefix_len > 0) {
573 if (rinfo->length < 1) {
574 return -EINVAL;
575 }
576 }
577
578 pref = rinfo->route_pref;
579 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 580 return -EINVAL;
70ceb4f5 581
4bed72e4 582 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
583
584 if (rinfo->length == 3)
585 prefix = (struct in6_addr *)rinfo->prefix;
586 else {
587 /* this function is safe */
588 ipv6_addr_prefix(&prefix_buf,
589 (struct in6_addr *)rinfo->prefix,
590 rinfo->prefix_len);
591 prefix = &prefix_buf;
592 }
593
efa2cea0
DL
594 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
595 dev->ifindex);
70ceb4f5
YH
596
597 if (rt && !lifetime) {
e0a1ad73 598 ip6_del_rt(rt);
70ceb4f5
YH
599 rt = NULL;
600 }
601
602 if (!rt && lifetime)
efa2cea0 603 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
604 pref);
605 else if (rt)
606 rt->rt6i_flags = RTF_ROUTEINFO |
607 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
608
609 if (rt) {
4bed72e4 610 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
611 rt->rt6i_flags &= ~RTF_EXPIRES;
612 } else {
d1918542 613 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
614 rt->rt6i_flags |= RTF_EXPIRES;
615 }
d8d1f30b 616 dst_release(&rt->dst);
70ceb4f5
YH
617 }
618 return 0;
619}
620#endif
621
8ed67789 622#define BACKTRACK(__net, saddr) \
982f56f3 623do { \
8ed67789 624 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 625 struct fib6_node *pn; \
e0eda7bb 626 while (1) { \
982f56f3
YH
627 if (fn->fn_flags & RTN_TL_ROOT) \
628 goto out; \
629 pn = fn->parent; \
630 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 631 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
632 else \
633 fn = pn; \
634 if (fn->fn_flags & RTN_RTINFO) \
635 goto restart; \
c71099ac 636 } \
c71099ac 637 } \
38308473 638} while (0)
c71099ac 639
8ed67789
DL
640static struct rt6_info *ip6_pol_route_lookup(struct net *net,
641 struct fib6_table *table,
4c9483b2 642 struct flowi6 *fl6, int flags)
1da177e4
LT
643{
644 struct fib6_node *fn;
645 struct rt6_info *rt;
646
c71099ac 647 read_lock_bh(&table->tb6_lock);
4c9483b2 648 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
649restart:
650 rt = fn->leaf;
4c9483b2
DM
651 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
652 BACKTRACK(net, &fl6->saddr);
c71099ac 653out:
d8d1f30b 654 dst_use(&rt->dst, jiffies);
c71099ac 655 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
656 return rt;
657
658}
659
ea6e574e
FW
660struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
661 int flags)
662{
663 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
664}
665EXPORT_SYMBOL_GPL(ip6_route_lookup);
666
9acd9f3a
YH
667struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
668 const struct in6_addr *saddr, int oif, int strict)
c71099ac 669{
4c9483b2
DM
670 struct flowi6 fl6 = {
671 .flowi6_oif = oif,
672 .daddr = *daddr,
c71099ac
TG
673 };
674 struct dst_entry *dst;
77d16f45 675 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 676
adaa70bb 677 if (saddr) {
4c9483b2 678 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
679 flags |= RT6_LOOKUP_F_HAS_SADDR;
680 }
681
4c9483b2 682 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
683 if (dst->error == 0)
684 return (struct rt6_info *) dst;
685
686 dst_release(dst);
687
1da177e4
LT
688 return NULL;
689}
690
7159039a
YH
691EXPORT_SYMBOL(rt6_lookup);
692
c71099ac 693/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
694 It takes new route entry, the addition fails by any reason the
695 route is freed. In any case, if caller does not hold it, it may
696 be destroyed.
697 */
698
86872cb5 699static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
700{
701 int err;
c71099ac 702 struct fib6_table *table;
1da177e4 703
c71099ac
TG
704 table = rt->rt6i_table;
705 write_lock_bh(&table->tb6_lock);
86872cb5 706 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 707 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
708
709 return err;
710}
711
40e22e8f
TG
712int ip6_ins_rt(struct rt6_info *rt)
713{
4d1169c1 714 struct nl_info info = {
d1918542 715 .nl_net = dev_net(rt->dst.dev),
4d1169c1 716 };
528c4ceb 717 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
718}
719
21efcfa0
ED
720static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
721 const struct in6_addr *daddr,
b71d1d42 722 const struct in6_addr *saddr)
1da177e4 723{
1da177e4
LT
724 struct rt6_info *rt;
725
726 /*
727 * Clone the route.
728 */
729
21efcfa0 730 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
731
732 if (rt) {
14deae41
DM
733 int attempts = !in_softirq();
734
38308473 735 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 736 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 737 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 738 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 739 rt->rt6i_gateway = *daddr;
58c4fb86 740 }
1da177e4 741
1da177e4 742 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
743
744#ifdef CONFIG_IPV6_SUBTREES
745 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 746 rt->rt6i_src.addr = *saddr;
1da177e4
LT
747 rt->rt6i_src.plen = 128;
748 }
749#endif
750
14deae41 751 retry:
8ade06c6 752 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 753 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
754 int saved_rt_min_interval =
755 net->ipv6.sysctl.ip6_rt_gc_min_interval;
756 int saved_rt_elasticity =
757 net->ipv6.sysctl.ip6_rt_gc_elasticity;
758
759 if (attempts-- > 0) {
760 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
762
86393e52 763 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
764
765 net->ipv6.sysctl.ip6_rt_gc_elasticity =
766 saved_rt_elasticity;
767 net->ipv6.sysctl.ip6_rt_gc_min_interval =
768 saved_rt_min_interval;
769 goto retry;
770 }
771
772 if (net_ratelimit())
773 printk(KERN_WARNING
7e1b33e5 774 "ipv6: Neighbour table overflow.\n");
d8d1f30b 775 dst_free(&rt->dst);
14deae41
DM
776 return NULL;
777 }
95a9a5ba 778 }
1da177e4 779
95a9a5ba
YH
780 return rt;
781}
1da177e4 782
21efcfa0
ED
783static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
784 const struct in6_addr *daddr)
299d9939 785{
21efcfa0
ED
786 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
787
299d9939 788 if (rt) {
299d9939 789 rt->rt6i_flags |= RTF_CACHE;
27217455 790 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
791 }
792 return rt;
793}
794
8ed67789 795static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 796 struct flowi6 *fl6, int flags)
1da177e4
LT
797{
798 struct fib6_node *fn;
519fbd87 799 struct rt6_info *rt, *nrt;
c71099ac 800 int strict = 0;
1da177e4 801 int attempts = 3;
519fbd87 802 int err;
53b7997f 803 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 804
77d16f45 805 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
806
807relookup:
c71099ac 808 read_lock_bh(&table->tb6_lock);
1da177e4 809
8238dd06 810restart_2:
4c9483b2 811 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
812
813restart:
4acad72d 814 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 815
4c9483b2 816 BACKTRACK(net, &fl6->saddr);
8ed67789 817 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 818 rt->rt6i_flags & RTF_CACHE)
1ddef044 819 goto out;
1da177e4 820
d8d1f30b 821 dst_hold(&rt->dst);
c71099ac 822 read_unlock_bh(&table->tb6_lock);
fb9de91e 823
27217455 824 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 825 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 826 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 827 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
828 else
829 goto out2;
e40cf353 830
d8d1f30b 831 dst_release(&rt->dst);
8ed67789 832 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 833
d8d1f30b 834 dst_hold(&rt->dst);
519fbd87 835 if (nrt) {
40e22e8f 836 err = ip6_ins_rt(nrt);
519fbd87 837 if (!err)
1da177e4 838 goto out2;
1da177e4 839 }
1da177e4 840
519fbd87
YH
841 if (--attempts <= 0)
842 goto out2;
843
844 /*
c71099ac 845 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
846 * released someone could insert this route. Relookup.
847 */
d8d1f30b 848 dst_release(&rt->dst);
519fbd87
YH
849 goto relookup;
850
851out:
8238dd06
YH
852 if (reachable) {
853 reachable = 0;
854 goto restart_2;
855 }
d8d1f30b 856 dst_hold(&rt->dst);
c71099ac 857 read_unlock_bh(&table->tb6_lock);
1da177e4 858out2:
d8d1f30b
CG
859 rt->dst.lastuse = jiffies;
860 rt->dst.__use++;
c71099ac
TG
861
862 return rt;
1da177e4
LT
863}
864
8ed67789 865static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 866 struct flowi6 *fl6, int flags)
4acad72d 867{
4c9483b2 868 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
869}
870
c71099ac
TG
871void ip6_route_input(struct sk_buff *skb)
872{
b71d1d42 873 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 874 struct net *net = dev_net(skb->dev);
adaa70bb 875 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
876 struct flowi6 fl6 = {
877 .flowi6_iif = skb->dev->ifindex,
878 .daddr = iph->daddr,
879 .saddr = iph->saddr,
38308473 880 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
881 .flowi6_mark = skb->mark,
882 .flowi6_proto = iph->nexthdr,
c71099ac 883 };
adaa70bb 884
1d6e55f1 885 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 886 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 887
4c9483b2 888 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
889}
890
8ed67789 891static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 892 struct flowi6 *fl6, int flags)
1da177e4 893{
4c9483b2 894 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
895}
896
9c7a4f9c 897struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 898 struct flowi6 *fl6)
c71099ac
TG
899{
900 int flags = 0;
901
4c9483b2 902 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 903 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 904
4c9483b2 905 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 906 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
907 else if (sk)
908 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 909
4c9483b2 910 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
911}
912
7159039a 913EXPORT_SYMBOL(ip6_route_output);
1da177e4 914
2774c131 915struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 916{
5c1e6aa3 917 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
918 struct dst_entry *new = NULL;
919
5c1e6aa3 920 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 921 if (rt) {
cf911662
DM
922 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
923
d8d1f30b 924 new = &rt->dst;
14e50e57 925
14e50e57 926 new->__use = 1;
352e512c
HX
927 new->input = dst_discard;
928 new->output = dst_discard;
14e50e57 929
21efcfa0
ED
930 if (dst_metrics_read_only(&ort->dst))
931 new->_metrics = ort->dst._metrics;
932 else
933 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
934 rt->rt6i_idev = ort->rt6i_idev;
935 if (rt->rt6i_idev)
936 in6_dev_hold(rt->rt6i_idev);
d1918542 937 rt->dst.expires = 0;
14e50e57 938
4e3fd7a0 939 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
940 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
941 rt->rt6i_metric = 0;
942
943 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
944#ifdef CONFIG_IPV6_SUBTREES
945 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
946#endif
947
948 dst_free(new);
949 }
950
69ead7af
DM
951 dst_release(dst_orig);
952 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 953}
14e50e57 954
1da177e4
LT
955/*
956 * Destination cache support functions
957 */
958
959static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
960{
961 struct rt6_info *rt;
962
963 rt = (struct rt6_info *) dst;
964
6431cbc2
DM
965 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
966 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
967 if (!rt->rt6i_peer)
968 rt6_bind_peer(rt, 0);
969 rt->rt6i_peer_genid = rt6_peer_genid();
970 }
1da177e4 971 return dst;
6431cbc2 972 }
1da177e4
LT
973 return NULL;
974}
975
976static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
977{
978 struct rt6_info *rt = (struct rt6_info *) dst;
979
980 if (rt) {
54c1a859
YH
981 if (rt->rt6i_flags & RTF_CACHE) {
982 if (rt6_check_expired(rt)) {
983 ip6_del_rt(rt);
984 dst = NULL;
985 }
986 } else {
1da177e4 987 dst_release(dst);
54c1a859
YH
988 dst = NULL;
989 }
1da177e4 990 }
54c1a859 991 return dst;
1da177e4
LT
992}
993
994static void ip6_link_failure(struct sk_buff *skb)
995{
996 struct rt6_info *rt;
997
3ffe533c 998 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 999
adf30907 1000 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1001 if (rt) {
38308473 1002 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1003 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1004 rt->rt6i_flags |= RTF_EXPIRES;
1005 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1006 rt->rt6i_node->fn_sernum = -1;
1007 }
1008}
1009
1010static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1011{
1012 struct rt6_info *rt6 = (struct rt6_info*)dst;
1013
1014 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1015 rt6->rt6i_flags |= RTF_MODIFIED;
1016 if (mtu < IPV6_MIN_MTU) {
defb3519 1017 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1018 mtu = IPV6_MIN_MTU;
defb3519
DM
1019 features |= RTAX_FEATURE_ALLFRAG;
1020 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1021 }
defb3519 1022 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1023 }
1024}
1025
0dbaee3b 1026static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1027{
0dbaee3b
DM
1028 struct net_device *dev = dst->dev;
1029 unsigned int mtu = dst_mtu(dst);
1030 struct net *net = dev_net(dev);
1031
1da177e4
LT
1032 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1033
5578689a
DL
1034 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1035 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1036
1037 /*
1ab1457c
YH
1038 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1039 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1040 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1041 * rely only on pmtu discovery"
1042 */
1043 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1044 mtu = IPV6_MAXPLEN;
1045 return mtu;
1046}
1047
ebb762f2 1048static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1049{
d33e4553 1050 struct inet6_dev *idev;
618f9bc7
SK
1051 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1052
1053 if (mtu)
1054 return mtu;
1055
1056 mtu = IPV6_MIN_MTU;
d33e4553
DM
1057
1058 rcu_read_lock();
1059 idev = __in6_dev_get(dst->dev);
1060 if (idev)
1061 mtu = idev->cnf.mtu6;
1062 rcu_read_unlock();
1063
1064 return mtu;
1065}
1066
3b00944c
YH
1067static struct dst_entry *icmp6_dst_gc_list;
1068static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1069
3b00944c 1070struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1071 struct neighbour *neigh,
87a11578 1072 struct flowi6 *fl6)
1da177e4 1073{
87a11578 1074 struct dst_entry *dst;
1da177e4
LT
1075 struct rt6_info *rt;
1076 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1077 struct net *net = dev_net(dev);
1da177e4 1078
38308473 1079 if (unlikely(!idev))
1da177e4
LT
1080 return NULL;
1081
957c665f 1082 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1083 if (unlikely(!rt)) {
1da177e4 1084 in6_dev_put(idev);
87a11578 1085 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1086 goto out;
1087 }
1088
1da177e4
LT
1089 if (neigh)
1090 neigh_hold(neigh);
14deae41 1091 else {
f83c7790 1092 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1093 if (IS_ERR(neigh)) {
252c3d84 1094 in6_dev_put(idev);
b43faac6
DM
1095 dst_free(&rt->dst);
1096 return ERR_CAST(neigh);
1097 }
14deae41 1098 }
1da177e4 1099
8e2ec639
YZ
1100 rt->dst.flags |= DST_HOST;
1101 rt->dst.output = ip6_output;
69cce1d1 1102 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1103 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1104 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1105 rt->rt6i_dst.plen = 128;
1106 rt->rt6i_idev = idev;
7011687f 1107 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1108
3b00944c 1109 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1110 rt->dst.next = icmp6_dst_gc_list;
1111 icmp6_dst_gc_list = &rt->dst;
3b00944c 1112 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1113
5578689a 1114 fib6_force_start_gc(net);
1da177e4 1115
87a11578
DM
1116 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1117
1da177e4 1118out:
87a11578 1119 return dst;
1da177e4
LT
1120}
1121
3d0f24a7 1122int icmp6_dst_gc(void)
1da177e4 1123{
e9476e95 1124 struct dst_entry *dst, **pprev;
3d0f24a7 1125 int more = 0;
1da177e4 1126
3b00944c
YH
1127 spin_lock_bh(&icmp6_dst_lock);
1128 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1129
1da177e4
LT
1130 while ((dst = *pprev) != NULL) {
1131 if (!atomic_read(&dst->__refcnt)) {
1132 *pprev = dst->next;
1133 dst_free(dst);
1da177e4
LT
1134 } else {
1135 pprev = &dst->next;
3d0f24a7 1136 ++more;
1da177e4
LT
1137 }
1138 }
1139
3b00944c 1140 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1141
3d0f24a7 1142 return more;
1da177e4
LT
1143}
1144
1e493d19
DM
1145static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1146 void *arg)
1147{
1148 struct dst_entry *dst, **pprev;
1149
1150 spin_lock_bh(&icmp6_dst_lock);
1151 pprev = &icmp6_dst_gc_list;
1152 while ((dst = *pprev) != NULL) {
1153 struct rt6_info *rt = (struct rt6_info *) dst;
1154 if (func(rt, arg)) {
1155 *pprev = dst->next;
1156 dst_free(dst);
1157 } else {
1158 pprev = &dst->next;
1159 }
1160 }
1161 spin_unlock_bh(&icmp6_dst_lock);
1162}
1163
569d3645 1164static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1165{
1da177e4 1166 unsigned long now = jiffies;
86393e52 1167 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1168 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1169 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1170 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1171 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1172 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1173 int entries;
7019b78e 1174
fc66f95c 1175 entries = dst_entries_get_fast(ops);
7019b78e 1176 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1177 entries <= rt_max_size)
1da177e4
LT
1178 goto out;
1179
6891a346
BT
1180 net->ipv6.ip6_rt_gc_expire++;
1181 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1182 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1183 entries = dst_entries_get_slow(ops);
1184 if (entries < ops->gc_thresh)
7019b78e 1185 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1186out:
7019b78e 1187 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1188 return entries > rt_max_size;
1da177e4
LT
1189}
1190
1191/* Clean host part of a prefix. Not necessary in radix tree,
1192 but results in cleaner routing tables.
1193
1194 Remove it only when all the things will work!
1195 */
1196
6b75d090 1197int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1198{
5170ae82 1199 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1200 if (hoplimit == 0) {
6b75d090 1201 struct net_device *dev = dst->dev;
c68f24cc
ED
1202 struct inet6_dev *idev;
1203
1204 rcu_read_lock();
1205 idev = __in6_dev_get(dev);
1206 if (idev)
6b75d090 1207 hoplimit = idev->cnf.hop_limit;
c68f24cc 1208 else
53b7997f 1209 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1210 rcu_read_unlock();
1da177e4
LT
1211 }
1212 return hoplimit;
1213}
abbf46ae 1214EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1215
1216/*
1217 *
1218 */
1219
86872cb5 1220int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1221{
1222 int err;
5578689a 1223 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1224 struct rt6_info *rt = NULL;
1225 struct net_device *dev = NULL;
1226 struct inet6_dev *idev = NULL;
c71099ac 1227 struct fib6_table *table;
1da177e4
LT
1228 int addr_type;
1229
86872cb5 1230 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1231 return -EINVAL;
1232#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1233 if (cfg->fc_src_len)
1da177e4
LT
1234 return -EINVAL;
1235#endif
86872cb5 1236 if (cfg->fc_ifindex) {
1da177e4 1237 err = -ENODEV;
5578689a 1238 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1239 if (!dev)
1240 goto out;
1241 idev = in6_dev_get(dev);
1242 if (!idev)
1243 goto out;
1244 }
1245
86872cb5
TG
1246 if (cfg->fc_metric == 0)
1247 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1248
d71314b4 1249 err = -ENOBUFS;
38308473
DM
1250 if (cfg->fc_nlinfo.nlh &&
1251 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1252 table = fib6_get_table(net, cfg->fc_table);
38308473 1253 if (!table) {
d71314b4
MV
1254 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1255 table = fib6_new_table(net, cfg->fc_table);
1256 }
1257 } else {
1258 table = fib6_new_table(net, cfg->fc_table);
1259 }
38308473
DM
1260
1261 if (!table)
c71099ac 1262 goto out;
c71099ac 1263
957c665f 1264 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1265
38308473 1266 if (!rt) {
1da177e4
LT
1267 err = -ENOMEM;
1268 goto out;
1269 }
1270
d8d1f30b 1271 rt->dst.obsolete = -1;
d1918542 1272 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1273 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1274 0;
1da177e4 1275
86872cb5
TG
1276 if (cfg->fc_protocol == RTPROT_UNSPEC)
1277 cfg->fc_protocol = RTPROT_BOOT;
1278 rt->rt6i_protocol = cfg->fc_protocol;
1279
1280 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1281
1282 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1283 rt->dst.input = ip6_mc_input;
ab79ad14
1284 else if (cfg->fc_flags & RTF_LOCAL)
1285 rt->dst.input = ip6_input;
1da177e4 1286 else
d8d1f30b 1287 rt->dst.input = ip6_forward;
1da177e4 1288
d8d1f30b 1289 rt->dst.output = ip6_output;
1da177e4 1290
86872cb5
TG
1291 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1292 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1293 if (rt->rt6i_dst.plen == 128)
11d53b49 1294 rt->dst.flags |= DST_HOST;
1da177e4 1295
8e2ec639
YZ
1296 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1297 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1298 if (!metrics) {
1299 err = -ENOMEM;
1300 goto out;
1301 }
1302 dst_init_metrics(&rt->dst, metrics, 0);
1303 }
1da177e4 1304#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1305 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1306 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1307#endif
1308
86872cb5 1309 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1310
1311 /* We cannot add true routes via loopback here,
1312 they would result in kernel looping; promote them to reject routes
1313 */
86872cb5 1314 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1315 (dev && (dev->flags & IFF_LOOPBACK) &&
1316 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1317 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1318 /* hold loopback dev/idev if we haven't done so. */
5578689a 1319 if (dev != net->loopback_dev) {
1da177e4
LT
1320 if (dev) {
1321 dev_put(dev);
1322 in6_dev_put(idev);
1323 }
5578689a 1324 dev = net->loopback_dev;
1da177e4
LT
1325 dev_hold(dev);
1326 idev = in6_dev_get(dev);
1327 if (!idev) {
1328 err = -ENODEV;
1329 goto out;
1330 }
1331 }
d8d1f30b
CG
1332 rt->dst.output = ip6_pkt_discard_out;
1333 rt->dst.input = ip6_pkt_discard;
1334 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1335 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1336 goto install_route;
1337 }
1338
86872cb5 1339 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1340 const struct in6_addr *gw_addr;
1da177e4
LT
1341 int gwa_type;
1342
86872cb5 1343 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1344 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1345 gwa_type = ipv6_addr_type(gw_addr);
1346
1347 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1348 struct rt6_info *grt;
1349
1350 /* IPv6 strictly inhibits using not link-local
1351 addresses as nexthop address.
1352 Otherwise, router will not able to send redirects.
1353 It is very good, but in some (rare!) circumstances
1354 (SIT, PtP, NBMA NOARP links) it is handy to allow
1355 some exceptions. --ANK
1356 */
1357 err = -EINVAL;
38308473 1358 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1359 goto out;
1360
5578689a 1361 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1362
1363 err = -EHOSTUNREACH;
38308473 1364 if (!grt)
1da177e4
LT
1365 goto out;
1366 if (dev) {
d1918542 1367 if (dev != grt->dst.dev) {
d8d1f30b 1368 dst_release(&grt->dst);
1da177e4
LT
1369 goto out;
1370 }
1371 } else {
d1918542 1372 dev = grt->dst.dev;
1da177e4
LT
1373 idev = grt->rt6i_idev;
1374 dev_hold(dev);
1375 in6_dev_hold(grt->rt6i_idev);
1376 }
38308473 1377 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1378 err = 0;
d8d1f30b 1379 dst_release(&grt->dst);
1da177e4
LT
1380
1381 if (err)
1382 goto out;
1383 }
1384 err = -EINVAL;
38308473 1385 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1386 goto out;
1387 }
1388
1389 err = -ENODEV;
38308473 1390 if (!dev)
1da177e4
LT
1391 goto out;
1392
c3968a85
DW
1393 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1394 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1395 err = -EINVAL;
1396 goto out;
1397 }
4e3fd7a0 1398 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1399 rt->rt6i_prefsrc.plen = 128;
1400 } else
1401 rt->rt6i_prefsrc.plen = 0;
1402
86872cb5 1403 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1404 err = rt6_bind_neighbour(rt, dev);
f83c7790 1405 if (err)
1da177e4 1406 goto out;
1da177e4
LT
1407 }
1408
86872cb5 1409 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1410
1411install_route:
86872cb5
TG
1412 if (cfg->fc_mx) {
1413 struct nlattr *nla;
1414 int remaining;
1415
1416 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1417 int type = nla_type(nla);
86872cb5
TG
1418
1419 if (type) {
1420 if (type > RTAX_MAX) {
1da177e4
LT
1421 err = -EINVAL;
1422 goto out;
1423 }
86872cb5 1424
defb3519 1425 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1426 }
1da177e4
LT
1427 }
1428 }
1429
d8d1f30b 1430 rt->dst.dev = dev;
1da177e4 1431 rt->rt6i_idev = idev;
c71099ac 1432 rt->rt6i_table = table;
63152fc0 1433
c346dca1 1434 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1435
86872cb5 1436 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1437
1438out:
1439 if (dev)
1440 dev_put(dev);
1441 if (idev)
1442 in6_dev_put(idev);
1443 if (rt)
d8d1f30b 1444 dst_free(&rt->dst);
1da177e4
LT
1445 return err;
1446}
1447
86872cb5 1448static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1449{
1450 int err;
c71099ac 1451 struct fib6_table *table;
d1918542 1452 struct net *net = dev_net(rt->dst.dev);
1da177e4 1453
8ed67789 1454 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1455 return -ENOENT;
1456
c71099ac
TG
1457 table = rt->rt6i_table;
1458 write_lock_bh(&table->tb6_lock);
1da177e4 1459
86872cb5 1460 err = fib6_del(rt, info);
d8d1f30b 1461 dst_release(&rt->dst);
1da177e4 1462
c71099ac 1463 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1464
1465 return err;
1466}
1467
e0a1ad73
TG
1468int ip6_del_rt(struct rt6_info *rt)
1469{
4d1169c1 1470 struct nl_info info = {
d1918542 1471 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1472 };
528c4ceb 1473 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1474}
1475
86872cb5 1476static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1477{
c71099ac 1478 struct fib6_table *table;
1da177e4
LT
1479 struct fib6_node *fn;
1480 struct rt6_info *rt;
1481 int err = -ESRCH;
1482
5578689a 1483 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1484 if (!table)
c71099ac
TG
1485 return err;
1486
1487 read_lock_bh(&table->tb6_lock);
1da177e4 1488
c71099ac 1489 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1490 &cfg->fc_dst, cfg->fc_dst_len,
1491 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1492
1da177e4 1493 if (fn) {
d8d1f30b 1494 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1495 if (cfg->fc_ifindex &&
d1918542
DM
1496 (!rt->dst.dev ||
1497 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1498 continue;
86872cb5
TG
1499 if (cfg->fc_flags & RTF_GATEWAY &&
1500 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1501 continue;
86872cb5 1502 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1503 continue;
d8d1f30b 1504 dst_hold(&rt->dst);
c71099ac 1505 read_unlock_bh(&table->tb6_lock);
1da177e4 1506
86872cb5 1507 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1508 }
1509 }
c71099ac 1510 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1511
1512 return err;
1513}
1514
1515/*
1516 * Handle redirects
1517 */
a6279458 1518struct ip6rd_flowi {
4c9483b2 1519 struct flowi6 fl6;
a6279458
YH
1520 struct in6_addr gateway;
1521};
1522
8ed67789
DL
1523static struct rt6_info *__ip6_route_redirect(struct net *net,
1524 struct fib6_table *table,
4c9483b2 1525 struct flowi6 *fl6,
a6279458 1526 int flags)
1da177e4 1527{
4c9483b2 1528 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1529 struct rt6_info *rt;
e843b9e1 1530 struct fib6_node *fn;
c71099ac 1531
1da177e4 1532 /*
e843b9e1
YH
1533 * Get the "current" route for this destination and
1534 * check if the redirect has come from approriate router.
1535 *
1536 * RFC 2461 specifies that redirects should only be
1537 * accepted if they come from the nexthop to the target.
1538 * Due to the way the routes are chosen, this notion
1539 * is a bit fuzzy and one might need to check all possible
1540 * routes.
1da177e4 1541 */
1da177e4 1542
c71099ac 1543 read_lock_bh(&table->tb6_lock);
4c9483b2 1544 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1545restart:
d8d1f30b 1546 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1547 /*
1548 * Current route is on-link; redirect is always invalid.
1549 *
1550 * Seems, previous statement is not true. It could
1551 * be node, which looks for us as on-link (f.e. proxy ndisc)
1552 * But then router serving it might decide, that we should
1553 * know truth 8)8) --ANK (980726).
1554 */
1555 if (rt6_check_expired(rt))
1556 continue;
1557 if (!(rt->rt6i_flags & RTF_GATEWAY))
1558 continue;
d1918542 1559 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1560 continue;
a6279458 1561 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1562 continue;
1563 break;
1564 }
a6279458 1565
cb15d9c2 1566 if (!rt)
8ed67789 1567 rt = net->ipv6.ip6_null_entry;
4c9483b2 1568 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1569out:
d8d1f30b 1570 dst_hold(&rt->dst);
a6279458 1571
c71099ac 1572 read_unlock_bh(&table->tb6_lock);
e843b9e1 1573
a6279458
YH
1574 return rt;
1575};
1576
b71d1d42
ED
1577static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1578 const struct in6_addr *src,
1579 const struct in6_addr *gateway,
a6279458
YH
1580 struct net_device *dev)
1581{
adaa70bb 1582 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1583 struct net *net = dev_net(dev);
a6279458 1584 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1585 .fl6 = {
1586 .flowi6_oif = dev->ifindex,
1587 .daddr = *dest,
1588 .saddr = *src,
a6279458 1589 },
a6279458 1590 };
adaa70bb 1591
4e3fd7a0 1592 rdfl.gateway = *gateway;
86c36ce4 1593
adaa70bb
TG
1594 if (rt6_need_strict(dest))
1595 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1596
4c9483b2 1597 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1598 flags, __ip6_route_redirect);
a6279458
YH
1599}
1600
b71d1d42
ED
1601void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1602 const struct in6_addr *saddr,
a6279458
YH
1603 struct neighbour *neigh, u8 *lladdr, int on_link)
1604{
1605 struct rt6_info *rt, *nrt = NULL;
1606 struct netevent_redirect netevent;
c346dca1 1607 struct net *net = dev_net(neigh->dev);
a6279458
YH
1608
1609 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1610
8ed67789 1611 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1612 if (net_ratelimit())
1613 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1614 "for redirect target\n");
a6279458 1615 goto out;
1da177e4
LT
1616 }
1617
1da177e4
LT
1618 /*
1619 * We have finally decided to accept it.
1620 */
1621
1ab1457c 1622 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1623 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1624 NEIGH_UPDATE_F_OVERRIDE|
1625 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1626 NEIGH_UPDATE_F_ISROUTER))
1627 );
1628
1629 /*
1630 * Redirect received -> path was valid.
1631 * Look, redirects are sent only in response to data packets,
1632 * so that this nexthop apparently is reachable. --ANK
1633 */
d8d1f30b 1634 dst_confirm(&rt->dst);
1da177e4
LT
1635
1636 /* Duplicate redirect: silently ignore. */
27217455 1637 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1638 goto out;
1639
21efcfa0 1640 nrt = ip6_rt_copy(rt, dest);
38308473 1641 if (!nrt)
1da177e4
LT
1642 goto out;
1643
1644 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1645 if (on_link)
1646 nrt->rt6i_flags &= ~RTF_GATEWAY;
1647
4e3fd7a0 1648 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1649 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1650
40e22e8f 1651 if (ip6_ins_rt(nrt))
1da177e4
LT
1652 goto out;
1653
d8d1f30b
CG
1654 netevent.old = &rt->dst;
1655 netevent.new = &nrt->dst;
8d71740c
TT
1656 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1657
38308473 1658 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1659 ip6_del_rt(rt);
1da177e4
LT
1660 return;
1661 }
1662
1663out:
d8d1f30b 1664 dst_release(&rt->dst);
1da177e4
LT
1665}
1666
1667/*
1668 * Handle ICMP "packet too big" messages
1669 * i.e. Path MTU discovery
1670 */
1671
b71d1d42 1672static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1673 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1674{
1675 struct rt6_info *rt, *nrt;
1676 int allfrag = 0;
d3052b55 1677again:
ae878ae2 1678 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1679 if (!rt)
1da177e4
LT
1680 return;
1681
d3052b55
AV
1682 if (rt6_check_expired(rt)) {
1683 ip6_del_rt(rt);
1684 goto again;
1685 }
1686
d8d1f30b 1687 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1688 goto out;
1689
1690 if (pmtu < IPV6_MIN_MTU) {
1691 /*
1ab1457c 1692 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1693 * MTU (1280) and a fragment header should always be included
1694 * after a node receiving Too Big message reporting PMTU is
1695 * less than the IPv6 Minimum Link MTU.
1696 */
1697 pmtu = IPV6_MIN_MTU;
1698 allfrag = 1;
1699 }
1700
1701 /* New mtu received -> path was valid.
1702 They are sent only in response to data packets,
1703 so that this nexthop apparently is reachable. --ANK
1704 */
d8d1f30b 1705 dst_confirm(&rt->dst);
1da177e4
LT
1706
1707 /* Host route. If it is static, it would be better
1708 not to override it, but add new one, so that
1709 when cache entry will expire old pmtu
1710 would return automatically.
1711 */
1712 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1713 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1714 if (allfrag) {
1715 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1716 features |= RTAX_FEATURE_ALLFRAG;
1717 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1718 }
d8d1f30b 1719 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1720 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1721 goto out;
1722 }
1723
1724 /* Network route.
1725 Two cases are possible:
1726 1. It is connected route. Action: COW
1727 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1728 */
27217455 1729 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1730 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1731 else
1732 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1733
d5315b50 1734 if (nrt) {
defb3519
DM
1735 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1736 if (allfrag) {
1737 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1738 features |= RTAX_FEATURE_ALLFRAG;
1739 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1740 }
a1e78363
YH
1741
1742 /* According to RFC 1981, detecting PMTU increase shouldn't be
1743 * happened within 5 mins, the recommended timer is 10 mins.
1744 * Here this route expiration time is set to ip6_rt_mtu_expires
1745 * which is 10 mins. After 10 mins the decreased pmtu is expired
1746 * and detecting PMTU increase will be automatically happened.
1747 */
d8d1f30b 1748 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1749 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1750
40e22e8f 1751 ip6_ins_rt(nrt);
1da177e4 1752 }
1da177e4 1753out:
d8d1f30b 1754 dst_release(&rt->dst);
1da177e4
LT
1755}
1756
b71d1d42 1757void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1758 struct net_device *dev, u32 pmtu)
1759{
1760 struct net *net = dev_net(dev);
1761
1762 /*
1763 * RFC 1981 states that a node "MUST reduce the size of the packets it
1764 * is sending along the path" that caused the Packet Too Big message.
1765 * Since it's not possible in the general case to determine which
1766 * interface was used to send the original packet, we update the MTU
1767 * on the interface that will be used to send future packets. We also
1768 * update the MTU on the interface that received the Packet Too Big in
1769 * case the original packet was forced out that interface with
1770 * SO_BINDTODEVICE or similar. This is the next best thing to the
1771 * correct behaviour, which would be to update the MTU on all
1772 * interfaces.
1773 */
1774 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1775 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1776}
1777
1da177e4
LT
1778/*
1779 * Misc support functions
1780 */
1781
21efcfa0
ED
1782static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1783 const struct in6_addr *dest)
1da177e4 1784{
d1918542 1785 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1786 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1787 ort->dst.dev, 0);
1da177e4
LT
1788
1789 if (rt) {
d8d1f30b
CG
1790 rt->dst.input = ort->dst.input;
1791 rt->dst.output = ort->dst.output;
8e2ec639 1792 rt->dst.flags |= DST_HOST;
d8d1f30b 1793
4e3fd7a0 1794 rt->rt6i_dst.addr = *dest;
8e2ec639 1795 rt->rt6i_dst.plen = 128;
defb3519 1796 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1797 rt->dst.error = ort->dst.error;
1da177e4
LT
1798 rt->rt6i_idev = ort->rt6i_idev;
1799 if (rt->rt6i_idev)
1800 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1801 rt->dst.lastuse = jiffies;
d1918542 1802 rt->dst.expires = 0;
1da177e4 1803
4e3fd7a0 1804 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1805 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1806 rt->rt6i_metric = 0;
1807
1da177e4
LT
1808#ifdef CONFIG_IPV6_SUBTREES
1809 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1810#endif
0f6c6392 1811 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1812 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1813 }
1814 return rt;
1815}
1816
70ceb4f5 1817#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1818static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1819 const struct in6_addr *prefix, int prefixlen,
1820 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1821{
1822 struct fib6_node *fn;
1823 struct rt6_info *rt = NULL;
c71099ac
TG
1824 struct fib6_table *table;
1825
efa2cea0 1826 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1827 if (!table)
c71099ac 1828 return NULL;
70ceb4f5 1829
c71099ac
TG
1830 write_lock_bh(&table->tb6_lock);
1831 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1832 if (!fn)
1833 goto out;
1834
d8d1f30b 1835 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1836 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1837 continue;
1838 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1839 continue;
1840 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1841 continue;
d8d1f30b 1842 dst_hold(&rt->dst);
70ceb4f5
YH
1843 break;
1844 }
1845out:
c71099ac 1846 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1847 return rt;
1848}
1849
efa2cea0 1850static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1851 const struct in6_addr *prefix, int prefixlen,
1852 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1853 unsigned pref)
1854{
86872cb5
TG
1855 struct fib6_config cfg = {
1856 .fc_table = RT6_TABLE_INFO,
238fc7ea 1857 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1858 .fc_ifindex = ifindex,
1859 .fc_dst_len = prefixlen,
1860 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1861 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1862 .fc_nlinfo.pid = 0,
1863 .fc_nlinfo.nlh = NULL,
1864 .fc_nlinfo.nl_net = net,
86872cb5
TG
1865 };
1866
4e3fd7a0
AD
1867 cfg.fc_dst = *prefix;
1868 cfg.fc_gateway = *gwaddr;
70ceb4f5 1869
e317da96
YH
1870 /* We should treat it as a default route if prefix length is 0. */
1871 if (!prefixlen)
86872cb5 1872 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1873
86872cb5 1874 ip6_route_add(&cfg);
70ceb4f5 1875
efa2cea0 1876 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1877}
1878#endif
1879
b71d1d42 1880struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1881{
1da177e4 1882 struct rt6_info *rt;
c71099ac 1883 struct fib6_table *table;
1da177e4 1884
c346dca1 1885 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1886 if (!table)
c71099ac 1887 return NULL;
1da177e4 1888
c71099ac 1889 write_lock_bh(&table->tb6_lock);
d8d1f30b 1890 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1891 if (dev == rt->dst.dev &&
045927ff 1892 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1893 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1894 break;
1895 }
1896 if (rt)
d8d1f30b 1897 dst_hold(&rt->dst);
c71099ac 1898 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1899 return rt;
1900}
1901
b71d1d42 1902struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1903 struct net_device *dev,
1904 unsigned int pref)
1da177e4 1905{
86872cb5
TG
1906 struct fib6_config cfg = {
1907 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1908 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1909 .fc_ifindex = dev->ifindex,
1910 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1911 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1912 .fc_nlinfo.pid = 0,
1913 .fc_nlinfo.nlh = NULL,
c346dca1 1914 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1915 };
1da177e4 1916
4e3fd7a0 1917 cfg.fc_gateway = *gwaddr;
1da177e4 1918
86872cb5 1919 ip6_route_add(&cfg);
1da177e4 1920
1da177e4
LT
1921 return rt6_get_dflt_router(gwaddr, dev);
1922}
1923
7b4da532 1924void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1925{
1926 struct rt6_info *rt;
c71099ac
TG
1927 struct fib6_table *table;
1928
1929 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1930 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1931 if (!table)
c71099ac 1932 return;
1da177e4
LT
1933
1934restart:
c71099ac 1935 read_lock_bh(&table->tb6_lock);
d8d1f30b 1936 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1937 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1938 dst_hold(&rt->dst);
c71099ac 1939 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1940 ip6_del_rt(rt);
1da177e4
LT
1941 goto restart;
1942 }
1943 }
c71099ac 1944 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1945}
1946
5578689a
DL
1947static void rtmsg_to_fib6_config(struct net *net,
1948 struct in6_rtmsg *rtmsg,
86872cb5
TG
1949 struct fib6_config *cfg)
1950{
1951 memset(cfg, 0, sizeof(*cfg));
1952
1953 cfg->fc_table = RT6_TABLE_MAIN;
1954 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1955 cfg->fc_metric = rtmsg->rtmsg_metric;
1956 cfg->fc_expires = rtmsg->rtmsg_info;
1957 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1958 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1959 cfg->fc_flags = rtmsg->rtmsg_flags;
1960
5578689a 1961 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1962
4e3fd7a0
AD
1963 cfg->fc_dst = rtmsg->rtmsg_dst;
1964 cfg->fc_src = rtmsg->rtmsg_src;
1965 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1966}
1967
5578689a 1968int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1969{
86872cb5 1970 struct fib6_config cfg;
1da177e4
LT
1971 struct in6_rtmsg rtmsg;
1972 int err;
1973
1974 switch(cmd) {
1975 case SIOCADDRT: /* Add a route */
1976 case SIOCDELRT: /* Delete a route */
1977 if (!capable(CAP_NET_ADMIN))
1978 return -EPERM;
1979 err = copy_from_user(&rtmsg, arg,
1980 sizeof(struct in6_rtmsg));
1981 if (err)
1982 return -EFAULT;
86872cb5 1983
5578689a 1984 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1985
1da177e4
LT
1986 rtnl_lock();
1987 switch (cmd) {
1988 case SIOCADDRT:
86872cb5 1989 err = ip6_route_add(&cfg);
1da177e4
LT
1990 break;
1991 case SIOCDELRT:
86872cb5 1992 err = ip6_route_del(&cfg);
1da177e4
LT
1993 break;
1994 default:
1995 err = -EINVAL;
1996 }
1997 rtnl_unlock();
1998
1999 return err;
3ff50b79 2000 }
1da177e4
LT
2001
2002 return -EINVAL;
2003}
2004
2005/*
2006 * Drop the packet on the floor
2007 */
2008
d5fdd6ba 2009static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2010{
612f09e8 2011 int type;
adf30907 2012 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2013 switch (ipstats_mib_noroutes) {
2014 case IPSTATS_MIB_INNOROUTES:
0660e03f 2015 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2016 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2017 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2018 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2019 break;
2020 }
2021 /* FALLTHROUGH */
2022 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2023 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2024 ipstats_mib_noroutes);
612f09e8
YH
2025 break;
2026 }
3ffe533c 2027 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2028 kfree_skb(skb);
2029 return 0;
2030}
2031
9ce8ade0
TG
2032static int ip6_pkt_discard(struct sk_buff *skb)
2033{
612f09e8 2034 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2035}
2036
20380731 2037static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2038{
adf30907 2039 skb->dev = skb_dst(skb)->dev;
612f09e8 2040 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2041}
2042
6723ab54
DM
2043#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2044
9ce8ade0
TG
2045static int ip6_pkt_prohibit(struct sk_buff *skb)
2046{
612f09e8 2047 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2048}
2049
2050static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2051{
adf30907 2052 skb->dev = skb_dst(skb)->dev;
612f09e8 2053 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2054}
2055
6723ab54
DM
2056#endif
2057
1da177e4
LT
2058/*
2059 * Allocate a dst for local (unicast / anycast) address.
2060 */
2061
2062struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2063 const struct in6_addr *addr,
8f031519 2064 bool anycast)
1da177e4 2065{
c346dca1 2066 struct net *net = dev_net(idev->dev);
5c1e6aa3 2067 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2068 net->loopback_dev, 0);
f83c7790 2069 int err;
1da177e4 2070
38308473 2071 if (!rt) {
40385653
BG
2072 if (net_ratelimit())
2073 pr_warning("IPv6: Maximum number of routes reached,"
2074 " consider increasing route/max_size.\n");
1da177e4 2075 return ERR_PTR(-ENOMEM);
40385653 2076 }
1da177e4 2077
1da177e4
LT
2078 in6_dev_hold(idev);
2079
11d53b49 2080 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2081 rt->dst.input = ip6_input;
2082 rt->dst.output = ip6_output;
1da177e4 2083 rt->rt6i_idev = idev;
d8d1f30b 2084 rt->dst.obsolete = -1;
1da177e4
LT
2085
2086 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2087 if (anycast)
2088 rt->rt6i_flags |= RTF_ANYCAST;
2089 else
1da177e4 2090 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2091 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2092 if (err) {
d8d1f30b 2093 dst_free(&rt->dst);
f83c7790 2094 return ERR_PTR(err);
1da177e4
LT
2095 }
2096
4e3fd7a0 2097 rt->rt6i_dst.addr = *addr;
1da177e4 2098 rt->rt6i_dst.plen = 128;
5578689a 2099 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2100
d8d1f30b 2101 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2102
2103 return rt;
2104}
2105
c3968a85
DW
2106int ip6_route_get_saddr(struct net *net,
2107 struct rt6_info *rt,
b71d1d42 2108 const struct in6_addr *daddr,
c3968a85
DW
2109 unsigned int prefs,
2110 struct in6_addr *saddr)
2111{
2112 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2113 int err = 0;
2114 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2115 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2116 else
2117 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2118 daddr, prefs, saddr);
2119 return err;
2120}
2121
2122/* remove deleted ip from prefsrc entries */
2123struct arg_dev_net_ip {
2124 struct net_device *dev;
2125 struct net *net;
2126 struct in6_addr *addr;
2127};
2128
2129static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2130{
2131 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2132 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2133 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2134
d1918542 2135 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2136 rt != net->ipv6.ip6_null_entry &&
2137 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2138 /* remove prefsrc entry */
2139 rt->rt6i_prefsrc.plen = 0;
2140 }
2141 return 0;
2142}
2143
2144void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2145{
2146 struct net *net = dev_net(ifp->idev->dev);
2147 struct arg_dev_net_ip adni = {
2148 .dev = ifp->idev->dev,
2149 .net = net,
2150 .addr = &ifp->addr,
2151 };
2152 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2153}
2154
8ed67789
DL
2155struct arg_dev_net {
2156 struct net_device *dev;
2157 struct net *net;
2158};
2159
1da177e4
LT
2160static int fib6_ifdown(struct rt6_info *rt, void *arg)
2161{
bc3ef660 2162 const struct arg_dev_net *adn = arg;
2163 const struct net_device *dev = adn->dev;
8ed67789 2164
d1918542 2165 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2166 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2167 return -1;
c159d30c 2168
1da177e4
LT
2169 return 0;
2170}
2171
f3db4851 2172void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2173{
8ed67789
DL
2174 struct arg_dev_net adn = {
2175 .dev = dev,
2176 .net = net,
2177 };
2178
2179 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2180 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2181}
2182
2183struct rt6_mtu_change_arg
2184{
2185 struct net_device *dev;
2186 unsigned mtu;
2187};
2188
2189static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2190{
2191 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2192 struct inet6_dev *idev;
2193
2194 /* In IPv6 pmtu discovery is not optional,
2195 so that RTAX_MTU lock cannot disable it.
2196 We still use this lock to block changes
2197 caused by addrconf/ndisc.
2198 */
2199
2200 idev = __in6_dev_get(arg->dev);
38308473 2201 if (!idev)
1da177e4
LT
2202 return 0;
2203
2204 /* For administrative MTU increase, there is no way to discover
2205 IPv6 PMTU increase, so PMTU increase should be updated here.
2206 Since RFC 1981 doesn't include administrative MTU increase
2207 update PMTU increase is a MUST. (i.e. jumbo frame)
2208 */
2209 /*
2210 If new MTU is less than route PMTU, this new MTU will be the
2211 lowest MTU in the path, update the route PMTU to reflect PMTU
2212 decreases; if new MTU is greater than route PMTU, and the
2213 old MTU is the lowest MTU in the path, update the route PMTU
2214 to reflect the increase. In this case if the other nodes' MTU
2215 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2216 PMTU discouvery.
2217 */
d1918542 2218 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2219 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2220 (dst_mtu(&rt->dst) >= arg->mtu ||
2221 (dst_mtu(&rt->dst) < arg->mtu &&
2222 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2223 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2224 }
1da177e4
LT
2225 return 0;
2226}
2227
2228void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2229{
c71099ac
TG
2230 struct rt6_mtu_change_arg arg = {
2231 .dev = dev,
2232 .mtu = mtu,
2233 };
1da177e4 2234
c346dca1 2235 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2236}
2237
ef7c79ed 2238static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2239 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2240 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2241 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2242 [RTA_PRIORITY] = { .type = NLA_U32 },
2243 [RTA_METRICS] = { .type = NLA_NESTED },
2244};
2245
2246static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2247 struct fib6_config *cfg)
1da177e4 2248{
86872cb5
TG
2249 struct rtmsg *rtm;
2250 struct nlattr *tb[RTA_MAX+1];
2251 int err;
1da177e4 2252
86872cb5
TG
2253 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2254 if (err < 0)
2255 goto errout;
1da177e4 2256
86872cb5
TG
2257 err = -EINVAL;
2258 rtm = nlmsg_data(nlh);
2259 memset(cfg, 0, sizeof(*cfg));
2260
2261 cfg->fc_table = rtm->rtm_table;
2262 cfg->fc_dst_len = rtm->rtm_dst_len;
2263 cfg->fc_src_len = rtm->rtm_src_len;
2264 cfg->fc_flags = RTF_UP;
2265 cfg->fc_protocol = rtm->rtm_protocol;
2266
2267 if (rtm->rtm_type == RTN_UNREACHABLE)
2268 cfg->fc_flags |= RTF_REJECT;
2269
ab79ad14
2270 if (rtm->rtm_type == RTN_LOCAL)
2271 cfg->fc_flags |= RTF_LOCAL;
2272
86872cb5
TG
2273 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2274 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2275 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2276
2277 if (tb[RTA_GATEWAY]) {
2278 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2279 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2280 }
86872cb5
TG
2281
2282 if (tb[RTA_DST]) {
2283 int plen = (rtm->rtm_dst_len + 7) >> 3;
2284
2285 if (nla_len(tb[RTA_DST]) < plen)
2286 goto errout;
2287
2288 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2289 }
86872cb5
TG
2290
2291 if (tb[RTA_SRC]) {
2292 int plen = (rtm->rtm_src_len + 7) >> 3;
2293
2294 if (nla_len(tb[RTA_SRC]) < plen)
2295 goto errout;
2296
2297 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2298 }
86872cb5 2299
c3968a85
DW
2300 if (tb[RTA_PREFSRC])
2301 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2302
86872cb5
TG
2303 if (tb[RTA_OIF])
2304 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2305
2306 if (tb[RTA_PRIORITY])
2307 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2308
2309 if (tb[RTA_METRICS]) {
2310 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2311 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2312 }
86872cb5
TG
2313
2314 if (tb[RTA_TABLE])
2315 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2316
2317 err = 0;
2318errout:
2319 return err;
1da177e4
LT
2320}
2321
c127ea2c 2322static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2323{
86872cb5
TG
2324 struct fib6_config cfg;
2325 int err;
1da177e4 2326
86872cb5
TG
2327 err = rtm_to_fib6_config(skb, nlh, &cfg);
2328 if (err < 0)
2329 return err;
2330
2331 return ip6_route_del(&cfg);
1da177e4
LT
2332}
2333
c127ea2c 2334static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2335{
86872cb5
TG
2336 struct fib6_config cfg;
2337 int err;
1da177e4 2338
86872cb5
TG
2339 err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 if (err < 0)
2341 return err;
2342
2343 return ip6_route_add(&cfg);
1da177e4
LT
2344}
2345
339bf98f
TG
2346static inline size_t rt6_nlmsg_size(void)
2347{
2348 return NLMSG_ALIGN(sizeof(struct rtmsg))
2349 + nla_total_size(16) /* RTA_SRC */
2350 + nla_total_size(16) /* RTA_DST */
2351 + nla_total_size(16) /* RTA_GATEWAY */
2352 + nla_total_size(16) /* RTA_PREFSRC */
2353 + nla_total_size(4) /* RTA_TABLE */
2354 + nla_total_size(4) /* RTA_IIF */
2355 + nla_total_size(4) /* RTA_OIF */
2356 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2357 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2358 + nla_total_size(sizeof(struct rta_cacheinfo));
2359}
2360
191cd582
BH
2361static int rt6_fill_node(struct net *net,
2362 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2363 struct in6_addr *dst, struct in6_addr *src,
2364 int iif, int type, u32 pid, u32 seq,
7bc570c8 2365 int prefix, int nowait, unsigned int flags)
1da177e4 2366{
346f870b 2367 const struct inet_peer *peer;
1da177e4 2368 struct rtmsg *rtm;
2d7202bf 2369 struct nlmsghdr *nlh;
e3703b3d 2370 long expires;
9e762a4a 2371 u32 table;
f2c31e32 2372 struct neighbour *n;
346f870b 2373 u32 ts, tsage;
1da177e4
LT
2374
2375 if (prefix) { /* user wants prefix routes only */
2376 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2377 /* success since this is not a prefix route */
2378 return 1;
2379 }
2380 }
2381
2d7202bf 2382 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2383 if (!nlh)
26932566 2384 return -EMSGSIZE;
2d7202bf
TG
2385
2386 rtm = nlmsg_data(nlh);
1da177e4
LT
2387 rtm->rtm_family = AF_INET6;
2388 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2389 rtm->rtm_src_len = rt->rt6i_src.plen;
2390 rtm->rtm_tos = 0;
c71099ac 2391 if (rt->rt6i_table)
9e762a4a 2392 table = rt->rt6i_table->tb6_id;
c71099ac 2393 else
9e762a4a
PM
2394 table = RT6_TABLE_UNSPEC;
2395 rtm->rtm_table = table;
2d7202bf 2396 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2397 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2398 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2399 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2400 rtm->rtm_type = RTN_LOCAL;
d1918542 2401 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2402 rtm->rtm_type = RTN_LOCAL;
2403 else
2404 rtm->rtm_type = RTN_UNICAST;
2405 rtm->rtm_flags = 0;
2406 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2407 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2408 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2409 rtm->rtm_protocol = RTPROT_REDIRECT;
2410 else if (rt->rt6i_flags & RTF_ADDRCONF)
2411 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2412 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2413 rtm->rtm_protocol = RTPROT_RA;
2414
38308473 2415 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2416 rtm->rtm_flags |= RTM_F_CLONED;
2417
2418 if (dst) {
2d7202bf 2419 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2420 rtm->rtm_dst_len = 128;
1da177e4 2421 } else if (rtm->rtm_dst_len)
2d7202bf 2422 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2423#ifdef CONFIG_IPV6_SUBTREES
2424 if (src) {
2d7202bf 2425 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2426 rtm->rtm_src_len = 128;
1da177e4 2427 } else if (rtm->rtm_src_len)
2d7202bf 2428 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2429#endif
7bc570c8
YH
2430 if (iif) {
2431#ifdef CONFIG_IPV6_MROUTE
2432 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2433 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2434 if (err <= 0) {
2435 if (!nowait) {
2436 if (err == 0)
2437 return 0;
2438 goto nla_put_failure;
2439 } else {
2440 if (err == -EMSGSIZE)
2441 goto nla_put_failure;
2442 }
2443 }
2444 } else
2445#endif
2446 NLA_PUT_U32(skb, RTA_IIF, iif);
2447 } else if (dst) {
1da177e4 2448 struct in6_addr saddr_buf;
c3968a85 2449 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2450 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2451 }
2d7202bf 2452
c3968a85
DW
2453 if (rt->rt6i_prefsrc.plen) {
2454 struct in6_addr saddr_buf;
4e3fd7a0 2455 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2456 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2457 }
2458
defb3519 2459 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2460 goto nla_put_failure;
2461
f2c31e32 2462 rcu_read_lock();
27217455 2463 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2464 if (n)
2465 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2466 rcu_read_unlock();
2d7202bf 2467
d8d1f30b 2468 if (rt->dst.dev)
d1918542 2469 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2470
2471 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2472
36e3deae
YH
2473 if (!(rt->rt6i_flags & RTF_EXPIRES))
2474 expires = 0;
d1918542
DM
2475 else if (rt->dst.expires - jiffies < INT_MAX)
2476 expires = rt->dst.expires - jiffies;
36e3deae
YH
2477 else
2478 expires = INT_MAX;
69cdf8f9 2479
346f870b
DM
2480 peer = rt->rt6i_peer;
2481 ts = tsage = 0;
2482 if (peer && peer->tcp_ts_stamp) {
2483 ts = peer->tcp_ts;
2484 tsage = get_seconds() - peer->tcp_ts_stamp;
2485 }
2486
2487 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2488 expires, rt->dst.error) < 0)
e3703b3d 2489 goto nla_put_failure;
2d7202bf
TG
2490
2491 return nlmsg_end(skb, nlh);
2492
2493nla_put_failure:
26932566
PM
2494 nlmsg_cancel(skb, nlh);
2495 return -EMSGSIZE;
1da177e4
LT
2496}
2497
1b43af54 2498int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2499{
2500 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2501 int prefix;
2502
2d7202bf
TG
2503 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2504 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2505 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2506 } else
2507 prefix = 0;
2508
191cd582
BH
2509 return rt6_fill_node(arg->net,
2510 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2511 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2512 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2513}
2514
c127ea2c 2515static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2516{
3b1e0a65 2517 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2518 struct nlattr *tb[RTA_MAX+1];
2519 struct rt6_info *rt;
1da177e4 2520 struct sk_buff *skb;
ab364a6f 2521 struct rtmsg *rtm;
4c9483b2 2522 struct flowi6 fl6;
ab364a6f 2523 int err, iif = 0;
1da177e4 2524
ab364a6f
TG
2525 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2526 if (err < 0)
2527 goto errout;
1da177e4 2528
ab364a6f 2529 err = -EINVAL;
4c9483b2 2530 memset(&fl6, 0, sizeof(fl6));
1da177e4 2531
ab364a6f
TG
2532 if (tb[RTA_SRC]) {
2533 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2534 goto errout;
2535
4e3fd7a0 2536 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2537 }
2538
2539 if (tb[RTA_DST]) {
2540 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2541 goto errout;
2542
4e3fd7a0 2543 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2544 }
2545
2546 if (tb[RTA_IIF])
2547 iif = nla_get_u32(tb[RTA_IIF]);
2548
2549 if (tb[RTA_OIF])
4c9483b2 2550 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2551
2552 if (iif) {
2553 struct net_device *dev;
5578689a 2554 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2555 if (!dev) {
2556 err = -ENODEV;
ab364a6f 2557 goto errout;
1da177e4
LT
2558 }
2559 }
2560
ab364a6f 2561 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2562 if (!skb) {
ab364a6f
TG
2563 err = -ENOBUFS;
2564 goto errout;
2565 }
1da177e4 2566
ab364a6f
TG
2567 /* Reserve room for dummy headers, this skb can pass
2568 through good chunk of routing engine.
2569 */
459a98ed 2570 skb_reset_mac_header(skb);
ab364a6f 2571 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2572
4c9483b2 2573 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2574 skb_dst_set(skb, &rt->dst);
1da177e4 2575
4c9483b2 2576 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2577 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2578 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2579 if (err < 0) {
ab364a6f
TG
2580 kfree_skb(skb);
2581 goto errout;
1da177e4
LT
2582 }
2583
5578689a 2584 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2585errout:
1da177e4 2586 return err;
1da177e4
LT
2587}
2588
86872cb5 2589void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2590{
2591 struct sk_buff *skb;
5578689a 2592 struct net *net = info->nl_net;
528c4ceb
DL
2593 u32 seq;
2594 int err;
2595
2596 err = -ENOBUFS;
38308473 2597 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2598
339bf98f 2599 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2600 if (!skb)
21713ebc
TG
2601 goto errout;
2602
191cd582 2603 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2604 event, info->pid, seq, 0, 0, 0);
26932566
PM
2605 if (err < 0) {
2606 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2607 WARN_ON(err == -EMSGSIZE);
2608 kfree_skb(skb);
2609 goto errout;
2610 }
1ce85fe4
PNA
2611 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2612 info->nlh, gfp_any());
2613 return;
21713ebc
TG
2614errout:
2615 if (err < 0)
5578689a 2616 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2617}
2618
8ed67789
DL
2619static int ip6_route_dev_notify(struct notifier_block *this,
2620 unsigned long event, void *data)
2621{
2622 struct net_device *dev = (struct net_device *)data;
c346dca1 2623 struct net *net = dev_net(dev);
8ed67789
DL
2624
2625 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2626 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2627 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2628#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2629 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2630 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2631 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2632 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2633#endif
2634 }
2635
2636 return NOTIFY_OK;
2637}
2638
1da177e4
LT
2639/*
2640 * /proc
2641 */
2642
2643#ifdef CONFIG_PROC_FS
2644
1da177e4
LT
2645struct rt6_proc_arg
2646{
2647 char *buffer;
2648 int offset;
2649 int length;
2650 int skip;
2651 int len;
2652};
2653
2654static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2655{
33120b30 2656 struct seq_file *m = p_arg;
69cce1d1 2657 struct neighbour *n;
1da177e4 2658
4b7a4274 2659 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2660
2661#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2662 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2663#else
33120b30 2664 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2665#endif
f2c31e32 2666 rcu_read_lock();
27217455 2667 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2668 if (n) {
2669 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2670 } else {
33120b30 2671 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2672 }
f2c31e32 2673 rcu_read_unlock();
33120b30 2674 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2675 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2676 rt->dst.__use, rt->rt6i_flags,
d1918542 2677 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2678 return 0;
2679}
2680
33120b30 2681static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2682{
f3db4851 2683 struct net *net = (struct net *)m->private;
32b293a5 2684 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2685 return 0;
2686}
1da177e4 2687
33120b30
AD
2688static int ipv6_route_open(struct inode *inode, struct file *file)
2689{
de05c557 2690 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2691}
2692
33120b30
AD
2693static const struct file_operations ipv6_route_proc_fops = {
2694 .owner = THIS_MODULE,
2695 .open = ipv6_route_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
b6fcbdb4 2698 .release = single_release_net,
33120b30
AD
2699};
2700
1da177e4
LT
2701static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2702{
69ddb805 2703 struct net *net = (struct net *)seq->private;
1da177e4 2704 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2705 net->ipv6.rt6_stats->fib_nodes,
2706 net->ipv6.rt6_stats->fib_route_nodes,
2707 net->ipv6.rt6_stats->fib_rt_alloc,
2708 net->ipv6.rt6_stats->fib_rt_entries,
2709 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2710 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2711 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2712
2713 return 0;
2714}
2715
2716static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2717{
de05c557 2718 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2719}
2720
9a32144e 2721static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2722 .owner = THIS_MODULE,
2723 .open = rt6_stats_seq_open,
2724 .read = seq_read,
2725 .llseek = seq_lseek,
b6fcbdb4 2726 .release = single_release_net,
1da177e4
LT
2727};
2728#endif /* CONFIG_PROC_FS */
2729
2730#ifdef CONFIG_SYSCTL
2731
1da177e4 2732static
8d65af78 2733int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2734 void __user *buffer, size_t *lenp, loff_t *ppos)
2735{
c486da34
LAG
2736 struct net *net;
2737 int delay;
2738 if (!write)
1da177e4 2739 return -EINVAL;
c486da34
LAG
2740
2741 net = (struct net *)ctl->extra1;
2742 delay = net->ipv6.sysctl.flush_delay;
2743 proc_dointvec(ctl, write, buffer, lenp, ppos);
2744 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2745 return 0;
1da177e4
LT
2746}
2747
760f2d01 2748ctl_table ipv6_route_table_template[] = {
1ab1457c 2749 {
1da177e4 2750 .procname = "flush",
4990509f 2751 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2752 .maxlen = sizeof(int),
89c8b3a1 2753 .mode = 0200,
6d9f239a 2754 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2755 },
2756 {
1da177e4 2757 .procname = "gc_thresh",
9a7ec3a9 2758 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
6d9f239a 2761 .proc_handler = proc_dointvec,
1da177e4
LT
2762 },
2763 {
1da177e4 2764 .procname = "max_size",
4990509f 2765 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
6d9f239a 2768 .proc_handler = proc_dointvec,
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "gc_min_interval",
4990509f 2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2776 },
2777 {
1da177e4 2778 .procname = "gc_timeout",
4990509f 2779 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
6d9f239a 2782 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_interval",
4990509f 2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
6d9f239a 2789 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2790 },
2791 {
1da177e4 2792 .procname = "gc_elasticity",
4990509f 2793 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
f3d3f616 2796 .proc_handler = proc_dointvec,
1da177e4
LT
2797 },
2798 {
1da177e4 2799 .procname = "mtu_expires",
4990509f 2800 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
6d9f239a 2803 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2804 },
2805 {
1da177e4 2806 .procname = "min_adv_mss",
4990509f 2807 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
f3d3f616 2810 .proc_handler = proc_dointvec,
1da177e4
LT
2811 },
2812 {
1da177e4 2813 .procname = "gc_min_interval_ms",
4990509f 2814 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
6d9f239a 2817 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2818 },
f8572d8f 2819 { }
1da177e4
LT
2820};
2821
2c8c1e72 2822struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2823{
2824 struct ctl_table *table;
2825
2826 table = kmemdup(ipv6_route_table_template,
2827 sizeof(ipv6_route_table_template),
2828 GFP_KERNEL);
5ee09105
YH
2829
2830 if (table) {
2831 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2832 table[0].extra1 = net;
86393e52 2833 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2834 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2835 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2836 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2837 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2838 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2839 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2840 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2841 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2842 }
2843
760f2d01
DL
2844 return table;
2845}
1da177e4
LT
2846#endif
2847
2c8c1e72 2848static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2849{
633d424b 2850 int ret = -ENOMEM;
8ed67789 2851
86393e52
AD
2852 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2853 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2854
fc66f95c
ED
2855 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2856 goto out_ip6_dst_ops;
2857
8ed67789
DL
2858 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2859 sizeof(*net->ipv6.ip6_null_entry),
2860 GFP_KERNEL);
2861 if (!net->ipv6.ip6_null_entry)
fc66f95c 2862 goto out_ip6_dst_entries;
d8d1f30b 2863 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2864 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2865 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2866 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2867 ip6_template_metrics, true);
8ed67789
DL
2868
2869#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2870 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2871 sizeof(*net->ipv6.ip6_prohibit_entry),
2872 GFP_KERNEL);
68fffc67
PZ
2873 if (!net->ipv6.ip6_prohibit_entry)
2874 goto out_ip6_null_entry;
d8d1f30b 2875 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2876 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2877 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2878 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2879 ip6_template_metrics, true);
8ed67789
DL
2880
2881 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2882 sizeof(*net->ipv6.ip6_blk_hole_entry),
2883 GFP_KERNEL);
68fffc67
PZ
2884 if (!net->ipv6.ip6_blk_hole_entry)
2885 goto out_ip6_prohibit_entry;
d8d1f30b 2886 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2887 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2888 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2889 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2890 ip6_template_metrics, true);
8ed67789
DL
2891#endif
2892
b339a47c
PZ
2893 net->ipv6.sysctl.flush_delay = 0;
2894 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2895 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2896 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2897 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2899 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2900 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2901
cdb18761
DL
2902#ifdef CONFIG_PROC_FS
2903 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2904 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2905#endif
6891a346
BT
2906 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2907
8ed67789
DL
2908 ret = 0;
2909out:
2910 return ret;
f2fc6a54 2911
68fffc67
PZ
2912#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913out_ip6_prohibit_entry:
2914 kfree(net->ipv6.ip6_prohibit_entry);
2915out_ip6_null_entry:
2916 kfree(net->ipv6.ip6_null_entry);
2917#endif
fc66f95c
ED
2918out_ip6_dst_entries:
2919 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2920out_ip6_dst_ops:
f2fc6a54 2921 goto out;
cdb18761
DL
2922}
2923
2c8c1e72 2924static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2925{
2926#ifdef CONFIG_PROC_FS
2927 proc_net_remove(net, "ipv6_route");
2928 proc_net_remove(net, "rt6_stats");
2929#endif
8ed67789
DL
2930 kfree(net->ipv6.ip6_null_entry);
2931#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2932 kfree(net->ipv6.ip6_prohibit_entry);
2933 kfree(net->ipv6.ip6_blk_hole_entry);
2934#endif
41bb78b4 2935 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2936}
2937
2938static struct pernet_operations ip6_route_net_ops = {
2939 .init = ip6_route_net_init,
2940 .exit = ip6_route_net_exit,
2941};
2942
8ed67789
DL
2943static struct notifier_block ip6_route_dev_notifier = {
2944 .notifier_call = ip6_route_dev_notify,
2945 .priority = 0,
2946};
2947
433d49c3 2948int __init ip6_route_init(void)
1da177e4 2949{
433d49c3
DL
2950 int ret;
2951
9a7ec3a9
DL
2952 ret = -ENOMEM;
2953 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2954 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2955 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2956 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2957 goto out;
14e50e57 2958
fc66f95c 2959 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2960 if (ret)
bdb3289f 2961 goto out_kmem_cache;
bdb3289f 2962
fc66f95c
ED
2963 ret = register_pernet_subsys(&ip6_route_net_ops);
2964 if (ret)
2965 goto out_dst_entries;
2966
5dc121e9
AE
2967 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2968
8ed67789
DL
2969 /* Registering of the loopback is done before this portion of code,
2970 * the loopback reference in rt6_info will not be taken, do it
2971 * manually for init_net */
d8d1f30b 2972 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2973 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2974 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2975 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2976 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2977 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2978 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2979 #endif
433d49c3
DL
2980 ret = fib6_init();
2981 if (ret)
8ed67789 2982 goto out_register_subsys;
433d49c3 2983
433d49c3
DL
2984 ret = xfrm6_init();
2985 if (ret)
cdb18761 2986 goto out_fib6_init;
c35b7e72 2987
433d49c3
DL
2988 ret = fib6_rules_init();
2989 if (ret)
2990 goto xfrm6_init;
7e5449c2 2991
433d49c3 2992 ret = -ENOBUFS;
c7ac8679
GR
2993 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2994 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2995 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2996 goto fib6_rules_init;
c127ea2c 2997
8ed67789 2998 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2999 if (ret)
3000 goto fib6_rules_init;
8ed67789 3001
433d49c3
DL
3002out:
3003 return ret;
3004
3005fib6_rules_init:
433d49c3
DL
3006 fib6_rules_cleanup();
3007xfrm6_init:
433d49c3 3008 xfrm6_fini();
433d49c3 3009out_fib6_init:
433d49c3 3010 fib6_gc_cleanup();
8ed67789
DL
3011out_register_subsys:
3012 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3013out_dst_entries:
3014 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3015out_kmem_cache:
f2fc6a54 3016 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3017 goto out;
1da177e4
LT
3018}
3019
3020void ip6_route_cleanup(void)
3021{
8ed67789 3022 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3023 fib6_rules_cleanup();
1da177e4 3024 xfrm6_fini();
1da177e4 3025 fib6_gc_cleanup();
8ed67789 3026 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3027 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3028 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3029}