net: Abstract away all dst_entry metrics accesses.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
112 };
113
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115 {
116 }
117
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 };
125
126 static struct rt6_info ip6_null_entry_template = {
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 },
135 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
136 .rt6i_protocol = RTPROT_KERNEL,
137 .rt6i_metric = ~(u32) 0,
138 .rt6i_ref = ATOMIC_INIT(1),
139 };
140
141 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
142
143 static int ip6_pkt_prohibit(struct sk_buff *skb);
144 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
145
146 static struct rt6_info ip6_prohibit_entry_template = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .obsolete = -1,
151 .error = -EACCES,
152 .input = ip6_pkt_prohibit,
153 .output = ip6_pkt_prohibit_out,
154 },
155 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
156 .rt6i_protocol = RTPROT_KERNEL,
157 .rt6i_metric = ~(u32) 0,
158 .rt6i_ref = ATOMIC_INIT(1),
159 };
160
161 static struct rt6_info ip6_blk_hole_entry_template = {
162 .dst = {
163 .__refcnt = ATOMIC_INIT(1),
164 .__use = 1,
165 .obsolete = -1,
166 .error = -EINVAL,
167 .input = dst_discard,
168 .output = dst_discard,
169 },
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
171 .rt6i_protocol = RTPROT_KERNEL,
172 .rt6i_metric = ~(u32) 0,
173 .rt6i_ref = ATOMIC_INIT(1),
174 };
175
176 #endif
177
178 /* allocate dst with ip6_dst_ops */
179 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
180 {
181 return (struct rt6_info *)dst_alloc(ops);
182 }
183
184 static void ip6_dst_destroy(struct dst_entry *dst)
185 {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 struct inet6_dev *idev = rt->rt6i_idev;
188 struct inet_peer *peer = rt->rt6i_peer;
189
190 if (idev != NULL) {
191 rt->rt6i_idev = NULL;
192 in6_dev_put(idev);
193 }
194 if (peer) {
195 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
196 rt->rt6i_peer = NULL;
197 inet_putpeer(peer);
198 }
199 }
200
201 void rt6_bind_peer(struct rt6_info *rt, int create)
202 {
203 struct inet_peer *peer;
204
205 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
206 return;
207
208 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
209 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
210 inet_putpeer(peer);
211 }
212
213 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
215 {
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
218 struct net_device *loopback_dev =
219 dev_net(dev)->loopback_dev;
220
221 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
222 struct inet6_dev *loopback_idev =
223 in6_dev_get(loopback_dev);
224 if (loopback_idev != NULL) {
225 rt->rt6i_idev = loopback_idev;
226 in6_dev_put(idev);
227 }
228 }
229 }
230
231 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
232 {
233 return (rt->rt6i_flags & RTF_EXPIRES) &&
234 time_after(jiffies, rt->rt6i_expires);
235 }
236
237 static inline int rt6_need_strict(struct in6_addr *daddr)
238 {
239 return ipv6_addr_type(daddr) &
240 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
241 }
242
243 /*
244 * Route lookup. Any table->tb6_lock is implied.
245 */
246
247 static inline struct rt6_info *rt6_device_match(struct net *net,
248 struct rt6_info *rt,
249 struct in6_addr *saddr,
250 int oif,
251 int flags)
252 {
253 struct rt6_info *local = NULL;
254 struct rt6_info *sprt;
255
256 if (!oif && ipv6_addr_any(saddr))
257 goto out;
258
259 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
260 struct net_device *dev = sprt->rt6i_dev;
261
262 if (oif) {
263 if (dev->ifindex == oif)
264 return sprt;
265 if (dev->flags & IFF_LOOPBACK) {
266 if (sprt->rt6i_idev == NULL ||
267 sprt->rt6i_idev->dev->ifindex != oif) {
268 if (flags & RT6_LOOKUP_F_IFACE && oif)
269 continue;
270 if (local && (!oif ||
271 local->rt6i_idev->dev->ifindex == oif))
272 continue;
273 }
274 local = sprt;
275 }
276 } else {
277 if (ipv6_chk_addr(net, saddr, dev,
278 flags & RT6_LOOKUP_F_IFACE))
279 return sprt;
280 }
281 }
282
283 if (oif) {
284 if (local)
285 return local;
286
287 if (flags & RT6_LOOKUP_F_IFACE)
288 return net->ipv6.ip6_null_entry;
289 }
290 out:
291 return rt;
292 }
293
294 #ifdef CONFIG_IPV6_ROUTER_PREF
295 static void rt6_probe(struct rt6_info *rt)
296 {
297 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
298 /*
299 * Okay, this does not seem to be appropriate
300 * for now, however, we need to check if it
301 * is really so; aka Router Reachability Probing.
302 *
303 * Router Reachability Probe MUST be rate-limited
304 * to no more than one per minute.
305 */
306 if (!neigh || (neigh->nud_state & NUD_VALID))
307 return;
308 read_lock_bh(&neigh->lock);
309 if (!(neigh->nud_state & NUD_VALID) &&
310 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
311 struct in6_addr mcaddr;
312 struct in6_addr *target;
313
314 neigh->updated = jiffies;
315 read_unlock_bh(&neigh->lock);
316
317 target = (struct in6_addr *)&neigh->primary_key;
318 addrconf_addr_solict_mult(target, &mcaddr);
319 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
320 } else
321 read_unlock_bh(&neigh->lock);
322 }
323 #else
324 static inline void rt6_probe(struct rt6_info *rt)
325 {
326 }
327 #endif
328
329 /*
330 * Default Router Selection (RFC 2461 6.3.6)
331 */
332 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
333 {
334 struct net_device *dev = rt->rt6i_dev;
335 if (!oif || dev->ifindex == oif)
336 return 2;
337 if ((dev->flags & IFF_LOOPBACK) &&
338 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
339 return 1;
340 return 0;
341 }
342
343 static inline int rt6_check_neigh(struct rt6_info *rt)
344 {
345 struct neighbour *neigh = rt->rt6i_nexthop;
346 int m;
347 if (rt->rt6i_flags & RTF_NONEXTHOP ||
348 !(rt->rt6i_flags & RTF_GATEWAY))
349 m = 1;
350 else if (neigh) {
351 read_lock_bh(&neigh->lock);
352 if (neigh->nud_state & NUD_VALID)
353 m = 2;
354 #ifdef CONFIG_IPV6_ROUTER_PREF
355 else if (neigh->nud_state & NUD_FAILED)
356 m = 0;
357 #endif
358 else
359 m = 1;
360 read_unlock_bh(&neigh->lock);
361 } else
362 m = 0;
363 return m;
364 }
365
366 static int rt6_score_route(struct rt6_info *rt, int oif,
367 int strict)
368 {
369 int m, n;
370
371 m = rt6_check_dev(rt, oif);
372 if (!m && (strict & RT6_LOOKUP_F_IFACE))
373 return -1;
374 #ifdef CONFIG_IPV6_ROUTER_PREF
375 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
376 #endif
377 n = rt6_check_neigh(rt);
378 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
379 return -1;
380 return m;
381 }
382
383 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
384 int *mpri, struct rt6_info *match)
385 {
386 int m;
387
388 if (rt6_check_expired(rt))
389 goto out;
390
391 m = rt6_score_route(rt, oif, strict);
392 if (m < 0)
393 goto out;
394
395 if (m > *mpri) {
396 if (strict & RT6_LOOKUP_F_REACHABLE)
397 rt6_probe(match);
398 *mpri = m;
399 match = rt;
400 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
401 rt6_probe(rt);
402 }
403
404 out:
405 return match;
406 }
407
408 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
409 struct rt6_info *rr_head,
410 u32 metric, int oif, int strict)
411 {
412 struct rt6_info *rt, *match;
413 int mpri = -1;
414
415 match = NULL;
416 for (rt = rr_head; rt && rt->rt6i_metric == metric;
417 rt = rt->dst.rt6_next)
418 match = find_match(rt, oif, strict, &mpri, match);
419 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
420 rt = rt->dst.rt6_next)
421 match = find_match(rt, oif, strict, &mpri, match);
422
423 return match;
424 }
425
426 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
427 {
428 struct rt6_info *match, *rt0;
429 struct net *net;
430
431 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
432 __func__, fn->leaf, oif);
433
434 rt0 = fn->rr_ptr;
435 if (!rt0)
436 fn->rr_ptr = rt0 = fn->leaf;
437
438 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
439
440 if (!match &&
441 (strict & RT6_LOOKUP_F_REACHABLE)) {
442 struct rt6_info *next = rt0->dst.rt6_next;
443
444 /* no entries matched; do round-robin */
445 if (!next || next->rt6i_metric != rt0->rt6i_metric)
446 next = fn->leaf;
447
448 if (next != rt0)
449 fn->rr_ptr = next;
450 }
451
452 RT6_TRACE("%s() => %p\n",
453 __func__, match);
454
455 net = dev_net(rt0->rt6i_dev);
456 return match ? match : net->ipv6.ip6_null_entry;
457 }
458
459 #ifdef CONFIG_IPV6_ROUTE_INFO
460 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
461 struct in6_addr *gwaddr)
462 {
463 struct net *net = dev_net(dev);
464 struct route_info *rinfo = (struct route_info *) opt;
465 struct in6_addr prefix_buf, *prefix;
466 unsigned int pref;
467 unsigned long lifetime;
468 struct rt6_info *rt;
469
470 if (len < sizeof(struct route_info)) {
471 return -EINVAL;
472 }
473
474 /* Sanity check for prefix_len and length */
475 if (rinfo->length > 3) {
476 return -EINVAL;
477 } else if (rinfo->prefix_len > 128) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 64) {
480 if (rinfo->length < 2) {
481 return -EINVAL;
482 }
483 } else if (rinfo->prefix_len > 0) {
484 if (rinfo->length < 1) {
485 return -EINVAL;
486 }
487 }
488
489 pref = rinfo->route_pref;
490 if (pref == ICMPV6_ROUTER_PREF_INVALID)
491 return -EINVAL;
492
493 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
494
495 if (rinfo->length == 3)
496 prefix = (struct in6_addr *)rinfo->prefix;
497 else {
498 /* this function is safe */
499 ipv6_addr_prefix(&prefix_buf,
500 (struct in6_addr *)rinfo->prefix,
501 rinfo->prefix_len);
502 prefix = &prefix_buf;
503 }
504
505 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
506 dev->ifindex);
507
508 if (rt && !lifetime) {
509 ip6_del_rt(rt);
510 rt = NULL;
511 }
512
513 if (!rt && lifetime)
514 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
515 pref);
516 else if (rt)
517 rt->rt6i_flags = RTF_ROUTEINFO |
518 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
519
520 if (rt) {
521 if (!addrconf_finite_timeout(lifetime)) {
522 rt->rt6i_flags &= ~RTF_EXPIRES;
523 } else {
524 rt->rt6i_expires = jiffies + HZ * lifetime;
525 rt->rt6i_flags |= RTF_EXPIRES;
526 }
527 dst_release(&rt->dst);
528 }
529 return 0;
530 }
531 #endif
532
533 #define BACKTRACK(__net, saddr) \
534 do { \
535 if (rt == __net->ipv6.ip6_null_entry) { \
536 struct fib6_node *pn; \
537 while (1) { \
538 if (fn->fn_flags & RTN_TL_ROOT) \
539 goto out; \
540 pn = fn->parent; \
541 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
542 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
543 else \
544 fn = pn; \
545 if (fn->fn_flags & RTN_RTINFO) \
546 goto restart; \
547 } \
548 } \
549 } while(0)
550
551 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
552 struct fib6_table *table,
553 struct flowi *fl, int flags)
554 {
555 struct fib6_node *fn;
556 struct rt6_info *rt;
557
558 read_lock_bh(&table->tb6_lock);
559 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
560 restart:
561 rt = fn->leaf;
562 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
563 BACKTRACK(net, &fl->fl6_src);
564 out:
565 dst_use(&rt->dst, jiffies);
566 read_unlock_bh(&table->tb6_lock);
567 return rt;
568
569 }
570
571 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
572 const struct in6_addr *saddr, int oif, int strict)
573 {
574 struct flowi fl = {
575 .oif = oif,
576 .fl6_dst = *daddr,
577 };
578 struct dst_entry *dst;
579 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
580
581 if (saddr) {
582 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
583 flags |= RT6_LOOKUP_F_HAS_SADDR;
584 }
585
586 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
587 if (dst->error == 0)
588 return (struct rt6_info *) dst;
589
590 dst_release(dst);
591
592 return NULL;
593 }
594
595 EXPORT_SYMBOL(rt6_lookup);
596
597 /* ip6_ins_rt is called with FREE table->tb6_lock.
598 It takes new route entry, the addition fails by any reason the
599 route is freed. In any case, if caller does not hold it, it may
600 be destroyed.
601 */
602
603 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
604 {
605 int err;
606 struct fib6_table *table;
607
608 table = rt->rt6i_table;
609 write_lock_bh(&table->tb6_lock);
610 err = fib6_add(&table->tb6_root, rt, info);
611 write_unlock_bh(&table->tb6_lock);
612
613 return err;
614 }
615
616 int ip6_ins_rt(struct rt6_info *rt)
617 {
618 struct nl_info info = {
619 .nl_net = dev_net(rt->rt6i_dev),
620 };
621 return __ip6_ins_rt(rt, &info);
622 }
623
624 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
625 struct in6_addr *saddr)
626 {
627 struct rt6_info *rt;
628
629 /*
630 * Clone the route.
631 */
632
633 rt = ip6_rt_copy(ort);
634
635 if (rt) {
636 struct neighbour *neigh;
637 int attempts = !in_softirq();
638
639 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
640 if (rt->rt6i_dst.plen != 128 &&
641 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
642 rt->rt6i_flags |= RTF_ANYCAST;
643 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
644 }
645
646 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
647 rt->rt6i_dst.plen = 128;
648 rt->rt6i_flags |= RTF_CACHE;
649 rt->dst.flags |= DST_HOST;
650
651 #ifdef CONFIG_IPV6_SUBTREES
652 if (rt->rt6i_src.plen && saddr) {
653 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
654 rt->rt6i_src.plen = 128;
655 }
656 #endif
657
658 retry:
659 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
660 if (IS_ERR(neigh)) {
661 struct net *net = dev_net(rt->rt6i_dev);
662 int saved_rt_min_interval =
663 net->ipv6.sysctl.ip6_rt_gc_min_interval;
664 int saved_rt_elasticity =
665 net->ipv6.sysctl.ip6_rt_gc_elasticity;
666
667 if (attempts-- > 0) {
668 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
670
671 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
672
673 net->ipv6.sysctl.ip6_rt_gc_elasticity =
674 saved_rt_elasticity;
675 net->ipv6.sysctl.ip6_rt_gc_min_interval =
676 saved_rt_min_interval;
677 goto retry;
678 }
679
680 if (net_ratelimit())
681 printk(KERN_WARNING
682 "ipv6: Neighbour table overflow.\n");
683 dst_free(&rt->dst);
684 return NULL;
685 }
686 rt->rt6i_nexthop = neigh;
687
688 }
689
690 return rt;
691 }
692
693 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
694 {
695 struct rt6_info *rt = ip6_rt_copy(ort);
696 if (rt) {
697 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
698 rt->rt6i_dst.plen = 128;
699 rt->rt6i_flags |= RTF_CACHE;
700 rt->dst.flags |= DST_HOST;
701 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
702 }
703 return rt;
704 }
705
706 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
707 struct flowi *fl, int flags)
708 {
709 struct fib6_node *fn;
710 struct rt6_info *rt, *nrt;
711 int strict = 0;
712 int attempts = 3;
713 int err;
714 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
715
716 strict |= flags & RT6_LOOKUP_F_IFACE;
717
718 relookup:
719 read_lock_bh(&table->tb6_lock);
720
721 restart_2:
722 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
723
724 restart:
725 rt = rt6_select(fn, oif, strict | reachable);
726
727 BACKTRACK(net, &fl->fl6_src);
728 if (rt == net->ipv6.ip6_null_entry ||
729 rt->rt6i_flags & RTF_CACHE)
730 goto out;
731
732 dst_hold(&rt->dst);
733 read_unlock_bh(&table->tb6_lock);
734
735 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
736 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
737 else {
738 #if CLONE_OFFLINK_ROUTE
739 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
740 #else
741 goto out2;
742 #endif
743 }
744
745 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry;
747
748 dst_hold(&rt->dst);
749 if (nrt) {
750 err = ip6_ins_rt(nrt);
751 if (!err)
752 goto out2;
753 }
754
755 if (--attempts <= 0)
756 goto out2;
757
758 /*
759 * Race condition! In the gap, when table->tb6_lock was
760 * released someone could insert this route. Relookup.
761 */
762 dst_release(&rt->dst);
763 goto relookup;
764
765 out:
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
769 }
770 dst_hold(&rt->dst);
771 read_unlock_bh(&table->tb6_lock);
772 out2:
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
775
776 return rt;
777 }
778
779 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags)
781 {
782 return ip6_pol_route(net, table, fl->iif, fl, flags);
783 }
784
785 void ip6_route_input(struct sk_buff *skb)
786 {
787 struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark,
796 .proto = iph->nexthdr,
797 };
798
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE;
801
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
803 }
804
805 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags)
807 {
808 return ip6_pol_route(net, table, fl->oif, fl, flags);
809 }
810
811 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
813 {
814 int flags = 0;
815
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
817 flags |= RT6_LOOKUP_F_IFACE;
818
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
823
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
825 }
826
827 EXPORT_SYMBOL(ip6_route_output);
828
829 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
830 {
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
835
836 if (rt) {
837 new = &rt->dst;
838
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
841 new->input = dst_discard;
842 new->output = dst_discard;
843
844 dst_copy_metrics(new, &ort->dst);
845 new->dev = ort->dst.dev;
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
852
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
856
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858 #ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860 #endif
861
862 dst_free(new);
863 }
864
865 dst_release(*dstp);
866 *dstp = new;
867 return new ? 0 : -ENOMEM;
868 }
869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870
871 /*
872 * Destination cache support functions
873 */
874
875 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
876 {
877 struct rt6_info *rt;
878
879 rt = (struct rt6_info *) dst;
880
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
882 return dst;
883
884 return NULL;
885 }
886
887 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
888 {
889 struct rt6_info *rt = (struct rt6_info *) dst;
890
891 if (rt) {
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
896 }
897 } else {
898 dst_release(dst);
899 dst = NULL;
900 }
901 }
902 return dst;
903 }
904
905 static void ip6_link_failure(struct sk_buff *skb)
906 {
907 struct rt6_info *rt;
908
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
910
911 rt = (struct rt6_info *) skb_dst(skb);
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919 }
920
921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922 {
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 u32 features = dst_metric(dst, RTAX_FEATURES);
929 mtu = IPV6_MIN_MTU;
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
932 }
933 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
935 }
936 }
937
938 static int ipv6_get_mtu(struct net_device *dev);
939
940 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
941 {
942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
943
944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
946
947 /*
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
951 * rely only on pmtu discovery"
952 */
953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954 mtu = IPV6_MAXPLEN;
955 return mtu;
956 }
957
958 static struct dst_entry *icmp6_dst_gc_list;
959 static DEFINE_SPINLOCK(icmp6_dst_lock);
960
961 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
962 struct neighbour *neigh,
963 const struct in6_addr *addr)
964 {
965 struct rt6_info *rt;
966 struct inet6_dev *idev = in6_dev_get(dev);
967 struct net *net = dev_net(dev);
968
969 if (unlikely(idev == NULL))
970 return NULL;
971
972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
973 if (unlikely(rt == NULL)) {
974 in6_dev_put(idev);
975 goto out;
976 }
977
978 dev_hold(dev);
979 if (neigh)
980 neigh_hold(neigh);
981 else {
982 neigh = ndisc_get_neigh(dev, addr);
983 if (IS_ERR(neigh))
984 neigh = NULL;
985 }
986
987 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh;
990 atomic_set(&rt->dst.__refcnt, 1);
991 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
992 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
993 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
994 rt->dst.output = ip6_output;
995
996 #if 0 /* there's no chance to use these for ndisc */
997 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998 ? DST_HOST
999 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001 rt->rt6i_dst.plen = 128;
1002 #endif
1003
1004 spin_lock_bh(&icmp6_dst_lock);
1005 rt->dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->dst;
1007 spin_unlock_bh(&icmp6_dst_lock);
1008
1009 fib6_force_start_gc(net);
1010
1011 out:
1012 return &rt->dst;
1013 }
1014
1015 int icmp6_dst_gc(void)
1016 {
1017 struct dst_entry *dst, *next, **pprev;
1018 int more = 0;
1019
1020 next = NULL;
1021
1022 spin_lock_bh(&icmp6_dst_lock);
1023 pprev = &icmp6_dst_gc_list;
1024
1025 while ((dst = *pprev) != NULL) {
1026 if (!atomic_read(&dst->__refcnt)) {
1027 *pprev = dst->next;
1028 dst_free(dst);
1029 } else {
1030 pprev = &dst->next;
1031 ++more;
1032 }
1033 }
1034
1035 spin_unlock_bh(&icmp6_dst_lock);
1036
1037 return more;
1038 }
1039
1040 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041 void *arg)
1042 {
1043 struct dst_entry *dst, **pprev;
1044
1045 spin_lock_bh(&icmp6_dst_lock);
1046 pprev = &icmp6_dst_gc_list;
1047 while ((dst = *pprev) != NULL) {
1048 struct rt6_info *rt = (struct rt6_info *) dst;
1049 if (func(rt, arg)) {
1050 *pprev = dst->next;
1051 dst_free(dst);
1052 } else {
1053 pprev = &dst->next;
1054 }
1055 }
1056 spin_unlock_bh(&icmp6_dst_lock);
1057 }
1058
1059 static int ip6_dst_gc(struct dst_ops *ops)
1060 {
1061 unsigned long now = jiffies;
1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1068 int entries;
1069
1070 entries = dst_entries_get_fast(ops);
1071 if (time_after(rt_last_gc + rt_min_interval, now) &&
1072 entries <= rt_max_size)
1073 goto out;
1074
1075 net->ipv6.ip6_rt_gc_expire++;
1076 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1077 net->ipv6.ip6_rt_last_gc = now;
1078 entries = dst_entries_get_slow(ops);
1079 if (entries < ops->gc_thresh)
1080 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1081 out:
1082 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1083 return entries > rt_max_size;
1084 }
1085
1086 /* Clean host part of a prefix. Not necessary in radix tree,
1087 but results in cleaner routing tables.
1088
1089 Remove it only when all the things will work!
1090 */
1091
1092 static int ipv6_get_mtu(struct net_device *dev)
1093 {
1094 int mtu = IPV6_MIN_MTU;
1095 struct inet6_dev *idev;
1096
1097 rcu_read_lock();
1098 idev = __in6_dev_get(dev);
1099 if (idev)
1100 mtu = idev->cnf.mtu6;
1101 rcu_read_unlock();
1102 return mtu;
1103 }
1104
1105 int ip6_dst_hoplimit(struct dst_entry *dst)
1106 {
1107 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1108 if (hoplimit < 0) {
1109 struct net_device *dev = dst->dev;
1110 struct inet6_dev *idev;
1111
1112 rcu_read_lock();
1113 idev = __in6_dev_get(dev);
1114 if (idev)
1115 hoplimit = idev->cnf.hop_limit;
1116 else
1117 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1118 rcu_read_unlock();
1119 }
1120 return hoplimit;
1121 }
1122
1123 /*
1124 *
1125 */
1126
1127 int ip6_route_add(struct fib6_config *cfg)
1128 {
1129 int err;
1130 struct net *net = cfg->fc_nlinfo.nl_net;
1131 struct rt6_info *rt = NULL;
1132 struct net_device *dev = NULL;
1133 struct inet6_dev *idev = NULL;
1134 struct fib6_table *table;
1135 int addr_type;
1136
1137 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1138 return -EINVAL;
1139 #ifndef CONFIG_IPV6_SUBTREES
1140 if (cfg->fc_src_len)
1141 return -EINVAL;
1142 #endif
1143 if (cfg->fc_ifindex) {
1144 err = -ENODEV;
1145 dev = dev_get_by_index(net, cfg->fc_ifindex);
1146 if (!dev)
1147 goto out;
1148 idev = in6_dev_get(dev);
1149 if (!idev)
1150 goto out;
1151 }
1152
1153 if (cfg->fc_metric == 0)
1154 cfg->fc_metric = IP6_RT_PRIO_USER;
1155
1156 table = fib6_new_table(net, cfg->fc_table);
1157 if (table == NULL) {
1158 err = -ENOBUFS;
1159 goto out;
1160 }
1161
1162 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1163
1164 if (rt == NULL) {
1165 err = -ENOMEM;
1166 goto out;
1167 }
1168
1169 rt->dst.obsolete = -1;
1170 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1171 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1172 0;
1173
1174 if (cfg->fc_protocol == RTPROT_UNSPEC)
1175 cfg->fc_protocol = RTPROT_BOOT;
1176 rt->rt6i_protocol = cfg->fc_protocol;
1177
1178 addr_type = ipv6_addr_type(&cfg->fc_dst);
1179
1180 if (addr_type & IPV6_ADDR_MULTICAST)
1181 rt->dst.input = ip6_mc_input;
1182 else if (cfg->fc_flags & RTF_LOCAL)
1183 rt->dst.input = ip6_input;
1184 else
1185 rt->dst.input = ip6_forward;
1186
1187 rt->dst.output = ip6_output;
1188
1189 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1190 rt->rt6i_dst.plen = cfg->fc_dst_len;
1191 if (rt->rt6i_dst.plen == 128)
1192 rt->dst.flags = DST_HOST;
1193
1194 #ifdef CONFIG_IPV6_SUBTREES
1195 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1196 rt->rt6i_src.plen = cfg->fc_src_len;
1197 #endif
1198
1199 rt->rt6i_metric = cfg->fc_metric;
1200
1201 /* We cannot add true routes via loopback here,
1202 they would result in kernel looping; promote them to reject routes
1203 */
1204 if ((cfg->fc_flags & RTF_REJECT) ||
1205 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1206 && !(cfg->fc_flags&RTF_LOCAL))) {
1207 /* hold loopback dev/idev if we haven't done so. */
1208 if (dev != net->loopback_dev) {
1209 if (dev) {
1210 dev_put(dev);
1211 in6_dev_put(idev);
1212 }
1213 dev = net->loopback_dev;
1214 dev_hold(dev);
1215 idev = in6_dev_get(dev);
1216 if (!idev) {
1217 err = -ENODEV;
1218 goto out;
1219 }
1220 }
1221 rt->dst.output = ip6_pkt_discard_out;
1222 rt->dst.input = ip6_pkt_discard;
1223 rt->dst.error = -ENETUNREACH;
1224 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1225 goto install_route;
1226 }
1227
1228 if (cfg->fc_flags & RTF_GATEWAY) {
1229 struct in6_addr *gw_addr;
1230 int gwa_type;
1231
1232 gw_addr = &cfg->fc_gateway;
1233 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1234 gwa_type = ipv6_addr_type(gw_addr);
1235
1236 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1237 struct rt6_info *grt;
1238
1239 /* IPv6 strictly inhibits using not link-local
1240 addresses as nexthop address.
1241 Otherwise, router will not able to send redirects.
1242 It is very good, but in some (rare!) circumstances
1243 (SIT, PtP, NBMA NOARP links) it is handy to allow
1244 some exceptions. --ANK
1245 */
1246 err = -EINVAL;
1247 if (!(gwa_type&IPV6_ADDR_UNICAST))
1248 goto out;
1249
1250 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1251
1252 err = -EHOSTUNREACH;
1253 if (grt == NULL)
1254 goto out;
1255 if (dev) {
1256 if (dev != grt->rt6i_dev) {
1257 dst_release(&grt->dst);
1258 goto out;
1259 }
1260 } else {
1261 dev = grt->rt6i_dev;
1262 idev = grt->rt6i_idev;
1263 dev_hold(dev);
1264 in6_dev_hold(grt->rt6i_idev);
1265 }
1266 if (!(grt->rt6i_flags&RTF_GATEWAY))
1267 err = 0;
1268 dst_release(&grt->dst);
1269
1270 if (err)
1271 goto out;
1272 }
1273 err = -EINVAL;
1274 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1275 goto out;
1276 }
1277
1278 err = -ENODEV;
1279 if (dev == NULL)
1280 goto out;
1281
1282 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1283 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1284 if (IS_ERR(rt->rt6i_nexthop)) {
1285 err = PTR_ERR(rt->rt6i_nexthop);
1286 rt->rt6i_nexthop = NULL;
1287 goto out;
1288 }
1289 }
1290
1291 rt->rt6i_flags = cfg->fc_flags;
1292
1293 install_route:
1294 if (cfg->fc_mx) {
1295 struct nlattr *nla;
1296 int remaining;
1297
1298 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1299 int type = nla_type(nla);
1300
1301 if (type) {
1302 if (type > RTAX_MAX) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1308 }
1309 }
1310 }
1311
1312 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1313 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1314 if (!dst_mtu(&rt->dst))
1315 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
1316 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1317 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1318 rt->dst.dev = dev;
1319 rt->rt6i_idev = idev;
1320 rt->rt6i_table = table;
1321
1322 cfg->fc_nlinfo.nl_net = dev_net(dev);
1323
1324 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1325
1326 out:
1327 if (dev)
1328 dev_put(dev);
1329 if (idev)
1330 in6_dev_put(idev);
1331 if (rt)
1332 dst_free(&rt->dst);
1333 return err;
1334 }
1335
1336 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1337 {
1338 int err;
1339 struct fib6_table *table;
1340 struct net *net = dev_net(rt->rt6i_dev);
1341
1342 if (rt == net->ipv6.ip6_null_entry)
1343 return -ENOENT;
1344
1345 table = rt->rt6i_table;
1346 write_lock_bh(&table->tb6_lock);
1347
1348 err = fib6_del(rt, info);
1349 dst_release(&rt->dst);
1350
1351 write_unlock_bh(&table->tb6_lock);
1352
1353 return err;
1354 }
1355
1356 int ip6_del_rt(struct rt6_info *rt)
1357 {
1358 struct nl_info info = {
1359 .nl_net = dev_net(rt->rt6i_dev),
1360 };
1361 return __ip6_del_rt(rt, &info);
1362 }
1363
1364 static int ip6_route_del(struct fib6_config *cfg)
1365 {
1366 struct fib6_table *table;
1367 struct fib6_node *fn;
1368 struct rt6_info *rt;
1369 int err = -ESRCH;
1370
1371 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1372 if (table == NULL)
1373 return err;
1374
1375 read_lock_bh(&table->tb6_lock);
1376
1377 fn = fib6_locate(&table->tb6_root,
1378 &cfg->fc_dst, cfg->fc_dst_len,
1379 &cfg->fc_src, cfg->fc_src_len);
1380
1381 if (fn) {
1382 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1383 if (cfg->fc_ifindex &&
1384 (rt->rt6i_dev == NULL ||
1385 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1386 continue;
1387 if (cfg->fc_flags & RTF_GATEWAY &&
1388 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1389 continue;
1390 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1391 continue;
1392 dst_hold(&rt->dst);
1393 read_unlock_bh(&table->tb6_lock);
1394
1395 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1396 }
1397 }
1398 read_unlock_bh(&table->tb6_lock);
1399
1400 return err;
1401 }
1402
1403 /*
1404 * Handle redirects
1405 */
1406 struct ip6rd_flowi {
1407 struct flowi fl;
1408 struct in6_addr gateway;
1409 };
1410
1411 static struct rt6_info *__ip6_route_redirect(struct net *net,
1412 struct fib6_table *table,
1413 struct flowi *fl,
1414 int flags)
1415 {
1416 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1417 struct rt6_info *rt;
1418 struct fib6_node *fn;
1419
1420 /*
1421 * Get the "current" route for this destination and
1422 * check if the redirect has come from approriate router.
1423 *
1424 * RFC 2461 specifies that redirects should only be
1425 * accepted if they come from the nexthop to the target.
1426 * Due to the way the routes are chosen, this notion
1427 * is a bit fuzzy and one might need to check all possible
1428 * routes.
1429 */
1430
1431 read_lock_bh(&table->tb6_lock);
1432 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1433 restart:
1434 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1435 /*
1436 * Current route is on-link; redirect is always invalid.
1437 *
1438 * Seems, previous statement is not true. It could
1439 * be node, which looks for us as on-link (f.e. proxy ndisc)
1440 * But then router serving it might decide, that we should
1441 * know truth 8)8) --ANK (980726).
1442 */
1443 if (rt6_check_expired(rt))
1444 continue;
1445 if (!(rt->rt6i_flags & RTF_GATEWAY))
1446 continue;
1447 if (fl->oif != rt->rt6i_dev->ifindex)
1448 continue;
1449 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1450 continue;
1451 break;
1452 }
1453
1454 if (!rt)
1455 rt = net->ipv6.ip6_null_entry;
1456 BACKTRACK(net, &fl->fl6_src);
1457 out:
1458 dst_hold(&rt->dst);
1459
1460 read_unlock_bh(&table->tb6_lock);
1461
1462 return rt;
1463 };
1464
1465 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1466 struct in6_addr *src,
1467 struct in6_addr *gateway,
1468 struct net_device *dev)
1469 {
1470 int flags = RT6_LOOKUP_F_HAS_SADDR;
1471 struct net *net = dev_net(dev);
1472 struct ip6rd_flowi rdfl = {
1473 .fl = {
1474 .oif = dev->ifindex,
1475 .fl6_dst = *dest,
1476 .fl6_src = *src,
1477 },
1478 };
1479
1480 ipv6_addr_copy(&rdfl.gateway, gateway);
1481
1482 if (rt6_need_strict(dest))
1483 flags |= RT6_LOOKUP_F_IFACE;
1484
1485 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1486 flags, __ip6_route_redirect);
1487 }
1488
1489 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1490 struct in6_addr *saddr,
1491 struct neighbour *neigh, u8 *lladdr, int on_link)
1492 {
1493 struct rt6_info *rt, *nrt = NULL;
1494 struct netevent_redirect netevent;
1495 struct net *net = dev_net(neigh->dev);
1496
1497 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1498
1499 if (rt == net->ipv6.ip6_null_entry) {
1500 if (net_ratelimit())
1501 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1502 "for redirect target\n");
1503 goto out;
1504 }
1505
1506 /*
1507 * We have finally decided to accept it.
1508 */
1509
1510 neigh_update(neigh, lladdr, NUD_STALE,
1511 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1512 NEIGH_UPDATE_F_OVERRIDE|
1513 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1514 NEIGH_UPDATE_F_ISROUTER))
1515 );
1516
1517 /*
1518 * Redirect received -> path was valid.
1519 * Look, redirects are sent only in response to data packets,
1520 * so that this nexthop apparently is reachable. --ANK
1521 */
1522 dst_confirm(&rt->dst);
1523
1524 /* Duplicate redirect: silently ignore. */
1525 if (neigh == rt->dst.neighbour)
1526 goto out;
1527
1528 nrt = ip6_rt_copy(rt);
1529 if (nrt == NULL)
1530 goto out;
1531
1532 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1533 if (on_link)
1534 nrt->rt6i_flags &= ~RTF_GATEWAY;
1535
1536 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1537 nrt->rt6i_dst.plen = 128;
1538 nrt->dst.flags |= DST_HOST;
1539
1540 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1541 nrt->rt6i_nexthop = neigh_clone(neigh);
1542 /* Reset pmtu, it may be better */
1543 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
1544 dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
1545 dst_mtu(&nrt->dst)));
1546
1547 if (ip6_ins_rt(nrt))
1548 goto out;
1549
1550 netevent.old = &rt->dst;
1551 netevent.new = &nrt->dst;
1552 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1553
1554 if (rt->rt6i_flags&RTF_CACHE) {
1555 ip6_del_rt(rt);
1556 return;
1557 }
1558
1559 out:
1560 dst_release(&rt->dst);
1561 }
1562
1563 /*
1564 * Handle ICMP "packet too big" messages
1565 * i.e. Path MTU discovery
1566 */
1567
1568 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1569 struct net *net, u32 pmtu, int ifindex)
1570 {
1571 struct rt6_info *rt, *nrt;
1572 int allfrag = 0;
1573
1574 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1575 if (rt == NULL)
1576 return;
1577
1578 if (pmtu >= dst_mtu(&rt->dst))
1579 goto out;
1580
1581 if (pmtu < IPV6_MIN_MTU) {
1582 /*
1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1587 */
1588 pmtu = IPV6_MIN_MTU;
1589 allfrag = 1;
1590 }
1591
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1595 */
1596 dst_confirm(&rt->dst);
1597
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1602 */
1603 if (rt->rt6i_flags & RTF_CACHE) {
1604 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1605 if (allfrag) {
1606 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1607 features |= RTAX_FEATURE_ALLFRAG;
1608 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1609 }
1610 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1611 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1612 goto out;
1613 }
1614
1615 /* Network route.
1616 Two cases are possible:
1617 1. It is connected route. Action: COW
1618 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1619 */
1620 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1621 nrt = rt6_alloc_cow(rt, daddr, saddr);
1622 else
1623 nrt = rt6_alloc_clone(rt, daddr);
1624
1625 if (nrt) {
1626 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1627 if (allfrag) {
1628 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1629 features |= RTAX_FEATURE_ALLFRAG;
1630 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1631 }
1632
1633 /* According to RFC 1981, detecting PMTU increase shouldn't be
1634 * happened within 5 mins, the recommended timer is 10 mins.
1635 * Here this route expiration time is set to ip6_rt_mtu_expires
1636 * which is 10 mins. After 10 mins the decreased pmtu is expired
1637 * and detecting PMTU increase will be automatically happened.
1638 */
1639 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1640 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1641
1642 ip6_ins_rt(nrt);
1643 }
1644 out:
1645 dst_release(&rt->dst);
1646 }
1647
1648 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1649 struct net_device *dev, u32 pmtu)
1650 {
1651 struct net *net = dev_net(dev);
1652
1653 /*
1654 * RFC 1981 states that a node "MUST reduce the size of the packets it
1655 * is sending along the path" that caused the Packet Too Big message.
1656 * Since it's not possible in the general case to determine which
1657 * interface was used to send the original packet, we update the MTU
1658 * on the interface that will be used to send future packets. We also
1659 * update the MTU on the interface that received the Packet Too Big in
1660 * case the original packet was forced out that interface with
1661 * SO_BINDTODEVICE or similar. This is the next best thing to the
1662 * correct behaviour, which would be to update the MTU on all
1663 * interfaces.
1664 */
1665 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1666 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1667 }
1668
1669 /*
1670 * Misc support functions
1671 */
1672
1673 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1674 {
1675 struct net *net = dev_net(ort->rt6i_dev);
1676 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1677
1678 if (rt) {
1679 rt->dst.input = ort->dst.input;
1680 rt->dst.output = ort->dst.output;
1681
1682 dst_copy_metrics(&rt->dst, &ort->dst);
1683 rt->dst.error = ort->dst.error;
1684 rt->dst.dev = ort->dst.dev;
1685 if (rt->dst.dev)
1686 dev_hold(rt->dst.dev);
1687 rt->rt6i_idev = ort->rt6i_idev;
1688 if (rt->rt6i_idev)
1689 in6_dev_hold(rt->rt6i_idev);
1690 rt->dst.lastuse = jiffies;
1691 rt->rt6i_expires = 0;
1692
1693 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1694 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1695 rt->rt6i_metric = 0;
1696
1697 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1698 #ifdef CONFIG_IPV6_SUBTREES
1699 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1700 #endif
1701 rt->rt6i_table = ort->rt6i_table;
1702 }
1703 return rt;
1704 }
1705
1706 #ifdef CONFIG_IPV6_ROUTE_INFO
1707 static struct rt6_info *rt6_get_route_info(struct net *net,
1708 struct in6_addr *prefix, int prefixlen,
1709 struct in6_addr *gwaddr, int ifindex)
1710 {
1711 struct fib6_node *fn;
1712 struct rt6_info *rt = NULL;
1713 struct fib6_table *table;
1714
1715 table = fib6_get_table(net, RT6_TABLE_INFO);
1716 if (table == NULL)
1717 return NULL;
1718
1719 write_lock_bh(&table->tb6_lock);
1720 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1721 if (!fn)
1722 goto out;
1723
1724 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1725 if (rt->rt6i_dev->ifindex != ifindex)
1726 continue;
1727 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1728 continue;
1729 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1730 continue;
1731 dst_hold(&rt->dst);
1732 break;
1733 }
1734 out:
1735 write_unlock_bh(&table->tb6_lock);
1736 return rt;
1737 }
1738
1739 static struct rt6_info *rt6_add_route_info(struct net *net,
1740 struct in6_addr *prefix, int prefixlen,
1741 struct in6_addr *gwaddr, int ifindex,
1742 unsigned pref)
1743 {
1744 struct fib6_config cfg = {
1745 .fc_table = RT6_TABLE_INFO,
1746 .fc_metric = IP6_RT_PRIO_USER,
1747 .fc_ifindex = ifindex,
1748 .fc_dst_len = prefixlen,
1749 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1750 RTF_UP | RTF_PREF(pref),
1751 .fc_nlinfo.pid = 0,
1752 .fc_nlinfo.nlh = NULL,
1753 .fc_nlinfo.nl_net = net,
1754 };
1755
1756 ipv6_addr_copy(&cfg.fc_dst, prefix);
1757 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1758
1759 /* We should treat it as a default route if prefix length is 0. */
1760 if (!prefixlen)
1761 cfg.fc_flags |= RTF_DEFAULT;
1762
1763 ip6_route_add(&cfg);
1764
1765 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1766 }
1767 #endif
1768
1769 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1770 {
1771 struct rt6_info *rt;
1772 struct fib6_table *table;
1773
1774 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1775 if (table == NULL)
1776 return NULL;
1777
1778 write_lock_bh(&table->tb6_lock);
1779 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1780 if (dev == rt->rt6i_dev &&
1781 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1782 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1783 break;
1784 }
1785 if (rt)
1786 dst_hold(&rt->dst);
1787 write_unlock_bh(&table->tb6_lock);
1788 return rt;
1789 }
1790
1791 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1792 struct net_device *dev,
1793 unsigned int pref)
1794 {
1795 struct fib6_config cfg = {
1796 .fc_table = RT6_TABLE_DFLT,
1797 .fc_metric = IP6_RT_PRIO_USER,
1798 .fc_ifindex = dev->ifindex,
1799 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1800 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1801 .fc_nlinfo.pid = 0,
1802 .fc_nlinfo.nlh = NULL,
1803 .fc_nlinfo.nl_net = dev_net(dev),
1804 };
1805
1806 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1807
1808 ip6_route_add(&cfg);
1809
1810 return rt6_get_dflt_router(gwaddr, dev);
1811 }
1812
1813 void rt6_purge_dflt_routers(struct net *net)
1814 {
1815 struct rt6_info *rt;
1816 struct fib6_table *table;
1817
1818 /* NOTE: Keep consistent with rt6_get_dflt_router */
1819 table = fib6_get_table(net, RT6_TABLE_DFLT);
1820 if (table == NULL)
1821 return;
1822
1823 restart:
1824 read_lock_bh(&table->tb6_lock);
1825 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1826 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1827 dst_hold(&rt->dst);
1828 read_unlock_bh(&table->tb6_lock);
1829 ip6_del_rt(rt);
1830 goto restart;
1831 }
1832 }
1833 read_unlock_bh(&table->tb6_lock);
1834 }
1835
1836 static void rtmsg_to_fib6_config(struct net *net,
1837 struct in6_rtmsg *rtmsg,
1838 struct fib6_config *cfg)
1839 {
1840 memset(cfg, 0, sizeof(*cfg));
1841
1842 cfg->fc_table = RT6_TABLE_MAIN;
1843 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1844 cfg->fc_metric = rtmsg->rtmsg_metric;
1845 cfg->fc_expires = rtmsg->rtmsg_info;
1846 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1847 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1848 cfg->fc_flags = rtmsg->rtmsg_flags;
1849
1850 cfg->fc_nlinfo.nl_net = net;
1851
1852 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1853 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1854 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1855 }
1856
1857 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1858 {
1859 struct fib6_config cfg;
1860 struct in6_rtmsg rtmsg;
1861 int err;
1862
1863 switch(cmd) {
1864 case SIOCADDRT: /* Add a route */
1865 case SIOCDELRT: /* Delete a route */
1866 if (!capable(CAP_NET_ADMIN))
1867 return -EPERM;
1868 err = copy_from_user(&rtmsg, arg,
1869 sizeof(struct in6_rtmsg));
1870 if (err)
1871 return -EFAULT;
1872
1873 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1874
1875 rtnl_lock();
1876 switch (cmd) {
1877 case SIOCADDRT:
1878 err = ip6_route_add(&cfg);
1879 break;
1880 case SIOCDELRT:
1881 err = ip6_route_del(&cfg);
1882 break;
1883 default:
1884 err = -EINVAL;
1885 }
1886 rtnl_unlock();
1887
1888 return err;
1889 }
1890
1891 return -EINVAL;
1892 }
1893
1894 /*
1895 * Drop the packet on the floor
1896 */
1897
1898 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1899 {
1900 int type;
1901 struct dst_entry *dst = skb_dst(skb);
1902 switch (ipstats_mib_noroutes) {
1903 case IPSTATS_MIB_INNOROUTES:
1904 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1905 if (type == IPV6_ADDR_ANY) {
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 IPSTATS_MIB_INADDRERRORS);
1908 break;
1909 }
1910 /* FALLTHROUGH */
1911 case IPSTATS_MIB_OUTNOROUTES:
1912 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1913 ipstats_mib_noroutes);
1914 break;
1915 }
1916 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1917 kfree_skb(skb);
1918 return 0;
1919 }
1920
1921 static int ip6_pkt_discard(struct sk_buff *skb)
1922 {
1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1924 }
1925
1926 static int ip6_pkt_discard_out(struct sk_buff *skb)
1927 {
1928 skb->dev = skb_dst(skb)->dev;
1929 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1930 }
1931
1932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1933
1934 static int ip6_pkt_prohibit(struct sk_buff *skb)
1935 {
1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1937 }
1938
1939 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1940 {
1941 skb->dev = skb_dst(skb)->dev;
1942 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1943 }
1944
1945 #endif
1946
1947 /*
1948 * Allocate a dst for local (unicast / anycast) address.
1949 */
1950
1951 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1952 const struct in6_addr *addr,
1953 int anycast)
1954 {
1955 struct net *net = dev_net(idev->dev);
1956 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1957 struct neighbour *neigh;
1958
1959 if (rt == NULL) {
1960 if (net_ratelimit())
1961 pr_warning("IPv6: Maximum number of routes reached,"
1962 " consider increasing route/max_size.\n");
1963 return ERR_PTR(-ENOMEM);
1964 }
1965
1966 dev_hold(net->loopback_dev);
1967 in6_dev_hold(idev);
1968
1969 rt->dst.flags = DST_HOST;
1970 rt->dst.input = ip6_input;
1971 rt->dst.output = ip6_output;
1972 rt->rt6i_dev = net->loopback_dev;
1973 rt->rt6i_idev = idev;
1974 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
1975 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1976 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1977 rt->dst.obsolete = -1;
1978
1979 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1980 if (anycast)
1981 rt->rt6i_flags |= RTF_ANYCAST;
1982 else
1983 rt->rt6i_flags |= RTF_LOCAL;
1984 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1985 if (IS_ERR(neigh)) {
1986 dst_free(&rt->dst);
1987
1988 /* We are casting this because that is the return
1989 * value type. But an errno encoded pointer is the
1990 * same regardless of the underlying pointer type,
1991 * and that's what we are returning. So this is OK.
1992 */
1993 return (struct rt6_info *) neigh;
1994 }
1995 rt->rt6i_nexthop = neigh;
1996
1997 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1998 rt->rt6i_dst.plen = 128;
1999 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2000
2001 atomic_set(&rt->dst.__refcnt, 1);
2002
2003 return rt;
2004 }
2005
2006 struct arg_dev_net {
2007 struct net_device *dev;
2008 struct net *net;
2009 };
2010
2011 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2012 {
2013 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2014 struct net *net = ((struct arg_dev_net *)arg)->net;
2015
2016 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2017 rt != net->ipv6.ip6_null_entry) {
2018 RT6_TRACE("deleted by ifdown %p\n", rt);
2019 return -1;
2020 }
2021 return 0;
2022 }
2023
2024 void rt6_ifdown(struct net *net, struct net_device *dev)
2025 {
2026 struct arg_dev_net adn = {
2027 .dev = dev,
2028 .net = net,
2029 };
2030
2031 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2032 icmp6_clean_all(fib6_ifdown, &adn);
2033 }
2034
2035 struct rt6_mtu_change_arg
2036 {
2037 struct net_device *dev;
2038 unsigned mtu;
2039 };
2040
2041 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2042 {
2043 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2044 struct inet6_dev *idev;
2045 struct net *net = dev_net(arg->dev);
2046
2047 /* In IPv6 pmtu discovery is not optional,
2048 so that RTAX_MTU lock cannot disable it.
2049 We still use this lock to block changes
2050 caused by addrconf/ndisc.
2051 */
2052
2053 idev = __in6_dev_get(arg->dev);
2054 if (idev == NULL)
2055 return 0;
2056
2057 /* For administrative MTU increase, there is no way to discover
2058 IPv6 PMTU increase, so PMTU increase should be updated here.
2059 Since RFC 1981 doesn't include administrative MTU increase
2060 update PMTU increase is a MUST. (i.e. jumbo frame)
2061 */
2062 /*
2063 If new MTU is less than route PMTU, this new MTU will be the
2064 lowest MTU in the path, update the route PMTU to reflect PMTU
2065 decreases; if new MTU is greater than route PMTU, and the
2066 old MTU is the lowest MTU in the path, update the route PMTU
2067 to reflect the increase. In this case if the other nodes' MTU
2068 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2069 PMTU discouvery.
2070 */
2071 if (rt->rt6i_dev == arg->dev &&
2072 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2073 (dst_mtu(&rt->dst) >= arg->mtu ||
2074 (dst_mtu(&rt->dst) < arg->mtu &&
2075 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2076 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2077 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
2078 }
2079 return 0;
2080 }
2081
2082 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2083 {
2084 struct rt6_mtu_change_arg arg = {
2085 .dev = dev,
2086 .mtu = mtu,
2087 };
2088
2089 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2090 }
2091
2092 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2093 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2094 [RTA_OIF] = { .type = NLA_U32 },
2095 [RTA_IIF] = { .type = NLA_U32 },
2096 [RTA_PRIORITY] = { .type = NLA_U32 },
2097 [RTA_METRICS] = { .type = NLA_NESTED },
2098 };
2099
2100 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2101 struct fib6_config *cfg)
2102 {
2103 struct rtmsg *rtm;
2104 struct nlattr *tb[RTA_MAX+1];
2105 int err;
2106
2107 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2108 if (err < 0)
2109 goto errout;
2110
2111 err = -EINVAL;
2112 rtm = nlmsg_data(nlh);
2113 memset(cfg, 0, sizeof(*cfg));
2114
2115 cfg->fc_table = rtm->rtm_table;
2116 cfg->fc_dst_len = rtm->rtm_dst_len;
2117 cfg->fc_src_len = rtm->rtm_src_len;
2118 cfg->fc_flags = RTF_UP;
2119 cfg->fc_protocol = rtm->rtm_protocol;
2120
2121 if (rtm->rtm_type == RTN_UNREACHABLE)
2122 cfg->fc_flags |= RTF_REJECT;
2123
2124 if (rtm->rtm_type == RTN_LOCAL)
2125 cfg->fc_flags |= RTF_LOCAL;
2126
2127 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2128 cfg->fc_nlinfo.nlh = nlh;
2129 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2130
2131 if (tb[RTA_GATEWAY]) {
2132 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2133 cfg->fc_flags |= RTF_GATEWAY;
2134 }
2135
2136 if (tb[RTA_DST]) {
2137 int plen = (rtm->rtm_dst_len + 7) >> 3;
2138
2139 if (nla_len(tb[RTA_DST]) < plen)
2140 goto errout;
2141
2142 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2143 }
2144
2145 if (tb[RTA_SRC]) {
2146 int plen = (rtm->rtm_src_len + 7) >> 3;
2147
2148 if (nla_len(tb[RTA_SRC]) < plen)
2149 goto errout;
2150
2151 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2152 }
2153
2154 if (tb[RTA_OIF])
2155 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2156
2157 if (tb[RTA_PRIORITY])
2158 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2159
2160 if (tb[RTA_METRICS]) {
2161 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2162 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2163 }
2164
2165 if (tb[RTA_TABLE])
2166 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2167
2168 err = 0;
2169 errout:
2170 return err;
2171 }
2172
2173 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2174 {
2175 struct fib6_config cfg;
2176 int err;
2177
2178 err = rtm_to_fib6_config(skb, nlh, &cfg);
2179 if (err < 0)
2180 return err;
2181
2182 return ip6_route_del(&cfg);
2183 }
2184
2185 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2186 {
2187 struct fib6_config cfg;
2188 int err;
2189
2190 err = rtm_to_fib6_config(skb, nlh, &cfg);
2191 if (err < 0)
2192 return err;
2193
2194 return ip6_route_add(&cfg);
2195 }
2196
2197 static inline size_t rt6_nlmsg_size(void)
2198 {
2199 return NLMSG_ALIGN(sizeof(struct rtmsg))
2200 + nla_total_size(16) /* RTA_SRC */
2201 + nla_total_size(16) /* RTA_DST */
2202 + nla_total_size(16) /* RTA_GATEWAY */
2203 + nla_total_size(16) /* RTA_PREFSRC */
2204 + nla_total_size(4) /* RTA_TABLE */
2205 + nla_total_size(4) /* RTA_IIF */
2206 + nla_total_size(4) /* RTA_OIF */
2207 + nla_total_size(4) /* RTA_PRIORITY */
2208 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2209 + nla_total_size(sizeof(struct rta_cacheinfo));
2210 }
2211
2212 static int rt6_fill_node(struct net *net,
2213 struct sk_buff *skb, struct rt6_info *rt,
2214 struct in6_addr *dst, struct in6_addr *src,
2215 int iif, int type, u32 pid, u32 seq,
2216 int prefix, int nowait, unsigned int flags)
2217 {
2218 struct rtmsg *rtm;
2219 struct nlmsghdr *nlh;
2220 long expires;
2221 u32 table;
2222
2223 if (prefix) { /* user wants prefix routes only */
2224 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2225 /* success since this is not a prefix route */
2226 return 1;
2227 }
2228 }
2229
2230 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2231 if (nlh == NULL)
2232 return -EMSGSIZE;
2233
2234 rtm = nlmsg_data(nlh);
2235 rtm->rtm_family = AF_INET6;
2236 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2237 rtm->rtm_src_len = rt->rt6i_src.plen;
2238 rtm->rtm_tos = 0;
2239 if (rt->rt6i_table)
2240 table = rt->rt6i_table->tb6_id;
2241 else
2242 table = RT6_TABLE_UNSPEC;
2243 rtm->rtm_table = table;
2244 NLA_PUT_U32(skb, RTA_TABLE, table);
2245 if (rt->rt6i_flags&RTF_REJECT)
2246 rtm->rtm_type = RTN_UNREACHABLE;
2247 else if (rt->rt6i_flags&RTF_LOCAL)
2248 rtm->rtm_type = RTN_LOCAL;
2249 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2250 rtm->rtm_type = RTN_LOCAL;
2251 else
2252 rtm->rtm_type = RTN_UNICAST;
2253 rtm->rtm_flags = 0;
2254 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2255 rtm->rtm_protocol = rt->rt6i_protocol;
2256 if (rt->rt6i_flags&RTF_DYNAMIC)
2257 rtm->rtm_protocol = RTPROT_REDIRECT;
2258 else if (rt->rt6i_flags & RTF_ADDRCONF)
2259 rtm->rtm_protocol = RTPROT_KERNEL;
2260 else if (rt->rt6i_flags&RTF_DEFAULT)
2261 rtm->rtm_protocol = RTPROT_RA;
2262
2263 if (rt->rt6i_flags&RTF_CACHE)
2264 rtm->rtm_flags |= RTM_F_CLONED;
2265
2266 if (dst) {
2267 NLA_PUT(skb, RTA_DST, 16, dst);
2268 rtm->rtm_dst_len = 128;
2269 } else if (rtm->rtm_dst_len)
2270 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2271 #ifdef CONFIG_IPV6_SUBTREES
2272 if (src) {
2273 NLA_PUT(skb, RTA_SRC, 16, src);
2274 rtm->rtm_src_len = 128;
2275 } else if (rtm->rtm_src_len)
2276 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2277 #endif
2278 if (iif) {
2279 #ifdef CONFIG_IPV6_MROUTE
2280 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2281 int err = ip6mr_get_route(net, skb, rtm, nowait);
2282 if (err <= 0) {
2283 if (!nowait) {
2284 if (err == 0)
2285 return 0;
2286 goto nla_put_failure;
2287 } else {
2288 if (err == -EMSGSIZE)
2289 goto nla_put_failure;
2290 }
2291 }
2292 } else
2293 #endif
2294 NLA_PUT_U32(skb, RTA_IIF, iif);
2295 } else if (dst) {
2296 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2297 struct in6_addr saddr_buf;
2298 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2299 dst, 0, &saddr_buf) == 0)
2300 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2301 }
2302
2303 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2304 goto nla_put_failure;
2305
2306 if (rt->dst.neighbour)
2307 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2308
2309 if (rt->dst.dev)
2310 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2311
2312 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2313
2314 if (!(rt->rt6i_flags & RTF_EXPIRES))
2315 expires = 0;
2316 else if (rt->rt6i_expires - jiffies < INT_MAX)
2317 expires = rt->rt6i_expires - jiffies;
2318 else
2319 expires = INT_MAX;
2320
2321 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2322 expires, rt->dst.error) < 0)
2323 goto nla_put_failure;
2324
2325 return nlmsg_end(skb, nlh);
2326
2327 nla_put_failure:
2328 nlmsg_cancel(skb, nlh);
2329 return -EMSGSIZE;
2330 }
2331
2332 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2333 {
2334 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2335 int prefix;
2336
2337 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2338 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2339 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2340 } else
2341 prefix = 0;
2342
2343 return rt6_fill_node(arg->net,
2344 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2345 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2346 prefix, 0, NLM_F_MULTI);
2347 }
2348
2349 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2350 {
2351 struct net *net = sock_net(in_skb->sk);
2352 struct nlattr *tb[RTA_MAX+1];
2353 struct rt6_info *rt;
2354 struct sk_buff *skb;
2355 struct rtmsg *rtm;
2356 struct flowi fl;
2357 int err, iif = 0;
2358
2359 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2360 if (err < 0)
2361 goto errout;
2362
2363 err = -EINVAL;
2364 memset(&fl, 0, sizeof(fl));
2365
2366 if (tb[RTA_SRC]) {
2367 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2368 goto errout;
2369
2370 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2371 }
2372
2373 if (tb[RTA_DST]) {
2374 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2375 goto errout;
2376
2377 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2378 }
2379
2380 if (tb[RTA_IIF])
2381 iif = nla_get_u32(tb[RTA_IIF]);
2382
2383 if (tb[RTA_OIF])
2384 fl.oif = nla_get_u32(tb[RTA_OIF]);
2385
2386 if (iif) {
2387 struct net_device *dev;
2388 dev = __dev_get_by_index(net, iif);
2389 if (!dev) {
2390 err = -ENODEV;
2391 goto errout;
2392 }
2393 }
2394
2395 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2396 if (skb == NULL) {
2397 err = -ENOBUFS;
2398 goto errout;
2399 }
2400
2401 /* Reserve room for dummy headers, this skb can pass
2402 through good chunk of routing engine.
2403 */
2404 skb_reset_mac_header(skb);
2405 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2406
2407 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2408 skb_dst_set(skb, &rt->dst);
2409
2410 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2411 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2412 nlh->nlmsg_seq, 0, 0, 0);
2413 if (err < 0) {
2414 kfree_skb(skb);
2415 goto errout;
2416 }
2417
2418 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2419 errout:
2420 return err;
2421 }
2422
2423 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2424 {
2425 struct sk_buff *skb;
2426 struct net *net = info->nl_net;
2427 u32 seq;
2428 int err;
2429
2430 err = -ENOBUFS;
2431 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2432
2433 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2434 if (skb == NULL)
2435 goto errout;
2436
2437 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2438 event, info->pid, seq, 0, 0, 0);
2439 if (err < 0) {
2440 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2441 WARN_ON(err == -EMSGSIZE);
2442 kfree_skb(skb);
2443 goto errout;
2444 }
2445 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2446 info->nlh, gfp_any());
2447 return;
2448 errout:
2449 if (err < 0)
2450 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2451 }
2452
2453 static int ip6_route_dev_notify(struct notifier_block *this,
2454 unsigned long event, void *data)
2455 {
2456 struct net_device *dev = (struct net_device *)data;
2457 struct net *net = dev_net(dev);
2458
2459 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2460 net->ipv6.ip6_null_entry->dst.dev = dev;
2461 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2462 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2463 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2464 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2465 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2466 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2467 #endif
2468 }
2469
2470 return NOTIFY_OK;
2471 }
2472
2473 /*
2474 * /proc
2475 */
2476
2477 #ifdef CONFIG_PROC_FS
2478
2479 struct rt6_proc_arg
2480 {
2481 char *buffer;
2482 int offset;
2483 int length;
2484 int skip;
2485 int len;
2486 };
2487
2488 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2489 {
2490 struct seq_file *m = p_arg;
2491
2492 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2493
2494 #ifdef CONFIG_IPV6_SUBTREES
2495 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2496 #else
2497 seq_puts(m, "00000000000000000000000000000000 00 ");
2498 #endif
2499
2500 if (rt->rt6i_nexthop) {
2501 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2502 } else {
2503 seq_puts(m, "00000000000000000000000000000000");
2504 }
2505 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2506 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2507 rt->dst.__use, rt->rt6i_flags,
2508 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2509 return 0;
2510 }
2511
2512 static int ipv6_route_show(struct seq_file *m, void *v)
2513 {
2514 struct net *net = (struct net *)m->private;
2515 fib6_clean_all(net, rt6_info_route, 0, m);
2516 return 0;
2517 }
2518
2519 static int ipv6_route_open(struct inode *inode, struct file *file)
2520 {
2521 return single_open_net(inode, file, ipv6_route_show);
2522 }
2523
2524 static const struct file_operations ipv6_route_proc_fops = {
2525 .owner = THIS_MODULE,
2526 .open = ipv6_route_open,
2527 .read = seq_read,
2528 .llseek = seq_lseek,
2529 .release = single_release_net,
2530 };
2531
2532 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2533 {
2534 struct net *net = (struct net *)seq->private;
2535 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2536 net->ipv6.rt6_stats->fib_nodes,
2537 net->ipv6.rt6_stats->fib_route_nodes,
2538 net->ipv6.rt6_stats->fib_rt_alloc,
2539 net->ipv6.rt6_stats->fib_rt_entries,
2540 net->ipv6.rt6_stats->fib_rt_cache,
2541 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2542 net->ipv6.rt6_stats->fib_discarded_routes);
2543
2544 return 0;
2545 }
2546
2547 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2548 {
2549 return single_open_net(inode, file, rt6_stats_seq_show);
2550 }
2551
2552 static const struct file_operations rt6_stats_seq_fops = {
2553 .owner = THIS_MODULE,
2554 .open = rt6_stats_seq_open,
2555 .read = seq_read,
2556 .llseek = seq_lseek,
2557 .release = single_release_net,
2558 };
2559 #endif /* CONFIG_PROC_FS */
2560
2561 #ifdef CONFIG_SYSCTL
2562
2563 static
2564 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2565 void __user *buffer, size_t *lenp, loff_t *ppos)
2566 {
2567 struct net *net = current->nsproxy->net_ns;
2568 int delay = net->ipv6.sysctl.flush_delay;
2569 if (write) {
2570 proc_dointvec(ctl, write, buffer, lenp, ppos);
2571 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2572 return 0;
2573 } else
2574 return -EINVAL;
2575 }
2576
2577 ctl_table ipv6_route_table_template[] = {
2578 {
2579 .procname = "flush",
2580 .data = &init_net.ipv6.sysctl.flush_delay,
2581 .maxlen = sizeof(int),
2582 .mode = 0200,
2583 .proc_handler = ipv6_sysctl_rtcache_flush
2584 },
2585 {
2586 .procname = "gc_thresh",
2587 .data = &ip6_dst_ops_template.gc_thresh,
2588 .maxlen = sizeof(int),
2589 .mode = 0644,
2590 .proc_handler = proc_dointvec,
2591 },
2592 {
2593 .procname = "max_size",
2594 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2595 .maxlen = sizeof(int),
2596 .mode = 0644,
2597 .proc_handler = proc_dointvec,
2598 },
2599 {
2600 .procname = "gc_min_interval",
2601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2602 .maxlen = sizeof(int),
2603 .mode = 0644,
2604 .proc_handler = proc_dointvec_jiffies,
2605 },
2606 {
2607 .procname = "gc_timeout",
2608 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
2611 .proc_handler = proc_dointvec_jiffies,
2612 },
2613 {
2614 .procname = "gc_interval",
2615 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2616 .maxlen = sizeof(int),
2617 .mode = 0644,
2618 .proc_handler = proc_dointvec_jiffies,
2619 },
2620 {
2621 .procname = "gc_elasticity",
2622 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2623 .maxlen = sizeof(int),
2624 .mode = 0644,
2625 .proc_handler = proc_dointvec,
2626 },
2627 {
2628 .procname = "mtu_expires",
2629 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2630 .maxlen = sizeof(int),
2631 .mode = 0644,
2632 .proc_handler = proc_dointvec_jiffies,
2633 },
2634 {
2635 .procname = "min_adv_mss",
2636 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2637 .maxlen = sizeof(int),
2638 .mode = 0644,
2639 .proc_handler = proc_dointvec,
2640 },
2641 {
2642 .procname = "gc_min_interval_ms",
2643 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2644 .maxlen = sizeof(int),
2645 .mode = 0644,
2646 .proc_handler = proc_dointvec_ms_jiffies,
2647 },
2648 { }
2649 };
2650
2651 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2652 {
2653 struct ctl_table *table;
2654
2655 table = kmemdup(ipv6_route_table_template,
2656 sizeof(ipv6_route_table_template),
2657 GFP_KERNEL);
2658
2659 if (table) {
2660 table[0].data = &net->ipv6.sysctl.flush_delay;
2661 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2662 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2663 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2664 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2665 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2666 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2667 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2668 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2669 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2670 }
2671
2672 return table;
2673 }
2674 #endif
2675
2676 static int __net_init ip6_route_net_init(struct net *net)
2677 {
2678 int ret = -ENOMEM;
2679
2680 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2681 sizeof(net->ipv6.ip6_dst_ops));
2682
2683 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2684 goto out_ip6_dst_ops;
2685
2686 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2687 sizeof(*net->ipv6.ip6_null_entry),
2688 GFP_KERNEL);
2689 if (!net->ipv6.ip6_null_entry)
2690 goto out_ip6_dst_entries;
2691 net->ipv6.ip6_null_entry->dst.path =
2692 (struct dst_entry *)net->ipv6.ip6_null_entry;
2693 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2694 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2695
2696 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2697 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2698 sizeof(*net->ipv6.ip6_prohibit_entry),
2699 GFP_KERNEL);
2700 if (!net->ipv6.ip6_prohibit_entry)
2701 goto out_ip6_null_entry;
2702 net->ipv6.ip6_prohibit_entry->dst.path =
2703 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2704 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2705 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2706
2707 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2708 sizeof(*net->ipv6.ip6_blk_hole_entry),
2709 GFP_KERNEL);
2710 if (!net->ipv6.ip6_blk_hole_entry)
2711 goto out_ip6_prohibit_entry;
2712 net->ipv6.ip6_blk_hole_entry->dst.path =
2713 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2714 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2715 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2716 #endif
2717
2718 net->ipv6.sysctl.flush_delay = 0;
2719 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2720 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2721 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2722 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2723 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2724 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2725 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2726
2727 #ifdef CONFIG_PROC_FS
2728 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2729 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2730 #endif
2731 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2732
2733 ret = 0;
2734 out:
2735 return ret;
2736
2737 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738 out_ip6_prohibit_entry:
2739 kfree(net->ipv6.ip6_prohibit_entry);
2740 out_ip6_null_entry:
2741 kfree(net->ipv6.ip6_null_entry);
2742 #endif
2743 out_ip6_dst_entries:
2744 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2745 out_ip6_dst_ops:
2746 goto out;
2747 }
2748
2749 static void __net_exit ip6_route_net_exit(struct net *net)
2750 {
2751 #ifdef CONFIG_PROC_FS
2752 proc_net_remove(net, "ipv6_route");
2753 proc_net_remove(net, "rt6_stats");
2754 #endif
2755 kfree(net->ipv6.ip6_null_entry);
2756 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2757 kfree(net->ipv6.ip6_prohibit_entry);
2758 kfree(net->ipv6.ip6_blk_hole_entry);
2759 #endif
2760 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2761 }
2762
2763 static struct pernet_operations ip6_route_net_ops = {
2764 .init = ip6_route_net_init,
2765 .exit = ip6_route_net_exit,
2766 };
2767
2768 static struct notifier_block ip6_route_dev_notifier = {
2769 .notifier_call = ip6_route_dev_notify,
2770 .priority = 0,
2771 };
2772
2773 int __init ip6_route_init(void)
2774 {
2775 int ret;
2776
2777 ret = -ENOMEM;
2778 ip6_dst_ops_template.kmem_cachep =
2779 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2780 SLAB_HWCACHE_ALIGN, NULL);
2781 if (!ip6_dst_ops_template.kmem_cachep)
2782 goto out;
2783
2784 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2785 if (ret)
2786 goto out_kmem_cache;
2787
2788 ret = register_pernet_subsys(&ip6_route_net_ops);
2789 if (ret)
2790 goto out_dst_entries;
2791
2792 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2793
2794 /* Registering of the loopback is done before this portion of code,
2795 * the loopback reference in rt6_info will not be taken, do it
2796 * manually for init_net */
2797 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2798 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2799 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2800 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2801 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2802 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2803 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2804 #endif
2805 ret = fib6_init();
2806 if (ret)
2807 goto out_register_subsys;
2808
2809 ret = xfrm6_init();
2810 if (ret)
2811 goto out_fib6_init;
2812
2813 ret = fib6_rules_init();
2814 if (ret)
2815 goto xfrm6_init;
2816
2817 ret = -ENOBUFS;
2818 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2819 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2820 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2821 goto fib6_rules_init;
2822
2823 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2824 if (ret)
2825 goto fib6_rules_init;
2826
2827 out:
2828 return ret;
2829
2830 fib6_rules_init:
2831 fib6_rules_cleanup();
2832 xfrm6_init:
2833 xfrm6_fini();
2834 out_fib6_init:
2835 fib6_gc_cleanup();
2836 out_register_subsys:
2837 unregister_pernet_subsys(&ip6_route_net_ops);
2838 out_dst_entries:
2839 dst_entries_destroy(&ip6_dst_blackhole_ops);
2840 out_kmem_cache:
2841 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2842 goto out;
2843 }
2844
2845 void ip6_route_cleanup(void)
2846 {
2847 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2848 fib6_rules_cleanup();
2849 xfrm6_fini();
2850 fib6_gc_cleanup();
2851 unregister_pernet_subsys(&ip6_route_net_ops);
2852 dst_entries_destroy(&ip6_dst_blackhole_ops);
2853 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2854 }