ipv6: Demark default hoplimit as zero.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
112 };
113
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115 {
116 }
117
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 };
125
126 static struct rt6_info ip6_null_entry_template = {
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 },
135 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
136 .rt6i_protocol = RTPROT_KERNEL,
137 .rt6i_metric = ~(u32) 0,
138 .rt6i_ref = ATOMIC_INIT(1),
139 };
140
141 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
142
143 static int ip6_pkt_prohibit(struct sk_buff *skb);
144 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
145
146 static struct rt6_info ip6_prohibit_entry_template = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .obsolete = -1,
151 .error = -EACCES,
152 .input = ip6_pkt_prohibit,
153 .output = ip6_pkt_prohibit_out,
154 },
155 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
156 .rt6i_protocol = RTPROT_KERNEL,
157 .rt6i_metric = ~(u32) 0,
158 .rt6i_ref = ATOMIC_INIT(1),
159 };
160
161 static struct rt6_info ip6_blk_hole_entry_template = {
162 .dst = {
163 .__refcnt = ATOMIC_INIT(1),
164 .__use = 1,
165 .obsolete = -1,
166 .error = -EINVAL,
167 .input = dst_discard,
168 .output = dst_discard,
169 },
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
171 .rt6i_protocol = RTPROT_KERNEL,
172 .rt6i_metric = ~(u32) 0,
173 .rt6i_ref = ATOMIC_INIT(1),
174 };
175
176 #endif
177
178 /* allocate dst with ip6_dst_ops */
179 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
180 {
181 return (struct rt6_info *)dst_alloc(ops);
182 }
183
184 static void ip6_dst_destroy(struct dst_entry *dst)
185 {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 struct inet6_dev *idev = rt->rt6i_idev;
188 struct inet_peer *peer = rt->rt6i_peer;
189
190 if (idev != NULL) {
191 rt->rt6i_idev = NULL;
192 in6_dev_put(idev);
193 }
194 if (peer) {
195 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
196 rt->rt6i_peer = NULL;
197 inet_putpeer(peer);
198 }
199 }
200
201 void rt6_bind_peer(struct rt6_info *rt, int create)
202 {
203 struct inet_peer *peer;
204
205 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
206 return;
207
208 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
209 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
210 inet_putpeer(peer);
211 }
212
213 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
215 {
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
218 struct net_device *loopback_dev =
219 dev_net(dev)->loopback_dev;
220
221 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
222 struct inet6_dev *loopback_idev =
223 in6_dev_get(loopback_dev);
224 if (loopback_idev != NULL) {
225 rt->rt6i_idev = loopback_idev;
226 in6_dev_put(idev);
227 }
228 }
229 }
230
231 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
232 {
233 return (rt->rt6i_flags & RTF_EXPIRES) &&
234 time_after(jiffies, rt->rt6i_expires);
235 }
236
237 static inline int rt6_need_strict(struct in6_addr *daddr)
238 {
239 return ipv6_addr_type(daddr) &
240 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
241 }
242
243 /*
244 * Route lookup. Any table->tb6_lock is implied.
245 */
246
247 static inline struct rt6_info *rt6_device_match(struct net *net,
248 struct rt6_info *rt,
249 struct in6_addr *saddr,
250 int oif,
251 int flags)
252 {
253 struct rt6_info *local = NULL;
254 struct rt6_info *sprt;
255
256 if (!oif && ipv6_addr_any(saddr))
257 goto out;
258
259 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
260 struct net_device *dev = sprt->rt6i_dev;
261
262 if (oif) {
263 if (dev->ifindex == oif)
264 return sprt;
265 if (dev->flags & IFF_LOOPBACK) {
266 if (sprt->rt6i_idev == NULL ||
267 sprt->rt6i_idev->dev->ifindex != oif) {
268 if (flags & RT6_LOOKUP_F_IFACE && oif)
269 continue;
270 if (local && (!oif ||
271 local->rt6i_idev->dev->ifindex == oif))
272 continue;
273 }
274 local = sprt;
275 }
276 } else {
277 if (ipv6_chk_addr(net, saddr, dev,
278 flags & RT6_LOOKUP_F_IFACE))
279 return sprt;
280 }
281 }
282
283 if (oif) {
284 if (local)
285 return local;
286
287 if (flags & RT6_LOOKUP_F_IFACE)
288 return net->ipv6.ip6_null_entry;
289 }
290 out:
291 return rt;
292 }
293
294 #ifdef CONFIG_IPV6_ROUTER_PREF
295 static void rt6_probe(struct rt6_info *rt)
296 {
297 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
298 /*
299 * Okay, this does not seem to be appropriate
300 * for now, however, we need to check if it
301 * is really so; aka Router Reachability Probing.
302 *
303 * Router Reachability Probe MUST be rate-limited
304 * to no more than one per minute.
305 */
306 if (!neigh || (neigh->nud_state & NUD_VALID))
307 return;
308 read_lock_bh(&neigh->lock);
309 if (!(neigh->nud_state & NUD_VALID) &&
310 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
311 struct in6_addr mcaddr;
312 struct in6_addr *target;
313
314 neigh->updated = jiffies;
315 read_unlock_bh(&neigh->lock);
316
317 target = (struct in6_addr *)&neigh->primary_key;
318 addrconf_addr_solict_mult(target, &mcaddr);
319 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
320 } else
321 read_unlock_bh(&neigh->lock);
322 }
323 #else
324 static inline void rt6_probe(struct rt6_info *rt)
325 {
326 }
327 #endif
328
329 /*
330 * Default Router Selection (RFC 2461 6.3.6)
331 */
332 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
333 {
334 struct net_device *dev = rt->rt6i_dev;
335 if (!oif || dev->ifindex == oif)
336 return 2;
337 if ((dev->flags & IFF_LOOPBACK) &&
338 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
339 return 1;
340 return 0;
341 }
342
343 static inline int rt6_check_neigh(struct rt6_info *rt)
344 {
345 struct neighbour *neigh = rt->rt6i_nexthop;
346 int m;
347 if (rt->rt6i_flags & RTF_NONEXTHOP ||
348 !(rt->rt6i_flags & RTF_GATEWAY))
349 m = 1;
350 else if (neigh) {
351 read_lock_bh(&neigh->lock);
352 if (neigh->nud_state & NUD_VALID)
353 m = 2;
354 #ifdef CONFIG_IPV6_ROUTER_PREF
355 else if (neigh->nud_state & NUD_FAILED)
356 m = 0;
357 #endif
358 else
359 m = 1;
360 read_unlock_bh(&neigh->lock);
361 } else
362 m = 0;
363 return m;
364 }
365
366 static int rt6_score_route(struct rt6_info *rt, int oif,
367 int strict)
368 {
369 int m, n;
370
371 m = rt6_check_dev(rt, oif);
372 if (!m && (strict & RT6_LOOKUP_F_IFACE))
373 return -1;
374 #ifdef CONFIG_IPV6_ROUTER_PREF
375 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
376 #endif
377 n = rt6_check_neigh(rt);
378 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
379 return -1;
380 return m;
381 }
382
383 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
384 int *mpri, struct rt6_info *match)
385 {
386 int m;
387
388 if (rt6_check_expired(rt))
389 goto out;
390
391 m = rt6_score_route(rt, oif, strict);
392 if (m < 0)
393 goto out;
394
395 if (m > *mpri) {
396 if (strict & RT6_LOOKUP_F_REACHABLE)
397 rt6_probe(match);
398 *mpri = m;
399 match = rt;
400 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
401 rt6_probe(rt);
402 }
403
404 out:
405 return match;
406 }
407
408 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
409 struct rt6_info *rr_head,
410 u32 metric, int oif, int strict)
411 {
412 struct rt6_info *rt, *match;
413 int mpri = -1;
414
415 match = NULL;
416 for (rt = rr_head; rt && rt->rt6i_metric == metric;
417 rt = rt->dst.rt6_next)
418 match = find_match(rt, oif, strict, &mpri, match);
419 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
420 rt = rt->dst.rt6_next)
421 match = find_match(rt, oif, strict, &mpri, match);
422
423 return match;
424 }
425
426 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
427 {
428 struct rt6_info *match, *rt0;
429 struct net *net;
430
431 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
432 __func__, fn->leaf, oif);
433
434 rt0 = fn->rr_ptr;
435 if (!rt0)
436 fn->rr_ptr = rt0 = fn->leaf;
437
438 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
439
440 if (!match &&
441 (strict & RT6_LOOKUP_F_REACHABLE)) {
442 struct rt6_info *next = rt0->dst.rt6_next;
443
444 /* no entries matched; do round-robin */
445 if (!next || next->rt6i_metric != rt0->rt6i_metric)
446 next = fn->leaf;
447
448 if (next != rt0)
449 fn->rr_ptr = next;
450 }
451
452 RT6_TRACE("%s() => %p\n",
453 __func__, match);
454
455 net = dev_net(rt0->rt6i_dev);
456 return match ? match : net->ipv6.ip6_null_entry;
457 }
458
459 #ifdef CONFIG_IPV6_ROUTE_INFO
460 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
461 struct in6_addr *gwaddr)
462 {
463 struct net *net = dev_net(dev);
464 struct route_info *rinfo = (struct route_info *) opt;
465 struct in6_addr prefix_buf, *prefix;
466 unsigned int pref;
467 unsigned long lifetime;
468 struct rt6_info *rt;
469
470 if (len < sizeof(struct route_info)) {
471 return -EINVAL;
472 }
473
474 /* Sanity check for prefix_len and length */
475 if (rinfo->length > 3) {
476 return -EINVAL;
477 } else if (rinfo->prefix_len > 128) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 64) {
480 if (rinfo->length < 2) {
481 return -EINVAL;
482 }
483 } else if (rinfo->prefix_len > 0) {
484 if (rinfo->length < 1) {
485 return -EINVAL;
486 }
487 }
488
489 pref = rinfo->route_pref;
490 if (pref == ICMPV6_ROUTER_PREF_INVALID)
491 return -EINVAL;
492
493 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
494
495 if (rinfo->length == 3)
496 prefix = (struct in6_addr *)rinfo->prefix;
497 else {
498 /* this function is safe */
499 ipv6_addr_prefix(&prefix_buf,
500 (struct in6_addr *)rinfo->prefix,
501 rinfo->prefix_len);
502 prefix = &prefix_buf;
503 }
504
505 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
506 dev->ifindex);
507
508 if (rt && !lifetime) {
509 ip6_del_rt(rt);
510 rt = NULL;
511 }
512
513 if (!rt && lifetime)
514 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
515 pref);
516 else if (rt)
517 rt->rt6i_flags = RTF_ROUTEINFO |
518 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
519
520 if (rt) {
521 if (!addrconf_finite_timeout(lifetime)) {
522 rt->rt6i_flags &= ~RTF_EXPIRES;
523 } else {
524 rt->rt6i_expires = jiffies + HZ * lifetime;
525 rt->rt6i_flags |= RTF_EXPIRES;
526 }
527 dst_release(&rt->dst);
528 }
529 return 0;
530 }
531 #endif
532
533 #define BACKTRACK(__net, saddr) \
534 do { \
535 if (rt == __net->ipv6.ip6_null_entry) { \
536 struct fib6_node *pn; \
537 while (1) { \
538 if (fn->fn_flags & RTN_TL_ROOT) \
539 goto out; \
540 pn = fn->parent; \
541 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
542 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
543 else \
544 fn = pn; \
545 if (fn->fn_flags & RTN_RTINFO) \
546 goto restart; \
547 } \
548 } \
549 } while(0)
550
551 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
552 struct fib6_table *table,
553 struct flowi *fl, int flags)
554 {
555 struct fib6_node *fn;
556 struct rt6_info *rt;
557
558 read_lock_bh(&table->tb6_lock);
559 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
560 restart:
561 rt = fn->leaf;
562 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
563 BACKTRACK(net, &fl->fl6_src);
564 out:
565 dst_use(&rt->dst, jiffies);
566 read_unlock_bh(&table->tb6_lock);
567 return rt;
568
569 }
570
571 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
572 const struct in6_addr *saddr, int oif, int strict)
573 {
574 struct flowi fl = {
575 .oif = oif,
576 .fl6_dst = *daddr,
577 };
578 struct dst_entry *dst;
579 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
580
581 if (saddr) {
582 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
583 flags |= RT6_LOOKUP_F_HAS_SADDR;
584 }
585
586 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
587 if (dst->error == 0)
588 return (struct rt6_info *) dst;
589
590 dst_release(dst);
591
592 return NULL;
593 }
594
595 EXPORT_SYMBOL(rt6_lookup);
596
597 /* ip6_ins_rt is called with FREE table->tb6_lock.
598 It takes new route entry, the addition fails by any reason the
599 route is freed. In any case, if caller does not hold it, it may
600 be destroyed.
601 */
602
603 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
604 {
605 int err;
606 struct fib6_table *table;
607
608 table = rt->rt6i_table;
609 write_lock_bh(&table->tb6_lock);
610 err = fib6_add(&table->tb6_root, rt, info);
611 write_unlock_bh(&table->tb6_lock);
612
613 return err;
614 }
615
616 int ip6_ins_rt(struct rt6_info *rt)
617 {
618 struct nl_info info = {
619 .nl_net = dev_net(rt->rt6i_dev),
620 };
621 return __ip6_ins_rt(rt, &info);
622 }
623
624 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
625 struct in6_addr *saddr)
626 {
627 struct rt6_info *rt;
628
629 /*
630 * Clone the route.
631 */
632
633 rt = ip6_rt_copy(ort);
634
635 if (rt) {
636 struct neighbour *neigh;
637 int attempts = !in_softirq();
638
639 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
640 if (rt->rt6i_dst.plen != 128 &&
641 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
642 rt->rt6i_flags |= RTF_ANYCAST;
643 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
644 }
645
646 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
647 rt->rt6i_dst.plen = 128;
648 rt->rt6i_flags |= RTF_CACHE;
649 rt->dst.flags |= DST_HOST;
650
651 #ifdef CONFIG_IPV6_SUBTREES
652 if (rt->rt6i_src.plen && saddr) {
653 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
654 rt->rt6i_src.plen = 128;
655 }
656 #endif
657
658 retry:
659 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
660 if (IS_ERR(neigh)) {
661 struct net *net = dev_net(rt->rt6i_dev);
662 int saved_rt_min_interval =
663 net->ipv6.sysctl.ip6_rt_gc_min_interval;
664 int saved_rt_elasticity =
665 net->ipv6.sysctl.ip6_rt_gc_elasticity;
666
667 if (attempts-- > 0) {
668 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
670
671 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
672
673 net->ipv6.sysctl.ip6_rt_gc_elasticity =
674 saved_rt_elasticity;
675 net->ipv6.sysctl.ip6_rt_gc_min_interval =
676 saved_rt_min_interval;
677 goto retry;
678 }
679
680 if (net_ratelimit())
681 printk(KERN_WARNING
682 "ipv6: Neighbour table overflow.\n");
683 dst_free(&rt->dst);
684 return NULL;
685 }
686 rt->rt6i_nexthop = neigh;
687
688 }
689
690 return rt;
691 }
692
693 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
694 {
695 struct rt6_info *rt = ip6_rt_copy(ort);
696 if (rt) {
697 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
698 rt->rt6i_dst.plen = 128;
699 rt->rt6i_flags |= RTF_CACHE;
700 rt->dst.flags |= DST_HOST;
701 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
702 }
703 return rt;
704 }
705
706 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
707 struct flowi *fl, int flags)
708 {
709 struct fib6_node *fn;
710 struct rt6_info *rt, *nrt;
711 int strict = 0;
712 int attempts = 3;
713 int err;
714 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
715
716 strict |= flags & RT6_LOOKUP_F_IFACE;
717
718 relookup:
719 read_lock_bh(&table->tb6_lock);
720
721 restart_2:
722 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
723
724 restart:
725 rt = rt6_select(fn, oif, strict | reachable);
726
727 BACKTRACK(net, &fl->fl6_src);
728 if (rt == net->ipv6.ip6_null_entry ||
729 rt->rt6i_flags & RTF_CACHE)
730 goto out;
731
732 dst_hold(&rt->dst);
733 read_unlock_bh(&table->tb6_lock);
734
735 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
736 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
737 else {
738 #if CLONE_OFFLINK_ROUTE
739 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
740 #else
741 goto out2;
742 #endif
743 }
744
745 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry;
747
748 dst_hold(&rt->dst);
749 if (nrt) {
750 err = ip6_ins_rt(nrt);
751 if (!err)
752 goto out2;
753 }
754
755 if (--attempts <= 0)
756 goto out2;
757
758 /*
759 * Race condition! In the gap, when table->tb6_lock was
760 * released someone could insert this route. Relookup.
761 */
762 dst_release(&rt->dst);
763 goto relookup;
764
765 out:
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
769 }
770 dst_hold(&rt->dst);
771 read_unlock_bh(&table->tb6_lock);
772 out2:
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
775
776 return rt;
777 }
778
779 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags)
781 {
782 return ip6_pol_route(net, table, fl->iif, fl, flags);
783 }
784
785 void ip6_route_input(struct sk_buff *skb)
786 {
787 struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark,
796 .proto = iph->nexthdr,
797 };
798
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE;
801
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
803 }
804
805 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags)
807 {
808 return ip6_pol_route(net, table, fl->oif, fl, flags);
809 }
810
811 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
813 {
814 int flags = 0;
815
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
817 flags |= RT6_LOOKUP_F_IFACE;
818
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
823
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
825 }
826
827 EXPORT_SYMBOL(ip6_route_output);
828
829 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
830 {
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
835
836 if (rt) {
837 new = &rt->dst;
838
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
841 new->input = dst_discard;
842 new->output = dst_discard;
843
844 dst_copy_metrics(new, &ort->dst);
845 new->dev = ort->dst.dev;
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
852
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
856
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858 #ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860 #endif
861
862 dst_free(new);
863 }
864
865 dst_release(*dstp);
866 *dstp = new;
867 return new ? 0 : -ENOMEM;
868 }
869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
870
871 /*
872 * Destination cache support functions
873 */
874
875 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
876 {
877 struct rt6_info *rt;
878
879 rt = (struct rt6_info *) dst;
880
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
882 return dst;
883
884 return NULL;
885 }
886
887 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
888 {
889 struct rt6_info *rt = (struct rt6_info *) dst;
890
891 if (rt) {
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
896 }
897 } else {
898 dst_release(dst);
899 dst = NULL;
900 }
901 }
902 return dst;
903 }
904
905 static void ip6_link_failure(struct sk_buff *skb)
906 {
907 struct rt6_info *rt;
908
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
910
911 rt = (struct rt6_info *) skb_dst(skb);
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919 }
920
921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922 {
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 u32 features = dst_metric(dst, RTAX_FEATURES);
929 mtu = IPV6_MIN_MTU;
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
932 }
933 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
935 }
936 }
937
938 static int ipv6_get_mtu(struct net_device *dev);
939
940 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
941 {
942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
943
944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
946
947 /*
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
951 * rely only on pmtu discovery"
952 */
953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954 mtu = IPV6_MAXPLEN;
955 return mtu;
956 }
957
958 static struct dst_entry *icmp6_dst_gc_list;
959 static DEFINE_SPINLOCK(icmp6_dst_lock);
960
961 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
962 struct neighbour *neigh,
963 const struct in6_addr *addr)
964 {
965 struct rt6_info *rt;
966 struct inet6_dev *idev = in6_dev_get(dev);
967 struct net *net = dev_net(dev);
968
969 if (unlikely(idev == NULL))
970 return NULL;
971
972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
973 if (unlikely(rt == NULL)) {
974 in6_dev_put(idev);
975 goto out;
976 }
977
978 dev_hold(dev);
979 if (neigh)
980 neigh_hold(neigh);
981 else {
982 neigh = ndisc_get_neigh(dev, addr);
983 if (IS_ERR(neigh))
984 neigh = NULL;
985 }
986
987 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh;
990 atomic_set(&rt->dst.__refcnt, 1);
991 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
992 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
993 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
994 rt->dst.output = ip6_output;
995
996 #if 0 /* there's no chance to use these for ndisc */
997 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998 ? DST_HOST
999 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001 rt->rt6i_dst.plen = 128;
1002 #endif
1003
1004 spin_lock_bh(&icmp6_dst_lock);
1005 rt->dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->dst;
1007 spin_unlock_bh(&icmp6_dst_lock);
1008
1009 fib6_force_start_gc(net);
1010
1011 out:
1012 return &rt->dst;
1013 }
1014
1015 int icmp6_dst_gc(void)
1016 {
1017 struct dst_entry *dst, *next, **pprev;
1018 int more = 0;
1019
1020 next = NULL;
1021
1022 spin_lock_bh(&icmp6_dst_lock);
1023 pprev = &icmp6_dst_gc_list;
1024
1025 while ((dst = *pprev) != NULL) {
1026 if (!atomic_read(&dst->__refcnt)) {
1027 *pprev = dst->next;
1028 dst_free(dst);
1029 } else {
1030 pprev = &dst->next;
1031 ++more;
1032 }
1033 }
1034
1035 spin_unlock_bh(&icmp6_dst_lock);
1036
1037 return more;
1038 }
1039
1040 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041 void *arg)
1042 {
1043 struct dst_entry *dst, **pprev;
1044
1045 spin_lock_bh(&icmp6_dst_lock);
1046 pprev = &icmp6_dst_gc_list;
1047 while ((dst = *pprev) != NULL) {
1048 struct rt6_info *rt = (struct rt6_info *) dst;
1049 if (func(rt, arg)) {
1050 *pprev = dst->next;
1051 dst_free(dst);
1052 } else {
1053 pprev = &dst->next;
1054 }
1055 }
1056 spin_unlock_bh(&icmp6_dst_lock);
1057 }
1058
1059 static int ip6_dst_gc(struct dst_ops *ops)
1060 {
1061 unsigned long now = jiffies;
1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1068 int entries;
1069
1070 entries = dst_entries_get_fast(ops);
1071 if (time_after(rt_last_gc + rt_min_interval, now) &&
1072 entries <= rt_max_size)
1073 goto out;
1074
1075 net->ipv6.ip6_rt_gc_expire++;
1076 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1077 net->ipv6.ip6_rt_last_gc = now;
1078 entries = dst_entries_get_slow(ops);
1079 if (entries < ops->gc_thresh)
1080 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1081 out:
1082 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1083 return entries > rt_max_size;
1084 }
1085
1086 /* Clean host part of a prefix. Not necessary in radix tree,
1087 but results in cleaner routing tables.
1088
1089 Remove it only when all the things will work!
1090 */
1091
1092 static int ipv6_get_mtu(struct net_device *dev)
1093 {
1094 int mtu = IPV6_MIN_MTU;
1095 struct inet6_dev *idev;
1096
1097 rcu_read_lock();
1098 idev = __in6_dev_get(dev);
1099 if (idev)
1100 mtu = idev->cnf.mtu6;
1101 rcu_read_unlock();
1102 return mtu;
1103 }
1104
1105 int ip6_dst_hoplimit(struct dst_entry *dst)
1106 {
1107 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1108 if (hoplimit == 0) {
1109 struct net_device *dev = dst->dev;
1110 struct inet6_dev *idev;
1111
1112 rcu_read_lock();
1113 idev = __in6_dev_get(dev);
1114 if (idev)
1115 hoplimit = idev->cnf.hop_limit;
1116 else
1117 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1118 rcu_read_unlock();
1119 }
1120 return hoplimit;
1121 }
1122 EXPORT_SYMBOL(ip6_dst_hoplimit);
1123
1124 /*
1125 *
1126 */
1127
1128 int ip6_route_add(struct fib6_config *cfg)
1129 {
1130 int err;
1131 struct net *net = cfg->fc_nlinfo.nl_net;
1132 struct rt6_info *rt = NULL;
1133 struct net_device *dev = NULL;
1134 struct inet6_dev *idev = NULL;
1135 struct fib6_table *table;
1136 int addr_type;
1137
1138 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1139 return -EINVAL;
1140 #ifndef CONFIG_IPV6_SUBTREES
1141 if (cfg->fc_src_len)
1142 return -EINVAL;
1143 #endif
1144 if (cfg->fc_ifindex) {
1145 err = -ENODEV;
1146 dev = dev_get_by_index(net, cfg->fc_ifindex);
1147 if (!dev)
1148 goto out;
1149 idev = in6_dev_get(dev);
1150 if (!idev)
1151 goto out;
1152 }
1153
1154 if (cfg->fc_metric == 0)
1155 cfg->fc_metric = IP6_RT_PRIO_USER;
1156
1157 table = fib6_new_table(net, cfg->fc_table);
1158 if (table == NULL) {
1159 err = -ENOBUFS;
1160 goto out;
1161 }
1162
1163 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1164
1165 if (rt == NULL) {
1166 err = -ENOMEM;
1167 goto out;
1168 }
1169
1170 rt->dst.obsolete = -1;
1171 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1172 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1173 0;
1174
1175 if (cfg->fc_protocol == RTPROT_UNSPEC)
1176 cfg->fc_protocol = RTPROT_BOOT;
1177 rt->rt6i_protocol = cfg->fc_protocol;
1178
1179 addr_type = ipv6_addr_type(&cfg->fc_dst);
1180
1181 if (addr_type & IPV6_ADDR_MULTICAST)
1182 rt->dst.input = ip6_mc_input;
1183 else if (cfg->fc_flags & RTF_LOCAL)
1184 rt->dst.input = ip6_input;
1185 else
1186 rt->dst.input = ip6_forward;
1187
1188 rt->dst.output = ip6_output;
1189
1190 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1191 rt->rt6i_dst.plen = cfg->fc_dst_len;
1192 if (rt->rt6i_dst.plen == 128)
1193 rt->dst.flags = DST_HOST;
1194
1195 #ifdef CONFIG_IPV6_SUBTREES
1196 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1197 rt->rt6i_src.plen = cfg->fc_src_len;
1198 #endif
1199
1200 rt->rt6i_metric = cfg->fc_metric;
1201
1202 /* We cannot add true routes via loopback here,
1203 they would result in kernel looping; promote them to reject routes
1204 */
1205 if ((cfg->fc_flags & RTF_REJECT) ||
1206 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1207 && !(cfg->fc_flags&RTF_LOCAL))) {
1208 /* hold loopback dev/idev if we haven't done so. */
1209 if (dev != net->loopback_dev) {
1210 if (dev) {
1211 dev_put(dev);
1212 in6_dev_put(idev);
1213 }
1214 dev = net->loopback_dev;
1215 dev_hold(dev);
1216 idev = in6_dev_get(dev);
1217 if (!idev) {
1218 err = -ENODEV;
1219 goto out;
1220 }
1221 }
1222 rt->dst.output = ip6_pkt_discard_out;
1223 rt->dst.input = ip6_pkt_discard;
1224 rt->dst.error = -ENETUNREACH;
1225 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1226 goto install_route;
1227 }
1228
1229 if (cfg->fc_flags & RTF_GATEWAY) {
1230 struct in6_addr *gw_addr;
1231 int gwa_type;
1232
1233 gw_addr = &cfg->fc_gateway;
1234 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1235 gwa_type = ipv6_addr_type(gw_addr);
1236
1237 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1238 struct rt6_info *grt;
1239
1240 /* IPv6 strictly inhibits using not link-local
1241 addresses as nexthop address.
1242 Otherwise, router will not able to send redirects.
1243 It is very good, but in some (rare!) circumstances
1244 (SIT, PtP, NBMA NOARP links) it is handy to allow
1245 some exceptions. --ANK
1246 */
1247 err = -EINVAL;
1248 if (!(gwa_type&IPV6_ADDR_UNICAST))
1249 goto out;
1250
1251 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1252
1253 err = -EHOSTUNREACH;
1254 if (grt == NULL)
1255 goto out;
1256 if (dev) {
1257 if (dev != grt->rt6i_dev) {
1258 dst_release(&grt->dst);
1259 goto out;
1260 }
1261 } else {
1262 dev = grt->rt6i_dev;
1263 idev = grt->rt6i_idev;
1264 dev_hold(dev);
1265 in6_dev_hold(grt->rt6i_idev);
1266 }
1267 if (!(grt->rt6i_flags&RTF_GATEWAY))
1268 err = 0;
1269 dst_release(&grt->dst);
1270
1271 if (err)
1272 goto out;
1273 }
1274 err = -EINVAL;
1275 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1276 goto out;
1277 }
1278
1279 err = -ENODEV;
1280 if (dev == NULL)
1281 goto out;
1282
1283 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1284 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1285 if (IS_ERR(rt->rt6i_nexthop)) {
1286 err = PTR_ERR(rt->rt6i_nexthop);
1287 rt->rt6i_nexthop = NULL;
1288 goto out;
1289 }
1290 }
1291
1292 rt->rt6i_flags = cfg->fc_flags;
1293
1294 install_route:
1295 if (cfg->fc_mx) {
1296 struct nlattr *nla;
1297 int remaining;
1298
1299 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1300 int type = nla_type(nla);
1301
1302 if (type) {
1303 if (type > RTAX_MAX) {
1304 err = -EINVAL;
1305 goto out;
1306 }
1307
1308 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1309 }
1310 }
1311 }
1312
1313 if (!dst_mtu(&rt->dst))
1314 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
1315 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1316 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1317 rt->dst.dev = dev;
1318 rt->rt6i_idev = idev;
1319 rt->rt6i_table = table;
1320
1321 cfg->fc_nlinfo.nl_net = dev_net(dev);
1322
1323 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1324
1325 out:
1326 if (dev)
1327 dev_put(dev);
1328 if (idev)
1329 in6_dev_put(idev);
1330 if (rt)
1331 dst_free(&rt->dst);
1332 return err;
1333 }
1334
1335 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1336 {
1337 int err;
1338 struct fib6_table *table;
1339 struct net *net = dev_net(rt->rt6i_dev);
1340
1341 if (rt == net->ipv6.ip6_null_entry)
1342 return -ENOENT;
1343
1344 table = rt->rt6i_table;
1345 write_lock_bh(&table->tb6_lock);
1346
1347 err = fib6_del(rt, info);
1348 dst_release(&rt->dst);
1349
1350 write_unlock_bh(&table->tb6_lock);
1351
1352 return err;
1353 }
1354
1355 int ip6_del_rt(struct rt6_info *rt)
1356 {
1357 struct nl_info info = {
1358 .nl_net = dev_net(rt->rt6i_dev),
1359 };
1360 return __ip6_del_rt(rt, &info);
1361 }
1362
1363 static int ip6_route_del(struct fib6_config *cfg)
1364 {
1365 struct fib6_table *table;
1366 struct fib6_node *fn;
1367 struct rt6_info *rt;
1368 int err = -ESRCH;
1369
1370 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1371 if (table == NULL)
1372 return err;
1373
1374 read_lock_bh(&table->tb6_lock);
1375
1376 fn = fib6_locate(&table->tb6_root,
1377 &cfg->fc_dst, cfg->fc_dst_len,
1378 &cfg->fc_src, cfg->fc_src_len);
1379
1380 if (fn) {
1381 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1382 if (cfg->fc_ifindex &&
1383 (rt->rt6i_dev == NULL ||
1384 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1385 continue;
1386 if (cfg->fc_flags & RTF_GATEWAY &&
1387 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1388 continue;
1389 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1390 continue;
1391 dst_hold(&rt->dst);
1392 read_unlock_bh(&table->tb6_lock);
1393
1394 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1395 }
1396 }
1397 read_unlock_bh(&table->tb6_lock);
1398
1399 return err;
1400 }
1401
1402 /*
1403 * Handle redirects
1404 */
1405 struct ip6rd_flowi {
1406 struct flowi fl;
1407 struct in6_addr gateway;
1408 };
1409
1410 static struct rt6_info *__ip6_route_redirect(struct net *net,
1411 struct fib6_table *table,
1412 struct flowi *fl,
1413 int flags)
1414 {
1415 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1416 struct rt6_info *rt;
1417 struct fib6_node *fn;
1418
1419 /*
1420 * Get the "current" route for this destination and
1421 * check if the redirect has come from approriate router.
1422 *
1423 * RFC 2461 specifies that redirects should only be
1424 * accepted if they come from the nexthop to the target.
1425 * Due to the way the routes are chosen, this notion
1426 * is a bit fuzzy and one might need to check all possible
1427 * routes.
1428 */
1429
1430 read_lock_bh(&table->tb6_lock);
1431 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1432 restart:
1433 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1434 /*
1435 * Current route is on-link; redirect is always invalid.
1436 *
1437 * Seems, previous statement is not true. It could
1438 * be node, which looks for us as on-link (f.e. proxy ndisc)
1439 * But then router serving it might decide, that we should
1440 * know truth 8)8) --ANK (980726).
1441 */
1442 if (rt6_check_expired(rt))
1443 continue;
1444 if (!(rt->rt6i_flags & RTF_GATEWAY))
1445 continue;
1446 if (fl->oif != rt->rt6i_dev->ifindex)
1447 continue;
1448 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1449 continue;
1450 break;
1451 }
1452
1453 if (!rt)
1454 rt = net->ipv6.ip6_null_entry;
1455 BACKTRACK(net, &fl->fl6_src);
1456 out:
1457 dst_hold(&rt->dst);
1458
1459 read_unlock_bh(&table->tb6_lock);
1460
1461 return rt;
1462 };
1463
1464 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1465 struct in6_addr *src,
1466 struct in6_addr *gateway,
1467 struct net_device *dev)
1468 {
1469 int flags = RT6_LOOKUP_F_HAS_SADDR;
1470 struct net *net = dev_net(dev);
1471 struct ip6rd_flowi rdfl = {
1472 .fl = {
1473 .oif = dev->ifindex,
1474 .fl6_dst = *dest,
1475 .fl6_src = *src,
1476 },
1477 };
1478
1479 ipv6_addr_copy(&rdfl.gateway, gateway);
1480
1481 if (rt6_need_strict(dest))
1482 flags |= RT6_LOOKUP_F_IFACE;
1483
1484 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1485 flags, __ip6_route_redirect);
1486 }
1487
1488 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1489 struct in6_addr *saddr,
1490 struct neighbour *neigh, u8 *lladdr, int on_link)
1491 {
1492 struct rt6_info *rt, *nrt = NULL;
1493 struct netevent_redirect netevent;
1494 struct net *net = dev_net(neigh->dev);
1495
1496 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1497
1498 if (rt == net->ipv6.ip6_null_entry) {
1499 if (net_ratelimit())
1500 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1501 "for redirect target\n");
1502 goto out;
1503 }
1504
1505 /*
1506 * We have finally decided to accept it.
1507 */
1508
1509 neigh_update(neigh, lladdr, NUD_STALE,
1510 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1511 NEIGH_UPDATE_F_OVERRIDE|
1512 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1513 NEIGH_UPDATE_F_ISROUTER))
1514 );
1515
1516 /*
1517 * Redirect received -> path was valid.
1518 * Look, redirects are sent only in response to data packets,
1519 * so that this nexthop apparently is reachable. --ANK
1520 */
1521 dst_confirm(&rt->dst);
1522
1523 /* Duplicate redirect: silently ignore. */
1524 if (neigh == rt->dst.neighbour)
1525 goto out;
1526
1527 nrt = ip6_rt_copy(rt);
1528 if (nrt == NULL)
1529 goto out;
1530
1531 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1532 if (on_link)
1533 nrt->rt6i_flags &= ~RTF_GATEWAY;
1534
1535 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1536 nrt->rt6i_dst.plen = 128;
1537 nrt->dst.flags |= DST_HOST;
1538
1539 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1540 nrt->rt6i_nexthop = neigh_clone(neigh);
1541 /* Reset pmtu, it may be better */
1542 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
1543 dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
1544 dst_mtu(&nrt->dst)));
1545
1546 if (ip6_ins_rt(nrt))
1547 goto out;
1548
1549 netevent.old = &rt->dst;
1550 netevent.new = &nrt->dst;
1551 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1552
1553 if (rt->rt6i_flags&RTF_CACHE) {
1554 ip6_del_rt(rt);
1555 return;
1556 }
1557
1558 out:
1559 dst_release(&rt->dst);
1560 }
1561
1562 /*
1563 * Handle ICMP "packet too big" messages
1564 * i.e. Path MTU discovery
1565 */
1566
1567 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1568 struct net *net, u32 pmtu, int ifindex)
1569 {
1570 struct rt6_info *rt, *nrt;
1571 int allfrag = 0;
1572
1573 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1574 if (rt == NULL)
1575 return;
1576
1577 if (pmtu >= dst_mtu(&rt->dst))
1578 goto out;
1579
1580 if (pmtu < IPV6_MIN_MTU) {
1581 /*
1582 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1583 * MTU (1280) and a fragment header should always be included
1584 * after a node receiving Too Big message reporting PMTU is
1585 * less than the IPv6 Minimum Link MTU.
1586 */
1587 pmtu = IPV6_MIN_MTU;
1588 allfrag = 1;
1589 }
1590
1591 /* New mtu received -> path was valid.
1592 They are sent only in response to data packets,
1593 so that this nexthop apparently is reachable. --ANK
1594 */
1595 dst_confirm(&rt->dst);
1596
1597 /* Host route. If it is static, it would be better
1598 not to override it, but add new one, so that
1599 when cache entry will expire old pmtu
1600 would return automatically.
1601 */
1602 if (rt->rt6i_flags & RTF_CACHE) {
1603 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1604 if (allfrag) {
1605 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1606 features |= RTAX_FEATURE_ALLFRAG;
1607 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1608 }
1609 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1610 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1611 goto out;
1612 }
1613
1614 /* Network route.
1615 Two cases are possible:
1616 1. It is connected route. Action: COW
1617 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1618 */
1619 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1620 nrt = rt6_alloc_cow(rt, daddr, saddr);
1621 else
1622 nrt = rt6_alloc_clone(rt, daddr);
1623
1624 if (nrt) {
1625 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1626 if (allfrag) {
1627 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1628 features |= RTAX_FEATURE_ALLFRAG;
1629 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1630 }
1631
1632 /* According to RFC 1981, detecting PMTU increase shouldn't be
1633 * happened within 5 mins, the recommended timer is 10 mins.
1634 * Here this route expiration time is set to ip6_rt_mtu_expires
1635 * which is 10 mins. After 10 mins the decreased pmtu is expired
1636 * and detecting PMTU increase will be automatically happened.
1637 */
1638 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1639 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1640
1641 ip6_ins_rt(nrt);
1642 }
1643 out:
1644 dst_release(&rt->dst);
1645 }
1646
1647 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1648 struct net_device *dev, u32 pmtu)
1649 {
1650 struct net *net = dev_net(dev);
1651
1652 /*
1653 * RFC 1981 states that a node "MUST reduce the size of the packets it
1654 * is sending along the path" that caused the Packet Too Big message.
1655 * Since it's not possible in the general case to determine which
1656 * interface was used to send the original packet, we update the MTU
1657 * on the interface that will be used to send future packets. We also
1658 * update the MTU on the interface that received the Packet Too Big in
1659 * case the original packet was forced out that interface with
1660 * SO_BINDTODEVICE or similar. This is the next best thing to the
1661 * correct behaviour, which would be to update the MTU on all
1662 * interfaces.
1663 */
1664 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1665 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1666 }
1667
1668 /*
1669 * Misc support functions
1670 */
1671
1672 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1673 {
1674 struct net *net = dev_net(ort->rt6i_dev);
1675 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1676
1677 if (rt) {
1678 rt->dst.input = ort->dst.input;
1679 rt->dst.output = ort->dst.output;
1680
1681 dst_copy_metrics(&rt->dst, &ort->dst);
1682 rt->dst.error = ort->dst.error;
1683 rt->dst.dev = ort->dst.dev;
1684 if (rt->dst.dev)
1685 dev_hold(rt->dst.dev);
1686 rt->rt6i_idev = ort->rt6i_idev;
1687 if (rt->rt6i_idev)
1688 in6_dev_hold(rt->rt6i_idev);
1689 rt->dst.lastuse = jiffies;
1690 rt->rt6i_expires = 0;
1691
1692 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1693 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1694 rt->rt6i_metric = 0;
1695
1696 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1697 #ifdef CONFIG_IPV6_SUBTREES
1698 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1699 #endif
1700 rt->rt6i_table = ort->rt6i_table;
1701 }
1702 return rt;
1703 }
1704
1705 #ifdef CONFIG_IPV6_ROUTE_INFO
1706 static struct rt6_info *rt6_get_route_info(struct net *net,
1707 struct in6_addr *prefix, int prefixlen,
1708 struct in6_addr *gwaddr, int ifindex)
1709 {
1710 struct fib6_node *fn;
1711 struct rt6_info *rt = NULL;
1712 struct fib6_table *table;
1713
1714 table = fib6_get_table(net, RT6_TABLE_INFO);
1715 if (table == NULL)
1716 return NULL;
1717
1718 write_lock_bh(&table->tb6_lock);
1719 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1720 if (!fn)
1721 goto out;
1722
1723 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1724 if (rt->rt6i_dev->ifindex != ifindex)
1725 continue;
1726 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1727 continue;
1728 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1729 continue;
1730 dst_hold(&rt->dst);
1731 break;
1732 }
1733 out:
1734 write_unlock_bh(&table->tb6_lock);
1735 return rt;
1736 }
1737
1738 static struct rt6_info *rt6_add_route_info(struct net *net,
1739 struct in6_addr *prefix, int prefixlen,
1740 struct in6_addr *gwaddr, int ifindex,
1741 unsigned pref)
1742 {
1743 struct fib6_config cfg = {
1744 .fc_table = RT6_TABLE_INFO,
1745 .fc_metric = IP6_RT_PRIO_USER,
1746 .fc_ifindex = ifindex,
1747 .fc_dst_len = prefixlen,
1748 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1749 RTF_UP | RTF_PREF(pref),
1750 .fc_nlinfo.pid = 0,
1751 .fc_nlinfo.nlh = NULL,
1752 .fc_nlinfo.nl_net = net,
1753 };
1754
1755 ipv6_addr_copy(&cfg.fc_dst, prefix);
1756 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1757
1758 /* We should treat it as a default route if prefix length is 0. */
1759 if (!prefixlen)
1760 cfg.fc_flags |= RTF_DEFAULT;
1761
1762 ip6_route_add(&cfg);
1763
1764 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1765 }
1766 #endif
1767
1768 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1769 {
1770 struct rt6_info *rt;
1771 struct fib6_table *table;
1772
1773 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1774 if (table == NULL)
1775 return NULL;
1776
1777 write_lock_bh(&table->tb6_lock);
1778 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1779 if (dev == rt->rt6i_dev &&
1780 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1781 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1782 break;
1783 }
1784 if (rt)
1785 dst_hold(&rt->dst);
1786 write_unlock_bh(&table->tb6_lock);
1787 return rt;
1788 }
1789
1790 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1791 struct net_device *dev,
1792 unsigned int pref)
1793 {
1794 struct fib6_config cfg = {
1795 .fc_table = RT6_TABLE_DFLT,
1796 .fc_metric = IP6_RT_PRIO_USER,
1797 .fc_ifindex = dev->ifindex,
1798 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1799 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1800 .fc_nlinfo.pid = 0,
1801 .fc_nlinfo.nlh = NULL,
1802 .fc_nlinfo.nl_net = dev_net(dev),
1803 };
1804
1805 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1806
1807 ip6_route_add(&cfg);
1808
1809 return rt6_get_dflt_router(gwaddr, dev);
1810 }
1811
1812 void rt6_purge_dflt_routers(struct net *net)
1813 {
1814 struct rt6_info *rt;
1815 struct fib6_table *table;
1816
1817 /* NOTE: Keep consistent with rt6_get_dflt_router */
1818 table = fib6_get_table(net, RT6_TABLE_DFLT);
1819 if (table == NULL)
1820 return;
1821
1822 restart:
1823 read_lock_bh(&table->tb6_lock);
1824 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1825 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1826 dst_hold(&rt->dst);
1827 read_unlock_bh(&table->tb6_lock);
1828 ip6_del_rt(rt);
1829 goto restart;
1830 }
1831 }
1832 read_unlock_bh(&table->tb6_lock);
1833 }
1834
1835 static void rtmsg_to_fib6_config(struct net *net,
1836 struct in6_rtmsg *rtmsg,
1837 struct fib6_config *cfg)
1838 {
1839 memset(cfg, 0, sizeof(*cfg));
1840
1841 cfg->fc_table = RT6_TABLE_MAIN;
1842 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1843 cfg->fc_metric = rtmsg->rtmsg_metric;
1844 cfg->fc_expires = rtmsg->rtmsg_info;
1845 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1846 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1847 cfg->fc_flags = rtmsg->rtmsg_flags;
1848
1849 cfg->fc_nlinfo.nl_net = net;
1850
1851 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1852 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1853 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1854 }
1855
1856 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1857 {
1858 struct fib6_config cfg;
1859 struct in6_rtmsg rtmsg;
1860 int err;
1861
1862 switch(cmd) {
1863 case SIOCADDRT: /* Add a route */
1864 case SIOCDELRT: /* Delete a route */
1865 if (!capable(CAP_NET_ADMIN))
1866 return -EPERM;
1867 err = copy_from_user(&rtmsg, arg,
1868 sizeof(struct in6_rtmsg));
1869 if (err)
1870 return -EFAULT;
1871
1872 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1873
1874 rtnl_lock();
1875 switch (cmd) {
1876 case SIOCADDRT:
1877 err = ip6_route_add(&cfg);
1878 break;
1879 case SIOCDELRT:
1880 err = ip6_route_del(&cfg);
1881 break;
1882 default:
1883 err = -EINVAL;
1884 }
1885 rtnl_unlock();
1886
1887 return err;
1888 }
1889
1890 return -EINVAL;
1891 }
1892
1893 /*
1894 * Drop the packet on the floor
1895 */
1896
1897 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1898 {
1899 int type;
1900 struct dst_entry *dst = skb_dst(skb);
1901 switch (ipstats_mib_noroutes) {
1902 case IPSTATS_MIB_INNOROUTES:
1903 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1904 if (type == IPV6_ADDR_ANY) {
1905 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1906 IPSTATS_MIB_INADDRERRORS);
1907 break;
1908 }
1909 /* FALLTHROUGH */
1910 case IPSTATS_MIB_OUTNOROUTES:
1911 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1912 ipstats_mib_noroutes);
1913 break;
1914 }
1915 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1916 kfree_skb(skb);
1917 return 0;
1918 }
1919
1920 static int ip6_pkt_discard(struct sk_buff *skb)
1921 {
1922 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1923 }
1924
1925 static int ip6_pkt_discard_out(struct sk_buff *skb)
1926 {
1927 skb->dev = skb_dst(skb)->dev;
1928 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1929 }
1930
1931 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1932
1933 static int ip6_pkt_prohibit(struct sk_buff *skb)
1934 {
1935 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1936 }
1937
1938 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1939 {
1940 skb->dev = skb_dst(skb)->dev;
1941 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1942 }
1943
1944 #endif
1945
1946 /*
1947 * Allocate a dst for local (unicast / anycast) address.
1948 */
1949
1950 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1951 const struct in6_addr *addr,
1952 int anycast)
1953 {
1954 struct net *net = dev_net(idev->dev);
1955 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1956 struct neighbour *neigh;
1957
1958 if (rt == NULL) {
1959 if (net_ratelimit())
1960 pr_warning("IPv6: Maximum number of routes reached,"
1961 " consider increasing route/max_size.\n");
1962 return ERR_PTR(-ENOMEM);
1963 }
1964
1965 dev_hold(net->loopback_dev);
1966 in6_dev_hold(idev);
1967
1968 rt->dst.flags = DST_HOST;
1969 rt->dst.input = ip6_input;
1970 rt->dst.output = ip6_output;
1971 rt->rt6i_dev = net->loopback_dev;
1972 rt->rt6i_idev = idev;
1973 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
1974 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1975 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1976 rt->dst.obsolete = -1;
1977
1978 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1979 if (anycast)
1980 rt->rt6i_flags |= RTF_ANYCAST;
1981 else
1982 rt->rt6i_flags |= RTF_LOCAL;
1983 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1984 if (IS_ERR(neigh)) {
1985 dst_free(&rt->dst);
1986
1987 /* We are casting this because that is the return
1988 * value type. But an errno encoded pointer is the
1989 * same regardless of the underlying pointer type,
1990 * and that's what we are returning. So this is OK.
1991 */
1992 return (struct rt6_info *) neigh;
1993 }
1994 rt->rt6i_nexthop = neigh;
1995
1996 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1997 rt->rt6i_dst.plen = 128;
1998 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1999
2000 atomic_set(&rt->dst.__refcnt, 1);
2001
2002 return rt;
2003 }
2004
2005 struct arg_dev_net {
2006 struct net_device *dev;
2007 struct net *net;
2008 };
2009
2010 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2011 {
2012 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2013 struct net *net = ((struct arg_dev_net *)arg)->net;
2014
2015 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2016 rt != net->ipv6.ip6_null_entry) {
2017 RT6_TRACE("deleted by ifdown %p\n", rt);
2018 return -1;
2019 }
2020 return 0;
2021 }
2022
2023 void rt6_ifdown(struct net *net, struct net_device *dev)
2024 {
2025 struct arg_dev_net adn = {
2026 .dev = dev,
2027 .net = net,
2028 };
2029
2030 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2031 icmp6_clean_all(fib6_ifdown, &adn);
2032 }
2033
2034 struct rt6_mtu_change_arg
2035 {
2036 struct net_device *dev;
2037 unsigned mtu;
2038 };
2039
2040 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2041 {
2042 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2043 struct inet6_dev *idev;
2044 struct net *net = dev_net(arg->dev);
2045
2046 /* In IPv6 pmtu discovery is not optional,
2047 so that RTAX_MTU lock cannot disable it.
2048 We still use this lock to block changes
2049 caused by addrconf/ndisc.
2050 */
2051
2052 idev = __in6_dev_get(arg->dev);
2053 if (idev == NULL)
2054 return 0;
2055
2056 /* For administrative MTU increase, there is no way to discover
2057 IPv6 PMTU increase, so PMTU increase should be updated here.
2058 Since RFC 1981 doesn't include administrative MTU increase
2059 update PMTU increase is a MUST. (i.e. jumbo frame)
2060 */
2061 /*
2062 If new MTU is less than route PMTU, this new MTU will be the
2063 lowest MTU in the path, update the route PMTU to reflect PMTU
2064 decreases; if new MTU is greater than route PMTU, and the
2065 old MTU is the lowest MTU in the path, update the route PMTU
2066 to reflect the increase. In this case if the other nodes' MTU
2067 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2068 PMTU discouvery.
2069 */
2070 if (rt->rt6i_dev == arg->dev &&
2071 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2072 (dst_mtu(&rt->dst) >= arg->mtu ||
2073 (dst_mtu(&rt->dst) < arg->mtu &&
2074 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2075 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2076 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
2077 }
2078 return 0;
2079 }
2080
2081 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2082 {
2083 struct rt6_mtu_change_arg arg = {
2084 .dev = dev,
2085 .mtu = mtu,
2086 };
2087
2088 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2089 }
2090
2091 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2092 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2093 [RTA_OIF] = { .type = NLA_U32 },
2094 [RTA_IIF] = { .type = NLA_U32 },
2095 [RTA_PRIORITY] = { .type = NLA_U32 },
2096 [RTA_METRICS] = { .type = NLA_NESTED },
2097 };
2098
2099 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2100 struct fib6_config *cfg)
2101 {
2102 struct rtmsg *rtm;
2103 struct nlattr *tb[RTA_MAX+1];
2104 int err;
2105
2106 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2107 if (err < 0)
2108 goto errout;
2109
2110 err = -EINVAL;
2111 rtm = nlmsg_data(nlh);
2112 memset(cfg, 0, sizeof(*cfg));
2113
2114 cfg->fc_table = rtm->rtm_table;
2115 cfg->fc_dst_len = rtm->rtm_dst_len;
2116 cfg->fc_src_len = rtm->rtm_src_len;
2117 cfg->fc_flags = RTF_UP;
2118 cfg->fc_protocol = rtm->rtm_protocol;
2119
2120 if (rtm->rtm_type == RTN_UNREACHABLE)
2121 cfg->fc_flags |= RTF_REJECT;
2122
2123 if (rtm->rtm_type == RTN_LOCAL)
2124 cfg->fc_flags |= RTF_LOCAL;
2125
2126 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2127 cfg->fc_nlinfo.nlh = nlh;
2128 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2129
2130 if (tb[RTA_GATEWAY]) {
2131 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2132 cfg->fc_flags |= RTF_GATEWAY;
2133 }
2134
2135 if (tb[RTA_DST]) {
2136 int plen = (rtm->rtm_dst_len + 7) >> 3;
2137
2138 if (nla_len(tb[RTA_DST]) < plen)
2139 goto errout;
2140
2141 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2142 }
2143
2144 if (tb[RTA_SRC]) {
2145 int plen = (rtm->rtm_src_len + 7) >> 3;
2146
2147 if (nla_len(tb[RTA_SRC]) < plen)
2148 goto errout;
2149
2150 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2151 }
2152
2153 if (tb[RTA_OIF])
2154 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2155
2156 if (tb[RTA_PRIORITY])
2157 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2158
2159 if (tb[RTA_METRICS]) {
2160 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2161 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2162 }
2163
2164 if (tb[RTA_TABLE])
2165 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2166
2167 err = 0;
2168 errout:
2169 return err;
2170 }
2171
2172 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2173 {
2174 struct fib6_config cfg;
2175 int err;
2176
2177 err = rtm_to_fib6_config(skb, nlh, &cfg);
2178 if (err < 0)
2179 return err;
2180
2181 return ip6_route_del(&cfg);
2182 }
2183
2184 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2185 {
2186 struct fib6_config cfg;
2187 int err;
2188
2189 err = rtm_to_fib6_config(skb, nlh, &cfg);
2190 if (err < 0)
2191 return err;
2192
2193 return ip6_route_add(&cfg);
2194 }
2195
2196 static inline size_t rt6_nlmsg_size(void)
2197 {
2198 return NLMSG_ALIGN(sizeof(struct rtmsg))
2199 + nla_total_size(16) /* RTA_SRC */
2200 + nla_total_size(16) /* RTA_DST */
2201 + nla_total_size(16) /* RTA_GATEWAY */
2202 + nla_total_size(16) /* RTA_PREFSRC */
2203 + nla_total_size(4) /* RTA_TABLE */
2204 + nla_total_size(4) /* RTA_IIF */
2205 + nla_total_size(4) /* RTA_OIF */
2206 + nla_total_size(4) /* RTA_PRIORITY */
2207 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2208 + nla_total_size(sizeof(struct rta_cacheinfo));
2209 }
2210
2211 static int rt6_fill_node(struct net *net,
2212 struct sk_buff *skb, struct rt6_info *rt,
2213 struct in6_addr *dst, struct in6_addr *src,
2214 int iif, int type, u32 pid, u32 seq,
2215 int prefix, int nowait, unsigned int flags)
2216 {
2217 struct rtmsg *rtm;
2218 struct nlmsghdr *nlh;
2219 long expires;
2220 u32 table;
2221
2222 if (prefix) { /* user wants prefix routes only */
2223 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2224 /* success since this is not a prefix route */
2225 return 1;
2226 }
2227 }
2228
2229 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2230 if (nlh == NULL)
2231 return -EMSGSIZE;
2232
2233 rtm = nlmsg_data(nlh);
2234 rtm->rtm_family = AF_INET6;
2235 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2236 rtm->rtm_src_len = rt->rt6i_src.plen;
2237 rtm->rtm_tos = 0;
2238 if (rt->rt6i_table)
2239 table = rt->rt6i_table->tb6_id;
2240 else
2241 table = RT6_TABLE_UNSPEC;
2242 rtm->rtm_table = table;
2243 NLA_PUT_U32(skb, RTA_TABLE, table);
2244 if (rt->rt6i_flags&RTF_REJECT)
2245 rtm->rtm_type = RTN_UNREACHABLE;
2246 else if (rt->rt6i_flags&RTF_LOCAL)
2247 rtm->rtm_type = RTN_LOCAL;
2248 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2249 rtm->rtm_type = RTN_LOCAL;
2250 else
2251 rtm->rtm_type = RTN_UNICAST;
2252 rtm->rtm_flags = 0;
2253 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2254 rtm->rtm_protocol = rt->rt6i_protocol;
2255 if (rt->rt6i_flags&RTF_DYNAMIC)
2256 rtm->rtm_protocol = RTPROT_REDIRECT;
2257 else if (rt->rt6i_flags & RTF_ADDRCONF)
2258 rtm->rtm_protocol = RTPROT_KERNEL;
2259 else if (rt->rt6i_flags&RTF_DEFAULT)
2260 rtm->rtm_protocol = RTPROT_RA;
2261
2262 if (rt->rt6i_flags&RTF_CACHE)
2263 rtm->rtm_flags |= RTM_F_CLONED;
2264
2265 if (dst) {
2266 NLA_PUT(skb, RTA_DST, 16, dst);
2267 rtm->rtm_dst_len = 128;
2268 } else if (rtm->rtm_dst_len)
2269 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2270 #ifdef CONFIG_IPV6_SUBTREES
2271 if (src) {
2272 NLA_PUT(skb, RTA_SRC, 16, src);
2273 rtm->rtm_src_len = 128;
2274 } else if (rtm->rtm_src_len)
2275 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2276 #endif
2277 if (iif) {
2278 #ifdef CONFIG_IPV6_MROUTE
2279 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2280 int err = ip6mr_get_route(net, skb, rtm, nowait);
2281 if (err <= 0) {
2282 if (!nowait) {
2283 if (err == 0)
2284 return 0;
2285 goto nla_put_failure;
2286 } else {
2287 if (err == -EMSGSIZE)
2288 goto nla_put_failure;
2289 }
2290 }
2291 } else
2292 #endif
2293 NLA_PUT_U32(skb, RTA_IIF, iif);
2294 } else if (dst) {
2295 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2296 struct in6_addr saddr_buf;
2297 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2298 dst, 0, &saddr_buf) == 0)
2299 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2300 }
2301
2302 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2303 goto nla_put_failure;
2304
2305 if (rt->dst.neighbour)
2306 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2307
2308 if (rt->dst.dev)
2309 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2310
2311 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2312
2313 if (!(rt->rt6i_flags & RTF_EXPIRES))
2314 expires = 0;
2315 else if (rt->rt6i_expires - jiffies < INT_MAX)
2316 expires = rt->rt6i_expires - jiffies;
2317 else
2318 expires = INT_MAX;
2319
2320 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2321 expires, rt->dst.error) < 0)
2322 goto nla_put_failure;
2323
2324 return nlmsg_end(skb, nlh);
2325
2326 nla_put_failure:
2327 nlmsg_cancel(skb, nlh);
2328 return -EMSGSIZE;
2329 }
2330
2331 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2332 {
2333 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2334 int prefix;
2335
2336 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2337 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2338 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2339 } else
2340 prefix = 0;
2341
2342 return rt6_fill_node(arg->net,
2343 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2344 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2345 prefix, 0, NLM_F_MULTI);
2346 }
2347
2348 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2349 {
2350 struct net *net = sock_net(in_skb->sk);
2351 struct nlattr *tb[RTA_MAX+1];
2352 struct rt6_info *rt;
2353 struct sk_buff *skb;
2354 struct rtmsg *rtm;
2355 struct flowi fl;
2356 int err, iif = 0;
2357
2358 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2359 if (err < 0)
2360 goto errout;
2361
2362 err = -EINVAL;
2363 memset(&fl, 0, sizeof(fl));
2364
2365 if (tb[RTA_SRC]) {
2366 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2367 goto errout;
2368
2369 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2370 }
2371
2372 if (tb[RTA_DST]) {
2373 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2374 goto errout;
2375
2376 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2377 }
2378
2379 if (tb[RTA_IIF])
2380 iif = nla_get_u32(tb[RTA_IIF]);
2381
2382 if (tb[RTA_OIF])
2383 fl.oif = nla_get_u32(tb[RTA_OIF]);
2384
2385 if (iif) {
2386 struct net_device *dev;
2387 dev = __dev_get_by_index(net, iif);
2388 if (!dev) {
2389 err = -ENODEV;
2390 goto errout;
2391 }
2392 }
2393
2394 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2395 if (skb == NULL) {
2396 err = -ENOBUFS;
2397 goto errout;
2398 }
2399
2400 /* Reserve room for dummy headers, this skb can pass
2401 through good chunk of routing engine.
2402 */
2403 skb_reset_mac_header(skb);
2404 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2405
2406 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2407 skb_dst_set(skb, &rt->dst);
2408
2409 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2410 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2411 nlh->nlmsg_seq, 0, 0, 0);
2412 if (err < 0) {
2413 kfree_skb(skb);
2414 goto errout;
2415 }
2416
2417 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2418 errout:
2419 return err;
2420 }
2421
2422 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2423 {
2424 struct sk_buff *skb;
2425 struct net *net = info->nl_net;
2426 u32 seq;
2427 int err;
2428
2429 err = -ENOBUFS;
2430 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2431
2432 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2433 if (skb == NULL)
2434 goto errout;
2435
2436 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2437 event, info->pid, seq, 0, 0, 0);
2438 if (err < 0) {
2439 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2440 WARN_ON(err == -EMSGSIZE);
2441 kfree_skb(skb);
2442 goto errout;
2443 }
2444 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2445 info->nlh, gfp_any());
2446 return;
2447 errout:
2448 if (err < 0)
2449 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2450 }
2451
2452 static int ip6_route_dev_notify(struct notifier_block *this,
2453 unsigned long event, void *data)
2454 {
2455 struct net_device *dev = (struct net_device *)data;
2456 struct net *net = dev_net(dev);
2457
2458 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2459 net->ipv6.ip6_null_entry->dst.dev = dev;
2460 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2461 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2462 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2463 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2464 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2465 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2466 #endif
2467 }
2468
2469 return NOTIFY_OK;
2470 }
2471
2472 /*
2473 * /proc
2474 */
2475
2476 #ifdef CONFIG_PROC_FS
2477
2478 struct rt6_proc_arg
2479 {
2480 char *buffer;
2481 int offset;
2482 int length;
2483 int skip;
2484 int len;
2485 };
2486
2487 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2488 {
2489 struct seq_file *m = p_arg;
2490
2491 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2492
2493 #ifdef CONFIG_IPV6_SUBTREES
2494 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2495 #else
2496 seq_puts(m, "00000000000000000000000000000000 00 ");
2497 #endif
2498
2499 if (rt->rt6i_nexthop) {
2500 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2501 } else {
2502 seq_puts(m, "00000000000000000000000000000000");
2503 }
2504 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2505 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2506 rt->dst.__use, rt->rt6i_flags,
2507 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2508 return 0;
2509 }
2510
2511 static int ipv6_route_show(struct seq_file *m, void *v)
2512 {
2513 struct net *net = (struct net *)m->private;
2514 fib6_clean_all(net, rt6_info_route, 0, m);
2515 return 0;
2516 }
2517
2518 static int ipv6_route_open(struct inode *inode, struct file *file)
2519 {
2520 return single_open_net(inode, file, ipv6_route_show);
2521 }
2522
2523 static const struct file_operations ipv6_route_proc_fops = {
2524 .owner = THIS_MODULE,
2525 .open = ipv6_route_open,
2526 .read = seq_read,
2527 .llseek = seq_lseek,
2528 .release = single_release_net,
2529 };
2530
2531 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2532 {
2533 struct net *net = (struct net *)seq->private;
2534 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2535 net->ipv6.rt6_stats->fib_nodes,
2536 net->ipv6.rt6_stats->fib_route_nodes,
2537 net->ipv6.rt6_stats->fib_rt_alloc,
2538 net->ipv6.rt6_stats->fib_rt_entries,
2539 net->ipv6.rt6_stats->fib_rt_cache,
2540 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2541 net->ipv6.rt6_stats->fib_discarded_routes);
2542
2543 return 0;
2544 }
2545
2546 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2547 {
2548 return single_open_net(inode, file, rt6_stats_seq_show);
2549 }
2550
2551 static const struct file_operations rt6_stats_seq_fops = {
2552 .owner = THIS_MODULE,
2553 .open = rt6_stats_seq_open,
2554 .read = seq_read,
2555 .llseek = seq_lseek,
2556 .release = single_release_net,
2557 };
2558 #endif /* CONFIG_PROC_FS */
2559
2560 #ifdef CONFIG_SYSCTL
2561
2562 static
2563 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2564 void __user *buffer, size_t *lenp, loff_t *ppos)
2565 {
2566 struct net *net = current->nsproxy->net_ns;
2567 int delay = net->ipv6.sysctl.flush_delay;
2568 if (write) {
2569 proc_dointvec(ctl, write, buffer, lenp, ppos);
2570 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2571 return 0;
2572 } else
2573 return -EINVAL;
2574 }
2575
2576 ctl_table ipv6_route_table_template[] = {
2577 {
2578 .procname = "flush",
2579 .data = &init_net.ipv6.sysctl.flush_delay,
2580 .maxlen = sizeof(int),
2581 .mode = 0200,
2582 .proc_handler = ipv6_sysctl_rtcache_flush
2583 },
2584 {
2585 .procname = "gc_thresh",
2586 .data = &ip6_dst_ops_template.gc_thresh,
2587 .maxlen = sizeof(int),
2588 .mode = 0644,
2589 .proc_handler = proc_dointvec,
2590 },
2591 {
2592 .procname = "max_size",
2593 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2594 .maxlen = sizeof(int),
2595 .mode = 0644,
2596 .proc_handler = proc_dointvec,
2597 },
2598 {
2599 .procname = "gc_min_interval",
2600 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2601 .maxlen = sizeof(int),
2602 .mode = 0644,
2603 .proc_handler = proc_dointvec_jiffies,
2604 },
2605 {
2606 .procname = "gc_timeout",
2607 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2608 .maxlen = sizeof(int),
2609 .mode = 0644,
2610 .proc_handler = proc_dointvec_jiffies,
2611 },
2612 {
2613 .procname = "gc_interval",
2614 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2615 .maxlen = sizeof(int),
2616 .mode = 0644,
2617 .proc_handler = proc_dointvec_jiffies,
2618 },
2619 {
2620 .procname = "gc_elasticity",
2621 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2622 .maxlen = sizeof(int),
2623 .mode = 0644,
2624 .proc_handler = proc_dointvec,
2625 },
2626 {
2627 .procname = "mtu_expires",
2628 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2629 .maxlen = sizeof(int),
2630 .mode = 0644,
2631 .proc_handler = proc_dointvec_jiffies,
2632 },
2633 {
2634 .procname = "min_adv_mss",
2635 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2636 .maxlen = sizeof(int),
2637 .mode = 0644,
2638 .proc_handler = proc_dointvec,
2639 },
2640 {
2641 .procname = "gc_min_interval_ms",
2642 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2643 .maxlen = sizeof(int),
2644 .mode = 0644,
2645 .proc_handler = proc_dointvec_ms_jiffies,
2646 },
2647 { }
2648 };
2649
2650 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2651 {
2652 struct ctl_table *table;
2653
2654 table = kmemdup(ipv6_route_table_template,
2655 sizeof(ipv6_route_table_template),
2656 GFP_KERNEL);
2657
2658 if (table) {
2659 table[0].data = &net->ipv6.sysctl.flush_delay;
2660 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2661 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2662 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2663 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2664 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2665 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2666 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2667 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2668 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2669 }
2670
2671 return table;
2672 }
2673 #endif
2674
2675 static int __net_init ip6_route_net_init(struct net *net)
2676 {
2677 int ret = -ENOMEM;
2678
2679 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2680 sizeof(net->ipv6.ip6_dst_ops));
2681
2682 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2683 goto out_ip6_dst_ops;
2684
2685 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2686 sizeof(*net->ipv6.ip6_null_entry),
2687 GFP_KERNEL);
2688 if (!net->ipv6.ip6_null_entry)
2689 goto out_ip6_dst_entries;
2690 net->ipv6.ip6_null_entry->dst.path =
2691 (struct dst_entry *)net->ipv6.ip6_null_entry;
2692 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2693 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2694
2695 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2697 sizeof(*net->ipv6.ip6_prohibit_entry),
2698 GFP_KERNEL);
2699 if (!net->ipv6.ip6_prohibit_entry)
2700 goto out_ip6_null_entry;
2701 net->ipv6.ip6_prohibit_entry->dst.path =
2702 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2703 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2704 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2705
2706 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2707 sizeof(*net->ipv6.ip6_blk_hole_entry),
2708 GFP_KERNEL);
2709 if (!net->ipv6.ip6_blk_hole_entry)
2710 goto out_ip6_prohibit_entry;
2711 net->ipv6.ip6_blk_hole_entry->dst.path =
2712 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2713 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2714 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2715 #endif
2716
2717 net->ipv6.sysctl.flush_delay = 0;
2718 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2719 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2720 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2721 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2722 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2723 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2724 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2725
2726 #ifdef CONFIG_PROC_FS
2727 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2728 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2729 #endif
2730 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2731
2732 ret = 0;
2733 out:
2734 return ret;
2735
2736 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2737 out_ip6_prohibit_entry:
2738 kfree(net->ipv6.ip6_prohibit_entry);
2739 out_ip6_null_entry:
2740 kfree(net->ipv6.ip6_null_entry);
2741 #endif
2742 out_ip6_dst_entries:
2743 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2744 out_ip6_dst_ops:
2745 goto out;
2746 }
2747
2748 static void __net_exit ip6_route_net_exit(struct net *net)
2749 {
2750 #ifdef CONFIG_PROC_FS
2751 proc_net_remove(net, "ipv6_route");
2752 proc_net_remove(net, "rt6_stats");
2753 #endif
2754 kfree(net->ipv6.ip6_null_entry);
2755 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2756 kfree(net->ipv6.ip6_prohibit_entry);
2757 kfree(net->ipv6.ip6_blk_hole_entry);
2758 #endif
2759 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2760 }
2761
2762 static struct pernet_operations ip6_route_net_ops = {
2763 .init = ip6_route_net_init,
2764 .exit = ip6_route_net_exit,
2765 };
2766
2767 static struct notifier_block ip6_route_dev_notifier = {
2768 .notifier_call = ip6_route_dev_notify,
2769 .priority = 0,
2770 };
2771
2772 int __init ip6_route_init(void)
2773 {
2774 int ret;
2775
2776 ret = -ENOMEM;
2777 ip6_dst_ops_template.kmem_cachep =
2778 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2779 SLAB_HWCACHE_ALIGN, NULL);
2780 if (!ip6_dst_ops_template.kmem_cachep)
2781 goto out;
2782
2783 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2784 if (ret)
2785 goto out_kmem_cache;
2786
2787 ret = register_pernet_subsys(&ip6_route_net_ops);
2788 if (ret)
2789 goto out_dst_entries;
2790
2791 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2792
2793 /* Registering of the loopback is done before this portion of code,
2794 * the loopback reference in rt6_info will not be taken, do it
2795 * manually for init_net */
2796 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2797 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2798 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2799 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2800 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2801 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2802 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2803 #endif
2804 ret = fib6_init();
2805 if (ret)
2806 goto out_register_subsys;
2807
2808 ret = xfrm6_init();
2809 if (ret)
2810 goto out_fib6_init;
2811
2812 ret = fib6_rules_init();
2813 if (ret)
2814 goto xfrm6_init;
2815
2816 ret = -ENOBUFS;
2817 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2818 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2819 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2820 goto fib6_rules_init;
2821
2822 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2823 if (ret)
2824 goto fib6_rules_init;
2825
2826 out:
2827 return ret;
2828
2829 fib6_rules_init:
2830 fib6_rules_cleanup();
2831 xfrm6_init:
2832 xfrm6_fini();
2833 out_fib6_init:
2834 fib6_gc_cleanup();
2835 out_register_subsys:
2836 unregister_pernet_subsys(&ip6_route_net_ops);
2837 out_dst_entries:
2838 dst_entries_destroy(&ip6_dst_blackhole_ops);
2839 out_kmem_cache:
2840 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2841 goto out;
2842 }
2843
2844 void ip6_route_cleanup(void)
2845 {
2846 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2847 fib6_rules_cleanup();
2848 xfrm6_fini();
2849 fib6_gc_cleanup();
2850 unregister_pernet_subsys(&ip6_route_net_ops);
2851 dst_entries_destroy(&ip6_dst_blackhole_ops);
2852 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2853 }