net: replace remaining __FUNCTION__ occurrences
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 /* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 * Ville Nuorvala
26 * Fixed routing subtrees.
27 */
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = ip6_local_out,
112 .entry_size = sizeof(struct rt6_info),
113 .entries = ATOMIC_INIT(0),
114 };
115
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 {
118 }
119
120 static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
127 .entries = ATOMIC_INIT(0),
128 };
129
130 static struct rt6_info ip6_null_entry_template = {
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 }
141 },
142 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
143 .rt6i_metric = ~(u32) 0,
144 .rt6i_ref = ATOMIC_INIT(1),
145 };
146
147 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
148
149 static int ip6_pkt_prohibit(struct sk_buff *skb);
150 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
151
152 struct rt6_info ip6_prohibit_entry_template = {
153 .u = {
154 .dst = {
155 .__refcnt = ATOMIC_INIT(1),
156 .__use = 1,
157 .obsolete = -1,
158 .error = -EACCES,
159 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
160 .input = ip6_pkt_prohibit,
161 .output = ip6_pkt_prohibit_out,
162 }
163 },
164 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
165 .rt6i_metric = ~(u32) 0,
166 .rt6i_ref = ATOMIC_INIT(1),
167 };
168
169 static struct rt6_info ip6_blk_hole_entry_template = {
170 .u = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
177 .input = dst_discard,
178 .output = dst_discard,
179 }
180 },
181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184 };
185
186 #endif
187
188 /* allocate dst with ip6_dst_ops */
189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
190 {
191 return (struct rt6_info *)dst_alloc(ops);
192 }
193
194 static void ip6_dst_destroy(struct dst_entry *dst)
195 {
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
202 }
203 }
204
205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207 {
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
210 struct net_device *loopback_dev =
211 dev->nd_net->loopback_dev;
212
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221 }
222
223 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224 {
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227 }
228
229 static inline int rt6_need_strict(struct in6_addr *daddr)
230 {
231 return (ipv6_addr_type(daddr) &
232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
233 }
234
235 /*
236 * Route lookup. Any table->tb6_lock is implied.
237 */
238
239 static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
241 int oif,
242 int strict)
243 {
244 struct rt6_info *local = NULL;
245 struct rt6_info *sprt;
246
247 if (oif) {
248 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249 struct net_device *dev = sprt->rt6i_dev;
250 if (dev->ifindex == oif)
251 return sprt;
252 if (dev->flags & IFF_LOOPBACK) {
253 if (sprt->rt6i_idev == NULL ||
254 sprt->rt6i_idev->dev->ifindex != oif) {
255 if (strict && oif)
256 continue;
257 if (local && (!oif ||
258 local->rt6i_idev->dev->ifindex == oif))
259 continue;
260 }
261 local = sprt;
262 }
263 }
264
265 if (local)
266 return local;
267
268 if (strict)
269 return net->ipv6.ip6_null_entry;
270 }
271 return rt;
272 }
273
274 #ifdef CONFIG_IPV6_ROUTER_PREF
275 static void rt6_probe(struct rt6_info *rt)
276 {
277 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
278 /*
279 * Okay, this does not seem to be appropriate
280 * for now, however, we need to check if it
281 * is really so; aka Router Reachability Probing.
282 *
283 * Router Reachability Probe MUST be rate-limited
284 * to no more than one per minute.
285 */
286 if (!neigh || (neigh->nud_state & NUD_VALID))
287 return;
288 read_lock_bh(&neigh->lock);
289 if (!(neigh->nud_state & NUD_VALID) &&
290 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
291 struct in6_addr mcaddr;
292 struct in6_addr *target;
293
294 neigh->updated = jiffies;
295 read_unlock_bh(&neigh->lock);
296
297 target = (struct in6_addr *)&neigh->primary_key;
298 addrconf_addr_solict_mult(target, &mcaddr);
299 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
300 } else
301 read_unlock_bh(&neigh->lock);
302 }
303 #else
304 static inline void rt6_probe(struct rt6_info *rt)
305 {
306 return;
307 }
308 #endif
309
310 /*
311 * Default Router Selection (RFC 2461 6.3.6)
312 */
313 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
314 {
315 struct net_device *dev = rt->rt6i_dev;
316 if (!oif || dev->ifindex == oif)
317 return 2;
318 if ((dev->flags & IFF_LOOPBACK) &&
319 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
320 return 1;
321 return 0;
322 }
323
324 static inline int rt6_check_neigh(struct rt6_info *rt)
325 {
326 struct neighbour *neigh = rt->rt6i_nexthop;
327 int m;
328 if (rt->rt6i_flags & RTF_NONEXTHOP ||
329 !(rt->rt6i_flags & RTF_GATEWAY))
330 m = 1;
331 else if (neigh) {
332 read_lock_bh(&neigh->lock);
333 if (neigh->nud_state & NUD_VALID)
334 m = 2;
335 #ifdef CONFIG_IPV6_ROUTER_PREF
336 else if (neigh->nud_state & NUD_FAILED)
337 m = 0;
338 #endif
339 else
340 m = 1;
341 read_unlock_bh(&neigh->lock);
342 } else
343 m = 0;
344 return m;
345 }
346
347 static int rt6_score_route(struct rt6_info *rt, int oif,
348 int strict)
349 {
350 int m, n;
351
352 m = rt6_check_dev(rt, oif);
353 if (!m && (strict & RT6_LOOKUP_F_IFACE))
354 return -1;
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
357 #endif
358 n = rt6_check_neigh(rt);
359 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
360 return -1;
361 return m;
362 }
363
364 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
365 int *mpri, struct rt6_info *match)
366 {
367 int m;
368
369 if (rt6_check_expired(rt))
370 goto out;
371
372 m = rt6_score_route(rt, oif, strict);
373 if (m < 0)
374 goto out;
375
376 if (m > *mpri) {
377 if (strict & RT6_LOOKUP_F_REACHABLE)
378 rt6_probe(match);
379 *mpri = m;
380 match = rt;
381 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
382 rt6_probe(rt);
383 }
384
385 out:
386 return match;
387 }
388
389 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
390 struct rt6_info *rr_head,
391 u32 metric, int oif, int strict)
392 {
393 struct rt6_info *rt, *match;
394 int mpri = -1;
395
396 match = NULL;
397 for (rt = rr_head; rt && rt->rt6i_metric == metric;
398 rt = rt->u.dst.rt6_next)
399 match = find_match(rt, oif, strict, &mpri, match);
400 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403
404 return match;
405 }
406
407 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
408 {
409 struct rt6_info *match, *rt0;
410 struct net *net;
411
412 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
413 __func__, fn->leaf, oif);
414
415 rt0 = fn->rr_ptr;
416 if (!rt0)
417 fn->rr_ptr = rt0 = fn->leaf;
418
419 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
420
421 if (!match &&
422 (strict & RT6_LOOKUP_F_REACHABLE)) {
423 struct rt6_info *next = rt0->u.dst.rt6_next;
424
425 /* no entries matched; do round-robin */
426 if (!next || next->rt6i_metric != rt0->rt6i_metric)
427 next = fn->leaf;
428
429 if (next != rt0)
430 fn->rr_ptr = next;
431 }
432
433 RT6_TRACE("%s() => %p\n",
434 __func__, match);
435
436 net = rt0->rt6i_dev->nd_net;
437 return (match ? match : net->ipv6.ip6_null_entry);
438 }
439
440 #ifdef CONFIG_IPV6_ROUTE_INFO
441 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
442 struct in6_addr *gwaddr)
443 {
444 struct net *net = dev->nd_net;
445 struct route_info *rinfo = (struct route_info *) opt;
446 struct in6_addr prefix_buf, *prefix;
447 unsigned int pref;
448 u32 lifetime;
449 struct rt6_info *rt;
450
451 if (len < sizeof(struct route_info)) {
452 return -EINVAL;
453 }
454
455 /* Sanity check for prefix_len and length */
456 if (rinfo->length > 3) {
457 return -EINVAL;
458 } else if (rinfo->prefix_len > 128) {
459 return -EINVAL;
460 } else if (rinfo->prefix_len > 64) {
461 if (rinfo->length < 2) {
462 return -EINVAL;
463 }
464 } else if (rinfo->prefix_len > 0) {
465 if (rinfo->length < 1) {
466 return -EINVAL;
467 }
468 }
469
470 pref = rinfo->route_pref;
471 if (pref == ICMPV6_ROUTER_PREF_INVALID)
472 pref = ICMPV6_ROUTER_PREF_MEDIUM;
473
474 lifetime = ntohl(rinfo->lifetime);
475 if (lifetime == 0xffffffff) {
476 /* infinity */
477 } else if (lifetime > 0x7fffffff/HZ) {
478 /* Avoid arithmetic overflow */
479 lifetime = 0x7fffffff/HZ - 1;
480 }
481
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
484 else {
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
488 rinfo->prefix_len);
489 prefix = &prefix_buf;
490 }
491
492 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 dev->ifindex);
494
495 if (rt && !lifetime) {
496 ip6_del_rt(rt);
497 rt = NULL;
498 }
499
500 if (!rt && lifetime)
501 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507 if (rt) {
508 if (lifetime == 0xffffffff) {
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
513 }
514 dst_release(&rt->u.dst);
515 }
516 return 0;
517 }
518 #endif
519
520 #define BACKTRACK(__net, saddr) \
521 do { \
522 if (rt == __net->ipv6.ip6_null_entry) { \
523 struct fib6_node *pn; \
524 while (1) { \
525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
534 } \
535 } \
536 } while(0)
537
538 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
539 struct fib6_table *table,
540 struct flowi *fl, int flags)
541 {
542 struct fib6_node *fn;
543 struct rt6_info *rt;
544
545 read_lock_bh(&table->tb6_lock);
546 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 restart:
548 rt = fn->leaf;
549 rt = rt6_device_match(net, rt, fl->oif, flags);
550 BACKTRACK(net, &fl->fl6_src);
551 out:
552 dst_use(&rt->u.dst, jiffies);
553 read_unlock_bh(&table->tb6_lock);
554 return rt;
555
556 }
557
558 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
559 struct in6_addr *saddr, int oif, int strict)
560 {
561 struct flowi fl = {
562 .oif = oif,
563 .nl_u = {
564 .ip6_u = {
565 .daddr = *daddr,
566 },
567 },
568 };
569 struct dst_entry *dst;
570 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571
572 if (saddr) {
573 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 flags |= RT6_LOOKUP_F_HAS_SADDR;
575 }
576
577 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
578 if (dst->error == 0)
579 return (struct rt6_info *) dst;
580
581 dst_release(dst);
582
583 return NULL;
584 }
585
586 EXPORT_SYMBOL(rt6_lookup);
587
588 /* ip6_ins_rt is called with FREE table->tb6_lock.
589 It takes new route entry, the addition fails by any reason the
590 route is freed. In any case, if caller does not hold it, it may
591 be destroyed.
592 */
593
594 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 {
596 int err;
597 struct fib6_table *table;
598
599 table = rt->rt6i_table;
600 write_lock_bh(&table->tb6_lock);
601 err = fib6_add(&table->tb6_root, rt, info);
602 write_unlock_bh(&table->tb6_lock);
603
604 return err;
605 }
606
607 int ip6_ins_rt(struct rt6_info *rt)
608 {
609 struct nl_info info = {
610 .nl_net = rt->rt6i_dev->nd_net,
611 };
612 return __ip6_ins_rt(rt, &info);
613 }
614
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 struct in6_addr *saddr)
617 {
618 struct rt6_info *rt;
619
620 /*
621 * Clone the route.
622 */
623
624 rt = ip6_rt_copy(ort);
625
626 if (rt) {
627 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 if (rt->rt6i_dst.plen != 128 &&
629 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 rt->rt6i_flags |= RTF_ANYCAST;
631 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632 }
633
634 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
635 rt->rt6i_dst.plen = 128;
636 rt->rt6i_flags |= RTF_CACHE;
637 rt->u.dst.flags |= DST_HOST;
638
639 #ifdef CONFIG_IPV6_SUBTREES
640 if (rt->rt6i_src.plen && saddr) {
641 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 rt->rt6i_src.plen = 128;
643 }
644 #endif
645
646 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647
648 }
649
650 return rt;
651 }
652
653 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654 {
655 struct rt6_info *rt = ip6_rt_copy(ort);
656 if (rt) {
657 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 rt->rt6i_dst.plen = 128;
659 rt->rt6i_flags |= RTF_CACHE;
660 rt->u.dst.flags |= DST_HOST;
661 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 }
663 return rt;
664 }
665
666 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
667 struct flowi *fl, int flags)
668 {
669 struct fib6_node *fn;
670 struct rt6_info *rt, *nrt;
671 int strict = 0;
672 int attempts = 3;
673 int err;
674 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
675
676 strict |= flags & RT6_LOOKUP_F_IFACE;
677
678 relookup:
679 read_lock_bh(&table->tb6_lock);
680
681 restart_2:
682 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683
684 restart:
685 rt = rt6_select(fn, oif, strict | reachable);
686
687 BACKTRACK(net, &fl->fl6_src);
688 if (rt == net->ipv6.ip6_null_entry ||
689 rt->rt6i_flags & RTF_CACHE)
690 goto out;
691
692 dst_hold(&rt->u.dst);
693 read_unlock_bh(&table->tb6_lock);
694
695 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
696 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
697 else {
698 #if CLONE_OFFLINK_ROUTE
699 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
700 #else
701 goto out2;
702 #endif
703 }
704
705 dst_release(&rt->u.dst);
706 rt = nrt ? : net->ipv6.ip6_null_entry;
707
708 dst_hold(&rt->u.dst);
709 if (nrt) {
710 err = ip6_ins_rt(nrt);
711 if (!err)
712 goto out2;
713 }
714
715 if (--attempts <= 0)
716 goto out2;
717
718 /*
719 * Race condition! In the gap, when table->tb6_lock was
720 * released someone could insert this route. Relookup.
721 */
722 dst_release(&rt->u.dst);
723 goto relookup;
724
725 out:
726 if (reachable) {
727 reachable = 0;
728 goto restart_2;
729 }
730 dst_hold(&rt->u.dst);
731 read_unlock_bh(&table->tb6_lock);
732 out2:
733 rt->u.dst.lastuse = jiffies;
734 rt->u.dst.__use++;
735
736 return rt;
737 }
738
739 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
740 struct flowi *fl, int flags)
741 {
742 return ip6_pol_route(net, table, fl->iif, fl, flags);
743 }
744
745 void ip6_route_input(struct sk_buff *skb)
746 {
747 struct ipv6hdr *iph = ipv6_hdr(skb);
748 struct net *net = skb->dev->nd_net;
749 int flags = RT6_LOOKUP_F_HAS_SADDR;
750 struct flowi fl = {
751 .iif = skb->dev->ifindex,
752 .nl_u = {
753 .ip6_u = {
754 .daddr = iph->daddr,
755 .saddr = iph->saddr,
756 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
757 },
758 },
759 .mark = skb->mark,
760 .proto = iph->nexthdr,
761 };
762
763 if (rt6_need_strict(&iph->daddr))
764 flags |= RT6_LOOKUP_F_IFACE;
765
766 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
767 }
768
769 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
770 struct flowi *fl, int flags)
771 {
772 return ip6_pol_route(net, table, fl->oif, fl, flags);
773 }
774
775 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
776 struct flowi *fl)
777 {
778 int flags = 0;
779
780 if (rt6_need_strict(&fl->fl6_dst))
781 flags |= RT6_LOOKUP_F_IFACE;
782
783 if (!ipv6_addr_any(&fl->fl6_src))
784 flags |= RT6_LOOKUP_F_HAS_SADDR;
785
786 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
787 }
788
789 EXPORT_SYMBOL(ip6_route_output);
790
791 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
792 {
793 struct rt6_info *ort = (struct rt6_info *) *dstp;
794 struct rt6_info *rt = (struct rt6_info *)
795 dst_alloc(&ip6_dst_blackhole_ops);
796 struct dst_entry *new = NULL;
797
798 if (rt) {
799 new = &rt->u.dst;
800
801 atomic_set(&new->__refcnt, 1);
802 new->__use = 1;
803 new->input = dst_discard;
804 new->output = dst_discard;
805
806 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807 new->dev = ort->u.dst.dev;
808 if (new->dev)
809 dev_hold(new->dev);
810 rt->rt6i_idev = ort->rt6i_idev;
811 if (rt->rt6i_idev)
812 in6_dev_hold(rt->rt6i_idev);
813 rt->rt6i_expires = 0;
814
815 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
816 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
817 rt->rt6i_metric = 0;
818
819 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
820 #ifdef CONFIG_IPV6_SUBTREES
821 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
822 #endif
823
824 dst_free(new);
825 }
826
827 dst_release(*dstp);
828 *dstp = new;
829 return (new ? 0 : -ENOMEM);
830 }
831 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
832
833 /*
834 * Destination cache support functions
835 */
836
837 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
838 {
839 struct rt6_info *rt;
840
841 rt = (struct rt6_info *) dst;
842
843 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844 return dst;
845
846 return NULL;
847 }
848
849 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
850 {
851 struct rt6_info *rt = (struct rt6_info *) dst;
852
853 if (rt) {
854 if (rt->rt6i_flags & RTF_CACHE)
855 ip6_del_rt(rt);
856 else
857 dst_release(dst);
858 }
859 return NULL;
860 }
861
862 static void ip6_link_failure(struct sk_buff *skb)
863 {
864 struct rt6_info *rt;
865
866 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
867
868 rt = (struct rt6_info *) skb->dst;
869 if (rt) {
870 if (rt->rt6i_flags&RTF_CACHE) {
871 dst_set_expires(&rt->u.dst, 0);
872 rt->rt6i_flags |= RTF_EXPIRES;
873 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874 rt->rt6i_node->fn_sernum = -1;
875 }
876 }
877
878 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
879 {
880 struct rt6_info *rt6 = (struct rt6_info*)dst;
881
882 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883 rt6->rt6i_flags |= RTF_MODIFIED;
884 if (mtu < IPV6_MIN_MTU) {
885 mtu = IPV6_MIN_MTU;
886 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
887 }
888 dst->metrics[RTAX_MTU-1] = mtu;
889 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
890 }
891 }
892
893 static int ipv6_get_mtu(struct net_device *dev);
894
895 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
896 {
897 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898
899 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
900 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
901
902 /*
903 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905 * IPV6_MAXPLEN is also valid and means: "any MSS,
906 * rely only on pmtu discovery"
907 */
908 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
909 mtu = IPV6_MAXPLEN;
910 return mtu;
911 }
912
913 static struct dst_entry *icmp6_dst_gc_list;
914 static DEFINE_SPINLOCK(icmp6_dst_lock);
915
916 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
917 struct neighbour *neigh,
918 struct in6_addr *addr)
919 {
920 struct rt6_info *rt;
921 struct inet6_dev *idev = in6_dev_get(dev);
922 struct net *net = dev->nd_net;
923
924 if (unlikely(idev == NULL))
925 return NULL;
926
927 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
928 if (unlikely(rt == NULL)) {
929 in6_dev_put(idev);
930 goto out;
931 }
932
933 dev_hold(dev);
934 if (neigh)
935 neigh_hold(neigh);
936 else
937 neigh = ndisc_get_neigh(dev, addr);
938
939 rt->rt6i_dev = dev;
940 rt->rt6i_idev = idev;
941 rt->rt6i_nexthop = neigh;
942 atomic_set(&rt->u.dst.__refcnt, 1);
943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
945 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
946 rt->u.dst.output = ip6_output;
947
948 #if 0 /* there's no chance to use these for ndisc */
949 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
950 ? DST_HOST
951 : 0;
952 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953 rt->rt6i_dst.plen = 128;
954 #endif
955
956 spin_lock_bh(&icmp6_dst_lock);
957 rt->u.dst.next = icmp6_dst_gc_list;
958 icmp6_dst_gc_list = &rt->u.dst;
959 spin_unlock_bh(&icmp6_dst_lock);
960
961 fib6_force_start_gc(net);
962
963 out:
964 return &rt->u.dst;
965 }
966
967 int icmp6_dst_gc(int *more)
968 {
969 struct dst_entry *dst, *next, **pprev;
970 int freed;
971
972 next = NULL;
973 freed = 0;
974
975 spin_lock_bh(&icmp6_dst_lock);
976 pprev = &icmp6_dst_gc_list;
977
978 while ((dst = *pprev) != NULL) {
979 if (!atomic_read(&dst->__refcnt)) {
980 *pprev = dst->next;
981 dst_free(dst);
982 freed++;
983 } else {
984 pprev = &dst->next;
985 (*more)++;
986 }
987 }
988
989 spin_unlock_bh(&icmp6_dst_lock);
990
991 return freed;
992 }
993
994 static int ip6_dst_gc(struct dst_ops *ops)
995 {
996 unsigned long now = jiffies;
997 struct net *net = ops->dst_net;
998 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
999 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1000 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1001 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1002 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1003
1004 if (time_after(rt_last_gc + rt_min_interval, now) &&
1005 atomic_read(&ops->entries) <= rt_max_size)
1006 goto out;
1007
1008 net->ipv6.ip6_rt_gc_expire++;
1009 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1010 net->ipv6.ip6_rt_last_gc = now;
1011 if (atomic_read(&ops->entries) < ops->gc_thresh)
1012 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1013 out:
1014 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1015 return (atomic_read(&ops->entries) > rt_max_size);
1016 }
1017
1018 /* Clean host part of a prefix. Not necessary in radix tree,
1019 but results in cleaner routing tables.
1020
1021 Remove it only when all the things will work!
1022 */
1023
1024 static int ipv6_get_mtu(struct net_device *dev)
1025 {
1026 int mtu = IPV6_MIN_MTU;
1027 struct inet6_dev *idev;
1028
1029 idev = in6_dev_get(dev);
1030 if (idev) {
1031 mtu = idev->cnf.mtu6;
1032 in6_dev_put(idev);
1033 }
1034 return mtu;
1035 }
1036
1037 int ipv6_get_hoplimit(struct net_device *dev)
1038 {
1039 int hoplimit = ipv6_devconf.hop_limit;
1040 struct inet6_dev *idev;
1041
1042 idev = in6_dev_get(dev);
1043 if (idev) {
1044 hoplimit = idev->cnf.hop_limit;
1045 in6_dev_put(idev);
1046 }
1047 return hoplimit;
1048 }
1049
1050 /*
1051 *
1052 */
1053
1054 int ip6_route_add(struct fib6_config *cfg)
1055 {
1056 int err;
1057 struct net *net = cfg->fc_nlinfo.nl_net;
1058 struct rt6_info *rt = NULL;
1059 struct net_device *dev = NULL;
1060 struct inet6_dev *idev = NULL;
1061 struct fib6_table *table;
1062 int addr_type;
1063
1064 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1065 return -EINVAL;
1066 #ifndef CONFIG_IPV6_SUBTREES
1067 if (cfg->fc_src_len)
1068 return -EINVAL;
1069 #endif
1070 if (cfg->fc_ifindex) {
1071 err = -ENODEV;
1072 dev = dev_get_by_index(net, cfg->fc_ifindex);
1073 if (!dev)
1074 goto out;
1075 idev = in6_dev_get(dev);
1076 if (!idev)
1077 goto out;
1078 }
1079
1080 if (cfg->fc_metric == 0)
1081 cfg->fc_metric = IP6_RT_PRIO_USER;
1082
1083 table = fib6_new_table(net, cfg->fc_table);
1084 if (table == NULL) {
1085 err = -ENOBUFS;
1086 goto out;
1087 }
1088
1089 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1090
1091 if (rt == NULL) {
1092 err = -ENOMEM;
1093 goto out;
1094 }
1095
1096 rt->u.dst.obsolete = -1;
1097 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1098
1099 if (cfg->fc_protocol == RTPROT_UNSPEC)
1100 cfg->fc_protocol = RTPROT_BOOT;
1101 rt->rt6i_protocol = cfg->fc_protocol;
1102
1103 addr_type = ipv6_addr_type(&cfg->fc_dst);
1104
1105 if (addr_type & IPV6_ADDR_MULTICAST)
1106 rt->u.dst.input = ip6_mc_input;
1107 else
1108 rt->u.dst.input = ip6_forward;
1109
1110 rt->u.dst.output = ip6_output;
1111
1112 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1113 rt->rt6i_dst.plen = cfg->fc_dst_len;
1114 if (rt->rt6i_dst.plen == 128)
1115 rt->u.dst.flags = DST_HOST;
1116
1117 #ifdef CONFIG_IPV6_SUBTREES
1118 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1119 rt->rt6i_src.plen = cfg->fc_src_len;
1120 #endif
1121
1122 rt->rt6i_metric = cfg->fc_metric;
1123
1124 /* We cannot add true routes via loopback here,
1125 they would result in kernel looping; promote them to reject routes
1126 */
1127 if ((cfg->fc_flags & RTF_REJECT) ||
1128 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1129 /* hold loopback dev/idev if we haven't done so. */
1130 if (dev != net->loopback_dev) {
1131 if (dev) {
1132 dev_put(dev);
1133 in6_dev_put(idev);
1134 }
1135 dev = net->loopback_dev;
1136 dev_hold(dev);
1137 idev = in6_dev_get(dev);
1138 if (!idev) {
1139 err = -ENODEV;
1140 goto out;
1141 }
1142 }
1143 rt->u.dst.output = ip6_pkt_discard_out;
1144 rt->u.dst.input = ip6_pkt_discard;
1145 rt->u.dst.error = -ENETUNREACH;
1146 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1147 goto install_route;
1148 }
1149
1150 if (cfg->fc_flags & RTF_GATEWAY) {
1151 struct in6_addr *gw_addr;
1152 int gwa_type;
1153
1154 gw_addr = &cfg->fc_gateway;
1155 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1156 gwa_type = ipv6_addr_type(gw_addr);
1157
1158 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1159 struct rt6_info *grt;
1160
1161 /* IPv6 strictly inhibits using not link-local
1162 addresses as nexthop address.
1163 Otherwise, router will not able to send redirects.
1164 It is very good, but in some (rare!) circumstances
1165 (SIT, PtP, NBMA NOARP links) it is handy to allow
1166 some exceptions. --ANK
1167 */
1168 err = -EINVAL;
1169 if (!(gwa_type&IPV6_ADDR_UNICAST))
1170 goto out;
1171
1172 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1173
1174 err = -EHOSTUNREACH;
1175 if (grt == NULL)
1176 goto out;
1177 if (dev) {
1178 if (dev != grt->rt6i_dev) {
1179 dst_release(&grt->u.dst);
1180 goto out;
1181 }
1182 } else {
1183 dev = grt->rt6i_dev;
1184 idev = grt->rt6i_idev;
1185 dev_hold(dev);
1186 in6_dev_hold(grt->rt6i_idev);
1187 }
1188 if (!(grt->rt6i_flags&RTF_GATEWAY))
1189 err = 0;
1190 dst_release(&grt->u.dst);
1191
1192 if (err)
1193 goto out;
1194 }
1195 err = -EINVAL;
1196 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1197 goto out;
1198 }
1199
1200 err = -ENODEV;
1201 if (dev == NULL)
1202 goto out;
1203
1204 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1205 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1206 if (IS_ERR(rt->rt6i_nexthop)) {
1207 err = PTR_ERR(rt->rt6i_nexthop);
1208 rt->rt6i_nexthop = NULL;
1209 goto out;
1210 }
1211 }
1212
1213 rt->rt6i_flags = cfg->fc_flags;
1214
1215 install_route:
1216 if (cfg->fc_mx) {
1217 struct nlattr *nla;
1218 int remaining;
1219
1220 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1221 int type = nla_type(nla);
1222
1223 if (type) {
1224 if (type > RTAX_MAX) {
1225 err = -EINVAL;
1226 goto out;
1227 }
1228
1229 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1230 }
1231 }
1232 }
1233
1234 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1235 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1236 if (!rt->u.dst.metrics[RTAX_MTU-1])
1237 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1238 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1239 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1240 rt->u.dst.dev = dev;
1241 rt->rt6i_idev = idev;
1242 rt->rt6i_table = table;
1243
1244 cfg->fc_nlinfo.nl_net = dev->nd_net;
1245
1246 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1247
1248 out:
1249 if (dev)
1250 dev_put(dev);
1251 if (idev)
1252 in6_dev_put(idev);
1253 if (rt)
1254 dst_free(&rt->u.dst);
1255 return err;
1256 }
1257
1258 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1259 {
1260 int err;
1261 struct fib6_table *table;
1262 struct net *net = rt->rt6i_dev->nd_net;
1263
1264 if (rt == net->ipv6.ip6_null_entry)
1265 return -ENOENT;
1266
1267 table = rt->rt6i_table;
1268 write_lock_bh(&table->tb6_lock);
1269
1270 err = fib6_del(rt, info);
1271 dst_release(&rt->u.dst);
1272
1273 write_unlock_bh(&table->tb6_lock);
1274
1275 return err;
1276 }
1277
1278 int ip6_del_rt(struct rt6_info *rt)
1279 {
1280 struct nl_info info = {
1281 .nl_net = rt->rt6i_dev->nd_net,
1282 };
1283 return __ip6_del_rt(rt, &info);
1284 }
1285
1286 static int ip6_route_del(struct fib6_config *cfg)
1287 {
1288 struct fib6_table *table;
1289 struct fib6_node *fn;
1290 struct rt6_info *rt;
1291 int err = -ESRCH;
1292
1293 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1294 if (table == NULL)
1295 return err;
1296
1297 read_lock_bh(&table->tb6_lock);
1298
1299 fn = fib6_locate(&table->tb6_root,
1300 &cfg->fc_dst, cfg->fc_dst_len,
1301 &cfg->fc_src, cfg->fc_src_len);
1302
1303 if (fn) {
1304 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1305 if (cfg->fc_ifindex &&
1306 (rt->rt6i_dev == NULL ||
1307 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1308 continue;
1309 if (cfg->fc_flags & RTF_GATEWAY &&
1310 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1311 continue;
1312 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1313 continue;
1314 dst_hold(&rt->u.dst);
1315 read_unlock_bh(&table->tb6_lock);
1316
1317 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1318 }
1319 }
1320 read_unlock_bh(&table->tb6_lock);
1321
1322 return err;
1323 }
1324
1325 /*
1326 * Handle redirects
1327 */
1328 struct ip6rd_flowi {
1329 struct flowi fl;
1330 struct in6_addr gateway;
1331 };
1332
1333 static struct rt6_info *__ip6_route_redirect(struct net *net,
1334 struct fib6_table *table,
1335 struct flowi *fl,
1336 int flags)
1337 {
1338 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1339 struct rt6_info *rt;
1340 struct fib6_node *fn;
1341
1342 /*
1343 * Get the "current" route for this destination and
1344 * check if the redirect has come from approriate router.
1345 *
1346 * RFC 2461 specifies that redirects should only be
1347 * accepted if they come from the nexthop to the target.
1348 * Due to the way the routes are chosen, this notion
1349 * is a bit fuzzy and one might need to check all possible
1350 * routes.
1351 */
1352
1353 read_lock_bh(&table->tb6_lock);
1354 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1355 restart:
1356 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1357 /*
1358 * Current route is on-link; redirect is always invalid.
1359 *
1360 * Seems, previous statement is not true. It could
1361 * be node, which looks for us as on-link (f.e. proxy ndisc)
1362 * But then router serving it might decide, that we should
1363 * know truth 8)8) --ANK (980726).
1364 */
1365 if (rt6_check_expired(rt))
1366 continue;
1367 if (!(rt->rt6i_flags & RTF_GATEWAY))
1368 continue;
1369 if (fl->oif != rt->rt6i_dev->ifindex)
1370 continue;
1371 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1372 continue;
1373 break;
1374 }
1375
1376 if (!rt)
1377 rt = net->ipv6.ip6_null_entry;
1378 BACKTRACK(net, &fl->fl6_src);
1379 out:
1380 dst_hold(&rt->u.dst);
1381
1382 read_unlock_bh(&table->tb6_lock);
1383
1384 return rt;
1385 };
1386
1387 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1388 struct in6_addr *src,
1389 struct in6_addr *gateway,
1390 struct net_device *dev)
1391 {
1392 int flags = RT6_LOOKUP_F_HAS_SADDR;
1393 struct net *net = dev->nd_net;
1394 struct ip6rd_flowi rdfl = {
1395 .fl = {
1396 .oif = dev->ifindex,
1397 .nl_u = {
1398 .ip6_u = {
1399 .daddr = *dest,
1400 .saddr = *src,
1401 },
1402 },
1403 },
1404 .gateway = *gateway,
1405 };
1406
1407 if (rt6_need_strict(dest))
1408 flags |= RT6_LOOKUP_F_IFACE;
1409
1410 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1411 flags, __ip6_route_redirect);
1412 }
1413
1414 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1415 struct in6_addr *saddr,
1416 struct neighbour *neigh, u8 *lladdr, int on_link)
1417 {
1418 struct rt6_info *rt, *nrt = NULL;
1419 struct netevent_redirect netevent;
1420 struct net *net = neigh->dev->nd_net;
1421
1422 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1423
1424 if (rt == net->ipv6.ip6_null_entry) {
1425 if (net_ratelimit())
1426 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1427 "for redirect target\n");
1428 goto out;
1429 }
1430
1431 /*
1432 * We have finally decided to accept it.
1433 */
1434
1435 neigh_update(neigh, lladdr, NUD_STALE,
1436 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1437 NEIGH_UPDATE_F_OVERRIDE|
1438 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1439 NEIGH_UPDATE_F_ISROUTER))
1440 );
1441
1442 /*
1443 * Redirect received -> path was valid.
1444 * Look, redirects are sent only in response to data packets,
1445 * so that this nexthop apparently is reachable. --ANK
1446 */
1447 dst_confirm(&rt->u.dst);
1448
1449 /* Duplicate redirect: silently ignore. */
1450 if (neigh == rt->u.dst.neighbour)
1451 goto out;
1452
1453 nrt = ip6_rt_copy(rt);
1454 if (nrt == NULL)
1455 goto out;
1456
1457 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1458 if (on_link)
1459 nrt->rt6i_flags &= ~RTF_GATEWAY;
1460
1461 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1462 nrt->rt6i_dst.plen = 128;
1463 nrt->u.dst.flags |= DST_HOST;
1464
1465 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1466 nrt->rt6i_nexthop = neigh_clone(neigh);
1467 /* Reset pmtu, it may be better */
1468 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1469 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1470 dst_mtu(&nrt->u.dst));
1471
1472 if (ip6_ins_rt(nrt))
1473 goto out;
1474
1475 netevent.old = &rt->u.dst;
1476 netevent.new = &nrt->u.dst;
1477 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1478
1479 if (rt->rt6i_flags&RTF_CACHE) {
1480 ip6_del_rt(rt);
1481 return;
1482 }
1483
1484 out:
1485 dst_release(&rt->u.dst);
1486 return;
1487 }
1488
1489 /*
1490 * Handle ICMP "packet too big" messages
1491 * i.e. Path MTU discovery
1492 */
1493
1494 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1495 struct net_device *dev, u32 pmtu)
1496 {
1497 struct rt6_info *rt, *nrt;
1498 struct net *net = dev->nd_net;
1499 int allfrag = 0;
1500
1501 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1502 if (rt == NULL)
1503 return;
1504
1505 if (pmtu >= dst_mtu(&rt->u.dst))
1506 goto out;
1507
1508 if (pmtu < IPV6_MIN_MTU) {
1509 /*
1510 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1511 * MTU (1280) and a fragment header should always be included
1512 * after a node receiving Too Big message reporting PMTU is
1513 * less than the IPv6 Minimum Link MTU.
1514 */
1515 pmtu = IPV6_MIN_MTU;
1516 allfrag = 1;
1517 }
1518
1519 /* New mtu received -> path was valid.
1520 They are sent only in response to data packets,
1521 so that this nexthop apparently is reachable. --ANK
1522 */
1523 dst_confirm(&rt->u.dst);
1524
1525 /* Host route. If it is static, it would be better
1526 not to override it, but add new one, so that
1527 when cache entry will expire old pmtu
1528 would return automatically.
1529 */
1530 if (rt->rt6i_flags & RTF_CACHE) {
1531 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1532 if (allfrag)
1533 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1534 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1535 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1536 goto out;
1537 }
1538
1539 /* Network route.
1540 Two cases are possible:
1541 1. It is connected route. Action: COW
1542 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1543 */
1544 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1545 nrt = rt6_alloc_cow(rt, daddr, saddr);
1546 else
1547 nrt = rt6_alloc_clone(rt, daddr);
1548
1549 if (nrt) {
1550 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1551 if (allfrag)
1552 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1553
1554 /* According to RFC 1981, detecting PMTU increase shouldn't be
1555 * happened within 5 mins, the recommended timer is 10 mins.
1556 * Here this route expiration time is set to ip6_rt_mtu_expires
1557 * which is 10 mins. After 10 mins the decreased pmtu is expired
1558 * and detecting PMTU increase will be automatically happened.
1559 */
1560 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1561 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1562
1563 ip6_ins_rt(nrt);
1564 }
1565 out:
1566 dst_release(&rt->u.dst);
1567 }
1568
1569 /*
1570 * Misc support functions
1571 */
1572
1573 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1574 {
1575 struct net *net = ort->rt6i_dev->nd_net;
1576 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1577
1578 if (rt) {
1579 rt->u.dst.input = ort->u.dst.input;
1580 rt->u.dst.output = ort->u.dst.output;
1581
1582 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1583 rt->u.dst.error = ort->u.dst.error;
1584 rt->u.dst.dev = ort->u.dst.dev;
1585 if (rt->u.dst.dev)
1586 dev_hold(rt->u.dst.dev);
1587 rt->rt6i_idev = ort->rt6i_idev;
1588 if (rt->rt6i_idev)
1589 in6_dev_hold(rt->rt6i_idev);
1590 rt->u.dst.lastuse = jiffies;
1591 rt->rt6i_expires = 0;
1592
1593 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1594 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1595 rt->rt6i_metric = 0;
1596
1597 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1598 #ifdef CONFIG_IPV6_SUBTREES
1599 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1600 #endif
1601 rt->rt6i_table = ort->rt6i_table;
1602 }
1603 return rt;
1604 }
1605
1606 #ifdef CONFIG_IPV6_ROUTE_INFO
1607 static struct rt6_info *rt6_get_route_info(struct net *net,
1608 struct in6_addr *prefix, int prefixlen,
1609 struct in6_addr *gwaddr, int ifindex)
1610 {
1611 struct fib6_node *fn;
1612 struct rt6_info *rt = NULL;
1613 struct fib6_table *table;
1614
1615 table = fib6_get_table(net, RT6_TABLE_INFO);
1616 if (table == NULL)
1617 return NULL;
1618
1619 write_lock_bh(&table->tb6_lock);
1620 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1621 if (!fn)
1622 goto out;
1623
1624 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1625 if (rt->rt6i_dev->ifindex != ifindex)
1626 continue;
1627 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1628 continue;
1629 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1630 continue;
1631 dst_hold(&rt->u.dst);
1632 break;
1633 }
1634 out:
1635 write_unlock_bh(&table->tb6_lock);
1636 return rt;
1637 }
1638
1639 static struct rt6_info *rt6_add_route_info(struct net *net,
1640 struct in6_addr *prefix, int prefixlen,
1641 struct in6_addr *gwaddr, int ifindex,
1642 unsigned pref)
1643 {
1644 struct fib6_config cfg = {
1645 .fc_table = RT6_TABLE_INFO,
1646 .fc_metric = IP6_RT_PRIO_USER,
1647 .fc_ifindex = ifindex,
1648 .fc_dst_len = prefixlen,
1649 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1650 RTF_UP | RTF_PREF(pref),
1651 .fc_nlinfo.pid = 0,
1652 .fc_nlinfo.nlh = NULL,
1653 .fc_nlinfo.nl_net = net,
1654 };
1655
1656 ipv6_addr_copy(&cfg.fc_dst, prefix);
1657 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1658
1659 /* We should treat it as a default route if prefix length is 0. */
1660 if (!prefixlen)
1661 cfg.fc_flags |= RTF_DEFAULT;
1662
1663 ip6_route_add(&cfg);
1664
1665 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1666 }
1667 #endif
1668
1669 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1670 {
1671 struct rt6_info *rt;
1672 struct fib6_table *table;
1673
1674 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1675 if (table == NULL)
1676 return NULL;
1677
1678 write_lock_bh(&table->tb6_lock);
1679 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1680 if (dev == rt->rt6i_dev &&
1681 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1682 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1683 break;
1684 }
1685 if (rt)
1686 dst_hold(&rt->u.dst);
1687 write_unlock_bh(&table->tb6_lock);
1688 return rt;
1689 }
1690
1691 EXPORT_SYMBOL(rt6_get_dflt_router);
1692
1693 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1694 struct net_device *dev,
1695 unsigned int pref)
1696 {
1697 struct fib6_config cfg = {
1698 .fc_table = RT6_TABLE_DFLT,
1699 .fc_metric = IP6_RT_PRIO_USER,
1700 .fc_ifindex = dev->ifindex,
1701 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1702 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1703 .fc_nlinfo.pid = 0,
1704 .fc_nlinfo.nlh = NULL,
1705 .fc_nlinfo.nl_net = dev->nd_net,
1706 };
1707
1708 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1709
1710 ip6_route_add(&cfg);
1711
1712 return rt6_get_dflt_router(gwaddr, dev);
1713 }
1714
1715 void rt6_purge_dflt_routers(struct net *net)
1716 {
1717 struct rt6_info *rt;
1718 struct fib6_table *table;
1719
1720 /* NOTE: Keep consistent with rt6_get_dflt_router */
1721 table = fib6_get_table(net, RT6_TABLE_DFLT);
1722 if (table == NULL)
1723 return;
1724
1725 restart:
1726 read_lock_bh(&table->tb6_lock);
1727 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1728 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1729 dst_hold(&rt->u.dst);
1730 read_unlock_bh(&table->tb6_lock);
1731 ip6_del_rt(rt);
1732 goto restart;
1733 }
1734 }
1735 read_unlock_bh(&table->tb6_lock);
1736 }
1737
1738 static void rtmsg_to_fib6_config(struct net *net,
1739 struct in6_rtmsg *rtmsg,
1740 struct fib6_config *cfg)
1741 {
1742 memset(cfg, 0, sizeof(*cfg));
1743
1744 cfg->fc_table = RT6_TABLE_MAIN;
1745 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1746 cfg->fc_metric = rtmsg->rtmsg_metric;
1747 cfg->fc_expires = rtmsg->rtmsg_info;
1748 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1749 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1750 cfg->fc_flags = rtmsg->rtmsg_flags;
1751
1752 cfg->fc_nlinfo.nl_net = net;
1753
1754 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1755 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1756 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1757 }
1758
1759 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1760 {
1761 struct fib6_config cfg;
1762 struct in6_rtmsg rtmsg;
1763 int err;
1764
1765 switch(cmd) {
1766 case SIOCADDRT: /* Add a route */
1767 case SIOCDELRT: /* Delete a route */
1768 if (!capable(CAP_NET_ADMIN))
1769 return -EPERM;
1770 err = copy_from_user(&rtmsg, arg,
1771 sizeof(struct in6_rtmsg));
1772 if (err)
1773 return -EFAULT;
1774
1775 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1776
1777 rtnl_lock();
1778 switch (cmd) {
1779 case SIOCADDRT:
1780 err = ip6_route_add(&cfg);
1781 break;
1782 case SIOCDELRT:
1783 err = ip6_route_del(&cfg);
1784 break;
1785 default:
1786 err = -EINVAL;
1787 }
1788 rtnl_unlock();
1789
1790 return err;
1791 }
1792
1793 return -EINVAL;
1794 }
1795
1796 /*
1797 * Drop the packet on the floor
1798 */
1799
1800 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1801 {
1802 int type;
1803 switch (ipstats_mib_noroutes) {
1804 case IPSTATS_MIB_INNOROUTES:
1805 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1806 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1807 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1808 break;
1809 }
1810 /* FALLTHROUGH */
1811 case IPSTATS_MIB_OUTNOROUTES:
1812 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1813 break;
1814 }
1815 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1816 kfree_skb(skb);
1817 return 0;
1818 }
1819
1820 static int ip6_pkt_discard(struct sk_buff *skb)
1821 {
1822 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1823 }
1824
1825 static int ip6_pkt_discard_out(struct sk_buff *skb)
1826 {
1827 skb->dev = skb->dst->dev;
1828 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1829 }
1830
1831 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1832
1833 static int ip6_pkt_prohibit(struct sk_buff *skb)
1834 {
1835 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1836 }
1837
1838 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1839 {
1840 skb->dev = skb->dst->dev;
1841 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1842 }
1843
1844 #endif
1845
1846 /*
1847 * Allocate a dst for local (unicast / anycast) address.
1848 */
1849
1850 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1851 const struct in6_addr *addr,
1852 int anycast)
1853 {
1854 struct net *net = idev->dev->nd_net;
1855 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1856
1857 if (rt == NULL)
1858 return ERR_PTR(-ENOMEM);
1859
1860 dev_hold(net->loopback_dev);
1861 in6_dev_hold(idev);
1862
1863 rt->u.dst.flags = DST_HOST;
1864 rt->u.dst.input = ip6_input;
1865 rt->u.dst.output = ip6_output;
1866 rt->rt6i_dev = net->loopback_dev;
1867 rt->rt6i_idev = idev;
1868 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1869 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1870 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1871 rt->u.dst.obsolete = -1;
1872
1873 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1874 if (anycast)
1875 rt->rt6i_flags |= RTF_ANYCAST;
1876 else
1877 rt->rt6i_flags |= RTF_LOCAL;
1878 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1879 if (rt->rt6i_nexthop == NULL) {
1880 dst_free(&rt->u.dst);
1881 return ERR_PTR(-ENOMEM);
1882 }
1883
1884 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1885 rt->rt6i_dst.plen = 128;
1886 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1887
1888 atomic_set(&rt->u.dst.__refcnt, 1);
1889
1890 return rt;
1891 }
1892
1893 struct arg_dev_net {
1894 struct net_device *dev;
1895 struct net *net;
1896 };
1897
1898 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1899 {
1900 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1901 struct net *net = ((struct arg_dev_net *)arg)->net;
1902
1903 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1904 rt != net->ipv6.ip6_null_entry) {
1905 RT6_TRACE("deleted by ifdown %p\n", rt);
1906 return -1;
1907 }
1908 return 0;
1909 }
1910
1911 void rt6_ifdown(struct net *net, struct net_device *dev)
1912 {
1913 struct arg_dev_net adn = {
1914 .dev = dev,
1915 .net = net,
1916 };
1917
1918 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1919 }
1920
1921 struct rt6_mtu_change_arg
1922 {
1923 struct net_device *dev;
1924 unsigned mtu;
1925 };
1926
1927 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1928 {
1929 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1930 struct inet6_dev *idev;
1931 struct net *net = arg->dev->nd_net;
1932
1933 /* In IPv6 pmtu discovery is not optional,
1934 so that RTAX_MTU lock cannot disable it.
1935 We still use this lock to block changes
1936 caused by addrconf/ndisc.
1937 */
1938
1939 idev = __in6_dev_get(arg->dev);
1940 if (idev == NULL)
1941 return 0;
1942
1943 /* For administrative MTU increase, there is no way to discover
1944 IPv6 PMTU increase, so PMTU increase should be updated here.
1945 Since RFC 1981 doesn't include administrative MTU increase
1946 update PMTU increase is a MUST. (i.e. jumbo frame)
1947 */
1948 /*
1949 If new MTU is less than route PMTU, this new MTU will be the
1950 lowest MTU in the path, update the route PMTU to reflect PMTU
1951 decreases; if new MTU is greater than route PMTU, and the
1952 old MTU is the lowest MTU in the path, update the route PMTU
1953 to reflect the increase. In this case if the other nodes' MTU
1954 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1955 PMTU discouvery.
1956 */
1957 if (rt->rt6i_dev == arg->dev &&
1958 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1959 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1960 (dst_mtu(&rt->u.dst) < arg->mtu &&
1961 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1962 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1963 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1964 }
1965 return 0;
1966 }
1967
1968 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1969 {
1970 struct rt6_mtu_change_arg arg = {
1971 .dev = dev,
1972 .mtu = mtu,
1973 };
1974
1975 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1976 }
1977
1978 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1979 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1980 [RTA_OIF] = { .type = NLA_U32 },
1981 [RTA_IIF] = { .type = NLA_U32 },
1982 [RTA_PRIORITY] = { .type = NLA_U32 },
1983 [RTA_METRICS] = { .type = NLA_NESTED },
1984 };
1985
1986 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1987 struct fib6_config *cfg)
1988 {
1989 struct rtmsg *rtm;
1990 struct nlattr *tb[RTA_MAX+1];
1991 int err;
1992
1993 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1994 if (err < 0)
1995 goto errout;
1996
1997 err = -EINVAL;
1998 rtm = nlmsg_data(nlh);
1999 memset(cfg, 0, sizeof(*cfg));
2000
2001 cfg->fc_table = rtm->rtm_table;
2002 cfg->fc_dst_len = rtm->rtm_dst_len;
2003 cfg->fc_src_len = rtm->rtm_src_len;
2004 cfg->fc_flags = RTF_UP;
2005 cfg->fc_protocol = rtm->rtm_protocol;
2006
2007 if (rtm->rtm_type == RTN_UNREACHABLE)
2008 cfg->fc_flags |= RTF_REJECT;
2009
2010 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2011 cfg->fc_nlinfo.nlh = nlh;
2012 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
2013
2014 if (tb[RTA_GATEWAY]) {
2015 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2016 cfg->fc_flags |= RTF_GATEWAY;
2017 }
2018
2019 if (tb[RTA_DST]) {
2020 int plen = (rtm->rtm_dst_len + 7) >> 3;
2021
2022 if (nla_len(tb[RTA_DST]) < plen)
2023 goto errout;
2024
2025 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2026 }
2027
2028 if (tb[RTA_SRC]) {
2029 int plen = (rtm->rtm_src_len + 7) >> 3;
2030
2031 if (nla_len(tb[RTA_SRC]) < plen)
2032 goto errout;
2033
2034 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2035 }
2036
2037 if (tb[RTA_OIF])
2038 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2039
2040 if (tb[RTA_PRIORITY])
2041 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2042
2043 if (tb[RTA_METRICS]) {
2044 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2045 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2046 }
2047
2048 if (tb[RTA_TABLE])
2049 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2050
2051 err = 0;
2052 errout:
2053 return err;
2054 }
2055
2056 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2057 {
2058 struct fib6_config cfg;
2059 int err;
2060
2061 err = rtm_to_fib6_config(skb, nlh, &cfg);
2062 if (err < 0)
2063 return err;
2064
2065 return ip6_route_del(&cfg);
2066 }
2067
2068 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2069 {
2070 struct fib6_config cfg;
2071 int err;
2072
2073 err = rtm_to_fib6_config(skb, nlh, &cfg);
2074 if (err < 0)
2075 return err;
2076
2077 return ip6_route_add(&cfg);
2078 }
2079
2080 static inline size_t rt6_nlmsg_size(void)
2081 {
2082 return NLMSG_ALIGN(sizeof(struct rtmsg))
2083 + nla_total_size(16) /* RTA_SRC */
2084 + nla_total_size(16) /* RTA_DST */
2085 + nla_total_size(16) /* RTA_GATEWAY */
2086 + nla_total_size(16) /* RTA_PREFSRC */
2087 + nla_total_size(4) /* RTA_TABLE */
2088 + nla_total_size(4) /* RTA_IIF */
2089 + nla_total_size(4) /* RTA_OIF */
2090 + nla_total_size(4) /* RTA_PRIORITY */
2091 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2092 + nla_total_size(sizeof(struct rta_cacheinfo));
2093 }
2094
2095 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2096 struct in6_addr *dst, struct in6_addr *src,
2097 int iif, int type, u32 pid, u32 seq,
2098 int prefix, unsigned int flags)
2099 {
2100 struct rtmsg *rtm;
2101 struct nlmsghdr *nlh;
2102 long expires;
2103 u32 table;
2104
2105 if (prefix) { /* user wants prefix routes only */
2106 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2107 /* success since this is not a prefix route */
2108 return 1;
2109 }
2110 }
2111
2112 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2113 if (nlh == NULL)
2114 return -EMSGSIZE;
2115
2116 rtm = nlmsg_data(nlh);
2117 rtm->rtm_family = AF_INET6;
2118 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2119 rtm->rtm_src_len = rt->rt6i_src.plen;
2120 rtm->rtm_tos = 0;
2121 if (rt->rt6i_table)
2122 table = rt->rt6i_table->tb6_id;
2123 else
2124 table = RT6_TABLE_UNSPEC;
2125 rtm->rtm_table = table;
2126 NLA_PUT_U32(skb, RTA_TABLE, table);
2127 if (rt->rt6i_flags&RTF_REJECT)
2128 rtm->rtm_type = RTN_UNREACHABLE;
2129 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2130 rtm->rtm_type = RTN_LOCAL;
2131 else
2132 rtm->rtm_type = RTN_UNICAST;
2133 rtm->rtm_flags = 0;
2134 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2135 rtm->rtm_protocol = rt->rt6i_protocol;
2136 if (rt->rt6i_flags&RTF_DYNAMIC)
2137 rtm->rtm_protocol = RTPROT_REDIRECT;
2138 else if (rt->rt6i_flags & RTF_ADDRCONF)
2139 rtm->rtm_protocol = RTPROT_KERNEL;
2140 else if (rt->rt6i_flags&RTF_DEFAULT)
2141 rtm->rtm_protocol = RTPROT_RA;
2142
2143 if (rt->rt6i_flags&RTF_CACHE)
2144 rtm->rtm_flags |= RTM_F_CLONED;
2145
2146 if (dst) {
2147 NLA_PUT(skb, RTA_DST, 16, dst);
2148 rtm->rtm_dst_len = 128;
2149 } else if (rtm->rtm_dst_len)
2150 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2151 #ifdef CONFIG_IPV6_SUBTREES
2152 if (src) {
2153 NLA_PUT(skb, RTA_SRC, 16, src);
2154 rtm->rtm_src_len = 128;
2155 } else if (rtm->rtm_src_len)
2156 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2157 #endif
2158 if (iif)
2159 NLA_PUT_U32(skb, RTA_IIF, iif);
2160 else if (dst) {
2161 struct in6_addr saddr_buf;
2162 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2163 dst, &saddr_buf) == 0)
2164 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2165 }
2166
2167 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2168 goto nla_put_failure;
2169
2170 if (rt->u.dst.neighbour)
2171 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2172
2173 if (rt->u.dst.dev)
2174 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2175
2176 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2177
2178 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2179 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2180 expires, rt->u.dst.error) < 0)
2181 goto nla_put_failure;
2182
2183 return nlmsg_end(skb, nlh);
2184
2185 nla_put_failure:
2186 nlmsg_cancel(skb, nlh);
2187 return -EMSGSIZE;
2188 }
2189
2190 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2191 {
2192 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2193 int prefix;
2194
2195 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2196 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2197 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2198 } else
2199 prefix = 0;
2200
2201 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2202 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2203 prefix, NLM_F_MULTI);
2204 }
2205
2206 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2207 {
2208 struct net *net = in_skb->sk->sk_net;
2209 struct nlattr *tb[RTA_MAX+1];
2210 struct rt6_info *rt;
2211 struct sk_buff *skb;
2212 struct rtmsg *rtm;
2213 struct flowi fl;
2214 int err, iif = 0;
2215
2216 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2217 if (err < 0)
2218 goto errout;
2219
2220 err = -EINVAL;
2221 memset(&fl, 0, sizeof(fl));
2222
2223 if (tb[RTA_SRC]) {
2224 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2225 goto errout;
2226
2227 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2228 }
2229
2230 if (tb[RTA_DST]) {
2231 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2232 goto errout;
2233
2234 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2235 }
2236
2237 if (tb[RTA_IIF])
2238 iif = nla_get_u32(tb[RTA_IIF]);
2239
2240 if (tb[RTA_OIF])
2241 fl.oif = nla_get_u32(tb[RTA_OIF]);
2242
2243 if (iif) {
2244 struct net_device *dev;
2245 dev = __dev_get_by_index(net, iif);
2246 if (!dev) {
2247 err = -ENODEV;
2248 goto errout;
2249 }
2250 }
2251
2252 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2253 if (skb == NULL) {
2254 err = -ENOBUFS;
2255 goto errout;
2256 }
2257
2258 /* Reserve room for dummy headers, this skb can pass
2259 through good chunk of routing engine.
2260 */
2261 skb_reset_mac_header(skb);
2262 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2263
2264 rt = (struct rt6_info*) ip6_route_output(&init_net, NULL, &fl);
2265 skb->dst = &rt->u.dst;
2266
2267 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2268 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2269 nlh->nlmsg_seq, 0, 0);
2270 if (err < 0) {
2271 kfree_skb(skb);
2272 goto errout;
2273 }
2274
2275 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2276 errout:
2277 return err;
2278 }
2279
2280 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2281 {
2282 struct sk_buff *skb;
2283 struct net *net = info->nl_net;
2284 u32 seq;
2285 int err;
2286
2287 err = -ENOBUFS;
2288 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2289
2290 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2291 if (skb == NULL)
2292 goto errout;
2293
2294 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2295 event, info->pid, seq, 0, 0);
2296 if (err < 0) {
2297 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2298 WARN_ON(err == -EMSGSIZE);
2299 kfree_skb(skb);
2300 goto errout;
2301 }
2302 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2303 info->nlh, gfp_any());
2304 errout:
2305 if (err < 0)
2306 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2307 }
2308
2309 static int ip6_route_dev_notify(struct notifier_block *this,
2310 unsigned long event, void *data)
2311 {
2312 struct net_device *dev = (struct net_device *)data;
2313 struct net *net = dev->nd_net;
2314
2315 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2316 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2317 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2318 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2319 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2320 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2321 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2322 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2323 #endif
2324 }
2325
2326 return NOTIFY_OK;
2327 }
2328
2329 /*
2330 * /proc
2331 */
2332
2333 #ifdef CONFIG_PROC_FS
2334
2335 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2336
2337 struct rt6_proc_arg
2338 {
2339 char *buffer;
2340 int offset;
2341 int length;
2342 int skip;
2343 int len;
2344 };
2345
2346 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2347 {
2348 struct seq_file *m = p_arg;
2349
2350 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2351 rt->rt6i_dst.plen);
2352
2353 #ifdef CONFIG_IPV6_SUBTREES
2354 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2355 rt->rt6i_src.plen);
2356 #else
2357 seq_puts(m, "00000000000000000000000000000000 00 ");
2358 #endif
2359
2360 if (rt->rt6i_nexthop) {
2361 seq_printf(m, NIP6_SEQFMT,
2362 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2363 } else {
2364 seq_puts(m, "00000000000000000000000000000000");
2365 }
2366 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2367 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2368 rt->u.dst.__use, rt->rt6i_flags,
2369 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2370 return 0;
2371 }
2372
2373 static int ipv6_route_show(struct seq_file *m, void *v)
2374 {
2375 struct net *net = (struct net *)m->private;
2376 fib6_clean_all(net, rt6_info_route, 0, m);
2377 return 0;
2378 }
2379
2380 static int ipv6_route_open(struct inode *inode, struct file *file)
2381 {
2382 struct net *net = get_proc_net(inode);
2383 if (!net)
2384 return -ENXIO;
2385 return single_open(file, ipv6_route_show, net);
2386 }
2387
2388 static int ipv6_route_release(struct inode *inode, struct file *file)
2389 {
2390 struct seq_file *seq = file->private_data;
2391 struct net *net = seq->private;
2392 put_net(net);
2393 return single_release(inode, file);
2394 }
2395
2396 static const struct file_operations ipv6_route_proc_fops = {
2397 .owner = THIS_MODULE,
2398 .open = ipv6_route_open,
2399 .read = seq_read,
2400 .llseek = seq_lseek,
2401 .release = ipv6_route_release,
2402 };
2403
2404 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2405 {
2406 struct net *net = (struct net *)seq->private;
2407 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2408 net->ipv6.rt6_stats->fib_nodes,
2409 net->ipv6.rt6_stats->fib_route_nodes,
2410 net->ipv6.rt6_stats->fib_rt_alloc,
2411 net->ipv6.rt6_stats->fib_rt_entries,
2412 net->ipv6.rt6_stats->fib_rt_cache,
2413 atomic_read(&net->ipv6.ip6_dst_ops->entries),
2414 net->ipv6.rt6_stats->fib_discarded_routes);
2415
2416 return 0;
2417 }
2418
2419 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2420 {
2421 struct net *net = get_proc_net(inode);
2422 return single_open(file, rt6_stats_seq_show, net);
2423 }
2424
2425 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2426 {
2427 struct seq_file *seq = file->private_data;
2428 struct net *net = (struct net *)seq->private;
2429 put_net(net);
2430 return single_release(inode, file);
2431 }
2432
2433 static const struct file_operations rt6_stats_seq_fops = {
2434 .owner = THIS_MODULE,
2435 .open = rt6_stats_seq_open,
2436 .read = seq_read,
2437 .llseek = seq_lseek,
2438 .release = rt6_stats_seq_release,
2439 };
2440 #endif /* CONFIG_PROC_FS */
2441
2442 #ifdef CONFIG_SYSCTL
2443
2444 static
2445 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2446 void __user *buffer, size_t *lenp, loff_t *ppos)
2447 {
2448 struct net *net = current->nsproxy->net_ns;
2449 int delay = net->ipv6.sysctl.flush_delay;
2450 if (write) {
2451 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2452 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2453 return 0;
2454 } else
2455 return -EINVAL;
2456 }
2457
2458 ctl_table ipv6_route_table_template[] = {
2459 {
2460 .procname = "flush",
2461 .data = &init_net.ipv6.sysctl.flush_delay,
2462 .maxlen = sizeof(int),
2463 .mode = 0200,
2464 .proc_handler = &ipv6_sysctl_rtcache_flush
2465 },
2466 {
2467 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2468 .procname = "gc_thresh",
2469 .data = &ip6_dst_ops_template.gc_thresh,
2470 .maxlen = sizeof(int),
2471 .mode = 0644,
2472 .proc_handler = &proc_dointvec,
2473 },
2474 {
2475 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2476 .procname = "max_size",
2477 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2478 .maxlen = sizeof(int),
2479 .mode = 0644,
2480 .proc_handler = &proc_dointvec,
2481 },
2482 {
2483 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2484 .procname = "gc_min_interval",
2485 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2486 .maxlen = sizeof(int),
2487 .mode = 0644,
2488 .proc_handler = &proc_dointvec_jiffies,
2489 .strategy = &sysctl_jiffies,
2490 },
2491 {
2492 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2493 .procname = "gc_timeout",
2494 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2495 .maxlen = sizeof(int),
2496 .mode = 0644,
2497 .proc_handler = &proc_dointvec_jiffies,
2498 .strategy = &sysctl_jiffies,
2499 },
2500 {
2501 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2502 .procname = "gc_interval",
2503 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2504 .maxlen = sizeof(int),
2505 .mode = 0644,
2506 .proc_handler = &proc_dointvec_jiffies,
2507 .strategy = &sysctl_jiffies,
2508 },
2509 {
2510 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2511 .procname = "gc_elasticity",
2512 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2513 .maxlen = sizeof(int),
2514 .mode = 0644,
2515 .proc_handler = &proc_dointvec_jiffies,
2516 .strategy = &sysctl_jiffies,
2517 },
2518 {
2519 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2520 .procname = "mtu_expires",
2521 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2522 .maxlen = sizeof(int),
2523 .mode = 0644,
2524 .proc_handler = &proc_dointvec_jiffies,
2525 .strategy = &sysctl_jiffies,
2526 },
2527 {
2528 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2529 .procname = "min_adv_mss",
2530 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2531 .maxlen = sizeof(int),
2532 .mode = 0644,
2533 .proc_handler = &proc_dointvec_jiffies,
2534 .strategy = &sysctl_jiffies,
2535 },
2536 {
2537 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2538 .procname = "gc_min_interval_ms",
2539 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2540 .maxlen = sizeof(int),
2541 .mode = 0644,
2542 .proc_handler = &proc_dointvec_ms_jiffies,
2543 .strategy = &sysctl_ms_jiffies,
2544 },
2545 { .ctl_name = 0 }
2546 };
2547
2548 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2549 {
2550 struct ctl_table *table;
2551
2552 table = kmemdup(ipv6_route_table_template,
2553 sizeof(ipv6_route_table_template),
2554 GFP_KERNEL);
2555
2556 if (table) {
2557 table[0].data = &net->ipv6.sysctl.flush_delay;
2558 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
2559 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2560 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2561 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2562 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2563 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2564 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2565 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2566 }
2567
2568 return table;
2569 }
2570 #endif
2571
2572 static int ip6_route_net_init(struct net *net)
2573 {
2574 int ret = 0;
2575
2576 ret = -ENOMEM;
2577 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2578 sizeof(*net->ipv6.ip6_dst_ops),
2579 GFP_KERNEL);
2580 if (!net->ipv6.ip6_dst_ops)
2581 goto out;
2582 net->ipv6.ip6_dst_ops->dst_net = net;
2583
2584 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2585 sizeof(*net->ipv6.ip6_null_entry),
2586 GFP_KERNEL);
2587 if (!net->ipv6.ip6_null_entry)
2588 goto out_ip6_dst_ops;
2589 net->ipv6.ip6_null_entry->u.dst.path =
2590 (struct dst_entry *)net->ipv6.ip6_null_entry;
2591 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2592
2593 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2594 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2595 sizeof(*net->ipv6.ip6_prohibit_entry),
2596 GFP_KERNEL);
2597 if (!net->ipv6.ip6_prohibit_entry) {
2598 kfree(net->ipv6.ip6_null_entry);
2599 goto out;
2600 }
2601 net->ipv6.ip6_prohibit_entry->u.dst.path =
2602 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2603 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2604
2605 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2606 sizeof(*net->ipv6.ip6_blk_hole_entry),
2607 GFP_KERNEL);
2608 if (!net->ipv6.ip6_blk_hole_entry) {
2609 kfree(net->ipv6.ip6_null_entry);
2610 kfree(net->ipv6.ip6_prohibit_entry);
2611 goto out;
2612 }
2613 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2614 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2615 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2616 #endif
2617
2618 #ifdef CONFIG_PROC_FS
2619 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2620 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2621 #endif
2622 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2623
2624 ret = 0;
2625 out:
2626 return ret;
2627
2628 out_ip6_dst_ops:
2629 kfree(net->ipv6.ip6_dst_ops);
2630 goto out;
2631 }
2632
2633 static void ip6_route_net_exit(struct net *net)
2634 {
2635 #ifdef CONFIG_PROC_FS
2636 proc_net_remove(net, "ipv6_route");
2637 proc_net_remove(net, "rt6_stats");
2638 #endif
2639 kfree(net->ipv6.ip6_null_entry);
2640 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2641 kfree(net->ipv6.ip6_prohibit_entry);
2642 kfree(net->ipv6.ip6_blk_hole_entry);
2643 #endif
2644 kfree(net->ipv6.ip6_dst_ops);
2645 }
2646
2647 static struct pernet_operations ip6_route_net_ops = {
2648 .init = ip6_route_net_init,
2649 .exit = ip6_route_net_exit,
2650 };
2651
2652 static struct notifier_block ip6_route_dev_notifier = {
2653 .notifier_call = ip6_route_dev_notify,
2654 .priority = 0,
2655 };
2656
2657 int __init ip6_route_init(void)
2658 {
2659 int ret;
2660
2661 ret = -ENOMEM;
2662 ip6_dst_ops_template.kmem_cachep =
2663 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2664 SLAB_HWCACHE_ALIGN, NULL);
2665 if (!ip6_dst_ops_template.kmem_cachep)
2666 goto out;;
2667
2668 ret = register_pernet_subsys(&ip6_route_net_ops);
2669 if (ret)
2670 goto out_kmem_cache;
2671
2672 /* Registering of the loopback is done before this portion of code,
2673 * the loopback reference in rt6_info will not be taken, do it
2674 * manually for init_net */
2675 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2676 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2677 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2678 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2679 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2680 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2681 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2682 #endif
2683 ret = fib6_init();
2684 if (ret)
2685 goto out_register_subsys;
2686
2687 ret = xfrm6_init();
2688 if (ret)
2689 goto out_fib6_init;
2690
2691 ret = fib6_rules_init();
2692 if (ret)
2693 goto xfrm6_init;
2694
2695 ret = -ENOBUFS;
2696 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2697 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2698 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2699 goto fib6_rules_init;
2700
2701 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2702 if (ret)
2703 goto fib6_rules_init;
2704
2705 out:
2706 return ret;
2707
2708 fib6_rules_init:
2709 fib6_rules_cleanup();
2710 xfrm6_init:
2711 xfrm6_fini();
2712 out_fib6_init:
2713 fib6_gc_cleanup();
2714 out_register_subsys:
2715 unregister_pernet_subsys(&ip6_route_net_ops);
2716 out_kmem_cache:
2717 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2718 goto out;
2719 }
2720
2721 void ip6_route_cleanup(void)
2722 {
2723 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2724 fib6_rules_cleanup();
2725 xfrm6_fini();
2726 fib6_gc_cleanup();
2727 unregister_pernet_subsys(&ip6_route_net_ops);
2728 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2729 }