netlink: Rename pid to portid to avoid confusion
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
90 unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129 {
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142 {
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
161 rt->n = n;
162
163 return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167 .family = AF_INET6,
168 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
172 .default_advmss = ip6_default_advmss,
173 .mtu = ip6_mtu,
174 .cow_metrics = ipv6_cow_metrics,
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
181 .local_out = __ip6_local_out,
182 .neigh_lookup = ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204 {
205 return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
210 .protocol = cpu_to_be16(ETH_P_IPV6),
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
213 .mtu = ip6_blackhole_mtu,
214 .default_advmss = ip6_default_advmss,
215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
218 .neigh_lookup = ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static struct rt6_info ip6_null_entry_template = {
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = -1,
230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
235 .rt6i_protocol = RTPROT_KERNEL,
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static struct rt6_info ip6_prohibit_entry_template = {
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EACCES,
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
255 .rt6i_protocol = RTPROT_KERNEL,
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258 };
259
260 static struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = -1,
265 .error = -EINVAL,
266 .input = dst_discard,
267 .output = dst_discard,
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
270 .rt6i_protocol = RTPROT_KERNEL,
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 struct net_device *dev,
280 int flags,
281 struct fib6_table *table)
282 {
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_NONE, flags);
285
286 if (rt) {
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 }
292 return rt;
293 }
294
295 static void ip6_dst_destroy(struct dst_entry *dst)
296 {
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299
300 if (rt->n)
301 neigh_release(rt->n);
302
303 if (!(rt->dst.flags & DST_HOST))
304 dst_destroy_metrics_generic(dst);
305
306 if (idev) {
307 rt->rt6i_idev = NULL;
308 in6_dev_put(idev);
309 }
310
311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312 dst_release(dst->from);
313
314 if (rt6_has_peer(rt)) {
315 struct inet_peer *peer = rt6_peer_ptr(rt);
316 inet_putpeer(peer);
317 }
318 }
319
320 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322 static u32 rt6_peer_genid(void)
323 {
324 return atomic_read(&__rt6_peer_genid);
325 }
326
327 void rt6_bind_peer(struct rt6_info *rt, int create)
328 {
329 struct inet_peer_base *base;
330 struct inet_peer *peer;
331
332 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 if (!base)
334 return;
335
336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
343 }
344
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347 {
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
350 struct net_device *loopback_dev =
351 dev_net(dev)->loopback_dev;
352
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
366 }
367 }
368 }
369
370 static bool rt6_check_expired(const struct rt6_info *rt)
371 {
372 struct rt6_info *ort = NULL;
373
374 if (rt->rt6i_flags & RTF_EXPIRES) {
375 if (time_after(jiffies, rt->dst.expires))
376 return true;
377 } else if (rt->dst.from) {
378 ort = (struct rt6_info *) rt->dst.from;
379 return (ort->rt6i_flags & RTF_EXPIRES) &&
380 time_after(jiffies, ort->dst.expires);
381 }
382 return false;
383 }
384
385 static bool rt6_need_strict(const struct in6_addr *daddr)
386 {
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
389 }
390
391 /*
392 * Route lookup. Any table->tb6_lock is implied.
393 */
394
395 static inline struct rt6_info *rt6_device_match(struct net *net,
396 struct rt6_info *rt,
397 const struct in6_addr *saddr,
398 int oif,
399 int flags)
400 {
401 struct rt6_info *local = NULL;
402 struct rt6_info *sprt;
403
404 if (!oif && ipv6_addr_any(saddr))
405 goto out;
406
407 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
408 struct net_device *dev = sprt->dst.dev;
409
410 if (oif) {
411 if (dev->ifindex == oif)
412 return sprt;
413 if (dev->flags & IFF_LOOPBACK) {
414 if (!sprt->rt6i_idev ||
415 sprt->rt6i_idev->dev->ifindex != oif) {
416 if (flags & RT6_LOOKUP_F_IFACE && oif)
417 continue;
418 if (local && (!oif ||
419 local->rt6i_idev->dev->ifindex == oif))
420 continue;
421 }
422 local = sprt;
423 }
424 } else {
425 if (ipv6_chk_addr(net, saddr, dev,
426 flags & RT6_LOOKUP_F_IFACE))
427 return sprt;
428 }
429 }
430
431 if (oif) {
432 if (local)
433 return local;
434
435 if (flags & RT6_LOOKUP_F_IFACE)
436 return net->ipv6.ip6_null_entry;
437 }
438 out:
439 return rt;
440 }
441
442 #ifdef CONFIG_IPV6_ROUTER_PREF
443 static void rt6_probe(struct rt6_info *rt)
444 {
445 struct neighbour *neigh;
446 /*
447 * Okay, this does not seem to be appropriate
448 * for now, however, we need to check if it
449 * is really so; aka Router Reachability Probing.
450 *
451 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute.
453 */
454 rcu_read_lock();
455 neigh = rt ? rt->n : NULL;
456 if (!neigh || (neigh->nud_state & NUD_VALID))
457 goto out;
458 read_lock_bh(&neigh->lock);
459 if (!(neigh->nud_state & NUD_VALID) &&
460 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
461 struct in6_addr mcaddr;
462 struct in6_addr *target;
463
464 neigh->updated = jiffies;
465 read_unlock_bh(&neigh->lock);
466
467 target = (struct in6_addr *)&neigh->primary_key;
468 addrconf_addr_solict_mult(target, &mcaddr);
469 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
470 } else {
471 read_unlock_bh(&neigh->lock);
472 }
473 out:
474 rcu_read_unlock();
475 }
476 #else
477 static inline void rt6_probe(struct rt6_info *rt)
478 {
479 }
480 #endif
481
482 /*
483 * Default Router Selection (RFC 2461 6.3.6)
484 */
485 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
486 {
487 struct net_device *dev = rt->dst.dev;
488 if (!oif || dev->ifindex == oif)
489 return 2;
490 if ((dev->flags & IFF_LOOPBACK) &&
491 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
492 return 1;
493 return 0;
494 }
495
496 static inline int rt6_check_neigh(struct rt6_info *rt)
497 {
498 struct neighbour *neigh;
499 int m;
500
501 rcu_read_lock();
502 neigh = rt->n;
503 if (rt->rt6i_flags & RTF_NONEXTHOP ||
504 !(rt->rt6i_flags & RTF_GATEWAY))
505 m = 1;
506 else if (neigh) {
507 read_lock_bh(&neigh->lock);
508 if (neigh->nud_state & NUD_VALID)
509 m = 2;
510 #ifdef CONFIG_IPV6_ROUTER_PREF
511 else if (neigh->nud_state & NUD_FAILED)
512 m = 0;
513 #endif
514 else
515 m = 1;
516 read_unlock_bh(&neigh->lock);
517 } else
518 m = 0;
519 rcu_read_unlock();
520 return m;
521 }
522
523 static int rt6_score_route(struct rt6_info *rt, int oif,
524 int strict)
525 {
526 int m, n;
527
528 m = rt6_check_dev(rt, oif);
529 if (!m && (strict & RT6_LOOKUP_F_IFACE))
530 return -1;
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
533 #endif
534 n = rt6_check_neigh(rt);
535 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
536 return -1;
537 return m;
538 }
539
540 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
541 int *mpri, struct rt6_info *match)
542 {
543 int m;
544
545 if (rt6_check_expired(rt))
546 goto out;
547
548 m = rt6_score_route(rt, oif, strict);
549 if (m < 0)
550 goto out;
551
552 if (m > *mpri) {
553 if (strict & RT6_LOOKUP_F_REACHABLE)
554 rt6_probe(match);
555 *mpri = m;
556 match = rt;
557 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
558 rt6_probe(rt);
559 }
560
561 out:
562 return match;
563 }
564
565 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
566 struct rt6_info *rr_head,
567 u32 metric, int oif, int strict)
568 {
569 struct rt6_info *rt, *match;
570 int mpri = -1;
571
572 match = NULL;
573 for (rt = rr_head; rt && rt->rt6i_metric == metric;
574 rt = rt->dst.rt6_next)
575 match = find_match(rt, oif, strict, &mpri, match);
576 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
577 rt = rt->dst.rt6_next)
578 match = find_match(rt, oif, strict, &mpri, match);
579
580 return match;
581 }
582
583 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
584 {
585 struct rt6_info *match, *rt0;
586 struct net *net;
587
588 rt0 = fn->rr_ptr;
589 if (!rt0)
590 fn->rr_ptr = rt0 = fn->leaf;
591
592 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
593
594 if (!match &&
595 (strict & RT6_LOOKUP_F_REACHABLE)) {
596 struct rt6_info *next = rt0->dst.rt6_next;
597
598 /* no entries matched; do round-robin */
599 if (!next || next->rt6i_metric != rt0->rt6i_metric)
600 next = fn->leaf;
601
602 if (next != rt0)
603 fn->rr_ptr = next;
604 }
605
606 net = dev_net(rt0->dst.dev);
607 return match ? match : net->ipv6.ip6_null_entry;
608 }
609
610 #ifdef CONFIG_IPV6_ROUTE_INFO
611 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
612 const struct in6_addr *gwaddr)
613 {
614 struct net *net = dev_net(dev);
615 struct route_info *rinfo = (struct route_info *) opt;
616 struct in6_addr prefix_buf, *prefix;
617 unsigned int pref;
618 unsigned long lifetime;
619 struct rt6_info *rt;
620
621 if (len < sizeof(struct route_info)) {
622 return -EINVAL;
623 }
624
625 /* Sanity check for prefix_len and length */
626 if (rinfo->length > 3) {
627 return -EINVAL;
628 } else if (rinfo->prefix_len > 128) {
629 return -EINVAL;
630 } else if (rinfo->prefix_len > 64) {
631 if (rinfo->length < 2) {
632 return -EINVAL;
633 }
634 } else if (rinfo->prefix_len > 0) {
635 if (rinfo->length < 1) {
636 return -EINVAL;
637 }
638 }
639
640 pref = rinfo->route_pref;
641 if (pref == ICMPV6_ROUTER_PREF_INVALID)
642 return -EINVAL;
643
644 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
645
646 if (rinfo->length == 3)
647 prefix = (struct in6_addr *)rinfo->prefix;
648 else {
649 /* this function is safe */
650 ipv6_addr_prefix(&prefix_buf,
651 (struct in6_addr *)rinfo->prefix,
652 rinfo->prefix_len);
653 prefix = &prefix_buf;
654 }
655
656 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
657 dev->ifindex);
658
659 if (rt && !lifetime) {
660 ip6_del_rt(rt);
661 rt = NULL;
662 }
663
664 if (!rt && lifetime)
665 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
666 pref);
667 else if (rt)
668 rt->rt6i_flags = RTF_ROUTEINFO |
669 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
670
671 if (rt) {
672 if (!addrconf_finite_timeout(lifetime))
673 rt6_clean_expires(rt);
674 else
675 rt6_set_expires(rt, jiffies + HZ * lifetime);
676
677 dst_release(&rt->dst);
678 }
679 return 0;
680 }
681 #endif
682
683 #define BACKTRACK(__net, saddr) \
684 do { \
685 if (rt == __net->ipv6.ip6_null_entry) { \
686 struct fib6_node *pn; \
687 while (1) { \
688 if (fn->fn_flags & RTN_TL_ROOT) \
689 goto out; \
690 pn = fn->parent; \
691 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
692 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
693 else \
694 fn = pn; \
695 if (fn->fn_flags & RTN_RTINFO) \
696 goto restart; \
697 } \
698 } \
699 } while (0)
700
701 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
702 struct fib6_table *table,
703 struct flowi6 *fl6, int flags)
704 {
705 struct fib6_node *fn;
706 struct rt6_info *rt;
707
708 read_lock_bh(&table->tb6_lock);
709 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
710 restart:
711 rt = fn->leaf;
712 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
713 BACKTRACK(net, &fl6->saddr);
714 out:
715 dst_use(&rt->dst, jiffies);
716 read_unlock_bh(&table->tb6_lock);
717 return rt;
718
719 }
720
721 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
722 int flags)
723 {
724 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
725 }
726 EXPORT_SYMBOL_GPL(ip6_route_lookup);
727
728 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
729 const struct in6_addr *saddr, int oif, int strict)
730 {
731 struct flowi6 fl6 = {
732 .flowi6_oif = oif,
733 .daddr = *daddr,
734 };
735 struct dst_entry *dst;
736 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
737
738 if (saddr) {
739 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
740 flags |= RT6_LOOKUP_F_HAS_SADDR;
741 }
742
743 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
744 if (dst->error == 0)
745 return (struct rt6_info *) dst;
746
747 dst_release(dst);
748
749 return NULL;
750 }
751
752 EXPORT_SYMBOL(rt6_lookup);
753
754 /* ip6_ins_rt is called with FREE table->tb6_lock.
755 It takes new route entry, the addition fails by any reason the
756 route is freed. In any case, if caller does not hold it, it may
757 be destroyed.
758 */
759
760 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
761 {
762 int err;
763 struct fib6_table *table;
764
765 table = rt->rt6i_table;
766 write_lock_bh(&table->tb6_lock);
767 err = fib6_add(&table->tb6_root, rt, info);
768 write_unlock_bh(&table->tb6_lock);
769
770 return err;
771 }
772
773 int ip6_ins_rt(struct rt6_info *rt)
774 {
775 struct nl_info info = {
776 .nl_net = dev_net(rt->dst.dev),
777 };
778 return __ip6_ins_rt(rt, &info);
779 }
780
781 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
782 const struct in6_addr *daddr,
783 const struct in6_addr *saddr)
784 {
785 struct rt6_info *rt;
786
787 /*
788 * Clone the route.
789 */
790
791 rt = ip6_rt_copy(ort, daddr);
792
793 if (rt) {
794 int attempts = !in_softirq();
795
796 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
797 if (ort->rt6i_dst.plen != 128 &&
798 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
799 rt->rt6i_flags |= RTF_ANYCAST;
800 rt->rt6i_gateway = *daddr;
801 }
802
803 rt->rt6i_flags |= RTF_CACHE;
804
805 #ifdef CONFIG_IPV6_SUBTREES
806 if (rt->rt6i_src.plen && saddr) {
807 rt->rt6i_src.addr = *saddr;
808 rt->rt6i_src.plen = 128;
809 }
810 #endif
811
812 retry:
813 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
814 struct net *net = dev_net(rt->dst.dev);
815 int saved_rt_min_interval =
816 net->ipv6.sysctl.ip6_rt_gc_min_interval;
817 int saved_rt_elasticity =
818 net->ipv6.sysctl.ip6_rt_gc_elasticity;
819
820 if (attempts-- > 0) {
821 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
822 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
823
824 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
825
826 net->ipv6.sysctl.ip6_rt_gc_elasticity =
827 saved_rt_elasticity;
828 net->ipv6.sysctl.ip6_rt_gc_min_interval =
829 saved_rt_min_interval;
830 goto retry;
831 }
832
833 net_warn_ratelimited("Neighbour table overflow\n");
834 dst_free(&rt->dst);
835 return NULL;
836 }
837 }
838
839 return rt;
840 }
841
842 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
843 const struct in6_addr *daddr)
844 {
845 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
846
847 if (rt) {
848 rt->rt6i_flags |= RTF_CACHE;
849 rt->n = neigh_clone(ort->n);
850 }
851 return rt;
852 }
853
854 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
855 struct flowi6 *fl6, int flags)
856 {
857 struct fib6_node *fn;
858 struct rt6_info *rt, *nrt;
859 int strict = 0;
860 int attempts = 3;
861 int err;
862 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
863
864 strict |= flags & RT6_LOOKUP_F_IFACE;
865
866 relookup:
867 read_lock_bh(&table->tb6_lock);
868
869 restart_2:
870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
871
872 restart:
873 rt = rt6_select(fn, oif, strict | reachable);
874
875 BACKTRACK(net, &fl6->saddr);
876 if (rt == net->ipv6.ip6_null_entry ||
877 rt->rt6i_flags & RTF_CACHE)
878 goto out;
879
880 dst_hold(&rt->dst);
881 read_unlock_bh(&table->tb6_lock);
882
883 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
884 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
885 else if (!(rt->dst.flags & DST_HOST))
886 nrt = rt6_alloc_clone(rt, &fl6->daddr);
887 else
888 goto out2;
889
890 dst_release(&rt->dst);
891 rt = nrt ? : net->ipv6.ip6_null_entry;
892
893 dst_hold(&rt->dst);
894 if (nrt) {
895 err = ip6_ins_rt(nrt);
896 if (!err)
897 goto out2;
898 }
899
900 if (--attempts <= 0)
901 goto out2;
902
903 /*
904 * Race condition! In the gap, when table->tb6_lock was
905 * released someone could insert this route. Relookup.
906 */
907 dst_release(&rt->dst);
908 goto relookup;
909
910 out:
911 if (reachable) {
912 reachable = 0;
913 goto restart_2;
914 }
915 dst_hold(&rt->dst);
916 read_unlock_bh(&table->tb6_lock);
917 out2:
918 rt->dst.lastuse = jiffies;
919 rt->dst.__use++;
920
921 return rt;
922 }
923
924 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
925 struct flowi6 *fl6, int flags)
926 {
927 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
928 }
929
930 static struct dst_entry *ip6_route_input_lookup(struct net *net,
931 struct net_device *dev,
932 struct flowi6 *fl6, int flags)
933 {
934 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
935 flags |= RT6_LOOKUP_F_IFACE;
936
937 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
938 }
939
940 void ip6_route_input(struct sk_buff *skb)
941 {
942 const struct ipv6hdr *iph = ipv6_hdr(skb);
943 struct net *net = dev_net(skb->dev);
944 int flags = RT6_LOOKUP_F_HAS_SADDR;
945 struct flowi6 fl6 = {
946 .flowi6_iif = skb->dev->ifindex,
947 .daddr = iph->daddr,
948 .saddr = iph->saddr,
949 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
950 .flowi6_mark = skb->mark,
951 .flowi6_proto = iph->nexthdr,
952 };
953
954 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
955 }
956
957 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
958 struct flowi6 *fl6, int flags)
959 {
960 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
961 }
962
963 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
964 struct flowi6 *fl6)
965 {
966 int flags = 0;
967
968 fl6->flowi6_iif = LOOPBACK_IFINDEX;
969
970 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
971 flags |= RT6_LOOKUP_F_IFACE;
972
973 if (!ipv6_addr_any(&fl6->saddr))
974 flags |= RT6_LOOKUP_F_HAS_SADDR;
975 else if (sk)
976 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
977
978 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
979 }
980
981 EXPORT_SYMBOL(ip6_route_output);
982
983 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
984 {
985 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
986 struct dst_entry *new = NULL;
987
988 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
989 if (rt) {
990 new = &rt->dst;
991
992 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
993 rt6_init_peer(rt, net->ipv6.peers);
994
995 new->__use = 1;
996 new->input = dst_discard;
997 new->output = dst_discard;
998
999 if (dst_metrics_read_only(&ort->dst))
1000 new->_metrics = ort->dst._metrics;
1001 else
1002 dst_copy_metrics(new, &ort->dst);
1003 rt->rt6i_idev = ort->rt6i_idev;
1004 if (rt->rt6i_idev)
1005 in6_dev_hold(rt->rt6i_idev);
1006
1007 rt->rt6i_gateway = ort->rt6i_gateway;
1008 rt->rt6i_flags = ort->rt6i_flags;
1009 rt6_clean_expires(rt);
1010 rt->rt6i_metric = 0;
1011
1012 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1013 #ifdef CONFIG_IPV6_SUBTREES
1014 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1015 #endif
1016
1017 dst_free(new);
1018 }
1019
1020 dst_release(dst_orig);
1021 return new ? new : ERR_PTR(-ENOMEM);
1022 }
1023
1024 /*
1025 * Destination cache support functions
1026 */
1027
1028 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1029 {
1030 struct rt6_info *rt;
1031
1032 rt = (struct rt6_info *) dst;
1033
1034 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1035 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1036 if (!rt6_has_peer(rt))
1037 rt6_bind_peer(rt, 0);
1038 rt->rt6i_peer_genid = rt6_peer_genid();
1039 }
1040 return dst;
1041 }
1042 return NULL;
1043 }
1044
1045 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1046 {
1047 struct rt6_info *rt = (struct rt6_info *) dst;
1048
1049 if (rt) {
1050 if (rt->rt6i_flags & RTF_CACHE) {
1051 if (rt6_check_expired(rt)) {
1052 ip6_del_rt(rt);
1053 dst = NULL;
1054 }
1055 } else {
1056 dst_release(dst);
1057 dst = NULL;
1058 }
1059 }
1060 return dst;
1061 }
1062
1063 static void ip6_link_failure(struct sk_buff *skb)
1064 {
1065 struct rt6_info *rt;
1066
1067 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1068
1069 rt = (struct rt6_info *) skb_dst(skb);
1070 if (rt) {
1071 if (rt->rt6i_flags & RTF_CACHE)
1072 rt6_update_expires(rt, 0);
1073 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1074 rt->rt6i_node->fn_sernum = -1;
1075 }
1076 }
1077
1078 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1079 struct sk_buff *skb, u32 mtu)
1080 {
1081 struct rt6_info *rt6 = (struct rt6_info*)dst;
1082
1083 dst_confirm(dst);
1084 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1085 struct net *net = dev_net(dst->dev);
1086
1087 rt6->rt6i_flags |= RTF_MODIFIED;
1088 if (mtu < IPV6_MIN_MTU) {
1089 u32 features = dst_metric(dst, RTAX_FEATURES);
1090 mtu = IPV6_MIN_MTU;
1091 features |= RTAX_FEATURE_ALLFRAG;
1092 dst_metric_set(dst, RTAX_FEATURES, features);
1093 }
1094 dst_metric_set(dst, RTAX_MTU, mtu);
1095 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1096 }
1097 }
1098
1099 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1100 int oif, u32 mark)
1101 {
1102 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1103 struct dst_entry *dst;
1104 struct flowi6 fl6;
1105
1106 memset(&fl6, 0, sizeof(fl6));
1107 fl6.flowi6_oif = oif;
1108 fl6.flowi6_mark = mark;
1109 fl6.flowi6_flags = 0;
1110 fl6.daddr = iph->daddr;
1111 fl6.saddr = iph->saddr;
1112 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1113
1114 dst = ip6_route_output(net, NULL, &fl6);
1115 if (!dst->error)
1116 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1117 dst_release(dst);
1118 }
1119 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1120
1121 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1122 {
1123 ip6_update_pmtu(skb, sock_net(sk), mtu,
1124 sk->sk_bound_dev_if, sk->sk_mark);
1125 }
1126 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1127
1128 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1129 {
1130 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1131 struct dst_entry *dst;
1132 struct flowi6 fl6;
1133
1134 memset(&fl6, 0, sizeof(fl6));
1135 fl6.flowi6_oif = oif;
1136 fl6.flowi6_mark = mark;
1137 fl6.flowi6_flags = 0;
1138 fl6.daddr = iph->daddr;
1139 fl6.saddr = iph->saddr;
1140 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1141
1142 dst = ip6_route_output(net, NULL, &fl6);
1143 if (!dst->error)
1144 rt6_do_redirect(dst, NULL, skb);
1145 dst_release(dst);
1146 }
1147 EXPORT_SYMBOL_GPL(ip6_redirect);
1148
1149 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1150 {
1151 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1154
1155 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1156 {
1157 struct net_device *dev = dst->dev;
1158 unsigned int mtu = dst_mtu(dst);
1159 struct net *net = dev_net(dev);
1160
1161 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1162
1163 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1164 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1165
1166 /*
1167 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1168 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1169 * IPV6_MAXPLEN is also valid and means: "any MSS,
1170 * rely only on pmtu discovery"
1171 */
1172 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1173 mtu = IPV6_MAXPLEN;
1174 return mtu;
1175 }
1176
1177 static unsigned int ip6_mtu(const struct dst_entry *dst)
1178 {
1179 struct inet6_dev *idev;
1180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1181
1182 if (mtu)
1183 return mtu;
1184
1185 mtu = IPV6_MIN_MTU;
1186
1187 rcu_read_lock();
1188 idev = __in6_dev_get(dst->dev);
1189 if (idev)
1190 mtu = idev->cnf.mtu6;
1191 rcu_read_unlock();
1192
1193 return mtu;
1194 }
1195
1196 static struct dst_entry *icmp6_dst_gc_list;
1197 static DEFINE_SPINLOCK(icmp6_dst_lock);
1198
1199 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1200 struct neighbour *neigh,
1201 struct flowi6 *fl6)
1202 {
1203 struct dst_entry *dst;
1204 struct rt6_info *rt;
1205 struct inet6_dev *idev = in6_dev_get(dev);
1206 struct net *net = dev_net(dev);
1207
1208 if (unlikely(!idev))
1209 return ERR_PTR(-ENODEV);
1210
1211 rt = ip6_dst_alloc(net, dev, 0, NULL);
1212 if (unlikely(!rt)) {
1213 in6_dev_put(idev);
1214 dst = ERR_PTR(-ENOMEM);
1215 goto out;
1216 }
1217
1218 if (neigh)
1219 neigh_hold(neigh);
1220 else {
1221 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1222 if (IS_ERR(neigh)) {
1223 in6_dev_put(idev);
1224 dst_free(&rt->dst);
1225 return ERR_CAST(neigh);
1226 }
1227 }
1228
1229 rt->dst.flags |= DST_HOST;
1230 rt->dst.output = ip6_output;
1231 rt->n = neigh;
1232 atomic_set(&rt->dst.__refcnt, 1);
1233 rt->rt6i_dst.addr = fl6->daddr;
1234 rt->rt6i_dst.plen = 128;
1235 rt->rt6i_idev = idev;
1236 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1237
1238 spin_lock_bh(&icmp6_dst_lock);
1239 rt->dst.next = icmp6_dst_gc_list;
1240 icmp6_dst_gc_list = &rt->dst;
1241 spin_unlock_bh(&icmp6_dst_lock);
1242
1243 fib6_force_start_gc(net);
1244
1245 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1246
1247 out:
1248 return dst;
1249 }
1250
1251 int icmp6_dst_gc(void)
1252 {
1253 struct dst_entry *dst, **pprev;
1254 int more = 0;
1255
1256 spin_lock_bh(&icmp6_dst_lock);
1257 pprev = &icmp6_dst_gc_list;
1258
1259 while ((dst = *pprev) != NULL) {
1260 if (!atomic_read(&dst->__refcnt)) {
1261 *pprev = dst->next;
1262 dst_free(dst);
1263 } else {
1264 pprev = &dst->next;
1265 ++more;
1266 }
1267 }
1268
1269 spin_unlock_bh(&icmp6_dst_lock);
1270
1271 return more;
1272 }
1273
1274 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1275 void *arg)
1276 {
1277 struct dst_entry *dst, **pprev;
1278
1279 spin_lock_bh(&icmp6_dst_lock);
1280 pprev = &icmp6_dst_gc_list;
1281 while ((dst = *pprev) != NULL) {
1282 struct rt6_info *rt = (struct rt6_info *) dst;
1283 if (func(rt, arg)) {
1284 *pprev = dst->next;
1285 dst_free(dst);
1286 } else {
1287 pprev = &dst->next;
1288 }
1289 }
1290 spin_unlock_bh(&icmp6_dst_lock);
1291 }
1292
1293 static int ip6_dst_gc(struct dst_ops *ops)
1294 {
1295 unsigned long now = jiffies;
1296 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1297 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1298 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1299 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1300 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1301 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1302 int entries;
1303
1304 entries = dst_entries_get_fast(ops);
1305 if (time_after(rt_last_gc + rt_min_interval, now) &&
1306 entries <= rt_max_size)
1307 goto out;
1308
1309 net->ipv6.ip6_rt_gc_expire++;
1310 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1311 net->ipv6.ip6_rt_last_gc = now;
1312 entries = dst_entries_get_slow(ops);
1313 if (entries < ops->gc_thresh)
1314 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1315 out:
1316 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1317 return entries > rt_max_size;
1318 }
1319
1320 /* Clean host part of a prefix. Not necessary in radix tree,
1321 but results in cleaner routing tables.
1322
1323 Remove it only when all the things will work!
1324 */
1325
1326 int ip6_dst_hoplimit(struct dst_entry *dst)
1327 {
1328 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1329 if (hoplimit == 0) {
1330 struct net_device *dev = dst->dev;
1331 struct inet6_dev *idev;
1332
1333 rcu_read_lock();
1334 idev = __in6_dev_get(dev);
1335 if (idev)
1336 hoplimit = idev->cnf.hop_limit;
1337 else
1338 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1339 rcu_read_unlock();
1340 }
1341 return hoplimit;
1342 }
1343 EXPORT_SYMBOL(ip6_dst_hoplimit);
1344
1345 /*
1346 *
1347 */
1348
1349 int ip6_route_add(struct fib6_config *cfg)
1350 {
1351 int err;
1352 struct net *net = cfg->fc_nlinfo.nl_net;
1353 struct rt6_info *rt = NULL;
1354 struct net_device *dev = NULL;
1355 struct inet6_dev *idev = NULL;
1356 struct fib6_table *table;
1357 int addr_type;
1358
1359 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1360 return -EINVAL;
1361 #ifndef CONFIG_IPV6_SUBTREES
1362 if (cfg->fc_src_len)
1363 return -EINVAL;
1364 #endif
1365 if (cfg->fc_ifindex) {
1366 err = -ENODEV;
1367 dev = dev_get_by_index(net, cfg->fc_ifindex);
1368 if (!dev)
1369 goto out;
1370 idev = in6_dev_get(dev);
1371 if (!idev)
1372 goto out;
1373 }
1374
1375 if (cfg->fc_metric == 0)
1376 cfg->fc_metric = IP6_RT_PRIO_USER;
1377
1378 err = -ENOBUFS;
1379 if (cfg->fc_nlinfo.nlh &&
1380 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1381 table = fib6_get_table(net, cfg->fc_table);
1382 if (!table) {
1383 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1384 table = fib6_new_table(net, cfg->fc_table);
1385 }
1386 } else {
1387 table = fib6_new_table(net, cfg->fc_table);
1388 }
1389
1390 if (!table)
1391 goto out;
1392
1393 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1394
1395 if (!rt) {
1396 err = -ENOMEM;
1397 goto out;
1398 }
1399
1400 rt->dst.obsolete = -1;
1401
1402 if (cfg->fc_flags & RTF_EXPIRES)
1403 rt6_set_expires(rt, jiffies +
1404 clock_t_to_jiffies(cfg->fc_expires));
1405 else
1406 rt6_clean_expires(rt);
1407
1408 if (cfg->fc_protocol == RTPROT_UNSPEC)
1409 cfg->fc_protocol = RTPROT_BOOT;
1410 rt->rt6i_protocol = cfg->fc_protocol;
1411
1412 addr_type = ipv6_addr_type(&cfg->fc_dst);
1413
1414 if (addr_type & IPV6_ADDR_MULTICAST)
1415 rt->dst.input = ip6_mc_input;
1416 else if (cfg->fc_flags & RTF_LOCAL)
1417 rt->dst.input = ip6_input;
1418 else
1419 rt->dst.input = ip6_forward;
1420
1421 rt->dst.output = ip6_output;
1422
1423 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1424 rt->rt6i_dst.plen = cfg->fc_dst_len;
1425 if (rt->rt6i_dst.plen == 128)
1426 rt->dst.flags |= DST_HOST;
1427
1428 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1429 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1430 if (!metrics) {
1431 err = -ENOMEM;
1432 goto out;
1433 }
1434 dst_init_metrics(&rt->dst, metrics, 0);
1435 }
1436 #ifdef CONFIG_IPV6_SUBTREES
1437 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1438 rt->rt6i_src.plen = cfg->fc_src_len;
1439 #endif
1440
1441 rt->rt6i_metric = cfg->fc_metric;
1442
1443 /* We cannot add true routes via loopback here,
1444 they would result in kernel looping; promote them to reject routes
1445 */
1446 if ((cfg->fc_flags & RTF_REJECT) ||
1447 (dev && (dev->flags & IFF_LOOPBACK) &&
1448 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1449 !(cfg->fc_flags & RTF_LOCAL))) {
1450 /* hold loopback dev/idev if we haven't done so. */
1451 if (dev != net->loopback_dev) {
1452 if (dev) {
1453 dev_put(dev);
1454 in6_dev_put(idev);
1455 }
1456 dev = net->loopback_dev;
1457 dev_hold(dev);
1458 idev = in6_dev_get(dev);
1459 if (!idev) {
1460 err = -ENODEV;
1461 goto out;
1462 }
1463 }
1464 rt->dst.output = ip6_pkt_discard_out;
1465 rt->dst.input = ip6_pkt_discard;
1466 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1467 switch (cfg->fc_type) {
1468 case RTN_BLACKHOLE:
1469 rt->dst.error = -EINVAL;
1470 break;
1471 case RTN_PROHIBIT:
1472 rt->dst.error = -EACCES;
1473 break;
1474 case RTN_THROW:
1475 rt->dst.error = -EAGAIN;
1476 break;
1477 default:
1478 rt->dst.error = -ENETUNREACH;
1479 break;
1480 }
1481 goto install_route;
1482 }
1483
1484 if (cfg->fc_flags & RTF_GATEWAY) {
1485 const struct in6_addr *gw_addr;
1486 int gwa_type;
1487
1488 gw_addr = &cfg->fc_gateway;
1489 rt->rt6i_gateway = *gw_addr;
1490 gwa_type = ipv6_addr_type(gw_addr);
1491
1492 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1493 struct rt6_info *grt;
1494
1495 /* IPv6 strictly inhibits using not link-local
1496 addresses as nexthop address.
1497 Otherwise, router will not able to send redirects.
1498 It is very good, but in some (rare!) circumstances
1499 (SIT, PtP, NBMA NOARP links) it is handy to allow
1500 some exceptions. --ANK
1501 */
1502 err = -EINVAL;
1503 if (!(gwa_type & IPV6_ADDR_UNICAST))
1504 goto out;
1505
1506 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1507
1508 err = -EHOSTUNREACH;
1509 if (!grt)
1510 goto out;
1511 if (dev) {
1512 if (dev != grt->dst.dev) {
1513 dst_release(&grt->dst);
1514 goto out;
1515 }
1516 } else {
1517 dev = grt->dst.dev;
1518 idev = grt->rt6i_idev;
1519 dev_hold(dev);
1520 in6_dev_hold(grt->rt6i_idev);
1521 }
1522 if (!(grt->rt6i_flags & RTF_GATEWAY))
1523 err = 0;
1524 dst_release(&grt->dst);
1525
1526 if (err)
1527 goto out;
1528 }
1529 err = -EINVAL;
1530 if (!dev || (dev->flags & IFF_LOOPBACK))
1531 goto out;
1532 }
1533
1534 err = -ENODEV;
1535 if (!dev)
1536 goto out;
1537
1538 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1539 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1540 err = -EINVAL;
1541 goto out;
1542 }
1543 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1544 rt->rt6i_prefsrc.plen = 128;
1545 } else
1546 rt->rt6i_prefsrc.plen = 0;
1547
1548 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1549 err = rt6_bind_neighbour(rt, dev);
1550 if (err)
1551 goto out;
1552 }
1553
1554 rt->rt6i_flags = cfg->fc_flags;
1555
1556 install_route:
1557 if (cfg->fc_mx) {
1558 struct nlattr *nla;
1559 int remaining;
1560
1561 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1562 int type = nla_type(nla);
1563
1564 if (type) {
1565 if (type > RTAX_MAX) {
1566 err = -EINVAL;
1567 goto out;
1568 }
1569
1570 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1571 }
1572 }
1573 }
1574
1575 rt->dst.dev = dev;
1576 rt->rt6i_idev = idev;
1577 rt->rt6i_table = table;
1578
1579 cfg->fc_nlinfo.nl_net = dev_net(dev);
1580
1581 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1582
1583 out:
1584 if (dev)
1585 dev_put(dev);
1586 if (idev)
1587 in6_dev_put(idev);
1588 if (rt)
1589 dst_free(&rt->dst);
1590 return err;
1591 }
1592
1593 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1594 {
1595 int err;
1596 struct fib6_table *table;
1597 struct net *net = dev_net(rt->dst.dev);
1598
1599 if (rt == net->ipv6.ip6_null_entry)
1600 return -ENOENT;
1601
1602 table = rt->rt6i_table;
1603 write_lock_bh(&table->tb6_lock);
1604
1605 err = fib6_del(rt, info);
1606 dst_release(&rt->dst);
1607
1608 write_unlock_bh(&table->tb6_lock);
1609
1610 return err;
1611 }
1612
1613 int ip6_del_rt(struct rt6_info *rt)
1614 {
1615 struct nl_info info = {
1616 .nl_net = dev_net(rt->dst.dev),
1617 };
1618 return __ip6_del_rt(rt, &info);
1619 }
1620
1621 static int ip6_route_del(struct fib6_config *cfg)
1622 {
1623 struct fib6_table *table;
1624 struct fib6_node *fn;
1625 struct rt6_info *rt;
1626 int err = -ESRCH;
1627
1628 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1629 if (!table)
1630 return err;
1631
1632 read_lock_bh(&table->tb6_lock);
1633
1634 fn = fib6_locate(&table->tb6_root,
1635 &cfg->fc_dst, cfg->fc_dst_len,
1636 &cfg->fc_src, cfg->fc_src_len);
1637
1638 if (fn) {
1639 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1640 if (cfg->fc_ifindex &&
1641 (!rt->dst.dev ||
1642 rt->dst.dev->ifindex != cfg->fc_ifindex))
1643 continue;
1644 if (cfg->fc_flags & RTF_GATEWAY &&
1645 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1646 continue;
1647 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1648 continue;
1649 dst_hold(&rt->dst);
1650 read_unlock_bh(&table->tb6_lock);
1651
1652 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1653 }
1654 }
1655 read_unlock_bh(&table->tb6_lock);
1656
1657 return err;
1658 }
1659
1660 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1661 {
1662 struct net *net = dev_net(skb->dev);
1663 struct netevent_redirect netevent;
1664 struct rt6_info *rt, *nrt = NULL;
1665 const struct in6_addr *target;
1666 struct ndisc_options ndopts;
1667 const struct in6_addr *dest;
1668 struct neighbour *old_neigh;
1669 struct inet6_dev *in6_dev;
1670 struct neighbour *neigh;
1671 struct icmp6hdr *icmph;
1672 int optlen, on_link;
1673 u8 *lladdr;
1674
1675 optlen = skb->tail - skb->transport_header;
1676 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1677
1678 if (optlen < 0) {
1679 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1680 return;
1681 }
1682
1683 icmph = icmp6_hdr(skb);
1684 target = (const struct in6_addr *) (icmph + 1);
1685 dest = target + 1;
1686
1687 if (ipv6_addr_is_multicast(dest)) {
1688 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1689 return;
1690 }
1691
1692 on_link = 0;
1693 if (ipv6_addr_equal(dest, target)) {
1694 on_link = 1;
1695 } else if (ipv6_addr_type(target) !=
1696 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1697 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1698 return;
1699 }
1700
1701 in6_dev = __in6_dev_get(skb->dev);
1702 if (!in6_dev)
1703 return;
1704 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1705 return;
1706
1707 /* RFC2461 8.1:
1708 * The IP source address of the Redirect MUST be the same as the current
1709 * first-hop router for the specified ICMP Destination Address.
1710 */
1711
1712 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1713 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1714 return;
1715 }
1716
1717 lladdr = NULL;
1718 if (ndopts.nd_opts_tgt_lladdr) {
1719 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1720 skb->dev);
1721 if (!lladdr) {
1722 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1723 return;
1724 }
1725 }
1726
1727 rt = (struct rt6_info *) dst;
1728 if (rt == net->ipv6.ip6_null_entry) {
1729 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1730 return;
1731 }
1732
1733 /* Redirect received -> path was valid.
1734 * Look, redirects are sent only in response to data packets,
1735 * so that this nexthop apparently is reachable. --ANK
1736 */
1737 dst_confirm(&rt->dst);
1738
1739 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1740 if (!neigh)
1741 return;
1742
1743 /* Duplicate redirect: silently ignore. */
1744 old_neigh = rt->n;
1745 if (neigh == old_neigh)
1746 goto out;
1747
1748 /*
1749 * We have finally decided to accept it.
1750 */
1751
1752 neigh_update(neigh, lladdr, NUD_STALE,
1753 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1754 NEIGH_UPDATE_F_OVERRIDE|
1755 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1756 NEIGH_UPDATE_F_ISROUTER))
1757 );
1758
1759 nrt = ip6_rt_copy(rt, dest);
1760 if (!nrt)
1761 goto out;
1762
1763 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1764 if (on_link)
1765 nrt->rt6i_flags &= ~RTF_GATEWAY;
1766
1767 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1768 nrt->n = neigh_clone(neigh);
1769
1770 if (ip6_ins_rt(nrt))
1771 goto out;
1772
1773 netevent.old = &rt->dst;
1774 netevent.old_neigh = old_neigh;
1775 netevent.new = &nrt->dst;
1776 netevent.new_neigh = neigh;
1777 netevent.daddr = dest;
1778 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1779
1780 if (rt->rt6i_flags & RTF_CACHE) {
1781 rt = (struct rt6_info *) dst_clone(&rt->dst);
1782 ip6_del_rt(rt);
1783 }
1784
1785 out:
1786 neigh_release(neigh);
1787 }
1788
1789 /*
1790 * Misc support functions
1791 */
1792
1793 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1794 const struct in6_addr *dest)
1795 {
1796 struct net *net = dev_net(ort->dst.dev);
1797 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1798 ort->rt6i_table);
1799
1800 if (rt) {
1801 rt->dst.input = ort->dst.input;
1802 rt->dst.output = ort->dst.output;
1803 rt->dst.flags |= DST_HOST;
1804
1805 rt->rt6i_dst.addr = *dest;
1806 rt->rt6i_dst.plen = 128;
1807 dst_copy_metrics(&rt->dst, &ort->dst);
1808 rt->dst.error = ort->dst.error;
1809 rt->rt6i_idev = ort->rt6i_idev;
1810 if (rt->rt6i_idev)
1811 in6_dev_hold(rt->rt6i_idev);
1812 rt->dst.lastuse = jiffies;
1813
1814 rt->rt6i_gateway = ort->rt6i_gateway;
1815 rt->rt6i_flags = ort->rt6i_flags;
1816 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1817 (RTF_DEFAULT | RTF_ADDRCONF))
1818 rt6_set_from(rt, ort);
1819 else
1820 rt6_clean_expires(rt);
1821 rt->rt6i_metric = 0;
1822
1823 #ifdef CONFIG_IPV6_SUBTREES
1824 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1825 #endif
1826 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1827 rt->rt6i_table = ort->rt6i_table;
1828 }
1829 return rt;
1830 }
1831
1832 #ifdef CONFIG_IPV6_ROUTE_INFO
1833 static struct rt6_info *rt6_get_route_info(struct net *net,
1834 const struct in6_addr *prefix, int prefixlen,
1835 const struct in6_addr *gwaddr, int ifindex)
1836 {
1837 struct fib6_node *fn;
1838 struct rt6_info *rt = NULL;
1839 struct fib6_table *table;
1840
1841 table = fib6_get_table(net, RT6_TABLE_INFO);
1842 if (!table)
1843 return NULL;
1844
1845 write_lock_bh(&table->tb6_lock);
1846 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1847 if (!fn)
1848 goto out;
1849
1850 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1851 if (rt->dst.dev->ifindex != ifindex)
1852 continue;
1853 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1854 continue;
1855 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1856 continue;
1857 dst_hold(&rt->dst);
1858 break;
1859 }
1860 out:
1861 write_unlock_bh(&table->tb6_lock);
1862 return rt;
1863 }
1864
1865 static struct rt6_info *rt6_add_route_info(struct net *net,
1866 const struct in6_addr *prefix, int prefixlen,
1867 const struct in6_addr *gwaddr, int ifindex,
1868 unsigned int pref)
1869 {
1870 struct fib6_config cfg = {
1871 .fc_table = RT6_TABLE_INFO,
1872 .fc_metric = IP6_RT_PRIO_USER,
1873 .fc_ifindex = ifindex,
1874 .fc_dst_len = prefixlen,
1875 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1876 RTF_UP | RTF_PREF(pref),
1877 .fc_nlinfo.portid = 0,
1878 .fc_nlinfo.nlh = NULL,
1879 .fc_nlinfo.nl_net = net,
1880 };
1881
1882 cfg.fc_dst = *prefix;
1883 cfg.fc_gateway = *gwaddr;
1884
1885 /* We should treat it as a default route if prefix length is 0. */
1886 if (!prefixlen)
1887 cfg.fc_flags |= RTF_DEFAULT;
1888
1889 ip6_route_add(&cfg);
1890
1891 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1892 }
1893 #endif
1894
1895 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1896 {
1897 struct rt6_info *rt;
1898 struct fib6_table *table;
1899
1900 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1901 if (!table)
1902 return NULL;
1903
1904 write_lock_bh(&table->tb6_lock);
1905 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1906 if (dev == rt->dst.dev &&
1907 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1908 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1909 break;
1910 }
1911 if (rt)
1912 dst_hold(&rt->dst);
1913 write_unlock_bh(&table->tb6_lock);
1914 return rt;
1915 }
1916
1917 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1918 struct net_device *dev,
1919 unsigned int pref)
1920 {
1921 struct fib6_config cfg = {
1922 .fc_table = RT6_TABLE_DFLT,
1923 .fc_metric = IP6_RT_PRIO_USER,
1924 .fc_ifindex = dev->ifindex,
1925 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1926 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1927 .fc_nlinfo.portid = 0,
1928 .fc_nlinfo.nlh = NULL,
1929 .fc_nlinfo.nl_net = dev_net(dev),
1930 };
1931
1932 cfg.fc_gateway = *gwaddr;
1933
1934 ip6_route_add(&cfg);
1935
1936 return rt6_get_dflt_router(gwaddr, dev);
1937 }
1938
1939 void rt6_purge_dflt_routers(struct net *net)
1940 {
1941 struct rt6_info *rt;
1942 struct fib6_table *table;
1943
1944 /* NOTE: Keep consistent with rt6_get_dflt_router */
1945 table = fib6_get_table(net, RT6_TABLE_DFLT);
1946 if (!table)
1947 return;
1948
1949 restart:
1950 read_lock_bh(&table->tb6_lock);
1951 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1952 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1953 dst_hold(&rt->dst);
1954 read_unlock_bh(&table->tb6_lock);
1955 ip6_del_rt(rt);
1956 goto restart;
1957 }
1958 }
1959 read_unlock_bh(&table->tb6_lock);
1960 }
1961
1962 static void rtmsg_to_fib6_config(struct net *net,
1963 struct in6_rtmsg *rtmsg,
1964 struct fib6_config *cfg)
1965 {
1966 memset(cfg, 0, sizeof(*cfg));
1967
1968 cfg->fc_table = RT6_TABLE_MAIN;
1969 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1970 cfg->fc_metric = rtmsg->rtmsg_metric;
1971 cfg->fc_expires = rtmsg->rtmsg_info;
1972 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1973 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1974 cfg->fc_flags = rtmsg->rtmsg_flags;
1975
1976 cfg->fc_nlinfo.nl_net = net;
1977
1978 cfg->fc_dst = rtmsg->rtmsg_dst;
1979 cfg->fc_src = rtmsg->rtmsg_src;
1980 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1981 }
1982
1983 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1984 {
1985 struct fib6_config cfg;
1986 struct in6_rtmsg rtmsg;
1987 int err;
1988
1989 switch(cmd) {
1990 case SIOCADDRT: /* Add a route */
1991 case SIOCDELRT: /* Delete a route */
1992 if (!capable(CAP_NET_ADMIN))
1993 return -EPERM;
1994 err = copy_from_user(&rtmsg, arg,
1995 sizeof(struct in6_rtmsg));
1996 if (err)
1997 return -EFAULT;
1998
1999 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2000
2001 rtnl_lock();
2002 switch (cmd) {
2003 case SIOCADDRT:
2004 err = ip6_route_add(&cfg);
2005 break;
2006 case SIOCDELRT:
2007 err = ip6_route_del(&cfg);
2008 break;
2009 default:
2010 err = -EINVAL;
2011 }
2012 rtnl_unlock();
2013
2014 return err;
2015 }
2016
2017 return -EINVAL;
2018 }
2019
2020 /*
2021 * Drop the packet on the floor
2022 */
2023
2024 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2025 {
2026 int type;
2027 struct dst_entry *dst = skb_dst(skb);
2028 switch (ipstats_mib_noroutes) {
2029 case IPSTATS_MIB_INNOROUTES:
2030 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2031 if (type == IPV6_ADDR_ANY) {
2032 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2033 IPSTATS_MIB_INADDRERRORS);
2034 break;
2035 }
2036 /* FALLTHROUGH */
2037 case IPSTATS_MIB_OUTNOROUTES:
2038 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2039 ipstats_mib_noroutes);
2040 break;
2041 }
2042 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2043 kfree_skb(skb);
2044 return 0;
2045 }
2046
2047 static int ip6_pkt_discard(struct sk_buff *skb)
2048 {
2049 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2050 }
2051
2052 static int ip6_pkt_discard_out(struct sk_buff *skb)
2053 {
2054 skb->dev = skb_dst(skb)->dev;
2055 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2056 }
2057
2058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2059
2060 static int ip6_pkt_prohibit(struct sk_buff *skb)
2061 {
2062 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2063 }
2064
2065 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2066 {
2067 skb->dev = skb_dst(skb)->dev;
2068 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2069 }
2070
2071 #endif
2072
2073 /*
2074 * Allocate a dst for local (unicast / anycast) address.
2075 */
2076
2077 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2078 const struct in6_addr *addr,
2079 bool anycast)
2080 {
2081 struct net *net = dev_net(idev->dev);
2082 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2083 int err;
2084
2085 if (!rt) {
2086 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2087 return ERR_PTR(-ENOMEM);
2088 }
2089
2090 in6_dev_hold(idev);
2091
2092 rt->dst.flags |= DST_HOST;
2093 rt->dst.input = ip6_input;
2094 rt->dst.output = ip6_output;
2095 rt->rt6i_idev = idev;
2096 rt->dst.obsolete = -1;
2097
2098 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2099 if (anycast)
2100 rt->rt6i_flags |= RTF_ANYCAST;
2101 else
2102 rt->rt6i_flags |= RTF_LOCAL;
2103 err = rt6_bind_neighbour(rt, rt->dst.dev);
2104 if (err) {
2105 dst_free(&rt->dst);
2106 return ERR_PTR(err);
2107 }
2108
2109 rt->rt6i_dst.addr = *addr;
2110 rt->rt6i_dst.plen = 128;
2111 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2112
2113 atomic_set(&rt->dst.__refcnt, 1);
2114
2115 return rt;
2116 }
2117
2118 int ip6_route_get_saddr(struct net *net,
2119 struct rt6_info *rt,
2120 const struct in6_addr *daddr,
2121 unsigned int prefs,
2122 struct in6_addr *saddr)
2123 {
2124 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2125 int err = 0;
2126 if (rt->rt6i_prefsrc.plen)
2127 *saddr = rt->rt6i_prefsrc.addr;
2128 else
2129 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2130 daddr, prefs, saddr);
2131 return err;
2132 }
2133
2134 /* remove deleted ip from prefsrc entries */
2135 struct arg_dev_net_ip {
2136 struct net_device *dev;
2137 struct net *net;
2138 struct in6_addr *addr;
2139 };
2140
2141 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2142 {
2143 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2144 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2145 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2146
2147 if (((void *)rt->dst.dev == dev || !dev) &&
2148 rt != net->ipv6.ip6_null_entry &&
2149 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2150 /* remove prefsrc entry */
2151 rt->rt6i_prefsrc.plen = 0;
2152 }
2153 return 0;
2154 }
2155
2156 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2157 {
2158 struct net *net = dev_net(ifp->idev->dev);
2159 struct arg_dev_net_ip adni = {
2160 .dev = ifp->idev->dev,
2161 .net = net,
2162 .addr = &ifp->addr,
2163 };
2164 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2165 }
2166
2167 struct arg_dev_net {
2168 struct net_device *dev;
2169 struct net *net;
2170 };
2171
2172 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2173 {
2174 const struct arg_dev_net *adn = arg;
2175 const struct net_device *dev = adn->dev;
2176
2177 if ((rt->dst.dev == dev || !dev) &&
2178 rt != adn->net->ipv6.ip6_null_entry)
2179 return -1;
2180
2181 return 0;
2182 }
2183
2184 void rt6_ifdown(struct net *net, struct net_device *dev)
2185 {
2186 struct arg_dev_net adn = {
2187 .dev = dev,
2188 .net = net,
2189 };
2190
2191 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2192 icmp6_clean_all(fib6_ifdown, &adn);
2193 }
2194
2195 struct rt6_mtu_change_arg {
2196 struct net_device *dev;
2197 unsigned int mtu;
2198 };
2199
2200 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2201 {
2202 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2203 struct inet6_dev *idev;
2204
2205 /* In IPv6 pmtu discovery is not optional,
2206 so that RTAX_MTU lock cannot disable it.
2207 We still use this lock to block changes
2208 caused by addrconf/ndisc.
2209 */
2210
2211 idev = __in6_dev_get(arg->dev);
2212 if (!idev)
2213 return 0;
2214
2215 /* For administrative MTU increase, there is no way to discover
2216 IPv6 PMTU increase, so PMTU increase should be updated here.
2217 Since RFC 1981 doesn't include administrative MTU increase
2218 update PMTU increase is a MUST. (i.e. jumbo frame)
2219 */
2220 /*
2221 If new MTU is less than route PMTU, this new MTU will be the
2222 lowest MTU in the path, update the route PMTU to reflect PMTU
2223 decreases; if new MTU is greater than route PMTU, and the
2224 old MTU is the lowest MTU in the path, update the route PMTU
2225 to reflect the increase. In this case if the other nodes' MTU
2226 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2227 PMTU discouvery.
2228 */
2229 if (rt->dst.dev == arg->dev &&
2230 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2231 (dst_mtu(&rt->dst) >= arg->mtu ||
2232 (dst_mtu(&rt->dst) < arg->mtu &&
2233 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2234 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2235 }
2236 return 0;
2237 }
2238
2239 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2240 {
2241 struct rt6_mtu_change_arg arg = {
2242 .dev = dev,
2243 .mtu = mtu,
2244 };
2245
2246 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2247 }
2248
2249 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2250 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2251 [RTA_OIF] = { .type = NLA_U32 },
2252 [RTA_IIF] = { .type = NLA_U32 },
2253 [RTA_PRIORITY] = { .type = NLA_U32 },
2254 [RTA_METRICS] = { .type = NLA_NESTED },
2255 };
2256
2257 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2258 struct fib6_config *cfg)
2259 {
2260 struct rtmsg *rtm;
2261 struct nlattr *tb[RTA_MAX+1];
2262 int err;
2263
2264 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2265 if (err < 0)
2266 goto errout;
2267
2268 err = -EINVAL;
2269 rtm = nlmsg_data(nlh);
2270 memset(cfg, 0, sizeof(*cfg));
2271
2272 cfg->fc_table = rtm->rtm_table;
2273 cfg->fc_dst_len = rtm->rtm_dst_len;
2274 cfg->fc_src_len = rtm->rtm_src_len;
2275 cfg->fc_flags = RTF_UP;
2276 cfg->fc_protocol = rtm->rtm_protocol;
2277 cfg->fc_type = rtm->rtm_type;
2278
2279 if (rtm->rtm_type == RTN_UNREACHABLE ||
2280 rtm->rtm_type == RTN_BLACKHOLE ||
2281 rtm->rtm_type == RTN_PROHIBIT ||
2282 rtm->rtm_type == RTN_THROW)
2283 cfg->fc_flags |= RTF_REJECT;
2284
2285 if (rtm->rtm_type == RTN_LOCAL)
2286 cfg->fc_flags |= RTF_LOCAL;
2287
2288 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2289 cfg->fc_nlinfo.nlh = nlh;
2290 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2291
2292 if (tb[RTA_GATEWAY]) {
2293 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2294 cfg->fc_flags |= RTF_GATEWAY;
2295 }
2296
2297 if (tb[RTA_DST]) {
2298 int plen = (rtm->rtm_dst_len + 7) >> 3;
2299
2300 if (nla_len(tb[RTA_DST]) < plen)
2301 goto errout;
2302
2303 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2304 }
2305
2306 if (tb[RTA_SRC]) {
2307 int plen = (rtm->rtm_src_len + 7) >> 3;
2308
2309 if (nla_len(tb[RTA_SRC]) < plen)
2310 goto errout;
2311
2312 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2313 }
2314
2315 if (tb[RTA_PREFSRC])
2316 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2317
2318 if (tb[RTA_OIF])
2319 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2320
2321 if (tb[RTA_PRIORITY])
2322 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2323
2324 if (tb[RTA_METRICS]) {
2325 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2326 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2327 }
2328
2329 if (tb[RTA_TABLE])
2330 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2331
2332 err = 0;
2333 errout:
2334 return err;
2335 }
2336
2337 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2338 {
2339 struct fib6_config cfg;
2340 int err;
2341
2342 err = rtm_to_fib6_config(skb, nlh, &cfg);
2343 if (err < 0)
2344 return err;
2345
2346 return ip6_route_del(&cfg);
2347 }
2348
2349 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2350 {
2351 struct fib6_config cfg;
2352 int err;
2353
2354 err = rtm_to_fib6_config(skb, nlh, &cfg);
2355 if (err < 0)
2356 return err;
2357
2358 return ip6_route_add(&cfg);
2359 }
2360
2361 static inline size_t rt6_nlmsg_size(void)
2362 {
2363 return NLMSG_ALIGN(sizeof(struct rtmsg))
2364 + nla_total_size(16) /* RTA_SRC */
2365 + nla_total_size(16) /* RTA_DST */
2366 + nla_total_size(16) /* RTA_GATEWAY */
2367 + nla_total_size(16) /* RTA_PREFSRC */
2368 + nla_total_size(4) /* RTA_TABLE */
2369 + nla_total_size(4) /* RTA_IIF */
2370 + nla_total_size(4) /* RTA_OIF */
2371 + nla_total_size(4) /* RTA_PRIORITY */
2372 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2373 + nla_total_size(sizeof(struct rta_cacheinfo));
2374 }
2375
2376 static int rt6_fill_node(struct net *net,
2377 struct sk_buff *skb, struct rt6_info *rt,
2378 struct in6_addr *dst, struct in6_addr *src,
2379 int iif, int type, u32 portid, u32 seq,
2380 int prefix, int nowait, unsigned int flags)
2381 {
2382 struct rtmsg *rtm;
2383 struct nlmsghdr *nlh;
2384 long expires;
2385 u32 table;
2386 struct neighbour *n;
2387
2388 if (prefix) { /* user wants prefix routes only */
2389 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2390 /* success since this is not a prefix route */
2391 return 1;
2392 }
2393 }
2394
2395 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2396 if (!nlh)
2397 return -EMSGSIZE;
2398
2399 rtm = nlmsg_data(nlh);
2400 rtm->rtm_family = AF_INET6;
2401 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2402 rtm->rtm_src_len = rt->rt6i_src.plen;
2403 rtm->rtm_tos = 0;
2404 if (rt->rt6i_table)
2405 table = rt->rt6i_table->tb6_id;
2406 else
2407 table = RT6_TABLE_UNSPEC;
2408 rtm->rtm_table = table;
2409 if (nla_put_u32(skb, RTA_TABLE, table))
2410 goto nla_put_failure;
2411 if (rt->rt6i_flags & RTF_REJECT) {
2412 switch (rt->dst.error) {
2413 case -EINVAL:
2414 rtm->rtm_type = RTN_BLACKHOLE;
2415 break;
2416 case -EACCES:
2417 rtm->rtm_type = RTN_PROHIBIT;
2418 break;
2419 case -EAGAIN:
2420 rtm->rtm_type = RTN_THROW;
2421 break;
2422 default:
2423 rtm->rtm_type = RTN_UNREACHABLE;
2424 break;
2425 }
2426 }
2427 else if (rt->rt6i_flags & RTF_LOCAL)
2428 rtm->rtm_type = RTN_LOCAL;
2429 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2430 rtm->rtm_type = RTN_LOCAL;
2431 else
2432 rtm->rtm_type = RTN_UNICAST;
2433 rtm->rtm_flags = 0;
2434 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2435 rtm->rtm_protocol = rt->rt6i_protocol;
2436 if (rt->rt6i_flags & RTF_DYNAMIC)
2437 rtm->rtm_protocol = RTPROT_REDIRECT;
2438 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2439 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2440 rtm->rtm_protocol = RTPROT_RA;
2441 else
2442 rtm->rtm_protocol = RTPROT_KERNEL;
2443 }
2444
2445 if (rt->rt6i_flags & RTF_CACHE)
2446 rtm->rtm_flags |= RTM_F_CLONED;
2447
2448 if (dst) {
2449 if (nla_put(skb, RTA_DST, 16, dst))
2450 goto nla_put_failure;
2451 rtm->rtm_dst_len = 128;
2452 } else if (rtm->rtm_dst_len)
2453 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2454 goto nla_put_failure;
2455 #ifdef CONFIG_IPV6_SUBTREES
2456 if (src) {
2457 if (nla_put(skb, RTA_SRC, 16, src))
2458 goto nla_put_failure;
2459 rtm->rtm_src_len = 128;
2460 } else if (rtm->rtm_src_len &&
2461 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2462 goto nla_put_failure;
2463 #endif
2464 if (iif) {
2465 #ifdef CONFIG_IPV6_MROUTE
2466 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2467 int err = ip6mr_get_route(net, skb, rtm, nowait);
2468 if (err <= 0) {
2469 if (!nowait) {
2470 if (err == 0)
2471 return 0;
2472 goto nla_put_failure;
2473 } else {
2474 if (err == -EMSGSIZE)
2475 goto nla_put_failure;
2476 }
2477 }
2478 } else
2479 #endif
2480 if (nla_put_u32(skb, RTA_IIF, iif))
2481 goto nla_put_failure;
2482 } else if (dst) {
2483 struct in6_addr saddr_buf;
2484 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2485 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2486 goto nla_put_failure;
2487 }
2488
2489 if (rt->rt6i_prefsrc.plen) {
2490 struct in6_addr saddr_buf;
2491 saddr_buf = rt->rt6i_prefsrc.addr;
2492 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2493 goto nla_put_failure;
2494 }
2495
2496 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2497 goto nla_put_failure;
2498
2499 rcu_read_lock();
2500 n = rt->n;
2501 if (n) {
2502 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2503 rcu_read_unlock();
2504 goto nla_put_failure;
2505 }
2506 }
2507 rcu_read_unlock();
2508
2509 if (rt->dst.dev &&
2510 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2511 goto nla_put_failure;
2512 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2513 goto nla_put_failure;
2514
2515 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2516
2517 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2518 goto nla_put_failure;
2519
2520 return nlmsg_end(skb, nlh);
2521
2522 nla_put_failure:
2523 nlmsg_cancel(skb, nlh);
2524 return -EMSGSIZE;
2525 }
2526
2527 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2528 {
2529 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2530 int prefix;
2531
2532 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2533 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2534 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2535 } else
2536 prefix = 0;
2537
2538 return rt6_fill_node(arg->net,
2539 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2540 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2541 prefix, 0, NLM_F_MULTI);
2542 }
2543
2544 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2545 {
2546 struct net *net = sock_net(in_skb->sk);
2547 struct nlattr *tb[RTA_MAX+1];
2548 struct rt6_info *rt;
2549 struct sk_buff *skb;
2550 struct rtmsg *rtm;
2551 struct flowi6 fl6;
2552 int err, iif = 0, oif = 0;
2553
2554 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2555 if (err < 0)
2556 goto errout;
2557
2558 err = -EINVAL;
2559 memset(&fl6, 0, sizeof(fl6));
2560
2561 if (tb[RTA_SRC]) {
2562 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2563 goto errout;
2564
2565 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2566 }
2567
2568 if (tb[RTA_DST]) {
2569 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2570 goto errout;
2571
2572 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2573 }
2574
2575 if (tb[RTA_IIF])
2576 iif = nla_get_u32(tb[RTA_IIF]);
2577
2578 if (tb[RTA_OIF])
2579 oif = nla_get_u32(tb[RTA_OIF]);
2580
2581 if (iif) {
2582 struct net_device *dev;
2583 int flags = 0;
2584
2585 dev = __dev_get_by_index(net, iif);
2586 if (!dev) {
2587 err = -ENODEV;
2588 goto errout;
2589 }
2590
2591 fl6.flowi6_iif = iif;
2592
2593 if (!ipv6_addr_any(&fl6.saddr))
2594 flags |= RT6_LOOKUP_F_HAS_SADDR;
2595
2596 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2597 flags);
2598 } else {
2599 fl6.flowi6_oif = oif;
2600
2601 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2602 }
2603
2604 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2605 if (!skb) {
2606 dst_release(&rt->dst);
2607 err = -ENOBUFS;
2608 goto errout;
2609 }
2610
2611 /* Reserve room for dummy headers, this skb can pass
2612 through good chunk of routing engine.
2613 */
2614 skb_reset_mac_header(skb);
2615 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2616
2617 skb_dst_set(skb, &rt->dst);
2618
2619 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2620 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2621 nlh->nlmsg_seq, 0, 0, 0);
2622 if (err < 0) {
2623 kfree_skb(skb);
2624 goto errout;
2625 }
2626
2627 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2628 errout:
2629 return err;
2630 }
2631
2632 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2633 {
2634 struct sk_buff *skb;
2635 struct net *net = info->nl_net;
2636 u32 seq;
2637 int err;
2638
2639 err = -ENOBUFS;
2640 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2641
2642 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2643 if (!skb)
2644 goto errout;
2645
2646 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2647 event, info->portid, seq, 0, 0, 0);
2648 if (err < 0) {
2649 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2650 WARN_ON(err == -EMSGSIZE);
2651 kfree_skb(skb);
2652 goto errout;
2653 }
2654 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2655 info->nlh, gfp_any());
2656 return;
2657 errout:
2658 if (err < 0)
2659 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2660 }
2661
2662 static int ip6_route_dev_notify(struct notifier_block *this,
2663 unsigned long event, void *data)
2664 {
2665 struct net_device *dev = (struct net_device *)data;
2666 struct net *net = dev_net(dev);
2667
2668 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2669 net->ipv6.ip6_null_entry->dst.dev = dev;
2670 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2671 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2672 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2673 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2674 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2675 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2676 #endif
2677 }
2678
2679 return NOTIFY_OK;
2680 }
2681
2682 /*
2683 * /proc
2684 */
2685
2686 #ifdef CONFIG_PROC_FS
2687
2688 struct rt6_proc_arg
2689 {
2690 char *buffer;
2691 int offset;
2692 int length;
2693 int skip;
2694 int len;
2695 };
2696
2697 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2698 {
2699 struct seq_file *m = p_arg;
2700 struct neighbour *n;
2701
2702 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2703
2704 #ifdef CONFIG_IPV6_SUBTREES
2705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2706 #else
2707 seq_puts(m, "00000000000000000000000000000000 00 ");
2708 #endif
2709 rcu_read_lock();
2710 n = rt->n;
2711 if (n) {
2712 seq_printf(m, "%pi6", n->primary_key);
2713 } else {
2714 seq_puts(m, "00000000000000000000000000000000");
2715 }
2716 rcu_read_unlock();
2717 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2718 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2719 rt->dst.__use, rt->rt6i_flags,
2720 rt->dst.dev ? rt->dst.dev->name : "");
2721 return 0;
2722 }
2723
2724 static int ipv6_route_show(struct seq_file *m, void *v)
2725 {
2726 struct net *net = (struct net *)m->private;
2727 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2728 return 0;
2729 }
2730
2731 static int ipv6_route_open(struct inode *inode, struct file *file)
2732 {
2733 return single_open_net(inode, file, ipv6_route_show);
2734 }
2735
2736 static const struct file_operations ipv6_route_proc_fops = {
2737 .owner = THIS_MODULE,
2738 .open = ipv6_route_open,
2739 .read = seq_read,
2740 .llseek = seq_lseek,
2741 .release = single_release_net,
2742 };
2743
2744 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2745 {
2746 struct net *net = (struct net *)seq->private;
2747 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2748 net->ipv6.rt6_stats->fib_nodes,
2749 net->ipv6.rt6_stats->fib_route_nodes,
2750 net->ipv6.rt6_stats->fib_rt_alloc,
2751 net->ipv6.rt6_stats->fib_rt_entries,
2752 net->ipv6.rt6_stats->fib_rt_cache,
2753 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2754 net->ipv6.rt6_stats->fib_discarded_routes);
2755
2756 return 0;
2757 }
2758
2759 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2760 {
2761 return single_open_net(inode, file, rt6_stats_seq_show);
2762 }
2763
2764 static const struct file_operations rt6_stats_seq_fops = {
2765 .owner = THIS_MODULE,
2766 .open = rt6_stats_seq_open,
2767 .read = seq_read,
2768 .llseek = seq_lseek,
2769 .release = single_release_net,
2770 };
2771 #endif /* CONFIG_PROC_FS */
2772
2773 #ifdef CONFIG_SYSCTL
2774
2775 static
2776 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2777 void __user *buffer, size_t *lenp, loff_t *ppos)
2778 {
2779 struct net *net;
2780 int delay;
2781 if (!write)
2782 return -EINVAL;
2783
2784 net = (struct net *)ctl->extra1;
2785 delay = net->ipv6.sysctl.flush_delay;
2786 proc_dointvec(ctl, write, buffer, lenp, ppos);
2787 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2788 return 0;
2789 }
2790
2791 ctl_table ipv6_route_table_template[] = {
2792 {
2793 .procname = "flush",
2794 .data = &init_net.ipv6.sysctl.flush_delay,
2795 .maxlen = sizeof(int),
2796 .mode = 0200,
2797 .proc_handler = ipv6_sysctl_rtcache_flush
2798 },
2799 {
2800 .procname = "gc_thresh",
2801 .data = &ip6_dst_ops_template.gc_thresh,
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
2804 .proc_handler = proc_dointvec,
2805 },
2806 {
2807 .procname = "max_size",
2808 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
2811 .proc_handler = proc_dointvec,
2812 },
2813 {
2814 .procname = "gc_min_interval",
2815 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
2818 .proc_handler = proc_dointvec_jiffies,
2819 },
2820 {
2821 .procname = "gc_timeout",
2822 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2823 .maxlen = sizeof(int),
2824 .mode = 0644,
2825 .proc_handler = proc_dointvec_jiffies,
2826 },
2827 {
2828 .procname = "gc_interval",
2829 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2830 .maxlen = sizeof(int),
2831 .mode = 0644,
2832 .proc_handler = proc_dointvec_jiffies,
2833 },
2834 {
2835 .procname = "gc_elasticity",
2836 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2837 .maxlen = sizeof(int),
2838 .mode = 0644,
2839 .proc_handler = proc_dointvec,
2840 },
2841 {
2842 .procname = "mtu_expires",
2843 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2844 .maxlen = sizeof(int),
2845 .mode = 0644,
2846 .proc_handler = proc_dointvec_jiffies,
2847 },
2848 {
2849 .procname = "min_adv_mss",
2850 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2851 .maxlen = sizeof(int),
2852 .mode = 0644,
2853 .proc_handler = proc_dointvec,
2854 },
2855 {
2856 .procname = "gc_min_interval_ms",
2857 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2858 .maxlen = sizeof(int),
2859 .mode = 0644,
2860 .proc_handler = proc_dointvec_ms_jiffies,
2861 },
2862 { }
2863 };
2864
2865 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2866 {
2867 struct ctl_table *table;
2868
2869 table = kmemdup(ipv6_route_table_template,
2870 sizeof(ipv6_route_table_template),
2871 GFP_KERNEL);
2872
2873 if (table) {
2874 table[0].data = &net->ipv6.sysctl.flush_delay;
2875 table[0].extra1 = net;
2876 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2877 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2878 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2879 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2880 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2881 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2882 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2883 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2884 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2885 }
2886
2887 return table;
2888 }
2889 #endif
2890
2891 static int __net_init ip6_route_net_init(struct net *net)
2892 {
2893 int ret = -ENOMEM;
2894
2895 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2896 sizeof(net->ipv6.ip6_dst_ops));
2897
2898 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2899 goto out_ip6_dst_ops;
2900
2901 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2902 sizeof(*net->ipv6.ip6_null_entry),
2903 GFP_KERNEL);
2904 if (!net->ipv6.ip6_null_entry)
2905 goto out_ip6_dst_entries;
2906 net->ipv6.ip6_null_entry->dst.path =
2907 (struct dst_entry *)net->ipv6.ip6_null_entry;
2908 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2909 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2910 ip6_template_metrics, true);
2911
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2914 sizeof(*net->ipv6.ip6_prohibit_entry),
2915 GFP_KERNEL);
2916 if (!net->ipv6.ip6_prohibit_entry)
2917 goto out_ip6_null_entry;
2918 net->ipv6.ip6_prohibit_entry->dst.path =
2919 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2920 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2921 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2922 ip6_template_metrics, true);
2923
2924 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2925 sizeof(*net->ipv6.ip6_blk_hole_entry),
2926 GFP_KERNEL);
2927 if (!net->ipv6.ip6_blk_hole_entry)
2928 goto out_ip6_prohibit_entry;
2929 net->ipv6.ip6_blk_hole_entry->dst.path =
2930 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2931 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2932 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2933 ip6_template_metrics, true);
2934 #endif
2935
2936 net->ipv6.sysctl.flush_delay = 0;
2937 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2938 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2939 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2940 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2941 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2942 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2943 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2944
2945 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2946
2947 ret = 0;
2948 out:
2949 return ret;
2950
2951 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2952 out_ip6_prohibit_entry:
2953 kfree(net->ipv6.ip6_prohibit_entry);
2954 out_ip6_null_entry:
2955 kfree(net->ipv6.ip6_null_entry);
2956 #endif
2957 out_ip6_dst_entries:
2958 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2959 out_ip6_dst_ops:
2960 goto out;
2961 }
2962
2963 static void __net_exit ip6_route_net_exit(struct net *net)
2964 {
2965 kfree(net->ipv6.ip6_null_entry);
2966 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967 kfree(net->ipv6.ip6_prohibit_entry);
2968 kfree(net->ipv6.ip6_blk_hole_entry);
2969 #endif
2970 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2971 }
2972
2973 static int __net_init ip6_route_net_init_late(struct net *net)
2974 {
2975 #ifdef CONFIG_PROC_FS
2976 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2977 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2978 #endif
2979 return 0;
2980 }
2981
2982 static void __net_exit ip6_route_net_exit_late(struct net *net)
2983 {
2984 #ifdef CONFIG_PROC_FS
2985 proc_net_remove(net, "ipv6_route");
2986 proc_net_remove(net, "rt6_stats");
2987 #endif
2988 }
2989
2990 static struct pernet_operations ip6_route_net_ops = {
2991 .init = ip6_route_net_init,
2992 .exit = ip6_route_net_exit,
2993 };
2994
2995 static int __net_init ipv6_inetpeer_init(struct net *net)
2996 {
2997 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2998
2999 if (!bp)
3000 return -ENOMEM;
3001 inet_peer_base_init(bp);
3002 net->ipv6.peers = bp;
3003 return 0;
3004 }
3005
3006 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3007 {
3008 struct inet_peer_base *bp = net->ipv6.peers;
3009
3010 net->ipv6.peers = NULL;
3011 inetpeer_invalidate_tree(bp);
3012 kfree(bp);
3013 }
3014
3015 static struct pernet_operations ipv6_inetpeer_ops = {
3016 .init = ipv6_inetpeer_init,
3017 .exit = ipv6_inetpeer_exit,
3018 };
3019
3020 static struct pernet_operations ip6_route_net_late_ops = {
3021 .init = ip6_route_net_init_late,
3022 .exit = ip6_route_net_exit_late,
3023 };
3024
3025 static struct notifier_block ip6_route_dev_notifier = {
3026 .notifier_call = ip6_route_dev_notify,
3027 .priority = 0,
3028 };
3029
3030 int __init ip6_route_init(void)
3031 {
3032 int ret;
3033
3034 ret = -ENOMEM;
3035 ip6_dst_ops_template.kmem_cachep =
3036 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3037 SLAB_HWCACHE_ALIGN, NULL);
3038 if (!ip6_dst_ops_template.kmem_cachep)
3039 goto out;
3040
3041 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3042 if (ret)
3043 goto out_kmem_cache;
3044
3045 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3046 if (ret)
3047 goto out_dst_entries;
3048
3049 ret = register_pernet_subsys(&ip6_route_net_ops);
3050 if (ret)
3051 goto out_register_inetpeer;
3052
3053 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3054
3055 /* Registering of the loopback is done before this portion of code,
3056 * the loopback reference in rt6_info will not be taken, do it
3057 * manually for init_net */
3058 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3059 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3060 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3061 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3062 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3063 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3064 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3065 #endif
3066 ret = fib6_init();
3067 if (ret)
3068 goto out_register_subsys;
3069
3070 ret = xfrm6_init();
3071 if (ret)
3072 goto out_fib6_init;
3073
3074 ret = fib6_rules_init();
3075 if (ret)
3076 goto xfrm6_init;
3077
3078 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3079 if (ret)
3080 goto fib6_rules_init;
3081
3082 ret = -ENOBUFS;
3083 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3084 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3085 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3086 goto out_register_late_subsys;
3087
3088 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3089 if (ret)
3090 goto out_register_late_subsys;
3091
3092 out:
3093 return ret;
3094
3095 out_register_late_subsys:
3096 unregister_pernet_subsys(&ip6_route_net_late_ops);
3097 fib6_rules_init:
3098 fib6_rules_cleanup();
3099 xfrm6_init:
3100 xfrm6_fini();
3101 out_fib6_init:
3102 fib6_gc_cleanup();
3103 out_register_subsys:
3104 unregister_pernet_subsys(&ip6_route_net_ops);
3105 out_register_inetpeer:
3106 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3107 out_dst_entries:
3108 dst_entries_destroy(&ip6_dst_blackhole_ops);
3109 out_kmem_cache:
3110 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3111 goto out;
3112 }
3113
3114 void ip6_route_cleanup(void)
3115 {
3116 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3117 unregister_pernet_subsys(&ip6_route_net_late_ops);
3118 fib6_rules_cleanup();
3119 xfrm6_fini();
3120 fib6_gc_cleanup();
3121 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3122 unregister_pernet_subsys(&ip6_route_net_ops);
3123 dst_entries_destroy(&ip6_dst_blackhole_ops);
3124 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3125 }