Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/linville...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 peer = rt6_get_peer_create(rt);
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125 struct in6_addr *p = &rt->rt6i_gateway;
126
127 if (!ipv6_addr_any(p))
128 return (const void *) p;
129 return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158 .family = AF_INET6,
159 .protocol = cpu_to_be16(ETH_P_IPV6),
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
163 .default_advmss = ip6_default_advmss,
164 .mtu = ip6_mtu,
165 .cow_metrics = ipv6_cow_metrics,
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
171 .local_out = __ip6_local_out,
172 .neigh_lookup = ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188 {
189 return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
194 .protocol = cpu_to_be16(ETH_P_IPV6),
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
197 .mtu = ip6_blackhole_mtu,
198 .default_advmss = ip6_default_advmss,
199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
201 .neigh_lookup = ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
218 .rt6i_protocol = RTPROT_KERNEL,
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
238 .rt6i_protocol = RTPROT_KERNEL,
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
249 .input = dst_discard,
250 .output = dst_discard,
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
253 .rt6i_protocol = RTPROT_KERNEL,
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
262 struct net_device *dev,
263 int flags,
264 struct fib6_table *table)
265 {
266 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267 0, 0, flags);
268
269 if (rt) {
270 memset(&rt->rt6i_table, 0,
271 sizeof(*rt) - sizeof(struct dst_entry));
272 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
273 }
274 return rt;
275 }
276
277 static void ip6_dst_destroy(struct dst_entry *dst)
278 {
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281
282 if (!(rt->dst.flags & DST_HOST))
283 dst_destroy_metrics_generic(dst);
284
285 if (idev) {
286 rt->rt6i_idev = NULL;
287 in6_dev_put(idev);
288 }
289
290 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291 dst_release(dst->from);
292
293 if (rt6_has_peer(rt)) {
294 struct inet_peer *peer = rt6_peer_ptr(rt);
295 inet_putpeer(peer);
296 }
297 }
298
299 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301 static u32 rt6_peer_genid(void)
302 {
303 return atomic_read(&__rt6_peer_genid);
304 }
305
306 void rt6_bind_peer(struct rt6_info *rt, int create)
307 {
308 struct inet_peer_base *base;
309 struct inet_peer *peer;
310
311 base = inetpeer_base_ptr(rt->_rt6i_peer);
312 if (!base)
313 return;
314
315 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
316 if (peer) {
317 if (!rt6_set_peer(rt, peer))
318 inet_putpeer(peer);
319 else
320 rt->rt6i_peer_genid = rt6_peer_genid();
321 }
322 }
323
324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326 {
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
329 struct net_device *loopback_dev =
330 dev_net(dev)->loopback_dev;
331
332 if (dev != loopback_dev && idev && idev->dev == dev) {
333 struct inet6_dev *loopback_idev =
334 in6_dev_get(loopback_dev);
335 if (loopback_idev) {
336 rt->rt6i_idev = loopback_idev;
337 in6_dev_put(idev);
338 }
339 }
340 }
341
342 static bool rt6_check_expired(const struct rt6_info *rt)
343 {
344 struct rt6_info *ort = NULL;
345
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
348 return true;
349 } else if (rt->dst.from) {
350 ort = (struct rt6_info *) rt->dst.from;
351 return (ort->rt6i_flags & RTF_EXPIRES) &&
352 time_after(jiffies, ort->dst.expires);
353 }
354 return false;
355 }
356
357 static bool rt6_need_strict(const struct in6_addr *daddr)
358 {
359 return ipv6_addr_type(daddr) &
360 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
361 }
362
363 /*
364 * Route lookup. Any table->tb6_lock is implied.
365 */
366
367 static inline struct rt6_info *rt6_device_match(struct net *net,
368 struct rt6_info *rt,
369 const struct in6_addr *saddr,
370 int oif,
371 int flags)
372 {
373 struct rt6_info *local = NULL;
374 struct rt6_info *sprt;
375
376 if (!oif && ipv6_addr_any(saddr))
377 goto out;
378
379 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
380 struct net_device *dev = sprt->dst.dev;
381
382 if (oif) {
383 if (dev->ifindex == oif)
384 return sprt;
385 if (dev->flags & IFF_LOOPBACK) {
386 if (!sprt->rt6i_idev ||
387 sprt->rt6i_idev->dev->ifindex != oif) {
388 if (flags & RT6_LOOKUP_F_IFACE && oif)
389 continue;
390 if (local && (!oif ||
391 local->rt6i_idev->dev->ifindex == oif))
392 continue;
393 }
394 local = sprt;
395 }
396 } else {
397 if (ipv6_chk_addr(net, saddr, dev,
398 flags & RT6_LOOKUP_F_IFACE))
399 return sprt;
400 }
401 }
402
403 if (oif) {
404 if (local)
405 return local;
406
407 if (flags & RT6_LOOKUP_F_IFACE)
408 return net->ipv6.ip6_null_entry;
409 }
410 out:
411 return rt;
412 }
413
414 #ifdef CONFIG_IPV6_ROUTER_PREF
415 static void rt6_probe(struct rt6_info *rt)
416 {
417 struct neighbour *neigh;
418 /*
419 * Okay, this does not seem to be appropriate
420 * for now, however, we need to check if it
421 * is really so; aka Router Reachability Probing.
422 *
423 * Router Reachability Probe MUST be rate-limited
424 * to no more than one per minute.
425 */
426 rcu_read_lock();
427 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
428 if (!neigh || (neigh->nud_state & NUD_VALID))
429 goto out;
430 read_lock_bh(&neigh->lock);
431 if (!(neigh->nud_state & NUD_VALID) &&
432 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
433 struct in6_addr mcaddr;
434 struct in6_addr *target;
435
436 neigh->updated = jiffies;
437 read_unlock_bh(&neigh->lock);
438
439 target = (struct in6_addr *)&neigh->primary_key;
440 addrconf_addr_solict_mult(target, &mcaddr);
441 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
442 } else {
443 read_unlock_bh(&neigh->lock);
444 }
445 out:
446 rcu_read_unlock();
447 }
448 #else
449 static inline void rt6_probe(struct rt6_info *rt)
450 {
451 }
452 #endif
453
454 /*
455 * Default Router Selection (RFC 2461 6.3.6)
456 */
457 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
458 {
459 struct net_device *dev = rt->dst.dev;
460 if (!oif || dev->ifindex == oif)
461 return 2;
462 if ((dev->flags & IFF_LOOPBACK) &&
463 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
464 return 1;
465 return 0;
466 }
467
468 static inline int rt6_check_neigh(struct rt6_info *rt)
469 {
470 struct neighbour *neigh;
471 int m;
472
473 rcu_read_lock();
474 neigh = dst_get_neighbour_noref(&rt->dst);
475 if (rt->rt6i_flags & RTF_NONEXTHOP ||
476 !(rt->rt6i_flags & RTF_GATEWAY))
477 m = 1;
478 else if (neigh) {
479 read_lock_bh(&neigh->lock);
480 if (neigh->nud_state & NUD_VALID)
481 m = 2;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483 else if (neigh->nud_state & NUD_FAILED)
484 m = 0;
485 #endif
486 else
487 m = 1;
488 read_unlock_bh(&neigh->lock);
489 } else
490 m = 0;
491 rcu_read_unlock();
492 return m;
493 }
494
495 static int rt6_score_route(struct rt6_info *rt, int oif,
496 int strict)
497 {
498 int m, n;
499
500 m = rt6_check_dev(rt, oif);
501 if (!m && (strict & RT6_LOOKUP_F_IFACE))
502 return -1;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
505 #endif
506 n = rt6_check_neigh(rt);
507 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
508 return -1;
509 return m;
510 }
511
512 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
513 int *mpri, struct rt6_info *match)
514 {
515 int m;
516
517 if (rt6_check_expired(rt))
518 goto out;
519
520 m = rt6_score_route(rt, oif, strict);
521 if (m < 0)
522 goto out;
523
524 if (m > *mpri) {
525 if (strict & RT6_LOOKUP_F_REACHABLE)
526 rt6_probe(match);
527 *mpri = m;
528 match = rt;
529 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
530 rt6_probe(rt);
531 }
532
533 out:
534 return match;
535 }
536
537 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
538 struct rt6_info *rr_head,
539 u32 metric, int oif, int strict)
540 {
541 struct rt6_info *rt, *match;
542 int mpri = -1;
543
544 match = NULL;
545 for (rt = rr_head; rt && rt->rt6i_metric == metric;
546 rt = rt->dst.rt6_next)
547 match = find_match(rt, oif, strict, &mpri, match);
548 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
549 rt = rt->dst.rt6_next)
550 match = find_match(rt, oif, strict, &mpri, match);
551
552 return match;
553 }
554
555 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
556 {
557 struct rt6_info *match, *rt0;
558 struct net *net;
559
560 rt0 = fn->rr_ptr;
561 if (!rt0)
562 fn->rr_ptr = rt0 = fn->leaf;
563
564 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
565
566 if (!match &&
567 (strict & RT6_LOOKUP_F_REACHABLE)) {
568 struct rt6_info *next = rt0->dst.rt6_next;
569
570 /* no entries matched; do round-robin */
571 if (!next || next->rt6i_metric != rt0->rt6i_metric)
572 next = fn->leaf;
573
574 if (next != rt0)
575 fn->rr_ptr = next;
576 }
577
578 net = dev_net(rt0->dst.dev);
579 return match ? match : net->ipv6.ip6_null_entry;
580 }
581
582 #ifdef CONFIG_IPV6_ROUTE_INFO
583 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
584 const struct in6_addr *gwaddr)
585 {
586 struct net *net = dev_net(dev);
587 struct route_info *rinfo = (struct route_info *) opt;
588 struct in6_addr prefix_buf, *prefix;
589 unsigned int pref;
590 unsigned long lifetime;
591 struct rt6_info *rt;
592
593 if (len < sizeof(struct route_info)) {
594 return -EINVAL;
595 }
596
597 /* Sanity check for prefix_len and length */
598 if (rinfo->length > 3) {
599 return -EINVAL;
600 } else if (rinfo->prefix_len > 128) {
601 return -EINVAL;
602 } else if (rinfo->prefix_len > 64) {
603 if (rinfo->length < 2) {
604 return -EINVAL;
605 }
606 } else if (rinfo->prefix_len > 0) {
607 if (rinfo->length < 1) {
608 return -EINVAL;
609 }
610 }
611
612 pref = rinfo->route_pref;
613 if (pref == ICMPV6_ROUTER_PREF_INVALID)
614 return -EINVAL;
615
616 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
617
618 if (rinfo->length == 3)
619 prefix = (struct in6_addr *)rinfo->prefix;
620 else {
621 /* this function is safe */
622 ipv6_addr_prefix(&prefix_buf,
623 (struct in6_addr *)rinfo->prefix,
624 rinfo->prefix_len);
625 prefix = &prefix_buf;
626 }
627
628 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
629 dev->ifindex);
630
631 if (rt && !lifetime) {
632 ip6_del_rt(rt);
633 rt = NULL;
634 }
635
636 if (!rt && lifetime)
637 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
638 pref);
639 else if (rt)
640 rt->rt6i_flags = RTF_ROUTEINFO |
641 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
642
643 if (rt) {
644 if (!addrconf_finite_timeout(lifetime))
645 rt6_clean_expires(rt);
646 else
647 rt6_set_expires(rt, jiffies + HZ * lifetime);
648
649 dst_release(&rt->dst);
650 }
651 return 0;
652 }
653 #endif
654
655 #define BACKTRACK(__net, saddr) \
656 do { \
657 if (rt == __net->ipv6.ip6_null_entry) { \
658 struct fib6_node *pn; \
659 while (1) { \
660 if (fn->fn_flags & RTN_TL_ROOT) \
661 goto out; \
662 pn = fn->parent; \
663 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
664 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
665 else \
666 fn = pn; \
667 if (fn->fn_flags & RTN_RTINFO) \
668 goto restart; \
669 } \
670 } \
671 } while (0)
672
673 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
674 struct fib6_table *table,
675 struct flowi6 *fl6, int flags)
676 {
677 struct fib6_node *fn;
678 struct rt6_info *rt;
679
680 read_lock_bh(&table->tb6_lock);
681 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
682 restart:
683 rt = fn->leaf;
684 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
685 BACKTRACK(net, &fl6->saddr);
686 out:
687 dst_use(&rt->dst, jiffies);
688 read_unlock_bh(&table->tb6_lock);
689 return rt;
690
691 }
692
693 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
694 int flags)
695 {
696 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
697 }
698 EXPORT_SYMBOL_GPL(ip6_route_lookup);
699
700 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
701 const struct in6_addr *saddr, int oif, int strict)
702 {
703 struct flowi6 fl6 = {
704 .flowi6_oif = oif,
705 .daddr = *daddr,
706 };
707 struct dst_entry *dst;
708 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
709
710 if (saddr) {
711 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
712 flags |= RT6_LOOKUP_F_HAS_SADDR;
713 }
714
715 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
716 if (dst->error == 0)
717 return (struct rt6_info *) dst;
718
719 dst_release(dst);
720
721 return NULL;
722 }
723
724 EXPORT_SYMBOL(rt6_lookup);
725
726 /* ip6_ins_rt is called with FREE table->tb6_lock.
727 It takes new route entry, the addition fails by any reason the
728 route is freed. In any case, if caller does not hold it, it may
729 be destroyed.
730 */
731
732 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
733 {
734 int err;
735 struct fib6_table *table;
736
737 table = rt->rt6i_table;
738 write_lock_bh(&table->tb6_lock);
739 err = fib6_add(&table->tb6_root, rt, info);
740 write_unlock_bh(&table->tb6_lock);
741
742 return err;
743 }
744
745 int ip6_ins_rt(struct rt6_info *rt)
746 {
747 struct nl_info info = {
748 .nl_net = dev_net(rt->dst.dev),
749 };
750 return __ip6_ins_rt(rt, &info);
751 }
752
753 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
754 const struct in6_addr *daddr,
755 const struct in6_addr *saddr)
756 {
757 struct rt6_info *rt;
758
759 /*
760 * Clone the route.
761 */
762
763 rt = ip6_rt_copy(ort, daddr);
764
765 if (rt) {
766 int attempts = !in_softirq();
767
768 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
769 if (ort->rt6i_dst.plen != 128 &&
770 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
771 rt->rt6i_flags |= RTF_ANYCAST;
772 rt->rt6i_gateway = *daddr;
773 }
774
775 rt->rt6i_flags |= RTF_CACHE;
776
777 #ifdef CONFIG_IPV6_SUBTREES
778 if (rt->rt6i_src.plen && saddr) {
779 rt->rt6i_src.addr = *saddr;
780 rt->rt6i_src.plen = 128;
781 }
782 #endif
783
784 retry:
785 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
786 struct net *net = dev_net(rt->dst.dev);
787 int saved_rt_min_interval =
788 net->ipv6.sysctl.ip6_rt_gc_min_interval;
789 int saved_rt_elasticity =
790 net->ipv6.sysctl.ip6_rt_gc_elasticity;
791
792 if (attempts-- > 0) {
793 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
795
796 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
797
798 net->ipv6.sysctl.ip6_rt_gc_elasticity =
799 saved_rt_elasticity;
800 net->ipv6.sysctl.ip6_rt_gc_min_interval =
801 saved_rt_min_interval;
802 goto retry;
803 }
804
805 net_warn_ratelimited("Neighbour table overflow\n");
806 dst_free(&rt->dst);
807 return NULL;
808 }
809 }
810
811 return rt;
812 }
813
814 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
815 const struct in6_addr *daddr)
816 {
817 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
818
819 if (rt) {
820 rt->rt6i_flags |= RTF_CACHE;
821 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
822 }
823 return rt;
824 }
825
826 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
827 struct flowi6 *fl6, int flags)
828 {
829 struct fib6_node *fn;
830 struct rt6_info *rt, *nrt;
831 int strict = 0;
832 int attempts = 3;
833 int err;
834 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
835
836 strict |= flags & RT6_LOOKUP_F_IFACE;
837
838 relookup:
839 read_lock_bh(&table->tb6_lock);
840
841 restart_2:
842 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
843
844 restart:
845 rt = rt6_select(fn, oif, strict | reachable);
846
847 BACKTRACK(net, &fl6->saddr);
848 if (rt == net->ipv6.ip6_null_entry ||
849 rt->rt6i_flags & RTF_CACHE)
850 goto out;
851
852 dst_hold(&rt->dst);
853 read_unlock_bh(&table->tb6_lock);
854
855 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
856 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
857 else if (!(rt->dst.flags & DST_HOST))
858 nrt = rt6_alloc_clone(rt, &fl6->daddr);
859 else
860 goto out2;
861
862 dst_release(&rt->dst);
863 rt = nrt ? : net->ipv6.ip6_null_entry;
864
865 dst_hold(&rt->dst);
866 if (nrt) {
867 err = ip6_ins_rt(nrt);
868 if (!err)
869 goto out2;
870 }
871
872 if (--attempts <= 0)
873 goto out2;
874
875 /*
876 * Race condition! In the gap, when table->tb6_lock was
877 * released someone could insert this route. Relookup.
878 */
879 dst_release(&rt->dst);
880 goto relookup;
881
882 out:
883 if (reachable) {
884 reachable = 0;
885 goto restart_2;
886 }
887 dst_hold(&rt->dst);
888 read_unlock_bh(&table->tb6_lock);
889 out2:
890 rt->dst.lastuse = jiffies;
891 rt->dst.__use++;
892
893 return rt;
894 }
895
896 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
897 struct flowi6 *fl6, int flags)
898 {
899 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
900 }
901
902 static struct dst_entry *ip6_route_input_lookup(struct net *net,
903 struct net_device *dev,
904 struct flowi6 *fl6, int flags)
905 {
906 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
907 flags |= RT6_LOOKUP_F_IFACE;
908
909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
910 }
911
912 void ip6_route_input(struct sk_buff *skb)
913 {
914 const struct ipv6hdr *iph = ipv6_hdr(skb);
915 struct net *net = dev_net(skb->dev);
916 int flags = RT6_LOOKUP_F_HAS_SADDR;
917 struct flowi6 fl6 = {
918 .flowi6_iif = skb->dev->ifindex,
919 .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
922 .flowi6_mark = skb->mark,
923 .flowi6_proto = iph->nexthdr,
924 };
925
926 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
927 }
928
929 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
930 struct flowi6 *fl6, int flags)
931 {
932 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
933 }
934
935 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
936 struct flowi6 *fl6)
937 {
938 int flags = 0;
939
940 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
941 flags |= RT6_LOOKUP_F_IFACE;
942
943 if (!ipv6_addr_any(&fl6->saddr))
944 flags |= RT6_LOOKUP_F_HAS_SADDR;
945 else if (sk)
946 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
947
948 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
949 }
950
951 EXPORT_SYMBOL(ip6_route_output);
952
953 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
954 {
955 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
956 struct dst_entry *new = NULL;
957
958 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
959 if (rt) {
960 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
961 rt6_init_peer(rt, net->ipv6.peers);
962
963 new = &rt->dst;
964
965 new->__use = 1;
966 new->input = dst_discard;
967 new->output = dst_discard;
968
969 if (dst_metrics_read_only(&ort->dst))
970 new->_metrics = ort->dst._metrics;
971 else
972 dst_copy_metrics(new, &ort->dst);
973 rt->rt6i_idev = ort->rt6i_idev;
974 if (rt->rt6i_idev)
975 in6_dev_hold(rt->rt6i_idev);
976
977 rt->rt6i_gateway = ort->rt6i_gateway;
978 rt->rt6i_flags = ort->rt6i_flags;
979 rt6_clean_expires(rt);
980 rt->rt6i_metric = 0;
981
982 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983 #ifdef CONFIG_IPV6_SUBTREES
984 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985 #endif
986
987 dst_free(new);
988 }
989
990 dst_release(dst_orig);
991 return new ? new : ERR_PTR(-ENOMEM);
992 }
993
994 /*
995 * Destination cache support functions
996 */
997
998 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999 {
1000 struct rt6_info *rt;
1001
1002 rt = (struct rt6_info *) dst;
1003
1004 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1006 if (!rt6_has_peer(rt))
1007 rt6_bind_peer(rt, 0);
1008 rt->rt6i_peer_genid = rt6_peer_genid();
1009 }
1010 return dst;
1011 }
1012 return NULL;
1013 }
1014
1015 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016 {
1017 struct rt6_info *rt = (struct rt6_info *) dst;
1018
1019 if (rt) {
1020 if (rt->rt6i_flags & RTF_CACHE) {
1021 if (rt6_check_expired(rt)) {
1022 ip6_del_rt(rt);
1023 dst = NULL;
1024 }
1025 } else {
1026 dst_release(dst);
1027 dst = NULL;
1028 }
1029 }
1030 return dst;
1031 }
1032
1033 static void ip6_link_failure(struct sk_buff *skb)
1034 {
1035 struct rt6_info *rt;
1036
1037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1038
1039 rt = (struct rt6_info *) skb_dst(skb);
1040 if (rt) {
1041 if (rt->rt6i_flags & RTF_CACHE)
1042 rt6_update_expires(rt, 0);
1043 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1044 rt->rt6i_node->fn_sernum = -1;
1045 }
1046 }
1047
1048 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049 {
1050 struct rt6_info *rt6 = (struct rt6_info*)dst;
1051
1052 dst_confirm(dst);
1053 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1054 struct net *net = dev_net(dst->dev);
1055
1056 rt6->rt6i_flags |= RTF_MODIFIED;
1057 if (mtu < IPV6_MIN_MTU) {
1058 u32 features = dst_metric(dst, RTAX_FEATURES);
1059 mtu = IPV6_MIN_MTU;
1060 features |= RTAX_FEATURE_ALLFRAG;
1061 dst_metric_set(dst, RTAX_FEATURES, features);
1062 }
1063 dst_metric_set(dst, RTAX_MTU, mtu);
1064 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1065 }
1066 }
1067
1068 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1069 int oif, u32 mark)
1070 {
1071 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1072 struct dst_entry *dst;
1073 struct flowi6 fl6;
1074
1075 memset(&fl6, 0, sizeof(fl6));
1076 fl6.flowi6_oif = oif;
1077 fl6.flowi6_mark = mark;
1078 fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
1079 fl6.daddr = iph->daddr;
1080 fl6.saddr = iph->saddr;
1081 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1082
1083 dst = ip6_route_output(net, NULL, &fl6);
1084 if (!dst->error)
1085 ip6_rt_update_pmtu(dst, ntohl(mtu));
1086 dst_release(dst);
1087 }
1088 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1089
1090 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1091 {
1092 ip6_update_pmtu(skb, sock_net(sk), mtu,
1093 sk->sk_bound_dev_if, sk->sk_mark);
1094 }
1095 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1096
1097 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1098 {
1099 struct net_device *dev = dst->dev;
1100 unsigned int mtu = dst_mtu(dst);
1101 struct net *net = dev_net(dev);
1102
1103 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1104
1105 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1106 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1107
1108 /*
1109 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1110 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1111 * IPV6_MAXPLEN is also valid and means: "any MSS,
1112 * rely only on pmtu discovery"
1113 */
1114 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1115 mtu = IPV6_MAXPLEN;
1116 return mtu;
1117 }
1118
1119 static unsigned int ip6_mtu(const struct dst_entry *dst)
1120 {
1121 struct inet6_dev *idev;
1122 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1123
1124 if (mtu)
1125 return mtu;
1126
1127 mtu = IPV6_MIN_MTU;
1128
1129 rcu_read_lock();
1130 idev = __in6_dev_get(dst->dev);
1131 if (idev)
1132 mtu = idev->cnf.mtu6;
1133 rcu_read_unlock();
1134
1135 return mtu;
1136 }
1137
1138 static struct dst_entry *icmp6_dst_gc_list;
1139 static DEFINE_SPINLOCK(icmp6_dst_lock);
1140
1141 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1142 struct neighbour *neigh,
1143 struct flowi6 *fl6)
1144 {
1145 struct dst_entry *dst;
1146 struct rt6_info *rt;
1147 struct inet6_dev *idev = in6_dev_get(dev);
1148 struct net *net = dev_net(dev);
1149
1150 if (unlikely(!idev))
1151 return ERR_PTR(-ENODEV);
1152
1153 rt = ip6_dst_alloc(net, dev, 0, NULL);
1154 if (unlikely(!rt)) {
1155 in6_dev_put(idev);
1156 dst = ERR_PTR(-ENOMEM);
1157 goto out;
1158 }
1159
1160 if (neigh)
1161 neigh_hold(neigh);
1162 else {
1163 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1164 if (IS_ERR(neigh)) {
1165 in6_dev_put(idev);
1166 dst_free(&rt->dst);
1167 return ERR_CAST(neigh);
1168 }
1169 }
1170
1171 rt->dst.flags |= DST_HOST;
1172 rt->dst.output = ip6_output;
1173 dst_set_neighbour(&rt->dst, neigh);
1174 atomic_set(&rt->dst.__refcnt, 1);
1175 rt->rt6i_dst.addr = fl6->daddr;
1176 rt->rt6i_dst.plen = 128;
1177 rt->rt6i_idev = idev;
1178 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1179
1180 spin_lock_bh(&icmp6_dst_lock);
1181 rt->dst.next = icmp6_dst_gc_list;
1182 icmp6_dst_gc_list = &rt->dst;
1183 spin_unlock_bh(&icmp6_dst_lock);
1184
1185 fib6_force_start_gc(net);
1186
1187 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1188
1189 out:
1190 return dst;
1191 }
1192
1193 int icmp6_dst_gc(void)
1194 {
1195 struct dst_entry *dst, **pprev;
1196 int more = 0;
1197
1198 spin_lock_bh(&icmp6_dst_lock);
1199 pprev = &icmp6_dst_gc_list;
1200
1201 while ((dst = *pprev) != NULL) {
1202 if (!atomic_read(&dst->__refcnt)) {
1203 *pprev = dst->next;
1204 dst_free(dst);
1205 } else {
1206 pprev = &dst->next;
1207 ++more;
1208 }
1209 }
1210
1211 spin_unlock_bh(&icmp6_dst_lock);
1212
1213 return more;
1214 }
1215
1216 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1217 void *arg)
1218 {
1219 struct dst_entry *dst, **pprev;
1220
1221 spin_lock_bh(&icmp6_dst_lock);
1222 pprev = &icmp6_dst_gc_list;
1223 while ((dst = *pprev) != NULL) {
1224 struct rt6_info *rt = (struct rt6_info *) dst;
1225 if (func(rt, arg)) {
1226 *pprev = dst->next;
1227 dst_free(dst);
1228 } else {
1229 pprev = &dst->next;
1230 }
1231 }
1232 spin_unlock_bh(&icmp6_dst_lock);
1233 }
1234
1235 static int ip6_dst_gc(struct dst_ops *ops)
1236 {
1237 unsigned long now = jiffies;
1238 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1239 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1240 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1241 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1242 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1243 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1244 int entries;
1245
1246 entries = dst_entries_get_fast(ops);
1247 if (time_after(rt_last_gc + rt_min_interval, now) &&
1248 entries <= rt_max_size)
1249 goto out;
1250
1251 net->ipv6.ip6_rt_gc_expire++;
1252 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1253 net->ipv6.ip6_rt_last_gc = now;
1254 entries = dst_entries_get_slow(ops);
1255 if (entries < ops->gc_thresh)
1256 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1257 out:
1258 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1259 return entries > rt_max_size;
1260 }
1261
1262 /* Clean host part of a prefix. Not necessary in radix tree,
1263 but results in cleaner routing tables.
1264
1265 Remove it only when all the things will work!
1266 */
1267
1268 int ip6_dst_hoplimit(struct dst_entry *dst)
1269 {
1270 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1271 if (hoplimit == 0) {
1272 struct net_device *dev = dst->dev;
1273 struct inet6_dev *idev;
1274
1275 rcu_read_lock();
1276 idev = __in6_dev_get(dev);
1277 if (idev)
1278 hoplimit = idev->cnf.hop_limit;
1279 else
1280 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1281 rcu_read_unlock();
1282 }
1283 return hoplimit;
1284 }
1285 EXPORT_SYMBOL(ip6_dst_hoplimit);
1286
1287 /*
1288 *
1289 */
1290
1291 int ip6_route_add(struct fib6_config *cfg)
1292 {
1293 int err;
1294 struct net *net = cfg->fc_nlinfo.nl_net;
1295 struct rt6_info *rt = NULL;
1296 struct net_device *dev = NULL;
1297 struct inet6_dev *idev = NULL;
1298 struct fib6_table *table;
1299 int addr_type;
1300
1301 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1302 return -EINVAL;
1303 #ifndef CONFIG_IPV6_SUBTREES
1304 if (cfg->fc_src_len)
1305 return -EINVAL;
1306 #endif
1307 if (cfg->fc_ifindex) {
1308 err = -ENODEV;
1309 dev = dev_get_by_index(net, cfg->fc_ifindex);
1310 if (!dev)
1311 goto out;
1312 idev = in6_dev_get(dev);
1313 if (!idev)
1314 goto out;
1315 }
1316
1317 if (cfg->fc_metric == 0)
1318 cfg->fc_metric = IP6_RT_PRIO_USER;
1319
1320 err = -ENOBUFS;
1321 if (cfg->fc_nlinfo.nlh &&
1322 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1323 table = fib6_get_table(net, cfg->fc_table);
1324 if (!table) {
1325 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1326 table = fib6_new_table(net, cfg->fc_table);
1327 }
1328 } else {
1329 table = fib6_new_table(net, cfg->fc_table);
1330 }
1331
1332 if (!table)
1333 goto out;
1334
1335 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1336
1337 if (!rt) {
1338 err = -ENOMEM;
1339 goto out;
1340 }
1341
1342 rt->dst.obsolete = -1;
1343
1344 if (cfg->fc_flags & RTF_EXPIRES)
1345 rt6_set_expires(rt, jiffies +
1346 clock_t_to_jiffies(cfg->fc_expires));
1347 else
1348 rt6_clean_expires(rt);
1349
1350 if (cfg->fc_protocol == RTPROT_UNSPEC)
1351 cfg->fc_protocol = RTPROT_BOOT;
1352 rt->rt6i_protocol = cfg->fc_protocol;
1353
1354 addr_type = ipv6_addr_type(&cfg->fc_dst);
1355
1356 if (addr_type & IPV6_ADDR_MULTICAST)
1357 rt->dst.input = ip6_mc_input;
1358 else if (cfg->fc_flags & RTF_LOCAL)
1359 rt->dst.input = ip6_input;
1360 else
1361 rt->dst.input = ip6_forward;
1362
1363 rt->dst.output = ip6_output;
1364
1365 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1366 rt->rt6i_dst.plen = cfg->fc_dst_len;
1367 if (rt->rt6i_dst.plen == 128)
1368 rt->dst.flags |= DST_HOST;
1369
1370 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1371 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1372 if (!metrics) {
1373 err = -ENOMEM;
1374 goto out;
1375 }
1376 dst_init_metrics(&rt->dst, metrics, 0);
1377 }
1378 #ifdef CONFIG_IPV6_SUBTREES
1379 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1380 rt->rt6i_src.plen = cfg->fc_src_len;
1381 #endif
1382
1383 rt->rt6i_metric = cfg->fc_metric;
1384
1385 /* We cannot add true routes via loopback here,
1386 they would result in kernel looping; promote them to reject routes
1387 */
1388 if ((cfg->fc_flags & RTF_REJECT) ||
1389 (dev && (dev->flags & IFF_LOOPBACK) &&
1390 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1391 !(cfg->fc_flags & RTF_LOCAL))) {
1392 /* hold loopback dev/idev if we haven't done so. */
1393 if (dev != net->loopback_dev) {
1394 if (dev) {
1395 dev_put(dev);
1396 in6_dev_put(idev);
1397 }
1398 dev = net->loopback_dev;
1399 dev_hold(dev);
1400 idev = in6_dev_get(dev);
1401 if (!idev) {
1402 err = -ENODEV;
1403 goto out;
1404 }
1405 }
1406 rt->dst.output = ip6_pkt_discard_out;
1407 rt->dst.input = ip6_pkt_discard;
1408 rt->dst.error = -ENETUNREACH;
1409 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1410 goto install_route;
1411 }
1412
1413 if (cfg->fc_flags & RTF_GATEWAY) {
1414 const struct in6_addr *gw_addr;
1415 int gwa_type;
1416
1417 gw_addr = &cfg->fc_gateway;
1418 rt->rt6i_gateway = *gw_addr;
1419 gwa_type = ipv6_addr_type(gw_addr);
1420
1421 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1422 struct rt6_info *grt;
1423
1424 /* IPv6 strictly inhibits using not link-local
1425 addresses as nexthop address.
1426 Otherwise, router will not able to send redirects.
1427 It is very good, but in some (rare!) circumstances
1428 (SIT, PtP, NBMA NOARP links) it is handy to allow
1429 some exceptions. --ANK
1430 */
1431 err = -EINVAL;
1432 if (!(gwa_type & IPV6_ADDR_UNICAST))
1433 goto out;
1434
1435 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1436
1437 err = -EHOSTUNREACH;
1438 if (!grt)
1439 goto out;
1440 if (dev) {
1441 if (dev != grt->dst.dev) {
1442 dst_release(&grt->dst);
1443 goto out;
1444 }
1445 } else {
1446 dev = grt->dst.dev;
1447 idev = grt->rt6i_idev;
1448 dev_hold(dev);
1449 in6_dev_hold(grt->rt6i_idev);
1450 }
1451 if (!(grt->rt6i_flags & RTF_GATEWAY))
1452 err = 0;
1453 dst_release(&grt->dst);
1454
1455 if (err)
1456 goto out;
1457 }
1458 err = -EINVAL;
1459 if (!dev || (dev->flags & IFF_LOOPBACK))
1460 goto out;
1461 }
1462
1463 err = -ENODEV;
1464 if (!dev)
1465 goto out;
1466
1467 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1468 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1469 err = -EINVAL;
1470 goto out;
1471 }
1472 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1473 rt->rt6i_prefsrc.plen = 128;
1474 } else
1475 rt->rt6i_prefsrc.plen = 0;
1476
1477 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1478 err = rt6_bind_neighbour(rt, dev);
1479 if (err)
1480 goto out;
1481 }
1482
1483 rt->rt6i_flags = cfg->fc_flags;
1484
1485 install_route:
1486 if (cfg->fc_mx) {
1487 struct nlattr *nla;
1488 int remaining;
1489
1490 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1491 int type = nla_type(nla);
1492
1493 if (type) {
1494 if (type > RTAX_MAX) {
1495 err = -EINVAL;
1496 goto out;
1497 }
1498
1499 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1500 }
1501 }
1502 }
1503
1504 rt->dst.dev = dev;
1505 rt->rt6i_idev = idev;
1506 rt->rt6i_table = table;
1507
1508 cfg->fc_nlinfo.nl_net = dev_net(dev);
1509
1510 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1511
1512 out:
1513 if (dev)
1514 dev_put(dev);
1515 if (idev)
1516 in6_dev_put(idev);
1517 if (rt)
1518 dst_free(&rt->dst);
1519 return err;
1520 }
1521
1522 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1523 {
1524 int err;
1525 struct fib6_table *table;
1526 struct net *net = dev_net(rt->dst.dev);
1527
1528 if (rt == net->ipv6.ip6_null_entry)
1529 return -ENOENT;
1530
1531 table = rt->rt6i_table;
1532 write_lock_bh(&table->tb6_lock);
1533
1534 err = fib6_del(rt, info);
1535 dst_release(&rt->dst);
1536
1537 write_unlock_bh(&table->tb6_lock);
1538
1539 return err;
1540 }
1541
1542 int ip6_del_rt(struct rt6_info *rt)
1543 {
1544 struct nl_info info = {
1545 .nl_net = dev_net(rt->dst.dev),
1546 };
1547 return __ip6_del_rt(rt, &info);
1548 }
1549
1550 static int ip6_route_del(struct fib6_config *cfg)
1551 {
1552 struct fib6_table *table;
1553 struct fib6_node *fn;
1554 struct rt6_info *rt;
1555 int err = -ESRCH;
1556
1557 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1558 if (!table)
1559 return err;
1560
1561 read_lock_bh(&table->tb6_lock);
1562
1563 fn = fib6_locate(&table->tb6_root,
1564 &cfg->fc_dst, cfg->fc_dst_len,
1565 &cfg->fc_src, cfg->fc_src_len);
1566
1567 if (fn) {
1568 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1569 if (cfg->fc_ifindex &&
1570 (!rt->dst.dev ||
1571 rt->dst.dev->ifindex != cfg->fc_ifindex))
1572 continue;
1573 if (cfg->fc_flags & RTF_GATEWAY &&
1574 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1575 continue;
1576 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1577 continue;
1578 dst_hold(&rt->dst);
1579 read_unlock_bh(&table->tb6_lock);
1580
1581 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1582 }
1583 }
1584 read_unlock_bh(&table->tb6_lock);
1585
1586 return err;
1587 }
1588
1589 /*
1590 * Handle redirects
1591 */
1592 struct ip6rd_flowi {
1593 struct flowi6 fl6;
1594 struct in6_addr gateway;
1595 };
1596
1597 static struct rt6_info *__ip6_route_redirect(struct net *net,
1598 struct fib6_table *table,
1599 struct flowi6 *fl6,
1600 int flags)
1601 {
1602 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1603 struct rt6_info *rt;
1604 struct fib6_node *fn;
1605
1606 /*
1607 * Get the "current" route for this destination and
1608 * check if the redirect has come from approriate router.
1609 *
1610 * RFC 2461 specifies that redirects should only be
1611 * accepted if they come from the nexthop to the target.
1612 * Due to the way the routes are chosen, this notion
1613 * is a bit fuzzy and one might need to check all possible
1614 * routes.
1615 */
1616
1617 read_lock_bh(&table->tb6_lock);
1618 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1619 restart:
1620 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1621 /*
1622 * Current route is on-link; redirect is always invalid.
1623 *
1624 * Seems, previous statement is not true. It could
1625 * be node, which looks for us as on-link (f.e. proxy ndisc)
1626 * But then router serving it might decide, that we should
1627 * know truth 8)8) --ANK (980726).
1628 */
1629 if (rt6_check_expired(rt))
1630 continue;
1631 if (!(rt->rt6i_flags & RTF_GATEWAY))
1632 continue;
1633 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1634 continue;
1635 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1636 continue;
1637 break;
1638 }
1639
1640 if (!rt)
1641 rt = net->ipv6.ip6_null_entry;
1642 BACKTRACK(net, &fl6->saddr);
1643 out:
1644 dst_hold(&rt->dst);
1645
1646 read_unlock_bh(&table->tb6_lock);
1647
1648 return rt;
1649 };
1650
1651 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1652 const struct in6_addr *src,
1653 const struct in6_addr *gateway,
1654 struct net_device *dev)
1655 {
1656 int flags = RT6_LOOKUP_F_HAS_SADDR;
1657 struct net *net = dev_net(dev);
1658 struct ip6rd_flowi rdfl = {
1659 .fl6 = {
1660 .flowi6_oif = dev->ifindex,
1661 .daddr = *dest,
1662 .saddr = *src,
1663 },
1664 };
1665
1666 rdfl.gateway = *gateway;
1667
1668 if (rt6_need_strict(dest))
1669 flags |= RT6_LOOKUP_F_IFACE;
1670
1671 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1672 flags, __ip6_route_redirect);
1673 }
1674
1675 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1676 const struct in6_addr *saddr,
1677 struct neighbour *neigh, u8 *lladdr, int on_link)
1678 {
1679 struct rt6_info *rt, *nrt = NULL;
1680 struct netevent_redirect netevent;
1681 struct net *net = dev_net(neigh->dev);
1682
1683 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1684
1685 if (rt == net->ipv6.ip6_null_entry) {
1686 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1687 goto out;
1688 }
1689
1690 /*
1691 * We have finally decided to accept it.
1692 */
1693
1694 neigh_update(neigh, lladdr, NUD_STALE,
1695 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1696 NEIGH_UPDATE_F_OVERRIDE|
1697 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1698 NEIGH_UPDATE_F_ISROUTER))
1699 );
1700
1701 /*
1702 * Redirect received -> path was valid.
1703 * Look, redirects are sent only in response to data packets,
1704 * so that this nexthop apparently is reachable. --ANK
1705 */
1706 dst_confirm(&rt->dst);
1707
1708 /* Duplicate redirect: silently ignore. */
1709 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1710 goto out;
1711
1712 nrt = ip6_rt_copy(rt, dest);
1713 if (!nrt)
1714 goto out;
1715
1716 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1717 if (on_link)
1718 nrt->rt6i_flags &= ~RTF_GATEWAY;
1719
1720 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1721 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1722
1723 if (ip6_ins_rt(nrt))
1724 goto out;
1725
1726 netevent.old = &rt->dst;
1727 netevent.new = &nrt->dst;
1728 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1729
1730 if (rt->rt6i_flags & RTF_CACHE) {
1731 ip6_del_rt(rt);
1732 return;
1733 }
1734
1735 out:
1736 dst_release(&rt->dst);
1737 }
1738
1739 /*
1740 * Misc support functions
1741 */
1742
1743 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1744 const struct in6_addr *dest)
1745 {
1746 struct net *net = dev_net(ort->dst.dev);
1747 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1748 ort->rt6i_table);
1749
1750 if (rt) {
1751 rt->dst.input = ort->dst.input;
1752 rt->dst.output = ort->dst.output;
1753 rt->dst.flags |= DST_HOST;
1754
1755 rt->rt6i_dst.addr = *dest;
1756 rt->rt6i_dst.plen = 128;
1757 dst_copy_metrics(&rt->dst, &ort->dst);
1758 rt->dst.error = ort->dst.error;
1759 rt->rt6i_idev = ort->rt6i_idev;
1760 if (rt->rt6i_idev)
1761 in6_dev_hold(rt->rt6i_idev);
1762 rt->dst.lastuse = jiffies;
1763
1764 rt->rt6i_gateway = ort->rt6i_gateway;
1765 rt->rt6i_flags = ort->rt6i_flags;
1766 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1767 (RTF_DEFAULT | RTF_ADDRCONF))
1768 rt6_set_from(rt, ort);
1769 else
1770 rt6_clean_expires(rt);
1771 rt->rt6i_metric = 0;
1772
1773 #ifdef CONFIG_IPV6_SUBTREES
1774 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1775 #endif
1776 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1777 rt->rt6i_table = ort->rt6i_table;
1778 }
1779 return rt;
1780 }
1781
1782 #ifdef CONFIG_IPV6_ROUTE_INFO
1783 static struct rt6_info *rt6_get_route_info(struct net *net,
1784 const struct in6_addr *prefix, int prefixlen,
1785 const struct in6_addr *gwaddr, int ifindex)
1786 {
1787 struct fib6_node *fn;
1788 struct rt6_info *rt = NULL;
1789 struct fib6_table *table;
1790
1791 table = fib6_get_table(net, RT6_TABLE_INFO);
1792 if (!table)
1793 return NULL;
1794
1795 write_lock_bh(&table->tb6_lock);
1796 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1797 if (!fn)
1798 goto out;
1799
1800 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1801 if (rt->dst.dev->ifindex != ifindex)
1802 continue;
1803 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1804 continue;
1805 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1806 continue;
1807 dst_hold(&rt->dst);
1808 break;
1809 }
1810 out:
1811 write_unlock_bh(&table->tb6_lock);
1812 return rt;
1813 }
1814
1815 static struct rt6_info *rt6_add_route_info(struct net *net,
1816 const struct in6_addr *prefix, int prefixlen,
1817 const struct in6_addr *gwaddr, int ifindex,
1818 unsigned int pref)
1819 {
1820 struct fib6_config cfg = {
1821 .fc_table = RT6_TABLE_INFO,
1822 .fc_metric = IP6_RT_PRIO_USER,
1823 .fc_ifindex = ifindex,
1824 .fc_dst_len = prefixlen,
1825 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1826 RTF_UP | RTF_PREF(pref),
1827 .fc_nlinfo.pid = 0,
1828 .fc_nlinfo.nlh = NULL,
1829 .fc_nlinfo.nl_net = net,
1830 };
1831
1832 cfg.fc_dst = *prefix;
1833 cfg.fc_gateway = *gwaddr;
1834
1835 /* We should treat it as a default route if prefix length is 0. */
1836 if (!prefixlen)
1837 cfg.fc_flags |= RTF_DEFAULT;
1838
1839 ip6_route_add(&cfg);
1840
1841 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1842 }
1843 #endif
1844
1845 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1846 {
1847 struct rt6_info *rt;
1848 struct fib6_table *table;
1849
1850 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1851 if (!table)
1852 return NULL;
1853
1854 write_lock_bh(&table->tb6_lock);
1855 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1856 if (dev == rt->dst.dev &&
1857 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1858 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1859 break;
1860 }
1861 if (rt)
1862 dst_hold(&rt->dst);
1863 write_unlock_bh(&table->tb6_lock);
1864 return rt;
1865 }
1866
1867 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1868 struct net_device *dev,
1869 unsigned int pref)
1870 {
1871 struct fib6_config cfg = {
1872 .fc_table = RT6_TABLE_DFLT,
1873 .fc_metric = IP6_RT_PRIO_USER,
1874 .fc_ifindex = dev->ifindex,
1875 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1876 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1877 .fc_nlinfo.pid = 0,
1878 .fc_nlinfo.nlh = NULL,
1879 .fc_nlinfo.nl_net = dev_net(dev),
1880 };
1881
1882 cfg.fc_gateway = *gwaddr;
1883
1884 ip6_route_add(&cfg);
1885
1886 return rt6_get_dflt_router(gwaddr, dev);
1887 }
1888
1889 void rt6_purge_dflt_routers(struct net *net)
1890 {
1891 struct rt6_info *rt;
1892 struct fib6_table *table;
1893
1894 /* NOTE: Keep consistent with rt6_get_dflt_router */
1895 table = fib6_get_table(net, RT6_TABLE_DFLT);
1896 if (!table)
1897 return;
1898
1899 restart:
1900 read_lock_bh(&table->tb6_lock);
1901 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1902 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1903 dst_hold(&rt->dst);
1904 read_unlock_bh(&table->tb6_lock);
1905 ip6_del_rt(rt);
1906 goto restart;
1907 }
1908 }
1909 read_unlock_bh(&table->tb6_lock);
1910 }
1911
1912 static void rtmsg_to_fib6_config(struct net *net,
1913 struct in6_rtmsg *rtmsg,
1914 struct fib6_config *cfg)
1915 {
1916 memset(cfg, 0, sizeof(*cfg));
1917
1918 cfg->fc_table = RT6_TABLE_MAIN;
1919 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1920 cfg->fc_metric = rtmsg->rtmsg_metric;
1921 cfg->fc_expires = rtmsg->rtmsg_info;
1922 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1923 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1924 cfg->fc_flags = rtmsg->rtmsg_flags;
1925
1926 cfg->fc_nlinfo.nl_net = net;
1927
1928 cfg->fc_dst = rtmsg->rtmsg_dst;
1929 cfg->fc_src = rtmsg->rtmsg_src;
1930 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1931 }
1932
1933 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1934 {
1935 struct fib6_config cfg;
1936 struct in6_rtmsg rtmsg;
1937 int err;
1938
1939 switch(cmd) {
1940 case SIOCADDRT: /* Add a route */
1941 case SIOCDELRT: /* Delete a route */
1942 if (!capable(CAP_NET_ADMIN))
1943 return -EPERM;
1944 err = copy_from_user(&rtmsg, arg,
1945 sizeof(struct in6_rtmsg));
1946 if (err)
1947 return -EFAULT;
1948
1949 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1950
1951 rtnl_lock();
1952 switch (cmd) {
1953 case SIOCADDRT:
1954 err = ip6_route_add(&cfg);
1955 break;
1956 case SIOCDELRT:
1957 err = ip6_route_del(&cfg);
1958 break;
1959 default:
1960 err = -EINVAL;
1961 }
1962 rtnl_unlock();
1963
1964 return err;
1965 }
1966
1967 return -EINVAL;
1968 }
1969
1970 /*
1971 * Drop the packet on the floor
1972 */
1973
1974 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1975 {
1976 int type;
1977 struct dst_entry *dst = skb_dst(skb);
1978 switch (ipstats_mib_noroutes) {
1979 case IPSTATS_MIB_INNOROUTES:
1980 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1981 if (type == IPV6_ADDR_ANY) {
1982 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1983 IPSTATS_MIB_INADDRERRORS);
1984 break;
1985 }
1986 /* FALLTHROUGH */
1987 case IPSTATS_MIB_OUTNOROUTES:
1988 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1989 ipstats_mib_noroutes);
1990 break;
1991 }
1992 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1993 kfree_skb(skb);
1994 return 0;
1995 }
1996
1997 static int ip6_pkt_discard(struct sk_buff *skb)
1998 {
1999 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2000 }
2001
2002 static int ip6_pkt_discard_out(struct sk_buff *skb)
2003 {
2004 skb->dev = skb_dst(skb)->dev;
2005 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2006 }
2007
2008 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2009
2010 static int ip6_pkt_prohibit(struct sk_buff *skb)
2011 {
2012 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2013 }
2014
2015 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2016 {
2017 skb->dev = skb_dst(skb)->dev;
2018 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2019 }
2020
2021 #endif
2022
2023 /*
2024 * Allocate a dst for local (unicast / anycast) address.
2025 */
2026
2027 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2028 const struct in6_addr *addr,
2029 bool anycast)
2030 {
2031 struct net *net = dev_net(idev->dev);
2032 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2033 int err;
2034
2035 if (!rt) {
2036 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2037 return ERR_PTR(-ENOMEM);
2038 }
2039
2040 in6_dev_hold(idev);
2041
2042 rt->dst.flags |= DST_HOST;
2043 rt->dst.input = ip6_input;
2044 rt->dst.output = ip6_output;
2045 rt->rt6i_idev = idev;
2046 rt->dst.obsolete = -1;
2047
2048 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2049 if (anycast)
2050 rt->rt6i_flags |= RTF_ANYCAST;
2051 else
2052 rt->rt6i_flags |= RTF_LOCAL;
2053 err = rt6_bind_neighbour(rt, rt->dst.dev);
2054 if (err) {
2055 dst_free(&rt->dst);
2056 return ERR_PTR(err);
2057 }
2058
2059 rt->rt6i_dst.addr = *addr;
2060 rt->rt6i_dst.plen = 128;
2061 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2062
2063 atomic_set(&rt->dst.__refcnt, 1);
2064
2065 return rt;
2066 }
2067
2068 int ip6_route_get_saddr(struct net *net,
2069 struct rt6_info *rt,
2070 const struct in6_addr *daddr,
2071 unsigned int prefs,
2072 struct in6_addr *saddr)
2073 {
2074 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2075 int err = 0;
2076 if (rt->rt6i_prefsrc.plen)
2077 *saddr = rt->rt6i_prefsrc.addr;
2078 else
2079 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2080 daddr, prefs, saddr);
2081 return err;
2082 }
2083
2084 /* remove deleted ip from prefsrc entries */
2085 struct arg_dev_net_ip {
2086 struct net_device *dev;
2087 struct net *net;
2088 struct in6_addr *addr;
2089 };
2090
2091 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2092 {
2093 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2094 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2095 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2096
2097 if (((void *)rt->dst.dev == dev || !dev) &&
2098 rt != net->ipv6.ip6_null_entry &&
2099 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2100 /* remove prefsrc entry */
2101 rt->rt6i_prefsrc.plen = 0;
2102 }
2103 return 0;
2104 }
2105
2106 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2107 {
2108 struct net *net = dev_net(ifp->idev->dev);
2109 struct arg_dev_net_ip adni = {
2110 .dev = ifp->idev->dev,
2111 .net = net,
2112 .addr = &ifp->addr,
2113 };
2114 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2115 }
2116
2117 struct arg_dev_net {
2118 struct net_device *dev;
2119 struct net *net;
2120 };
2121
2122 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2123 {
2124 const struct arg_dev_net *adn = arg;
2125 const struct net_device *dev = adn->dev;
2126
2127 if ((rt->dst.dev == dev || !dev) &&
2128 rt != adn->net->ipv6.ip6_null_entry)
2129 return -1;
2130
2131 return 0;
2132 }
2133
2134 void rt6_ifdown(struct net *net, struct net_device *dev)
2135 {
2136 struct arg_dev_net adn = {
2137 .dev = dev,
2138 .net = net,
2139 };
2140
2141 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2142 icmp6_clean_all(fib6_ifdown, &adn);
2143 }
2144
2145 struct rt6_mtu_change_arg {
2146 struct net_device *dev;
2147 unsigned int mtu;
2148 };
2149
2150 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2151 {
2152 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2153 struct inet6_dev *idev;
2154
2155 /* In IPv6 pmtu discovery is not optional,
2156 so that RTAX_MTU lock cannot disable it.
2157 We still use this lock to block changes
2158 caused by addrconf/ndisc.
2159 */
2160
2161 idev = __in6_dev_get(arg->dev);
2162 if (!idev)
2163 return 0;
2164
2165 /* For administrative MTU increase, there is no way to discover
2166 IPv6 PMTU increase, so PMTU increase should be updated here.
2167 Since RFC 1981 doesn't include administrative MTU increase
2168 update PMTU increase is a MUST. (i.e. jumbo frame)
2169 */
2170 /*
2171 If new MTU is less than route PMTU, this new MTU will be the
2172 lowest MTU in the path, update the route PMTU to reflect PMTU
2173 decreases; if new MTU is greater than route PMTU, and the
2174 old MTU is the lowest MTU in the path, update the route PMTU
2175 to reflect the increase. In this case if the other nodes' MTU
2176 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2177 PMTU discouvery.
2178 */
2179 if (rt->dst.dev == arg->dev &&
2180 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2181 (dst_mtu(&rt->dst) >= arg->mtu ||
2182 (dst_mtu(&rt->dst) < arg->mtu &&
2183 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2184 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2185 }
2186 return 0;
2187 }
2188
2189 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2190 {
2191 struct rt6_mtu_change_arg arg = {
2192 .dev = dev,
2193 .mtu = mtu,
2194 };
2195
2196 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2197 }
2198
2199 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2200 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2201 [RTA_OIF] = { .type = NLA_U32 },
2202 [RTA_IIF] = { .type = NLA_U32 },
2203 [RTA_PRIORITY] = { .type = NLA_U32 },
2204 [RTA_METRICS] = { .type = NLA_NESTED },
2205 };
2206
2207 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2208 struct fib6_config *cfg)
2209 {
2210 struct rtmsg *rtm;
2211 struct nlattr *tb[RTA_MAX+1];
2212 int err;
2213
2214 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2215 if (err < 0)
2216 goto errout;
2217
2218 err = -EINVAL;
2219 rtm = nlmsg_data(nlh);
2220 memset(cfg, 0, sizeof(*cfg));
2221
2222 cfg->fc_table = rtm->rtm_table;
2223 cfg->fc_dst_len = rtm->rtm_dst_len;
2224 cfg->fc_src_len = rtm->rtm_src_len;
2225 cfg->fc_flags = RTF_UP;
2226 cfg->fc_protocol = rtm->rtm_protocol;
2227
2228 if (rtm->rtm_type == RTN_UNREACHABLE)
2229 cfg->fc_flags |= RTF_REJECT;
2230
2231 if (rtm->rtm_type == RTN_LOCAL)
2232 cfg->fc_flags |= RTF_LOCAL;
2233
2234 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2235 cfg->fc_nlinfo.nlh = nlh;
2236 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2237
2238 if (tb[RTA_GATEWAY]) {
2239 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2240 cfg->fc_flags |= RTF_GATEWAY;
2241 }
2242
2243 if (tb[RTA_DST]) {
2244 int plen = (rtm->rtm_dst_len + 7) >> 3;
2245
2246 if (nla_len(tb[RTA_DST]) < plen)
2247 goto errout;
2248
2249 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2250 }
2251
2252 if (tb[RTA_SRC]) {
2253 int plen = (rtm->rtm_src_len + 7) >> 3;
2254
2255 if (nla_len(tb[RTA_SRC]) < plen)
2256 goto errout;
2257
2258 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2259 }
2260
2261 if (tb[RTA_PREFSRC])
2262 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2263
2264 if (tb[RTA_OIF])
2265 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2266
2267 if (tb[RTA_PRIORITY])
2268 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2269
2270 if (tb[RTA_METRICS]) {
2271 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2272 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2273 }
2274
2275 if (tb[RTA_TABLE])
2276 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2277
2278 err = 0;
2279 errout:
2280 return err;
2281 }
2282
2283 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2284 {
2285 struct fib6_config cfg;
2286 int err;
2287
2288 err = rtm_to_fib6_config(skb, nlh, &cfg);
2289 if (err < 0)
2290 return err;
2291
2292 return ip6_route_del(&cfg);
2293 }
2294
2295 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2296 {
2297 struct fib6_config cfg;
2298 int err;
2299
2300 err = rtm_to_fib6_config(skb, nlh, &cfg);
2301 if (err < 0)
2302 return err;
2303
2304 return ip6_route_add(&cfg);
2305 }
2306
2307 static inline size_t rt6_nlmsg_size(void)
2308 {
2309 return NLMSG_ALIGN(sizeof(struct rtmsg))
2310 + nla_total_size(16) /* RTA_SRC */
2311 + nla_total_size(16) /* RTA_DST */
2312 + nla_total_size(16) /* RTA_GATEWAY */
2313 + nla_total_size(16) /* RTA_PREFSRC */
2314 + nla_total_size(4) /* RTA_TABLE */
2315 + nla_total_size(4) /* RTA_IIF */
2316 + nla_total_size(4) /* RTA_OIF */
2317 + nla_total_size(4) /* RTA_PRIORITY */
2318 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2319 + nla_total_size(sizeof(struct rta_cacheinfo));
2320 }
2321
2322 static int rt6_fill_node(struct net *net,
2323 struct sk_buff *skb, struct rt6_info *rt,
2324 struct in6_addr *dst, struct in6_addr *src,
2325 int iif, int type, u32 pid, u32 seq,
2326 int prefix, int nowait, unsigned int flags)
2327 {
2328 const struct inet_peer *peer;
2329 struct rtmsg *rtm;
2330 struct nlmsghdr *nlh;
2331 long expires;
2332 u32 table;
2333 struct neighbour *n;
2334 u32 ts, tsage;
2335
2336 if (prefix) { /* user wants prefix routes only */
2337 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2338 /* success since this is not a prefix route */
2339 return 1;
2340 }
2341 }
2342
2343 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2344 if (!nlh)
2345 return -EMSGSIZE;
2346
2347 rtm = nlmsg_data(nlh);
2348 rtm->rtm_family = AF_INET6;
2349 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2350 rtm->rtm_src_len = rt->rt6i_src.plen;
2351 rtm->rtm_tos = 0;
2352 if (rt->rt6i_table)
2353 table = rt->rt6i_table->tb6_id;
2354 else
2355 table = RT6_TABLE_UNSPEC;
2356 rtm->rtm_table = table;
2357 if (nla_put_u32(skb, RTA_TABLE, table))
2358 goto nla_put_failure;
2359 if (rt->rt6i_flags & RTF_REJECT)
2360 rtm->rtm_type = RTN_UNREACHABLE;
2361 else if (rt->rt6i_flags & RTF_LOCAL)
2362 rtm->rtm_type = RTN_LOCAL;
2363 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2364 rtm->rtm_type = RTN_LOCAL;
2365 else
2366 rtm->rtm_type = RTN_UNICAST;
2367 rtm->rtm_flags = 0;
2368 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2369 rtm->rtm_protocol = rt->rt6i_protocol;
2370 if (rt->rt6i_flags & RTF_DYNAMIC)
2371 rtm->rtm_protocol = RTPROT_REDIRECT;
2372 else if (rt->rt6i_flags & RTF_ADDRCONF)
2373 rtm->rtm_protocol = RTPROT_KERNEL;
2374 else if (rt->rt6i_flags & RTF_DEFAULT)
2375 rtm->rtm_protocol = RTPROT_RA;
2376
2377 if (rt->rt6i_flags & RTF_CACHE)
2378 rtm->rtm_flags |= RTM_F_CLONED;
2379
2380 if (dst) {
2381 if (nla_put(skb, RTA_DST, 16, dst))
2382 goto nla_put_failure;
2383 rtm->rtm_dst_len = 128;
2384 } else if (rtm->rtm_dst_len)
2385 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2386 goto nla_put_failure;
2387 #ifdef CONFIG_IPV6_SUBTREES
2388 if (src) {
2389 if (nla_put(skb, RTA_SRC, 16, src))
2390 goto nla_put_failure;
2391 rtm->rtm_src_len = 128;
2392 } else if (rtm->rtm_src_len &&
2393 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2394 goto nla_put_failure;
2395 #endif
2396 if (iif) {
2397 #ifdef CONFIG_IPV6_MROUTE
2398 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2399 int err = ip6mr_get_route(net, skb, rtm, nowait);
2400 if (err <= 0) {
2401 if (!nowait) {
2402 if (err == 0)
2403 return 0;
2404 goto nla_put_failure;
2405 } else {
2406 if (err == -EMSGSIZE)
2407 goto nla_put_failure;
2408 }
2409 }
2410 } else
2411 #endif
2412 if (nla_put_u32(skb, RTA_IIF, iif))
2413 goto nla_put_failure;
2414 } else if (dst) {
2415 struct in6_addr saddr_buf;
2416 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2417 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2418 goto nla_put_failure;
2419 }
2420
2421 if (rt->rt6i_prefsrc.plen) {
2422 struct in6_addr saddr_buf;
2423 saddr_buf = rt->rt6i_prefsrc.addr;
2424 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2425 goto nla_put_failure;
2426 }
2427
2428 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2429 goto nla_put_failure;
2430
2431 rcu_read_lock();
2432 n = dst_get_neighbour_noref(&rt->dst);
2433 if (n) {
2434 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2435 rcu_read_unlock();
2436 goto nla_put_failure;
2437 }
2438 }
2439 rcu_read_unlock();
2440
2441 if (rt->dst.dev &&
2442 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2443 goto nla_put_failure;
2444 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2445 goto nla_put_failure;
2446 if (!(rt->rt6i_flags & RTF_EXPIRES))
2447 expires = 0;
2448 else if (rt->dst.expires - jiffies < INT_MAX)
2449 expires = rt->dst.expires - jiffies;
2450 else
2451 expires = INT_MAX;
2452
2453 peer = NULL;
2454 if (rt6_has_peer(rt))
2455 peer = rt6_peer_ptr(rt);
2456 ts = tsage = 0;
2457 if (peer && peer->tcp_ts_stamp) {
2458 ts = peer->tcp_ts;
2459 tsage = get_seconds() - peer->tcp_ts_stamp;
2460 }
2461
2462 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2463 expires, rt->dst.error) < 0)
2464 goto nla_put_failure;
2465
2466 return nlmsg_end(skb, nlh);
2467
2468 nla_put_failure:
2469 nlmsg_cancel(skb, nlh);
2470 return -EMSGSIZE;
2471 }
2472
2473 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2474 {
2475 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2476 int prefix;
2477
2478 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2479 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2480 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2481 } else
2482 prefix = 0;
2483
2484 return rt6_fill_node(arg->net,
2485 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2486 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2487 prefix, 0, NLM_F_MULTI);
2488 }
2489
2490 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2491 {
2492 struct net *net = sock_net(in_skb->sk);
2493 struct nlattr *tb[RTA_MAX+1];
2494 struct rt6_info *rt;
2495 struct sk_buff *skb;
2496 struct rtmsg *rtm;
2497 struct flowi6 fl6;
2498 int err, iif = 0, oif = 0;
2499
2500 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2501 if (err < 0)
2502 goto errout;
2503
2504 err = -EINVAL;
2505 memset(&fl6, 0, sizeof(fl6));
2506
2507 if (tb[RTA_SRC]) {
2508 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2509 goto errout;
2510
2511 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2512 }
2513
2514 if (tb[RTA_DST]) {
2515 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2516 goto errout;
2517
2518 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2519 }
2520
2521 if (tb[RTA_IIF])
2522 iif = nla_get_u32(tb[RTA_IIF]);
2523
2524 if (tb[RTA_OIF])
2525 oif = nla_get_u32(tb[RTA_OIF]);
2526
2527 if (iif) {
2528 struct net_device *dev;
2529 int flags = 0;
2530
2531 dev = __dev_get_by_index(net, iif);
2532 if (!dev) {
2533 err = -ENODEV;
2534 goto errout;
2535 }
2536
2537 fl6.flowi6_iif = iif;
2538
2539 if (!ipv6_addr_any(&fl6.saddr))
2540 flags |= RT6_LOOKUP_F_HAS_SADDR;
2541
2542 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2543 flags);
2544 } else {
2545 fl6.flowi6_oif = oif;
2546
2547 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2548 }
2549
2550 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2551 if (!skb) {
2552 dst_release(&rt->dst);
2553 err = -ENOBUFS;
2554 goto errout;
2555 }
2556
2557 /* Reserve room for dummy headers, this skb can pass
2558 through good chunk of routing engine.
2559 */
2560 skb_reset_mac_header(skb);
2561 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2562
2563 skb_dst_set(skb, &rt->dst);
2564
2565 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2566 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2567 nlh->nlmsg_seq, 0, 0, 0);
2568 if (err < 0) {
2569 kfree_skb(skb);
2570 goto errout;
2571 }
2572
2573 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2574 errout:
2575 return err;
2576 }
2577
2578 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2579 {
2580 struct sk_buff *skb;
2581 struct net *net = info->nl_net;
2582 u32 seq;
2583 int err;
2584
2585 err = -ENOBUFS;
2586 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2587
2588 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2589 if (!skb)
2590 goto errout;
2591
2592 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2593 event, info->pid, seq, 0, 0, 0);
2594 if (err < 0) {
2595 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2596 WARN_ON(err == -EMSGSIZE);
2597 kfree_skb(skb);
2598 goto errout;
2599 }
2600 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2601 info->nlh, gfp_any());
2602 return;
2603 errout:
2604 if (err < 0)
2605 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2606 }
2607
2608 static int ip6_route_dev_notify(struct notifier_block *this,
2609 unsigned long event, void *data)
2610 {
2611 struct net_device *dev = (struct net_device *)data;
2612 struct net *net = dev_net(dev);
2613
2614 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2615 net->ipv6.ip6_null_entry->dst.dev = dev;
2616 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2617 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2618 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2619 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2620 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2621 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2622 #endif
2623 }
2624
2625 return NOTIFY_OK;
2626 }
2627
2628 /*
2629 * /proc
2630 */
2631
2632 #ifdef CONFIG_PROC_FS
2633
2634 struct rt6_proc_arg
2635 {
2636 char *buffer;
2637 int offset;
2638 int length;
2639 int skip;
2640 int len;
2641 };
2642
2643 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2644 {
2645 struct seq_file *m = p_arg;
2646 struct neighbour *n;
2647
2648 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2649
2650 #ifdef CONFIG_IPV6_SUBTREES
2651 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2652 #else
2653 seq_puts(m, "00000000000000000000000000000000 00 ");
2654 #endif
2655 rcu_read_lock();
2656 n = dst_get_neighbour_noref(&rt->dst);
2657 if (n) {
2658 seq_printf(m, "%pi6", n->primary_key);
2659 } else {
2660 seq_puts(m, "00000000000000000000000000000000");
2661 }
2662 rcu_read_unlock();
2663 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2664 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2665 rt->dst.__use, rt->rt6i_flags,
2666 rt->dst.dev ? rt->dst.dev->name : "");
2667 return 0;
2668 }
2669
2670 static int ipv6_route_show(struct seq_file *m, void *v)
2671 {
2672 struct net *net = (struct net *)m->private;
2673 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2674 return 0;
2675 }
2676
2677 static int ipv6_route_open(struct inode *inode, struct file *file)
2678 {
2679 return single_open_net(inode, file, ipv6_route_show);
2680 }
2681
2682 static const struct file_operations ipv6_route_proc_fops = {
2683 .owner = THIS_MODULE,
2684 .open = ipv6_route_open,
2685 .read = seq_read,
2686 .llseek = seq_lseek,
2687 .release = single_release_net,
2688 };
2689
2690 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2691 {
2692 struct net *net = (struct net *)seq->private;
2693 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2694 net->ipv6.rt6_stats->fib_nodes,
2695 net->ipv6.rt6_stats->fib_route_nodes,
2696 net->ipv6.rt6_stats->fib_rt_alloc,
2697 net->ipv6.rt6_stats->fib_rt_entries,
2698 net->ipv6.rt6_stats->fib_rt_cache,
2699 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2700 net->ipv6.rt6_stats->fib_discarded_routes);
2701
2702 return 0;
2703 }
2704
2705 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2706 {
2707 return single_open_net(inode, file, rt6_stats_seq_show);
2708 }
2709
2710 static const struct file_operations rt6_stats_seq_fops = {
2711 .owner = THIS_MODULE,
2712 .open = rt6_stats_seq_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
2715 .release = single_release_net,
2716 };
2717 #endif /* CONFIG_PROC_FS */
2718
2719 #ifdef CONFIG_SYSCTL
2720
2721 static
2722 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2723 void __user *buffer, size_t *lenp, loff_t *ppos)
2724 {
2725 struct net *net;
2726 int delay;
2727 if (!write)
2728 return -EINVAL;
2729
2730 net = (struct net *)ctl->extra1;
2731 delay = net->ipv6.sysctl.flush_delay;
2732 proc_dointvec(ctl, write, buffer, lenp, ppos);
2733 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2734 return 0;
2735 }
2736
2737 ctl_table ipv6_route_table_template[] = {
2738 {
2739 .procname = "flush",
2740 .data = &init_net.ipv6.sysctl.flush_delay,
2741 .maxlen = sizeof(int),
2742 .mode = 0200,
2743 .proc_handler = ipv6_sysctl_rtcache_flush
2744 },
2745 {
2746 .procname = "gc_thresh",
2747 .data = &ip6_dst_ops_template.gc_thresh,
2748 .maxlen = sizeof(int),
2749 .mode = 0644,
2750 .proc_handler = proc_dointvec,
2751 },
2752 {
2753 .procname = "max_size",
2754 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2755 .maxlen = sizeof(int),
2756 .mode = 0644,
2757 .proc_handler = proc_dointvec,
2758 },
2759 {
2760 .procname = "gc_min_interval",
2761 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2762 .maxlen = sizeof(int),
2763 .mode = 0644,
2764 .proc_handler = proc_dointvec_jiffies,
2765 },
2766 {
2767 .procname = "gc_timeout",
2768 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2769 .maxlen = sizeof(int),
2770 .mode = 0644,
2771 .proc_handler = proc_dointvec_jiffies,
2772 },
2773 {
2774 .procname = "gc_interval",
2775 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
2778 .proc_handler = proc_dointvec_jiffies,
2779 },
2780 {
2781 .procname = "gc_elasticity",
2782 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
2785 .proc_handler = proc_dointvec,
2786 },
2787 {
2788 .procname = "mtu_expires",
2789 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
2792 .proc_handler = proc_dointvec_jiffies,
2793 },
2794 {
2795 .procname = "min_adv_mss",
2796 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
2799 .proc_handler = proc_dointvec,
2800 },
2801 {
2802 .procname = "gc_min_interval_ms",
2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
2806 .proc_handler = proc_dointvec_ms_jiffies,
2807 },
2808 { }
2809 };
2810
2811 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2812 {
2813 struct ctl_table *table;
2814
2815 table = kmemdup(ipv6_route_table_template,
2816 sizeof(ipv6_route_table_template),
2817 GFP_KERNEL);
2818
2819 if (table) {
2820 table[0].data = &net->ipv6.sysctl.flush_delay;
2821 table[0].extra1 = net;
2822 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2823 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2824 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2825 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2826 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2827 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2828 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2829 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2830 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2831 }
2832
2833 return table;
2834 }
2835 #endif
2836
2837 static int __net_init ip6_route_net_init(struct net *net)
2838 {
2839 int ret = -ENOMEM;
2840
2841 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2842 sizeof(net->ipv6.ip6_dst_ops));
2843
2844 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2845 goto out_ip6_dst_ops;
2846
2847 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2848 sizeof(*net->ipv6.ip6_null_entry),
2849 GFP_KERNEL);
2850 if (!net->ipv6.ip6_null_entry)
2851 goto out_ip6_dst_entries;
2852 net->ipv6.ip6_null_entry->dst.path =
2853 (struct dst_entry *)net->ipv6.ip6_null_entry;
2854 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2855 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2856 ip6_template_metrics, true);
2857
2858 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2859 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2860 sizeof(*net->ipv6.ip6_prohibit_entry),
2861 GFP_KERNEL);
2862 if (!net->ipv6.ip6_prohibit_entry)
2863 goto out_ip6_null_entry;
2864 net->ipv6.ip6_prohibit_entry->dst.path =
2865 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2866 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2867 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2868 ip6_template_metrics, true);
2869
2870 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2871 sizeof(*net->ipv6.ip6_blk_hole_entry),
2872 GFP_KERNEL);
2873 if (!net->ipv6.ip6_blk_hole_entry)
2874 goto out_ip6_prohibit_entry;
2875 net->ipv6.ip6_blk_hole_entry->dst.path =
2876 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2877 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2878 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2879 ip6_template_metrics, true);
2880 #endif
2881
2882 net->ipv6.sysctl.flush_delay = 0;
2883 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2884 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2885 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2886 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2887 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2888 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2889 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2890
2891 #ifdef CONFIG_PROC_FS
2892 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2893 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2894 #endif
2895 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2896
2897 ret = 0;
2898 out:
2899 return ret;
2900
2901 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 out_ip6_prohibit_entry:
2903 kfree(net->ipv6.ip6_prohibit_entry);
2904 out_ip6_null_entry:
2905 kfree(net->ipv6.ip6_null_entry);
2906 #endif
2907 out_ip6_dst_entries:
2908 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2909 out_ip6_dst_ops:
2910 goto out;
2911 }
2912
2913 static void __net_exit ip6_route_net_exit(struct net *net)
2914 {
2915 #ifdef CONFIG_PROC_FS
2916 proc_net_remove(net, "ipv6_route");
2917 proc_net_remove(net, "rt6_stats");
2918 #endif
2919 kfree(net->ipv6.ip6_null_entry);
2920 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2921 kfree(net->ipv6.ip6_prohibit_entry);
2922 kfree(net->ipv6.ip6_blk_hole_entry);
2923 #endif
2924 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2925 }
2926
2927 static struct pernet_operations ip6_route_net_ops = {
2928 .init = ip6_route_net_init,
2929 .exit = ip6_route_net_exit,
2930 };
2931
2932 static int __net_init ipv6_inetpeer_init(struct net *net)
2933 {
2934 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2935
2936 if (!bp)
2937 return -ENOMEM;
2938 inet_peer_base_init(bp);
2939 net->ipv6.peers = bp;
2940 return 0;
2941 }
2942
2943 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2944 {
2945 struct inet_peer_base *bp = net->ipv6.peers;
2946
2947 net->ipv6.peers = NULL;
2948 inetpeer_invalidate_tree(bp);
2949 kfree(bp);
2950 }
2951
2952 static struct pernet_operations ipv6_inetpeer_ops = {
2953 .init = ipv6_inetpeer_init,
2954 .exit = ipv6_inetpeer_exit,
2955 };
2956
2957 static struct notifier_block ip6_route_dev_notifier = {
2958 .notifier_call = ip6_route_dev_notify,
2959 .priority = 0,
2960 };
2961
2962 int __init ip6_route_init(void)
2963 {
2964 int ret;
2965
2966 ret = -ENOMEM;
2967 ip6_dst_ops_template.kmem_cachep =
2968 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2969 SLAB_HWCACHE_ALIGN, NULL);
2970 if (!ip6_dst_ops_template.kmem_cachep)
2971 goto out;
2972
2973 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2974 if (ret)
2975 goto out_kmem_cache;
2976
2977 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
2978 if (ret)
2979 goto out_dst_entries;
2980
2981 ret = register_pernet_subsys(&ip6_route_net_ops);
2982 if (ret)
2983 goto out_register_inetpeer;
2984
2985 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2986
2987 /* Registering of the loopback is done before this portion of code,
2988 * the loopback reference in rt6_info will not be taken, do it
2989 * manually for init_net */
2990 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2991 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2992 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2993 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2994 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2995 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2996 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2997 #endif
2998 ret = fib6_init();
2999 if (ret)
3000 goto out_register_subsys;
3001
3002 ret = xfrm6_init();
3003 if (ret)
3004 goto out_fib6_init;
3005
3006 ret = fib6_rules_init();
3007 if (ret)
3008 goto xfrm6_init;
3009
3010 ret = -ENOBUFS;
3011 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3012 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3013 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3014 goto fib6_rules_init;
3015
3016 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3017 if (ret)
3018 goto fib6_rules_init;
3019
3020 out:
3021 return ret;
3022
3023 fib6_rules_init:
3024 fib6_rules_cleanup();
3025 xfrm6_init:
3026 xfrm6_fini();
3027 out_fib6_init:
3028 fib6_gc_cleanup();
3029 out_register_subsys:
3030 unregister_pernet_subsys(&ip6_route_net_ops);
3031 out_register_inetpeer:
3032 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3033 out_dst_entries:
3034 dst_entries_destroy(&ip6_dst_blackhole_ops);
3035 out_kmem_cache:
3036 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3037 goto out;
3038 }
3039
3040 void ip6_route_cleanup(void)
3041 {
3042 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3043 fib6_rules_cleanup();
3044 xfrm6_fini();
3045 fib6_gc_cleanup();
3046 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3047 unregister_pernet_subsys(&ip6_route_net_ops);
3048 dst_entries_destroy(&ip6_dst_blackhole_ops);
3049 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3050 }