Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 peer = rt6_get_peer_create(rt);
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125 struct in6_addr *p = &rt->rt6i_gateway;
126
127 if (!ipv6_addr_any(p))
128 return (const void *) p;
129 return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158 .family = AF_INET6,
159 .protocol = cpu_to_be16(ETH_P_IPV6),
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
163 .default_advmss = ip6_default_advmss,
164 .mtu = ip6_mtu,
165 .cow_metrics = ipv6_cow_metrics,
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
171 .local_out = __ip6_local_out,
172 .neigh_lookup = ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188 {
189 return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
194 .protocol = cpu_to_be16(ETH_P_IPV6),
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
197 .mtu = ip6_blackhole_mtu,
198 .default_advmss = ip6_default_advmss,
199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
201 .neigh_lookup = ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
218 .rt6i_protocol = RTPROT_KERNEL,
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
238 .rt6i_protocol = RTPROT_KERNEL,
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
249 .input = dst_discard,
250 .output = dst_discard,
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
253 .rt6i_protocol = RTPROT_KERNEL,
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
262 struct net_device *dev,
263 int flags,
264 struct fib6_table *table)
265 {
266 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267 0, 0, flags);
268
269 if (rt) {
270 memset(&rt->rt6i_table, 0,
271 sizeof(*rt) - sizeof(struct dst_entry));
272 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
273 }
274 return rt;
275 }
276
277 static void ip6_dst_destroy(struct dst_entry *dst)
278 {
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281
282 if (!(rt->dst.flags & DST_HOST))
283 dst_destroy_metrics_generic(dst);
284
285 if (idev) {
286 rt->rt6i_idev = NULL;
287 in6_dev_put(idev);
288 }
289
290 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291 dst_release(dst->from);
292
293 if (rt6_has_peer(rt)) {
294 struct inet_peer *peer = rt6_peer_ptr(rt);
295 inet_putpeer(peer);
296 }
297 }
298
299 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301 static u32 rt6_peer_genid(void)
302 {
303 return atomic_read(&__rt6_peer_genid);
304 }
305
306 void rt6_bind_peer(struct rt6_info *rt, int create)
307 {
308 struct inet_peer_base *base;
309 struct inet_peer *peer;
310
311 base = inetpeer_base_ptr(rt->_rt6i_peer);
312 if (!base)
313 return;
314
315 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
316 if (peer) {
317 if (!rt6_set_peer(rt, peer))
318 inet_putpeer(peer);
319 else
320 rt->rt6i_peer_genid = rt6_peer_genid();
321 }
322 }
323
324 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326 {
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
329 struct net_device *loopback_dev =
330 dev_net(dev)->loopback_dev;
331
332 if (dev != loopback_dev && idev && idev->dev == dev) {
333 struct inet6_dev *loopback_idev =
334 in6_dev_get(loopback_dev);
335 if (loopback_idev) {
336 rt->rt6i_idev = loopback_idev;
337 in6_dev_put(idev);
338 }
339 }
340 }
341
342 static bool rt6_check_expired(const struct rt6_info *rt)
343 {
344 struct rt6_info *ort = NULL;
345
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
348 return true;
349 } else if (rt->dst.from) {
350 ort = (struct rt6_info *) rt->dst.from;
351 return (ort->rt6i_flags & RTF_EXPIRES) &&
352 time_after(jiffies, ort->dst.expires);
353 }
354 return false;
355 }
356
357 static bool rt6_need_strict(const struct in6_addr *daddr)
358 {
359 return ipv6_addr_type(daddr) &
360 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
361 }
362
363 /*
364 * Route lookup. Any table->tb6_lock is implied.
365 */
366
367 static inline struct rt6_info *rt6_device_match(struct net *net,
368 struct rt6_info *rt,
369 const struct in6_addr *saddr,
370 int oif,
371 int flags)
372 {
373 struct rt6_info *local = NULL;
374 struct rt6_info *sprt;
375
376 if (!oif && ipv6_addr_any(saddr))
377 goto out;
378
379 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
380 struct net_device *dev = sprt->dst.dev;
381
382 if (oif) {
383 if (dev->ifindex == oif)
384 return sprt;
385 if (dev->flags & IFF_LOOPBACK) {
386 if (!sprt->rt6i_idev ||
387 sprt->rt6i_idev->dev->ifindex != oif) {
388 if (flags & RT6_LOOKUP_F_IFACE && oif)
389 continue;
390 if (local && (!oif ||
391 local->rt6i_idev->dev->ifindex == oif))
392 continue;
393 }
394 local = sprt;
395 }
396 } else {
397 if (ipv6_chk_addr(net, saddr, dev,
398 flags & RT6_LOOKUP_F_IFACE))
399 return sprt;
400 }
401 }
402
403 if (oif) {
404 if (local)
405 return local;
406
407 if (flags & RT6_LOOKUP_F_IFACE)
408 return net->ipv6.ip6_null_entry;
409 }
410 out:
411 return rt;
412 }
413
414 #ifdef CONFIG_IPV6_ROUTER_PREF
415 static void rt6_probe(struct rt6_info *rt)
416 {
417 struct neighbour *neigh;
418 /*
419 * Okay, this does not seem to be appropriate
420 * for now, however, we need to check if it
421 * is really so; aka Router Reachability Probing.
422 *
423 * Router Reachability Probe MUST be rate-limited
424 * to no more than one per minute.
425 */
426 rcu_read_lock();
427 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
428 if (!neigh || (neigh->nud_state & NUD_VALID))
429 goto out;
430 read_lock_bh(&neigh->lock);
431 if (!(neigh->nud_state & NUD_VALID) &&
432 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
433 struct in6_addr mcaddr;
434 struct in6_addr *target;
435
436 neigh->updated = jiffies;
437 read_unlock_bh(&neigh->lock);
438
439 target = (struct in6_addr *)&neigh->primary_key;
440 addrconf_addr_solict_mult(target, &mcaddr);
441 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
442 } else {
443 read_unlock_bh(&neigh->lock);
444 }
445 out:
446 rcu_read_unlock();
447 }
448 #else
449 static inline void rt6_probe(struct rt6_info *rt)
450 {
451 }
452 #endif
453
454 /*
455 * Default Router Selection (RFC 2461 6.3.6)
456 */
457 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
458 {
459 struct net_device *dev = rt->dst.dev;
460 if (!oif || dev->ifindex == oif)
461 return 2;
462 if ((dev->flags & IFF_LOOPBACK) &&
463 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
464 return 1;
465 return 0;
466 }
467
468 static inline int rt6_check_neigh(struct rt6_info *rt)
469 {
470 struct neighbour *neigh;
471 int m;
472
473 rcu_read_lock();
474 neigh = dst_get_neighbour_noref(&rt->dst);
475 if (rt->rt6i_flags & RTF_NONEXTHOP ||
476 !(rt->rt6i_flags & RTF_GATEWAY))
477 m = 1;
478 else if (neigh) {
479 read_lock_bh(&neigh->lock);
480 if (neigh->nud_state & NUD_VALID)
481 m = 2;
482 #ifdef CONFIG_IPV6_ROUTER_PREF
483 else if (neigh->nud_state & NUD_FAILED)
484 m = 0;
485 #endif
486 else
487 m = 1;
488 read_unlock_bh(&neigh->lock);
489 } else
490 m = 0;
491 rcu_read_unlock();
492 return m;
493 }
494
495 static int rt6_score_route(struct rt6_info *rt, int oif,
496 int strict)
497 {
498 int m, n;
499
500 m = rt6_check_dev(rt, oif);
501 if (!m && (strict & RT6_LOOKUP_F_IFACE))
502 return -1;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
505 #endif
506 n = rt6_check_neigh(rt);
507 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
508 return -1;
509 return m;
510 }
511
512 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
513 int *mpri, struct rt6_info *match)
514 {
515 int m;
516
517 if (rt6_check_expired(rt))
518 goto out;
519
520 m = rt6_score_route(rt, oif, strict);
521 if (m < 0)
522 goto out;
523
524 if (m > *mpri) {
525 if (strict & RT6_LOOKUP_F_REACHABLE)
526 rt6_probe(match);
527 *mpri = m;
528 match = rt;
529 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
530 rt6_probe(rt);
531 }
532
533 out:
534 return match;
535 }
536
537 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
538 struct rt6_info *rr_head,
539 u32 metric, int oif, int strict)
540 {
541 struct rt6_info *rt, *match;
542 int mpri = -1;
543
544 match = NULL;
545 for (rt = rr_head; rt && rt->rt6i_metric == metric;
546 rt = rt->dst.rt6_next)
547 match = find_match(rt, oif, strict, &mpri, match);
548 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
549 rt = rt->dst.rt6_next)
550 match = find_match(rt, oif, strict, &mpri, match);
551
552 return match;
553 }
554
555 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
556 {
557 struct rt6_info *match, *rt0;
558 struct net *net;
559
560 rt0 = fn->rr_ptr;
561 if (!rt0)
562 fn->rr_ptr = rt0 = fn->leaf;
563
564 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
565
566 if (!match &&
567 (strict & RT6_LOOKUP_F_REACHABLE)) {
568 struct rt6_info *next = rt0->dst.rt6_next;
569
570 /* no entries matched; do round-robin */
571 if (!next || next->rt6i_metric != rt0->rt6i_metric)
572 next = fn->leaf;
573
574 if (next != rt0)
575 fn->rr_ptr = next;
576 }
577
578 net = dev_net(rt0->dst.dev);
579 return match ? match : net->ipv6.ip6_null_entry;
580 }
581
582 #ifdef CONFIG_IPV6_ROUTE_INFO
583 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
584 const struct in6_addr *gwaddr)
585 {
586 struct net *net = dev_net(dev);
587 struct route_info *rinfo = (struct route_info *) opt;
588 struct in6_addr prefix_buf, *prefix;
589 unsigned int pref;
590 unsigned long lifetime;
591 struct rt6_info *rt;
592
593 if (len < sizeof(struct route_info)) {
594 return -EINVAL;
595 }
596
597 /* Sanity check for prefix_len and length */
598 if (rinfo->length > 3) {
599 return -EINVAL;
600 } else if (rinfo->prefix_len > 128) {
601 return -EINVAL;
602 } else if (rinfo->prefix_len > 64) {
603 if (rinfo->length < 2) {
604 return -EINVAL;
605 }
606 } else if (rinfo->prefix_len > 0) {
607 if (rinfo->length < 1) {
608 return -EINVAL;
609 }
610 }
611
612 pref = rinfo->route_pref;
613 if (pref == ICMPV6_ROUTER_PREF_INVALID)
614 return -EINVAL;
615
616 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
617
618 if (rinfo->length == 3)
619 prefix = (struct in6_addr *)rinfo->prefix;
620 else {
621 /* this function is safe */
622 ipv6_addr_prefix(&prefix_buf,
623 (struct in6_addr *)rinfo->prefix,
624 rinfo->prefix_len);
625 prefix = &prefix_buf;
626 }
627
628 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
629 dev->ifindex);
630
631 if (rt && !lifetime) {
632 ip6_del_rt(rt);
633 rt = NULL;
634 }
635
636 if (!rt && lifetime)
637 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
638 pref);
639 else if (rt)
640 rt->rt6i_flags = RTF_ROUTEINFO |
641 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
642
643 if (rt) {
644 if (!addrconf_finite_timeout(lifetime))
645 rt6_clean_expires(rt);
646 else
647 rt6_set_expires(rt, jiffies + HZ * lifetime);
648
649 dst_release(&rt->dst);
650 }
651 return 0;
652 }
653 #endif
654
655 #define BACKTRACK(__net, saddr) \
656 do { \
657 if (rt == __net->ipv6.ip6_null_entry) { \
658 struct fib6_node *pn; \
659 while (1) { \
660 if (fn->fn_flags & RTN_TL_ROOT) \
661 goto out; \
662 pn = fn->parent; \
663 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
664 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
665 else \
666 fn = pn; \
667 if (fn->fn_flags & RTN_RTINFO) \
668 goto restart; \
669 } \
670 } \
671 } while (0)
672
673 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
674 struct fib6_table *table,
675 struct flowi6 *fl6, int flags)
676 {
677 struct fib6_node *fn;
678 struct rt6_info *rt;
679
680 read_lock_bh(&table->tb6_lock);
681 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
682 restart:
683 rt = fn->leaf;
684 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
685 BACKTRACK(net, &fl6->saddr);
686 out:
687 dst_use(&rt->dst, jiffies);
688 read_unlock_bh(&table->tb6_lock);
689 return rt;
690
691 }
692
693 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
694 int flags)
695 {
696 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
697 }
698 EXPORT_SYMBOL_GPL(ip6_route_lookup);
699
700 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
701 const struct in6_addr *saddr, int oif, int strict)
702 {
703 struct flowi6 fl6 = {
704 .flowi6_oif = oif,
705 .daddr = *daddr,
706 };
707 struct dst_entry *dst;
708 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
709
710 if (saddr) {
711 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
712 flags |= RT6_LOOKUP_F_HAS_SADDR;
713 }
714
715 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
716 if (dst->error == 0)
717 return (struct rt6_info *) dst;
718
719 dst_release(dst);
720
721 return NULL;
722 }
723
724 EXPORT_SYMBOL(rt6_lookup);
725
726 /* ip6_ins_rt is called with FREE table->tb6_lock.
727 It takes new route entry, the addition fails by any reason the
728 route is freed. In any case, if caller does not hold it, it may
729 be destroyed.
730 */
731
732 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
733 {
734 int err;
735 struct fib6_table *table;
736
737 table = rt->rt6i_table;
738 write_lock_bh(&table->tb6_lock);
739 err = fib6_add(&table->tb6_root, rt, info);
740 write_unlock_bh(&table->tb6_lock);
741
742 return err;
743 }
744
745 int ip6_ins_rt(struct rt6_info *rt)
746 {
747 struct nl_info info = {
748 .nl_net = dev_net(rt->dst.dev),
749 };
750 return __ip6_ins_rt(rt, &info);
751 }
752
753 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
754 const struct in6_addr *daddr,
755 const struct in6_addr *saddr)
756 {
757 struct rt6_info *rt;
758
759 /*
760 * Clone the route.
761 */
762
763 rt = ip6_rt_copy(ort, daddr);
764
765 if (rt) {
766 int attempts = !in_softirq();
767
768 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
769 if (ort->rt6i_dst.plen != 128 &&
770 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
771 rt->rt6i_flags |= RTF_ANYCAST;
772 rt->rt6i_gateway = *daddr;
773 }
774
775 rt->rt6i_flags |= RTF_CACHE;
776
777 #ifdef CONFIG_IPV6_SUBTREES
778 if (rt->rt6i_src.plen && saddr) {
779 rt->rt6i_src.addr = *saddr;
780 rt->rt6i_src.plen = 128;
781 }
782 #endif
783
784 retry:
785 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
786 struct net *net = dev_net(rt->dst.dev);
787 int saved_rt_min_interval =
788 net->ipv6.sysctl.ip6_rt_gc_min_interval;
789 int saved_rt_elasticity =
790 net->ipv6.sysctl.ip6_rt_gc_elasticity;
791
792 if (attempts-- > 0) {
793 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
795
796 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
797
798 net->ipv6.sysctl.ip6_rt_gc_elasticity =
799 saved_rt_elasticity;
800 net->ipv6.sysctl.ip6_rt_gc_min_interval =
801 saved_rt_min_interval;
802 goto retry;
803 }
804
805 net_warn_ratelimited("Neighbour table overflow\n");
806 dst_free(&rt->dst);
807 return NULL;
808 }
809 }
810
811 return rt;
812 }
813
814 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
815 const struct in6_addr *daddr)
816 {
817 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
818
819 if (rt) {
820 rt->rt6i_flags |= RTF_CACHE;
821 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
822 }
823 return rt;
824 }
825
826 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
827 struct flowi6 *fl6, int flags)
828 {
829 struct fib6_node *fn;
830 struct rt6_info *rt, *nrt;
831 int strict = 0;
832 int attempts = 3;
833 int err;
834 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
835
836 strict |= flags & RT6_LOOKUP_F_IFACE;
837
838 relookup:
839 read_lock_bh(&table->tb6_lock);
840
841 restart_2:
842 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
843
844 restart:
845 rt = rt6_select(fn, oif, strict | reachable);
846
847 BACKTRACK(net, &fl6->saddr);
848 if (rt == net->ipv6.ip6_null_entry ||
849 rt->rt6i_flags & RTF_CACHE)
850 goto out;
851
852 dst_hold(&rt->dst);
853 read_unlock_bh(&table->tb6_lock);
854
855 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
856 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
857 else if (!(rt->dst.flags & DST_HOST))
858 nrt = rt6_alloc_clone(rt, &fl6->daddr);
859 else
860 goto out2;
861
862 dst_release(&rt->dst);
863 rt = nrt ? : net->ipv6.ip6_null_entry;
864
865 dst_hold(&rt->dst);
866 if (nrt) {
867 err = ip6_ins_rt(nrt);
868 if (!err)
869 goto out2;
870 }
871
872 if (--attempts <= 0)
873 goto out2;
874
875 /*
876 * Race condition! In the gap, when table->tb6_lock was
877 * released someone could insert this route. Relookup.
878 */
879 dst_release(&rt->dst);
880 goto relookup;
881
882 out:
883 if (reachable) {
884 reachable = 0;
885 goto restart_2;
886 }
887 dst_hold(&rt->dst);
888 read_unlock_bh(&table->tb6_lock);
889 out2:
890 rt->dst.lastuse = jiffies;
891 rt->dst.__use++;
892
893 return rt;
894 }
895
896 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
897 struct flowi6 *fl6, int flags)
898 {
899 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
900 }
901
902 static struct dst_entry *ip6_route_input_lookup(struct net *net,
903 struct net_device *dev,
904 struct flowi6 *fl6, int flags)
905 {
906 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
907 flags |= RT6_LOOKUP_F_IFACE;
908
909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
910 }
911
912 void ip6_route_input(struct sk_buff *skb)
913 {
914 const struct ipv6hdr *iph = ipv6_hdr(skb);
915 struct net *net = dev_net(skb->dev);
916 int flags = RT6_LOOKUP_F_HAS_SADDR;
917 struct flowi6 fl6 = {
918 .flowi6_iif = skb->dev->ifindex,
919 .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
922 .flowi6_mark = skb->mark,
923 .flowi6_proto = iph->nexthdr,
924 };
925
926 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
927 }
928
929 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
930 struct flowi6 *fl6, int flags)
931 {
932 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
933 }
934
935 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
936 struct flowi6 *fl6)
937 {
938 int flags = 0;
939
940 fl6->flowi6_iif = net->loopback_dev->ifindex;
941
942 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
943 flags |= RT6_LOOKUP_F_IFACE;
944
945 if (!ipv6_addr_any(&fl6->saddr))
946 flags |= RT6_LOOKUP_F_HAS_SADDR;
947 else if (sk)
948 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
949
950 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
951 }
952
953 EXPORT_SYMBOL(ip6_route_output);
954
955 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
956 {
957 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
958 struct dst_entry *new = NULL;
959
960 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
961 if (rt) {
962 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
963 rt6_init_peer(rt, net->ipv6.peers);
964
965 new = &rt->dst;
966
967 new->__use = 1;
968 new->input = dst_discard;
969 new->output = dst_discard;
970
971 if (dst_metrics_read_only(&ort->dst))
972 new->_metrics = ort->dst._metrics;
973 else
974 dst_copy_metrics(new, &ort->dst);
975 rt->rt6i_idev = ort->rt6i_idev;
976 if (rt->rt6i_idev)
977 in6_dev_hold(rt->rt6i_idev);
978
979 rt->rt6i_gateway = ort->rt6i_gateway;
980 rt->rt6i_flags = ort->rt6i_flags;
981 rt6_clean_expires(rt);
982 rt->rt6i_metric = 0;
983
984 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
985 #ifdef CONFIG_IPV6_SUBTREES
986 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
987 #endif
988
989 dst_free(new);
990 }
991
992 dst_release(dst_orig);
993 return new ? new : ERR_PTR(-ENOMEM);
994 }
995
996 /*
997 * Destination cache support functions
998 */
999
1000 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1001 {
1002 struct rt6_info *rt;
1003
1004 rt = (struct rt6_info *) dst;
1005
1006 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1007 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1008 if (!rt6_has_peer(rt))
1009 rt6_bind_peer(rt, 0);
1010 rt->rt6i_peer_genid = rt6_peer_genid();
1011 }
1012 return dst;
1013 }
1014 return NULL;
1015 }
1016
1017 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1018 {
1019 struct rt6_info *rt = (struct rt6_info *) dst;
1020
1021 if (rt) {
1022 if (rt->rt6i_flags & RTF_CACHE) {
1023 if (rt6_check_expired(rt)) {
1024 ip6_del_rt(rt);
1025 dst = NULL;
1026 }
1027 } else {
1028 dst_release(dst);
1029 dst = NULL;
1030 }
1031 }
1032 return dst;
1033 }
1034
1035 static void ip6_link_failure(struct sk_buff *skb)
1036 {
1037 struct rt6_info *rt;
1038
1039 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1040
1041 rt = (struct rt6_info *) skb_dst(skb);
1042 if (rt) {
1043 if (rt->rt6i_flags & RTF_CACHE)
1044 rt6_update_expires(rt, 0);
1045 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1046 rt->rt6i_node->fn_sernum = -1;
1047 }
1048 }
1049
1050 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1051 {
1052 struct rt6_info *rt6 = (struct rt6_info*)dst;
1053
1054 dst_confirm(dst);
1055 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1056 struct net *net = dev_net(dst->dev);
1057
1058 rt6->rt6i_flags |= RTF_MODIFIED;
1059 if (mtu < IPV6_MIN_MTU) {
1060 u32 features = dst_metric(dst, RTAX_FEATURES);
1061 mtu = IPV6_MIN_MTU;
1062 features |= RTAX_FEATURE_ALLFRAG;
1063 dst_metric_set(dst, RTAX_FEATURES, features);
1064 }
1065 dst_metric_set(dst, RTAX_MTU, mtu);
1066 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1067 }
1068 }
1069
1070 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1071 int oif, u32 mark)
1072 {
1073 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1074 struct dst_entry *dst;
1075 struct flowi6 fl6;
1076
1077 memset(&fl6, 0, sizeof(fl6));
1078 fl6.flowi6_oif = oif;
1079 fl6.flowi6_mark = mark;
1080 fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS;
1081 fl6.daddr = iph->daddr;
1082 fl6.saddr = iph->saddr;
1083 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1084
1085 dst = ip6_route_output(net, NULL, &fl6);
1086 if (!dst->error)
1087 ip6_rt_update_pmtu(dst, ntohl(mtu));
1088 dst_release(dst);
1089 }
1090 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1091
1092 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1093 {
1094 ip6_update_pmtu(skb, sock_net(sk), mtu,
1095 sk->sk_bound_dev_if, sk->sk_mark);
1096 }
1097 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1098
1099 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1100 {
1101 struct net_device *dev = dst->dev;
1102 unsigned int mtu = dst_mtu(dst);
1103 struct net *net = dev_net(dev);
1104
1105 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1106
1107 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1108 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1109
1110 /*
1111 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1112 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1113 * IPV6_MAXPLEN is also valid and means: "any MSS,
1114 * rely only on pmtu discovery"
1115 */
1116 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1117 mtu = IPV6_MAXPLEN;
1118 return mtu;
1119 }
1120
1121 static unsigned int ip6_mtu(const struct dst_entry *dst)
1122 {
1123 struct inet6_dev *idev;
1124 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1125
1126 if (mtu)
1127 return mtu;
1128
1129 mtu = IPV6_MIN_MTU;
1130
1131 rcu_read_lock();
1132 idev = __in6_dev_get(dst->dev);
1133 if (idev)
1134 mtu = idev->cnf.mtu6;
1135 rcu_read_unlock();
1136
1137 return mtu;
1138 }
1139
1140 static struct dst_entry *icmp6_dst_gc_list;
1141 static DEFINE_SPINLOCK(icmp6_dst_lock);
1142
1143 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1144 struct neighbour *neigh,
1145 struct flowi6 *fl6)
1146 {
1147 struct dst_entry *dst;
1148 struct rt6_info *rt;
1149 struct inet6_dev *idev = in6_dev_get(dev);
1150 struct net *net = dev_net(dev);
1151
1152 if (unlikely(!idev))
1153 return ERR_PTR(-ENODEV);
1154
1155 rt = ip6_dst_alloc(net, dev, 0, NULL);
1156 if (unlikely(!rt)) {
1157 in6_dev_put(idev);
1158 dst = ERR_PTR(-ENOMEM);
1159 goto out;
1160 }
1161
1162 if (neigh)
1163 neigh_hold(neigh);
1164 else {
1165 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1166 if (IS_ERR(neigh)) {
1167 in6_dev_put(idev);
1168 dst_free(&rt->dst);
1169 return ERR_CAST(neigh);
1170 }
1171 }
1172
1173 rt->dst.flags |= DST_HOST;
1174 rt->dst.output = ip6_output;
1175 dst_set_neighbour(&rt->dst, neigh);
1176 atomic_set(&rt->dst.__refcnt, 1);
1177 rt->rt6i_dst.addr = fl6->daddr;
1178 rt->rt6i_dst.plen = 128;
1179 rt->rt6i_idev = idev;
1180 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1181
1182 spin_lock_bh(&icmp6_dst_lock);
1183 rt->dst.next = icmp6_dst_gc_list;
1184 icmp6_dst_gc_list = &rt->dst;
1185 spin_unlock_bh(&icmp6_dst_lock);
1186
1187 fib6_force_start_gc(net);
1188
1189 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1190
1191 out:
1192 return dst;
1193 }
1194
1195 int icmp6_dst_gc(void)
1196 {
1197 struct dst_entry *dst, **pprev;
1198 int more = 0;
1199
1200 spin_lock_bh(&icmp6_dst_lock);
1201 pprev = &icmp6_dst_gc_list;
1202
1203 while ((dst = *pprev) != NULL) {
1204 if (!atomic_read(&dst->__refcnt)) {
1205 *pprev = dst->next;
1206 dst_free(dst);
1207 } else {
1208 pprev = &dst->next;
1209 ++more;
1210 }
1211 }
1212
1213 spin_unlock_bh(&icmp6_dst_lock);
1214
1215 return more;
1216 }
1217
1218 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1219 void *arg)
1220 {
1221 struct dst_entry *dst, **pprev;
1222
1223 spin_lock_bh(&icmp6_dst_lock);
1224 pprev = &icmp6_dst_gc_list;
1225 while ((dst = *pprev) != NULL) {
1226 struct rt6_info *rt = (struct rt6_info *) dst;
1227 if (func(rt, arg)) {
1228 *pprev = dst->next;
1229 dst_free(dst);
1230 } else {
1231 pprev = &dst->next;
1232 }
1233 }
1234 spin_unlock_bh(&icmp6_dst_lock);
1235 }
1236
1237 static int ip6_dst_gc(struct dst_ops *ops)
1238 {
1239 unsigned long now = jiffies;
1240 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1241 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1242 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1243 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1244 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1245 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1246 int entries;
1247
1248 entries = dst_entries_get_fast(ops);
1249 if (time_after(rt_last_gc + rt_min_interval, now) &&
1250 entries <= rt_max_size)
1251 goto out;
1252
1253 net->ipv6.ip6_rt_gc_expire++;
1254 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1255 net->ipv6.ip6_rt_last_gc = now;
1256 entries = dst_entries_get_slow(ops);
1257 if (entries < ops->gc_thresh)
1258 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1259 out:
1260 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1261 return entries > rt_max_size;
1262 }
1263
1264 /* Clean host part of a prefix. Not necessary in radix tree,
1265 but results in cleaner routing tables.
1266
1267 Remove it only when all the things will work!
1268 */
1269
1270 int ip6_dst_hoplimit(struct dst_entry *dst)
1271 {
1272 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1273 if (hoplimit == 0) {
1274 struct net_device *dev = dst->dev;
1275 struct inet6_dev *idev;
1276
1277 rcu_read_lock();
1278 idev = __in6_dev_get(dev);
1279 if (idev)
1280 hoplimit = idev->cnf.hop_limit;
1281 else
1282 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1283 rcu_read_unlock();
1284 }
1285 return hoplimit;
1286 }
1287 EXPORT_SYMBOL(ip6_dst_hoplimit);
1288
1289 /*
1290 *
1291 */
1292
1293 int ip6_route_add(struct fib6_config *cfg)
1294 {
1295 int err;
1296 struct net *net = cfg->fc_nlinfo.nl_net;
1297 struct rt6_info *rt = NULL;
1298 struct net_device *dev = NULL;
1299 struct inet6_dev *idev = NULL;
1300 struct fib6_table *table;
1301 int addr_type;
1302
1303 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1304 return -EINVAL;
1305 #ifndef CONFIG_IPV6_SUBTREES
1306 if (cfg->fc_src_len)
1307 return -EINVAL;
1308 #endif
1309 if (cfg->fc_ifindex) {
1310 err = -ENODEV;
1311 dev = dev_get_by_index(net, cfg->fc_ifindex);
1312 if (!dev)
1313 goto out;
1314 idev = in6_dev_get(dev);
1315 if (!idev)
1316 goto out;
1317 }
1318
1319 if (cfg->fc_metric == 0)
1320 cfg->fc_metric = IP6_RT_PRIO_USER;
1321
1322 err = -ENOBUFS;
1323 if (cfg->fc_nlinfo.nlh &&
1324 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1325 table = fib6_get_table(net, cfg->fc_table);
1326 if (!table) {
1327 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1328 table = fib6_new_table(net, cfg->fc_table);
1329 }
1330 } else {
1331 table = fib6_new_table(net, cfg->fc_table);
1332 }
1333
1334 if (!table)
1335 goto out;
1336
1337 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1338
1339 if (!rt) {
1340 err = -ENOMEM;
1341 goto out;
1342 }
1343
1344 rt->dst.obsolete = -1;
1345
1346 if (cfg->fc_flags & RTF_EXPIRES)
1347 rt6_set_expires(rt, jiffies +
1348 clock_t_to_jiffies(cfg->fc_expires));
1349 else
1350 rt6_clean_expires(rt);
1351
1352 if (cfg->fc_protocol == RTPROT_UNSPEC)
1353 cfg->fc_protocol = RTPROT_BOOT;
1354 rt->rt6i_protocol = cfg->fc_protocol;
1355
1356 addr_type = ipv6_addr_type(&cfg->fc_dst);
1357
1358 if (addr_type & IPV6_ADDR_MULTICAST)
1359 rt->dst.input = ip6_mc_input;
1360 else if (cfg->fc_flags & RTF_LOCAL)
1361 rt->dst.input = ip6_input;
1362 else
1363 rt->dst.input = ip6_forward;
1364
1365 rt->dst.output = ip6_output;
1366
1367 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1368 rt->rt6i_dst.plen = cfg->fc_dst_len;
1369 if (rt->rt6i_dst.plen == 128)
1370 rt->dst.flags |= DST_HOST;
1371
1372 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1373 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1374 if (!metrics) {
1375 err = -ENOMEM;
1376 goto out;
1377 }
1378 dst_init_metrics(&rt->dst, metrics, 0);
1379 }
1380 #ifdef CONFIG_IPV6_SUBTREES
1381 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1382 rt->rt6i_src.plen = cfg->fc_src_len;
1383 #endif
1384
1385 rt->rt6i_metric = cfg->fc_metric;
1386
1387 /* We cannot add true routes via loopback here,
1388 they would result in kernel looping; promote them to reject routes
1389 */
1390 if ((cfg->fc_flags & RTF_REJECT) ||
1391 (dev && (dev->flags & IFF_LOOPBACK) &&
1392 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1393 !(cfg->fc_flags & RTF_LOCAL))) {
1394 /* hold loopback dev/idev if we haven't done so. */
1395 if (dev != net->loopback_dev) {
1396 if (dev) {
1397 dev_put(dev);
1398 in6_dev_put(idev);
1399 }
1400 dev = net->loopback_dev;
1401 dev_hold(dev);
1402 idev = in6_dev_get(dev);
1403 if (!idev) {
1404 err = -ENODEV;
1405 goto out;
1406 }
1407 }
1408 rt->dst.output = ip6_pkt_discard_out;
1409 rt->dst.input = ip6_pkt_discard;
1410 rt->dst.error = -ENETUNREACH;
1411 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1412 goto install_route;
1413 }
1414
1415 if (cfg->fc_flags & RTF_GATEWAY) {
1416 const struct in6_addr *gw_addr;
1417 int gwa_type;
1418
1419 gw_addr = &cfg->fc_gateway;
1420 rt->rt6i_gateway = *gw_addr;
1421 gwa_type = ipv6_addr_type(gw_addr);
1422
1423 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1424 struct rt6_info *grt;
1425
1426 /* IPv6 strictly inhibits using not link-local
1427 addresses as nexthop address.
1428 Otherwise, router will not able to send redirects.
1429 It is very good, but in some (rare!) circumstances
1430 (SIT, PtP, NBMA NOARP links) it is handy to allow
1431 some exceptions. --ANK
1432 */
1433 err = -EINVAL;
1434 if (!(gwa_type & IPV6_ADDR_UNICAST))
1435 goto out;
1436
1437 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1438
1439 err = -EHOSTUNREACH;
1440 if (!grt)
1441 goto out;
1442 if (dev) {
1443 if (dev != grt->dst.dev) {
1444 dst_release(&grt->dst);
1445 goto out;
1446 }
1447 } else {
1448 dev = grt->dst.dev;
1449 idev = grt->rt6i_idev;
1450 dev_hold(dev);
1451 in6_dev_hold(grt->rt6i_idev);
1452 }
1453 if (!(grt->rt6i_flags & RTF_GATEWAY))
1454 err = 0;
1455 dst_release(&grt->dst);
1456
1457 if (err)
1458 goto out;
1459 }
1460 err = -EINVAL;
1461 if (!dev || (dev->flags & IFF_LOOPBACK))
1462 goto out;
1463 }
1464
1465 err = -ENODEV;
1466 if (!dev)
1467 goto out;
1468
1469 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1470 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1471 err = -EINVAL;
1472 goto out;
1473 }
1474 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1475 rt->rt6i_prefsrc.plen = 128;
1476 } else
1477 rt->rt6i_prefsrc.plen = 0;
1478
1479 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1480 err = rt6_bind_neighbour(rt, dev);
1481 if (err)
1482 goto out;
1483 }
1484
1485 rt->rt6i_flags = cfg->fc_flags;
1486
1487 install_route:
1488 if (cfg->fc_mx) {
1489 struct nlattr *nla;
1490 int remaining;
1491
1492 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1493 int type = nla_type(nla);
1494
1495 if (type) {
1496 if (type > RTAX_MAX) {
1497 err = -EINVAL;
1498 goto out;
1499 }
1500
1501 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1502 }
1503 }
1504 }
1505
1506 rt->dst.dev = dev;
1507 rt->rt6i_idev = idev;
1508 rt->rt6i_table = table;
1509
1510 cfg->fc_nlinfo.nl_net = dev_net(dev);
1511
1512 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1513
1514 out:
1515 if (dev)
1516 dev_put(dev);
1517 if (idev)
1518 in6_dev_put(idev);
1519 if (rt)
1520 dst_free(&rt->dst);
1521 return err;
1522 }
1523
1524 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1525 {
1526 int err;
1527 struct fib6_table *table;
1528 struct net *net = dev_net(rt->dst.dev);
1529
1530 if (rt == net->ipv6.ip6_null_entry)
1531 return -ENOENT;
1532
1533 table = rt->rt6i_table;
1534 write_lock_bh(&table->tb6_lock);
1535
1536 err = fib6_del(rt, info);
1537 dst_release(&rt->dst);
1538
1539 write_unlock_bh(&table->tb6_lock);
1540
1541 return err;
1542 }
1543
1544 int ip6_del_rt(struct rt6_info *rt)
1545 {
1546 struct nl_info info = {
1547 .nl_net = dev_net(rt->dst.dev),
1548 };
1549 return __ip6_del_rt(rt, &info);
1550 }
1551
1552 static int ip6_route_del(struct fib6_config *cfg)
1553 {
1554 struct fib6_table *table;
1555 struct fib6_node *fn;
1556 struct rt6_info *rt;
1557 int err = -ESRCH;
1558
1559 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1560 if (!table)
1561 return err;
1562
1563 read_lock_bh(&table->tb6_lock);
1564
1565 fn = fib6_locate(&table->tb6_root,
1566 &cfg->fc_dst, cfg->fc_dst_len,
1567 &cfg->fc_src, cfg->fc_src_len);
1568
1569 if (fn) {
1570 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1571 if (cfg->fc_ifindex &&
1572 (!rt->dst.dev ||
1573 rt->dst.dev->ifindex != cfg->fc_ifindex))
1574 continue;
1575 if (cfg->fc_flags & RTF_GATEWAY &&
1576 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1577 continue;
1578 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1579 continue;
1580 dst_hold(&rt->dst);
1581 read_unlock_bh(&table->tb6_lock);
1582
1583 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1584 }
1585 }
1586 read_unlock_bh(&table->tb6_lock);
1587
1588 return err;
1589 }
1590
1591 /*
1592 * Handle redirects
1593 */
1594 struct ip6rd_flowi {
1595 struct flowi6 fl6;
1596 struct in6_addr gateway;
1597 };
1598
1599 static struct rt6_info *__ip6_route_redirect(struct net *net,
1600 struct fib6_table *table,
1601 struct flowi6 *fl6,
1602 int flags)
1603 {
1604 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1605 struct rt6_info *rt;
1606 struct fib6_node *fn;
1607
1608 /*
1609 * Get the "current" route for this destination and
1610 * check if the redirect has come from approriate router.
1611 *
1612 * RFC 2461 specifies that redirects should only be
1613 * accepted if they come from the nexthop to the target.
1614 * Due to the way the routes are chosen, this notion
1615 * is a bit fuzzy and one might need to check all possible
1616 * routes.
1617 */
1618
1619 read_lock_bh(&table->tb6_lock);
1620 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1621 restart:
1622 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1623 /*
1624 * Current route is on-link; redirect is always invalid.
1625 *
1626 * Seems, previous statement is not true. It could
1627 * be node, which looks for us as on-link (f.e. proxy ndisc)
1628 * But then router serving it might decide, that we should
1629 * know truth 8)8) --ANK (980726).
1630 */
1631 if (rt6_check_expired(rt))
1632 continue;
1633 if (!(rt->rt6i_flags & RTF_GATEWAY))
1634 continue;
1635 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1636 continue;
1637 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1638 continue;
1639 break;
1640 }
1641
1642 if (!rt)
1643 rt = net->ipv6.ip6_null_entry;
1644 BACKTRACK(net, &fl6->saddr);
1645 out:
1646 dst_hold(&rt->dst);
1647
1648 read_unlock_bh(&table->tb6_lock);
1649
1650 return rt;
1651 };
1652
1653 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1654 const struct in6_addr *src,
1655 const struct in6_addr *gateway,
1656 struct net_device *dev)
1657 {
1658 int flags = RT6_LOOKUP_F_HAS_SADDR;
1659 struct net *net = dev_net(dev);
1660 struct ip6rd_flowi rdfl = {
1661 .fl6 = {
1662 .flowi6_oif = dev->ifindex,
1663 .daddr = *dest,
1664 .saddr = *src,
1665 },
1666 };
1667
1668 rdfl.gateway = *gateway;
1669
1670 if (rt6_need_strict(dest))
1671 flags |= RT6_LOOKUP_F_IFACE;
1672
1673 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1674 flags, __ip6_route_redirect);
1675 }
1676
1677 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1678 const struct in6_addr *saddr,
1679 struct neighbour *neigh, u8 *lladdr, int on_link)
1680 {
1681 struct rt6_info *rt, *nrt = NULL;
1682 struct netevent_redirect netevent;
1683 struct net *net = dev_net(neigh->dev);
1684
1685 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1686
1687 if (rt == net->ipv6.ip6_null_entry) {
1688 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1689 goto out;
1690 }
1691
1692 /*
1693 * We have finally decided to accept it.
1694 */
1695
1696 neigh_update(neigh, lladdr, NUD_STALE,
1697 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1698 NEIGH_UPDATE_F_OVERRIDE|
1699 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1700 NEIGH_UPDATE_F_ISROUTER))
1701 );
1702
1703 /*
1704 * Redirect received -> path was valid.
1705 * Look, redirects are sent only in response to data packets,
1706 * so that this nexthop apparently is reachable. --ANK
1707 */
1708 dst_confirm(&rt->dst);
1709
1710 /* Duplicate redirect: silently ignore. */
1711 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1712 goto out;
1713
1714 nrt = ip6_rt_copy(rt, dest);
1715 if (!nrt)
1716 goto out;
1717
1718 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1719 if (on_link)
1720 nrt->rt6i_flags &= ~RTF_GATEWAY;
1721
1722 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1723 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1724
1725 if (ip6_ins_rt(nrt))
1726 goto out;
1727
1728 netevent.old = &rt->dst;
1729 netevent.new = &nrt->dst;
1730 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1731
1732 if (rt->rt6i_flags & RTF_CACHE) {
1733 ip6_del_rt(rt);
1734 return;
1735 }
1736
1737 out:
1738 dst_release(&rt->dst);
1739 }
1740
1741 /*
1742 * Misc support functions
1743 */
1744
1745 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1746 const struct in6_addr *dest)
1747 {
1748 struct net *net = dev_net(ort->dst.dev);
1749 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1750 ort->rt6i_table);
1751
1752 if (rt) {
1753 rt->dst.input = ort->dst.input;
1754 rt->dst.output = ort->dst.output;
1755 rt->dst.flags |= DST_HOST;
1756
1757 rt->rt6i_dst.addr = *dest;
1758 rt->rt6i_dst.plen = 128;
1759 dst_copy_metrics(&rt->dst, &ort->dst);
1760 rt->dst.error = ort->dst.error;
1761 rt->rt6i_idev = ort->rt6i_idev;
1762 if (rt->rt6i_idev)
1763 in6_dev_hold(rt->rt6i_idev);
1764 rt->dst.lastuse = jiffies;
1765
1766 rt->rt6i_gateway = ort->rt6i_gateway;
1767 rt->rt6i_flags = ort->rt6i_flags;
1768 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1769 (RTF_DEFAULT | RTF_ADDRCONF))
1770 rt6_set_from(rt, ort);
1771 else
1772 rt6_clean_expires(rt);
1773 rt->rt6i_metric = 0;
1774
1775 #ifdef CONFIG_IPV6_SUBTREES
1776 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1777 #endif
1778 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1779 rt->rt6i_table = ort->rt6i_table;
1780 }
1781 return rt;
1782 }
1783
1784 #ifdef CONFIG_IPV6_ROUTE_INFO
1785 static struct rt6_info *rt6_get_route_info(struct net *net,
1786 const struct in6_addr *prefix, int prefixlen,
1787 const struct in6_addr *gwaddr, int ifindex)
1788 {
1789 struct fib6_node *fn;
1790 struct rt6_info *rt = NULL;
1791 struct fib6_table *table;
1792
1793 table = fib6_get_table(net, RT6_TABLE_INFO);
1794 if (!table)
1795 return NULL;
1796
1797 write_lock_bh(&table->tb6_lock);
1798 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1799 if (!fn)
1800 goto out;
1801
1802 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1803 if (rt->dst.dev->ifindex != ifindex)
1804 continue;
1805 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1806 continue;
1807 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1808 continue;
1809 dst_hold(&rt->dst);
1810 break;
1811 }
1812 out:
1813 write_unlock_bh(&table->tb6_lock);
1814 return rt;
1815 }
1816
1817 static struct rt6_info *rt6_add_route_info(struct net *net,
1818 const struct in6_addr *prefix, int prefixlen,
1819 const struct in6_addr *gwaddr, int ifindex,
1820 unsigned int pref)
1821 {
1822 struct fib6_config cfg = {
1823 .fc_table = RT6_TABLE_INFO,
1824 .fc_metric = IP6_RT_PRIO_USER,
1825 .fc_ifindex = ifindex,
1826 .fc_dst_len = prefixlen,
1827 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1828 RTF_UP | RTF_PREF(pref),
1829 .fc_nlinfo.pid = 0,
1830 .fc_nlinfo.nlh = NULL,
1831 .fc_nlinfo.nl_net = net,
1832 };
1833
1834 cfg.fc_dst = *prefix;
1835 cfg.fc_gateway = *gwaddr;
1836
1837 /* We should treat it as a default route if prefix length is 0. */
1838 if (!prefixlen)
1839 cfg.fc_flags |= RTF_DEFAULT;
1840
1841 ip6_route_add(&cfg);
1842
1843 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1844 }
1845 #endif
1846
1847 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1848 {
1849 struct rt6_info *rt;
1850 struct fib6_table *table;
1851
1852 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1853 if (!table)
1854 return NULL;
1855
1856 write_lock_bh(&table->tb6_lock);
1857 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1858 if (dev == rt->dst.dev &&
1859 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1860 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1861 break;
1862 }
1863 if (rt)
1864 dst_hold(&rt->dst);
1865 write_unlock_bh(&table->tb6_lock);
1866 return rt;
1867 }
1868
1869 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1870 struct net_device *dev,
1871 unsigned int pref)
1872 {
1873 struct fib6_config cfg = {
1874 .fc_table = RT6_TABLE_DFLT,
1875 .fc_metric = IP6_RT_PRIO_USER,
1876 .fc_ifindex = dev->ifindex,
1877 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1878 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1879 .fc_nlinfo.pid = 0,
1880 .fc_nlinfo.nlh = NULL,
1881 .fc_nlinfo.nl_net = dev_net(dev),
1882 };
1883
1884 cfg.fc_gateway = *gwaddr;
1885
1886 ip6_route_add(&cfg);
1887
1888 return rt6_get_dflt_router(gwaddr, dev);
1889 }
1890
1891 void rt6_purge_dflt_routers(struct net *net)
1892 {
1893 struct rt6_info *rt;
1894 struct fib6_table *table;
1895
1896 /* NOTE: Keep consistent with rt6_get_dflt_router */
1897 table = fib6_get_table(net, RT6_TABLE_DFLT);
1898 if (!table)
1899 return;
1900
1901 restart:
1902 read_lock_bh(&table->tb6_lock);
1903 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1904 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1905 dst_hold(&rt->dst);
1906 read_unlock_bh(&table->tb6_lock);
1907 ip6_del_rt(rt);
1908 goto restart;
1909 }
1910 }
1911 read_unlock_bh(&table->tb6_lock);
1912 }
1913
1914 static void rtmsg_to_fib6_config(struct net *net,
1915 struct in6_rtmsg *rtmsg,
1916 struct fib6_config *cfg)
1917 {
1918 memset(cfg, 0, sizeof(*cfg));
1919
1920 cfg->fc_table = RT6_TABLE_MAIN;
1921 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1922 cfg->fc_metric = rtmsg->rtmsg_metric;
1923 cfg->fc_expires = rtmsg->rtmsg_info;
1924 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1925 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1926 cfg->fc_flags = rtmsg->rtmsg_flags;
1927
1928 cfg->fc_nlinfo.nl_net = net;
1929
1930 cfg->fc_dst = rtmsg->rtmsg_dst;
1931 cfg->fc_src = rtmsg->rtmsg_src;
1932 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1933 }
1934
1935 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1936 {
1937 struct fib6_config cfg;
1938 struct in6_rtmsg rtmsg;
1939 int err;
1940
1941 switch(cmd) {
1942 case SIOCADDRT: /* Add a route */
1943 case SIOCDELRT: /* Delete a route */
1944 if (!capable(CAP_NET_ADMIN))
1945 return -EPERM;
1946 err = copy_from_user(&rtmsg, arg,
1947 sizeof(struct in6_rtmsg));
1948 if (err)
1949 return -EFAULT;
1950
1951 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1952
1953 rtnl_lock();
1954 switch (cmd) {
1955 case SIOCADDRT:
1956 err = ip6_route_add(&cfg);
1957 break;
1958 case SIOCDELRT:
1959 err = ip6_route_del(&cfg);
1960 break;
1961 default:
1962 err = -EINVAL;
1963 }
1964 rtnl_unlock();
1965
1966 return err;
1967 }
1968
1969 return -EINVAL;
1970 }
1971
1972 /*
1973 * Drop the packet on the floor
1974 */
1975
1976 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1977 {
1978 int type;
1979 struct dst_entry *dst = skb_dst(skb);
1980 switch (ipstats_mib_noroutes) {
1981 case IPSTATS_MIB_INNOROUTES:
1982 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1983 if (type == IPV6_ADDR_ANY) {
1984 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1985 IPSTATS_MIB_INADDRERRORS);
1986 break;
1987 }
1988 /* FALLTHROUGH */
1989 case IPSTATS_MIB_OUTNOROUTES:
1990 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1991 ipstats_mib_noroutes);
1992 break;
1993 }
1994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1995 kfree_skb(skb);
1996 return 0;
1997 }
1998
1999 static int ip6_pkt_discard(struct sk_buff *skb)
2000 {
2001 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2002 }
2003
2004 static int ip6_pkt_discard_out(struct sk_buff *skb)
2005 {
2006 skb->dev = skb_dst(skb)->dev;
2007 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2008 }
2009
2010 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2011
2012 static int ip6_pkt_prohibit(struct sk_buff *skb)
2013 {
2014 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2015 }
2016
2017 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2018 {
2019 skb->dev = skb_dst(skb)->dev;
2020 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2021 }
2022
2023 #endif
2024
2025 /*
2026 * Allocate a dst for local (unicast / anycast) address.
2027 */
2028
2029 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2030 const struct in6_addr *addr,
2031 bool anycast)
2032 {
2033 struct net *net = dev_net(idev->dev);
2034 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2035 int err;
2036
2037 if (!rt) {
2038 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2039 return ERR_PTR(-ENOMEM);
2040 }
2041
2042 in6_dev_hold(idev);
2043
2044 rt->dst.flags |= DST_HOST;
2045 rt->dst.input = ip6_input;
2046 rt->dst.output = ip6_output;
2047 rt->rt6i_idev = idev;
2048 rt->dst.obsolete = -1;
2049
2050 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2051 if (anycast)
2052 rt->rt6i_flags |= RTF_ANYCAST;
2053 else
2054 rt->rt6i_flags |= RTF_LOCAL;
2055 err = rt6_bind_neighbour(rt, rt->dst.dev);
2056 if (err) {
2057 dst_free(&rt->dst);
2058 return ERR_PTR(err);
2059 }
2060
2061 rt->rt6i_dst.addr = *addr;
2062 rt->rt6i_dst.plen = 128;
2063 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2064
2065 atomic_set(&rt->dst.__refcnt, 1);
2066
2067 return rt;
2068 }
2069
2070 int ip6_route_get_saddr(struct net *net,
2071 struct rt6_info *rt,
2072 const struct in6_addr *daddr,
2073 unsigned int prefs,
2074 struct in6_addr *saddr)
2075 {
2076 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2077 int err = 0;
2078 if (rt->rt6i_prefsrc.plen)
2079 *saddr = rt->rt6i_prefsrc.addr;
2080 else
2081 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2082 daddr, prefs, saddr);
2083 return err;
2084 }
2085
2086 /* remove deleted ip from prefsrc entries */
2087 struct arg_dev_net_ip {
2088 struct net_device *dev;
2089 struct net *net;
2090 struct in6_addr *addr;
2091 };
2092
2093 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2094 {
2095 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2096 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2097 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2098
2099 if (((void *)rt->dst.dev == dev || !dev) &&
2100 rt != net->ipv6.ip6_null_entry &&
2101 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2102 /* remove prefsrc entry */
2103 rt->rt6i_prefsrc.plen = 0;
2104 }
2105 return 0;
2106 }
2107
2108 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2109 {
2110 struct net *net = dev_net(ifp->idev->dev);
2111 struct arg_dev_net_ip adni = {
2112 .dev = ifp->idev->dev,
2113 .net = net,
2114 .addr = &ifp->addr,
2115 };
2116 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2117 }
2118
2119 struct arg_dev_net {
2120 struct net_device *dev;
2121 struct net *net;
2122 };
2123
2124 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2125 {
2126 const struct arg_dev_net *adn = arg;
2127 const struct net_device *dev = adn->dev;
2128
2129 if ((rt->dst.dev == dev || !dev) &&
2130 rt != adn->net->ipv6.ip6_null_entry)
2131 return -1;
2132
2133 return 0;
2134 }
2135
2136 void rt6_ifdown(struct net *net, struct net_device *dev)
2137 {
2138 struct arg_dev_net adn = {
2139 .dev = dev,
2140 .net = net,
2141 };
2142
2143 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2144 icmp6_clean_all(fib6_ifdown, &adn);
2145 }
2146
2147 struct rt6_mtu_change_arg {
2148 struct net_device *dev;
2149 unsigned int mtu;
2150 };
2151
2152 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2153 {
2154 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2155 struct inet6_dev *idev;
2156
2157 /* In IPv6 pmtu discovery is not optional,
2158 so that RTAX_MTU lock cannot disable it.
2159 We still use this lock to block changes
2160 caused by addrconf/ndisc.
2161 */
2162
2163 idev = __in6_dev_get(arg->dev);
2164 if (!idev)
2165 return 0;
2166
2167 /* For administrative MTU increase, there is no way to discover
2168 IPv6 PMTU increase, so PMTU increase should be updated here.
2169 Since RFC 1981 doesn't include administrative MTU increase
2170 update PMTU increase is a MUST. (i.e. jumbo frame)
2171 */
2172 /*
2173 If new MTU is less than route PMTU, this new MTU will be the
2174 lowest MTU in the path, update the route PMTU to reflect PMTU
2175 decreases; if new MTU is greater than route PMTU, and the
2176 old MTU is the lowest MTU in the path, update the route PMTU
2177 to reflect the increase. In this case if the other nodes' MTU
2178 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2179 PMTU discouvery.
2180 */
2181 if (rt->dst.dev == arg->dev &&
2182 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2183 (dst_mtu(&rt->dst) >= arg->mtu ||
2184 (dst_mtu(&rt->dst) < arg->mtu &&
2185 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2186 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2187 }
2188 return 0;
2189 }
2190
2191 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2192 {
2193 struct rt6_mtu_change_arg arg = {
2194 .dev = dev,
2195 .mtu = mtu,
2196 };
2197
2198 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2199 }
2200
2201 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2202 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2203 [RTA_OIF] = { .type = NLA_U32 },
2204 [RTA_IIF] = { .type = NLA_U32 },
2205 [RTA_PRIORITY] = { .type = NLA_U32 },
2206 [RTA_METRICS] = { .type = NLA_NESTED },
2207 };
2208
2209 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2210 struct fib6_config *cfg)
2211 {
2212 struct rtmsg *rtm;
2213 struct nlattr *tb[RTA_MAX+1];
2214 int err;
2215
2216 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2217 if (err < 0)
2218 goto errout;
2219
2220 err = -EINVAL;
2221 rtm = nlmsg_data(nlh);
2222 memset(cfg, 0, sizeof(*cfg));
2223
2224 cfg->fc_table = rtm->rtm_table;
2225 cfg->fc_dst_len = rtm->rtm_dst_len;
2226 cfg->fc_src_len = rtm->rtm_src_len;
2227 cfg->fc_flags = RTF_UP;
2228 cfg->fc_protocol = rtm->rtm_protocol;
2229
2230 if (rtm->rtm_type == RTN_UNREACHABLE)
2231 cfg->fc_flags |= RTF_REJECT;
2232
2233 if (rtm->rtm_type == RTN_LOCAL)
2234 cfg->fc_flags |= RTF_LOCAL;
2235
2236 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2237 cfg->fc_nlinfo.nlh = nlh;
2238 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2239
2240 if (tb[RTA_GATEWAY]) {
2241 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2242 cfg->fc_flags |= RTF_GATEWAY;
2243 }
2244
2245 if (tb[RTA_DST]) {
2246 int plen = (rtm->rtm_dst_len + 7) >> 3;
2247
2248 if (nla_len(tb[RTA_DST]) < plen)
2249 goto errout;
2250
2251 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2252 }
2253
2254 if (tb[RTA_SRC]) {
2255 int plen = (rtm->rtm_src_len + 7) >> 3;
2256
2257 if (nla_len(tb[RTA_SRC]) < plen)
2258 goto errout;
2259
2260 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2261 }
2262
2263 if (tb[RTA_PREFSRC])
2264 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2265
2266 if (tb[RTA_OIF])
2267 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2268
2269 if (tb[RTA_PRIORITY])
2270 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2271
2272 if (tb[RTA_METRICS]) {
2273 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2274 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2275 }
2276
2277 if (tb[RTA_TABLE])
2278 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2279
2280 err = 0;
2281 errout:
2282 return err;
2283 }
2284
2285 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2286 {
2287 struct fib6_config cfg;
2288 int err;
2289
2290 err = rtm_to_fib6_config(skb, nlh, &cfg);
2291 if (err < 0)
2292 return err;
2293
2294 return ip6_route_del(&cfg);
2295 }
2296
2297 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2298 {
2299 struct fib6_config cfg;
2300 int err;
2301
2302 err = rtm_to_fib6_config(skb, nlh, &cfg);
2303 if (err < 0)
2304 return err;
2305
2306 return ip6_route_add(&cfg);
2307 }
2308
2309 static inline size_t rt6_nlmsg_size(void)
2310 {
2311 return NLMSG_ALIGN(sizeof(struct rtmsg))
2312 + nla_total_size(16) /* RTA_SRC */
2313 + nla_total_size(16) /* RTA_DST */
2314 + nla_total_size(16) /* RTA_GATEWAY */
2315 + nla_total_size(16) /* RTA_PREFSRC */
2316 + nla_total_size(4) /* RTA_TABLE */
2317 + nla_total_size(4) /* RTA_IIF */
2318 + nla_total_size(4) /* RTA_OIF */
2319 + nla_total_size(4) /* RTA_PRIORITY */
2320 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2321 + nla_total_size(sizeof(struct rta_cacheinfo));
2322 }
2323
2324 static int rt6_fill_node(struct net *net,
2325 struct sk_buff *skb, struct rt6_info *rt,
2326 struct in6_addr *dst, struct in6_addr *src,
2327 int iif, int type, u32 pid, u32 seq,
2328 int prefix, int nowait, unsigned int flags)
2329 {
2330 const struct inet_peer *peer;
2331 struct rtmsg *rtm;
2332 struct nlmsghdr *nlh;
2333 long expires;
2334 u32 table;
2335 struct neighbour *n;
2336 u32 ts, tsage;
2337
2338 if (prefix) { /* user wants prefix routes only */
2339 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2340 /* success since this is not a prefix route */
2341 return 1;
2342 }
2343 }
2344
2345 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2346 if (!nlh)
2347 return -EMSGSIZE;
2348
2349 rtm = nlmsg_data(nlh);
2350 rtm->rtm_family = AF_INET6;
2351 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2352 rtm->rtm_src_len = rt->rt6i_src.plen;
2353 rtm->rtm_tos = 0;
2354 if (rt->rt6i_table)
2355 table = rt->rt6i_table->tb6_id;
2356 else
2357 table = RT6_TABLE_UNSPEC;
2358 rtm->rtm_table = table;
2359 if (nla_put_u32(skb, RTA_TABLE, table))
2360 goto nla_put_failure;
2361 if (rt->rt6i_flags & RTF_REJECT)
2362 rtm->rtm_type = RTN_UNREACHABLE;
2363 else if (rt->rt6i_flags & RTF_LOCAL)
2364 rtm->rtm_type = RTN_LOCAL;
2365 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2366 rtm->rtm_type = RTN_LOCAL;
2367 else
2368 rtm->rtm_type = RTN_UNICAST;
2369 rtm->rtm_flags = 0;
2370 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2371 rtm->rtm_protocol = rt->rt6i_protocol;
2372 if (rt->rt6i_flags & RTF_DYNAMIC)
2373 rtm->rtm_protocol = RTPROT_REDIRECT;
2374 else if (rt->rt6i_flags & RTF_ADDRCONF)
2375 rtm->rtm_protocol = RTPROT_KERNEL;
2376 else if (rt->rt6i_flags & RTF_DEFAULT)
2377 rtm->rtm_protocol = RTPROT_RA;
2378
2379 if (rt->rt6i_flags & RTF_CACHE)
2380 rtm->rtm_flags |= RTM_F_CLONED;
2381
2382 if (dst) {
2383 if (nla_put(skb, RTA_DST, 16, dst))
2384 goto nla_put_failure;
2385 rtm->rtm_dst_len = 128;
2386 } else if (rtm->rtm_dst_len)
2387 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2388 goto nla_put_failure;
2389 #ifdef CONFIG_IPV6_SUBTREES
2390 if (src) {
2391 if (nla_put(skb, RTA_SRC, 16, src))
2392 goto nla_put_failure;
2393 rtm->rtm_src_len = 128;
2394 } else if (rtm->rtm_src_len &&
2395 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2396 goto nla_put_failure;
2397 #endif
2398 if (iif) {
2399 #ifdef CONFIG_IPV6_MROUTE
2400 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2401 int err = ip6mr_get_route(net, skb, rtm, nowait);
2402 if (err <= 0) {
2403 if (!nowait) {
2404 if (err == 0)
2405 return 0;
2406 goto nla_put_failure;
2407 } else {
2408 if (err == -EMSGSIZE)
2409 goto nla_put_failure;
2410 }
2411 }
2412 } else
2413 #endif
2414 if (nla_put_u32(skb, RTA_IIF, iif))
2415 goto nla_put_failure;
2416 } else if (dst) {
2417 struct in6_addr saddr_buf;
2418 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2419 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2420 goto nla_put_failure;
2421 }
2422
2423 if (rt->rt6i_prefsrc.plen) {
2424 struct in6_addr saddr_buf;
2425 saddr_buf = rt->rt6i_prefsrc.addr;
2426 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2427 goto nla_put_failure;
2428 }
2429
2430 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2431 goto nla_put_failure;
2432
2433 rcu_read_lock();
2434 n = dst_get_neighbour_noref(&rt->dst);
2435 if (n) {
2436 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2437 rcu_read_unlock();
2438 goto nla_put_failure;
2439 }
2440 }
2441 rcu_read_unlock();
2442
2443 if (rt->dst.dev &&
2444 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2445 goto nla_put_failure;
2446 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2447 goto nla_put_failure;
2448 if (!(rt->rt6i_flags & RTF_EXPIRES))
2449 expires = 0;
2450 else if (rt->dst.expires - jiffies < INT_MAX)
2451 expires = rt->dst.expires - jiffies;
2452 else
2453 expires = INT_MAX;
2454
2455 peer = NULL;
2456 if (rt6_has_peer(rt))
2457 peer = rt6_peer_ptr(rt);
2458 ts = tsage = 0;
2459 if (peer && peer->tcp_ts_stamp) {
2460 ts = peer->tcp_ts;
2461 tsage = get_seconds() - peer->tcp_ts_stamp;
2462 }
2463
2464 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2465 expires, rt->dst.error) < 0)
2466 goto nla_put_failure;
2467
2468 return nlmsg_end(skb, nlh);
2469
2470 nla_put_failure:
2471 nlmsg_cancel(skb, nlh);
2472 return -EMSGSIZE;
2473 }
2474
2475 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2476 {
2477 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2478 int prefix;
2479
2480 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2481 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2482 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2483 } else
2484 prefix = 0;
2485
2486 return rt6_fill_node(arg->net,
2487 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2488 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2489 prefix, 0, NLM_F_MULTI);
2490 }
2491
2492 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2493 {
2494 struct net *net = sock_net(in_skb->sk);
2495 struct nlattr *tb[RTA_MAX+1];
2496 struct rt6_info *rt;
2497 struct sk_buff *skb;
2498 struct rtmsg *rtm;
2499 struct flowi6 fl6;
2500 int err, iif = 0, oif = 0;
2501
2502 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2503 if (err < 0)
2504 goto errout;
2505
2506 err = -EINVAL;
2507 memset(&fl6, 0, sizeof(fl6));
2508
2509 if (tb[RTA_SRC]) {
2510 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2511 goto errout;
2512
2513 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2514 }
2515
2516 if (tb[RTA_DST]) {
2517 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2518 goto errout;
2519
2520 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2521 }
2522
2523 if (tb[RTA_IIF])
2524 iif = nla_get_u32(tb[RTA_IIF]);
2525
2526 if (tb[RTA_OIF])
2527 oif = nla_get_u32(tb[RTA_OIF]);
2528
2529 if (iif) {
2530 struct net_device *dev;
2531 int flags = 0;
2532
2533 dev = __dev_get_by_index(net, iif);
2534 if (!dev) {
2535 err = -ENODEV;
2536 goto errout;
2537 }
2538
2539 fl6.flowi6_iif = iif;
2540
2541 if (!ipv6_addr_any(&fl6.saddr))
2542 flags |= RT6_LOOKUP_F_HAS_SADDR;
2543
2544 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2545 flags);
2546 } else {
2547 fl6.flowi6_oif = oif;
2548
2549 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2550 }
2551
2552 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2553 if (!skb) {
2554 dst_release(&rt->dst);
2555 err = -ENOBUFS;
2556 goto errout;
2557 }
2558
2559 /* Reserve room for dummy headers, this skb can pass
2560 through good chunk of routing engine.
2561 */
2562 skb_reset_mac_header(skb);
2563 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2564
2565 skb_dst_set(skb, &rt->dst);
2566
2567 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2568 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2569 nlh->nlmsg_seq, 0, 0, 0);
2570 if (err < 0) {
2571 kfree_skb(skb);
2572 goto errout;
2573 }
2574
2575 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2576 errout:
2577 return err;
2578 }
2579
2580 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2581 {
2582 struct sk_buff *skb;
2583 struct net *net = info->nl_net;
2584 u32 seq;
2585 int err;
2586
2587 err = -ENOBUFS;
2588 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2589
2590 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2591 if (!skb)
2592 goto errout;
2593
2594 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2595 event, info->pid, seq, 0, 0, 0);
2596 if (err < 0) {
2597 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2598 WARN_ON(err == -EMSGSIZE);
2599 kfree_skb(skb);
2600 goto errout;
2601 }
2602 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2603 info->nlh, gfp_any());
2604 return;
2605 errout:
2606 if (err < 0)
2607 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2608 }
2609
2610 static int ip6_route_dev_notify(struct notifier_block *this,
2611 unsigned long event, void *data)
2612 {
2613 struct net_device *dev = (struct net_device *)data;
2614 struct net *net = dev_net(dev);
2615
2616 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2617 net->ipv6.ip6_null_entry->dst.dev = dev;
2618 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2619 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2620 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2621 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2622 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2623 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2624 #endif
2625 }
2626
2627 return NOTIFY_OK;
2628 }
2629
2630 /*
2631 * /proc
2632 */
2633
2634 #ifdef CONFIG_PROC_FS
2635
2636 struct rt6_proc_arg
2637 {
2638 char *buffer;
2639 int offset;
2640 int length;
2641 int skip;
2642 int len;
2643 };
2644
2645 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2646 {
2647 struct seq_file *m = p_arg;
2648 struct neighbour *n;
2649
2650 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2651
2652 #ifdef CONFIG_IPV6_SUBTREES
2653 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2654 #else
2655 seq_puts(m, "00000000000000000000000000000000 00 ");
2656 #endif
2657 rcu_read_lock();
2658 n = dst_get_neighbour_noref(&rt->dst);
2659 if (n) {
2660 seq_printf(m, "%pi6", n->primary_key);
2661 } else {
2662 seq_puts(m, "00000000000000000000000000000000");
2663 }
2664 rcu_read_unlock();
2665 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2666 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2667 rt->dst.__use, rt->rt6i_flags,
2668 rt->dst.dev ? rt->dst.dev->name : "");
2669 return 0;
2670 }
2671
2672 static int ipv6_route_show(struct seq_file *m, void *v)
2673 {
2674 struct net *net = (struct net *)m->private;
2675 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2676 return 0;
2677 }
2678
2679 static int ipv6_route_open(struct inode *inode, struct file *file)
2680 {
2681 return single_open_net(inode, file, ipv6_route_show);
2682 }
2683
2684 static const struct file_operations ipv6_route_proc_fops = {
2685 .owner = THIS_MODULE,
2686 .open = ipv6_route_open,
2687 .read = seq_read,
2688 .llseek = seq_lseek,
2689 .release = single_release_net,
2690 };
2691
2692 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2693 {
2694 struct net *net = (struct net *)seq->private;
2695 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2696 net->ipv6.rt6_stats->fib_nodes,
2697 net->ipv6.rt6_stats->fib_route_nodes,
2698 net->ipv6.rt6_stats->fib_rt_alloc,
2699 net->ipv6.rt6_stats->fib_rt_entries,
2700 net->ipv6.rt6_stats->fib_rt_cache,
2701 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2702 net->ipv6.rt6_stats->fib_discarded_routes);
2703
2704 return 0;
2705 }
2706
2707 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2708 {
2709 return single_open_net(inode, file, rt6_stats_seq_show);
2710 }
2711
2712 static const struct file_operations rt6_stats_seq_fops = {
2713 .owner = THIS_MODULE,
2714 .open = rt6_stats_seq_open,
2715 .read = seq_read,
2716 .llseek = seq_lseek,
2717 .release = single_release_net,
2718 };
2719 #endif /* CONFIG_PROC_FS */
2720
2721 #ifdef CONFIG_SYSCTL
2722
2723 static
2724 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2725 void __user *buffer, size_t *lenp, loff_t *ppos)
2726 {
2727 struct net *net;
2728 int delay;
2729 if (!write)
2730 return -EINVAL;
2731
2732 net = (struct net *)ctl->extra1;
2733 delay = net->ipv6.sysctl.flush_delay;
2734 proc_dointvec(ctl, write, buffer, lenp, ppos);
2735 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2736 return 0;
2737 }
2738
2739 ctl_table ipv6_route_table_template[] = {
2740 {
2741 .procname = "flush",
2742 .data = &init_net.ipv6.sysctl.flush_delay,
2743 .maxlen = sizeof(int),
2744 .mode = 0200,
2745 .proc_handler = ipv6_sysctl_rtcache_flush
2746 },
2747 {
2748 .procname = "gc_thresh",
2749 .data = &ip6_dst_ops_template.gc_thresh,
2750 .maxlen = sizeof(int),
2751 .mode = 0644,
2752 .proc_handler = proc_dointvec,
2753 },
2754 {
2755 .procname = "max_size",
2756 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2757 .maxlen = sizeof(int),
2758 .mode = 0644,
2759 .proc_handler = proc_dointvec,
2760 },
2761 {
2762 .procname = "gc_min_interval",
2763 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2764 .maxlen = sizeof(int),
2765 .mode = 0644,
2766 .proc_handler = proc_dointvec_jiffies,
2767 },
2768 {
2769 .procname = "gc_timeout",
2770 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2771 .maxlen = sizeof(int),
2772 .mode = 0644,
2773 .proc_handler = proc_dointvec_jiffies,
2774 },
2775 {
2776 .procname = "gc_interval",
2777 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2778 .maxlen = sizeof(int),
2779 .mode = 0644,
2780 .proc_handler = proc_dointvec_jiffies,
2781 },
2782 {
2783 .procname = "gc_elasticity",
2784 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2785 .maxlen = sizeof(int),
2786 .mode = 0644,
2787 .proc_handler = proc_dointvec,
2788 },
2789 {
2790 .procname = "mtu_expires",
2791 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2792 .maxlen = sizeof(int),
2793 .mode = 0644,
2794 .proc_handler = proc_dointvec_jiffies,
2795 },
2796 {
2797 .procname = "min_adv_mss",
2798 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2799 .maxlen = sizeof(int),
2800 .mode = 0644,
2801 .proc_handler = proc_dointvec,
2802 },
2803 {
2804 .procname = "gc_min_interval_ms",
2805 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2806 .maxlen = sizeof(int),
2807 .mode = 0644,
2808 .proc_handler = proc_dointvec_ms_jiffies,
2809 },
2810 { }
2811 };
2812
2813 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2814 {
2815 struct ctl_table *table;
2816
2817 table = kmemdup(ipv6_route_table_template,
2818 sizeof(ipv6_route_table_template),
2819 GFP_KERNEL);
2820
2821 if (table) {
2822 table[0].data = &net->ipv6.sysctl.flush_delay;
2823 table[0].extra1 = net;
2824 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2825 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2826 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2827 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2828 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2829 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2830 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2831 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2832 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2833 }
2834
2835 return table;
2836 }
2837 #endif
2838
2839 static int __net_init ip6_route_net_init(struct net *net)
2840 {
2841 int ret = -ENOMEM;
2842
2843 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2844 sizeof(net->ipv6.ip6_dst_ops));
2845
2846 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2847 goto out_ip6_dst_ops;
2848
2849 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2850 sizeof(*net->ipv6.ip6_null_entry),
2851 GFP_KERNEL);
2852 if (!net->ipv6.ip6_null_entry)
2853 goto out_ip6_dst_entries;
2854 net->ipv6.ip6_null_entry->dst.path =
2855 (struct dst_entry *)net->ipv6.ip6_null_entry;
2856 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2857 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2858 ip6_template_metrics, true);
2859
2860 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2861 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2862 sizeof(*net->ipv6.ip6_prohibit_entry),
2863 GFP_KERNEL);
2864 if (!net->ipv6.ip6_prohibit_entry)
2865 goto out_ip6_null_entry;
2866 net->ipv6.ip6_prohibit_entry->dst.path =
2867 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2868 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2869 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2870 ip6_template_metrics, true);
2871
2872 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2873 sizeof(*net->ipv6.ip6_blk_hole_entry),
2874 GFP_KERNEL);
2875 if (!net->ipv6.ip6_blk_hole_entry)
2876 goto out_ip6_prohibit_entry;
2877 net->ipv6.ip6_blk_hole_entry->dst.path =
2878 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2879 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2880 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2881 ip6_template_metrics, true);
2882 #endif
2883
2884 net->ipv6.sysctl.flush_delay = 0;
2885 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2886 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2887 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2888 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2889 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2890 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2891 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2892
2893 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2894
2895 ret = 0;
2896 out:
2897 return ret;
2898
2899 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2900 out_ip6_prohibit_entry:
2901 kfree(net->ipv6.ip6_prohibit_entry);
2902 out_ip6_null_entry:
2903 kfree(net->ipv6.ip6_null_entry);
2904 #endif
2905 out_ip6_dst_entries:
2906 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907 out_ip6_dst_ops:
2908 goto out;
2909 }
2910
2911 static void __net_exit ip6_route_net_exit(struct net *net)
2912 {
2913 kfree(net->ipv6.ip6_null_entry);
2914 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2915 kfree(net->ipv6.ip6_prohibit_entry);
2916 kfree(net->ipv6.ip6_blk_hole_entry);
2917 #endif
2918 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2919 }
2920
2921 static int __net_init ip6_route_net_init_late(struct net *net)
2922 {
2923 #ifdef CONFIG_PROC_FS
2924 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2925 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2926 #endif
2927 return 0;
2928 }
2929
2930 static void __net_exit ip6_route_net_exit_late(struct net *net)
2931 {
2932 #ifdef CONFIG_PROC_FS
2933 proc_net_remove(net, "ipv6_route");
2934 proc_net_remove(net, "rt6_stats");
2935 #endif
2936 }
2937
2938 static struct pernet_operations ip6_route_net_ops = {
2939 .init = ip6_route_net_init,
2940 .exit = ip6_route_net_exit,
2941 };
2942
2943 static int __net_init ipv6_inetpeer_init(struct net *net)
2944 {
2945 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2946
2947 if (!bp)
2948 return -ENOMEM;
2949 inet_peer_base_init(bp);
2950 net->ipv6.peers = bp;
2951 return 0;
2952 }
2953
2954 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2955 {
2956 struct inet_peer_base *bp = net->ipv6.peers;
2957
2958 net->ipv6.peers = NULL;
2959 inetpeer_invalidate_tree(bp);
2960 kfree(bp);
2961 }
2962
2963 static struct pernet_operations ipv6_inetpeer_ops = {
2964 .init = ipv6_inetpeer_init,
2965 .exit = ipv6_inetpeer_exit,
2966 };
2967
2968 static struct pernet_operations ip6_route_net_late_ops = {
2969 .init = ip6_route_net_init_late,
2970 .exit = ip6_route_net_exit_late,
2971 };
2972
2973 static struct notifier_block ip6_route_dev_notifier = {
2974 .notifier_call = ip6_route_dev_notify,
2975 .priority = 0,
2976 };
2977
2978 int __init ip6_route_init(void)
2979 {
2980 int ret;
2981
2982 ret = -ENOMEM;
2983 ip6_dst_ops_template.kmem_cachep =
2984 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2985 SLAB_HWCACHE_ALIGN, NULL);
2986 if (!ip6_dst_ops_template.kmem_cachep)
2987 goto out;
2988
2989 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2990 if (ret)
2991 goto out_kmem_cache;
2992
2993 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
2994 if (ret)
2995 goto out_dst_entries;
2996
2997 ret = register_pernet_subsys(&ip6_route_net_ops);
2998 if (ret)
2999 goto out_register_inetpeer;
3000
3001 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3002
3003 /* Registering of the loopback is done before this portion of code,
3004 * the loopback reference in rt6_info will not be taken, do it
3005 * manually for init_net */
3006 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3007 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3008 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3009 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3010 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3011 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3012 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3013 #endif
3014 ret = fib6_init();
3015 if (ret)
3016 goto out_register_subsys;
3017
3018 ret = xfrm6_init();
3019 if (ret)
3020 goto out_fib6_init;
3021
3022 ret = fib6_rules_init();
3023 if (ret)
3024 goto xfrm6_init;
3025
3026 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3027 if (ret)
3028 goto fib6_rules_init;
3029
3030 ret = -ENOBUFS;
3031 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3032 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3033 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3034 goto out_register_late_subsys;
3035
3036 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3037 if (ret)
3038 goto out_register_late_subsys;
3039
3040 out:
3041 return ret;
3042
3043 out_register_late_subsys:
3044 unregister_pernet_subsys(&ip6_route_net_late_ops);
3045 fib6_rules_init:
3046 fib6_rules_cleanup();
3047 xfrm6_init:
3048 xfrm6_fini();
3049 out_fib6_init:
3050 fib6_gc_cleanup();
3051 out_register_subsys:
3052 unregister_pernet_subsys(&ip6_route_net_ops);
3053 out_register_inetpeer:
3054 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3055 out_dst_entries:
3056 dst_entries_destroy(&ip6_dst_blackhole_ops);
3057 out_kmem_cache:
3058 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3059 goto out;
3060 }
3061
3062 void ip6_route_cleanup(void)
3063 {
3064 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3065 unregister_pernet_subsys(&ip6_route_net_late_ops);
3066 fib6_rules_cleanup();
3067 xfrm6_fini();
3068 fib6_gc_cleanup();
3069 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3070 unregister_pernet_subsys(&ip6_route_net_ops);
3071 dst_entries_destroy(&ip6_dst_blackhole_ops);
3072 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3073 }