ipv6: Do not mark ipv6_inetpeer_ops as __net_initdata.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 peer = rt6_get_peer_create(rt);
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124 {
125 struct in6_addr *p = &rt->rt6i_gateway;
126
127 if (!ipv6_addr_any(p))
128 return (const void *) p;
129 return daddr;
130 }
131
132 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133 {
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142 }
143
144 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
145 {
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
155 }
156
157 static struct dst_ops ip6_dst_ops_template = {
158 .family = AF_INET6,
159 .protocol = cpu_to_be16(ETH_P_IPV6),
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
163 .default_advmss = ip6_default_advmss,
164 .mtu = ip6_mtu,
165 .cow_metrics = ipv6_cow_metrics,
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
171 .local_out = __ip6_local_out,
172 .neigh_lookup = ip6_neigh_lookup,
173 };
174
175 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
176 {
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
180 }
181
182 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183 {
184 }
185
186 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188 {
189 return NULL;
190 }
191
192 static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
194 .protocol = cpu_to_be16(ETH_P_IPV6),
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
197 .mtu = ip6_blackhole_mtu,
198 .default_advmss = ip6_default_advmss,
199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
201 .neigh_lookup = ip6_neigh_lookup,
202 };
203
204 static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206 };
207
208 static struct rt6_info ip6_null_entry_template = {
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
218 .rt6i_protocol = RTPROT_KERNEL,
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221 };
222
223 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
225 static int ip6_pkt_prohibit(struct sk_buff *skb);
226 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
227
228 static struct rt6_info ip6_prohibit_entry_template = {
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
238 .rt6i_protocol = RTPROT_KERNEL,
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241 };
242
243 static struct rt6_info ip6_blk_hole_entry_template = {
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
249 .input = dst_discard,
250 .output = dst_discard,
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
253 .rt6i_protocol = RTPROT_KERNEL,
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256 };
257
258 #endif
259
260 /* allocate dst with ip6_dst_ops */
261 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
262 struct net_device *dev,
263 int flags)
264 {
265 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
266
267 if (rt)
268 memset(&rt->rt6i_table, 0,
269 sizeof(*rt) - sizeof(struct dst_entry));
270
271 return rt;
272 }
273
274 static void ip6_dst_destroy(struct dst_entry *dst)
275 {
276 struct rt6_info *rt = (struct rt6_info *)dst;
277 struct inet6_dev *idev = rt->rt6i_idev;
278 struct inet_peer *peer = rt->rt6i_peer;
279
280 if (!(rt->dst.flags & DST_HOST))
281 dst_destroy_metrics_generic(dst);
282
283 if (idev) {
284 rt->rt6i_idev = NULL;
285 in6_dev_put(idev);
286 }
287
288 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
289 dst_release(dst->from);
290
291 if (peer) {
292 rt->rt6i_peer = NULL;
293 inet_putpeer(peer);
294 }
295 }
296
297 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
298
299 static u32 rt6_peer_genid(void)
300 {
301 return atomic_read(&__rt6_peer_genid);
302 }
303
304 void rt6_bind_peer(struct rt6_info *rt, int create)
305 {
306 struct net *net = dev_net(rt->dst.dev);
307 struct inet_peer *peer;
308
309 peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
310 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 inet_putpeer(peer);
312 else
313 rt->rt6i_peer_genid = rt6_peer_genid();
314 }
315
316 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 int how)
318 {
319 struct rt6_info *rt = (struct rt6_info *)dst;
320 struct inet6_dev *idev = rt->rt6i_idev;
321 struct net_device *loopback_dev =
322 dev_net(dev)->loopback_dev;
323
324 if (dev != loopback_dev && idev && idev->dev == dev) {
325 struct inet6_dev *loopback_idev =
326 in6_dev_get(loopback_dev);
327 if (loopback_idev) {
328 rt->rt6i_idev = loopback_idev;
329 in6_dev_put(idev);
330 }
331 }
332 }
333
334 static bool rt6_check_expired(const struct rt6_info *rt)
335 {
336 struct rt6_info *ort = NULL;
337
338 if (rt->rt6i_flags & RTF_EXPIRES) {
339 if (time_after(jiffies, rt->dst.expires))
340 return true;
341 } else if (rt->dst.from) {
342 ort = (struct rt6_info *) rt->dst.from;
343 return (ort->rt6i_flags & RTF_EXPIRES) &&
344 time_after(jiffies, ort->dst.expires);
345 }
346 return false;
347 }
348
349 static bool rt6_need_strict(const struct in6_addr *daddr)
350 {
351 return ipv6_addr_type(daddr) &
352 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
353 }
354
355 /*
356 * Route lookup. Any table->tb6_lock is implied.
357 */
358
359 static inline struct rt6_info *rt6_device_match(struct net *net,
360 struct rt6_info *rt,
361 const struct in6_addr *saddr,
362 int oif,
363 int flags)
364 {
365 struct rt6_info *local = NULL;
366 struct rt6_info *sprt;
367
368 if (!oif && ipv6_addr_any(saddr))
369 goto out;
370
371 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
372 struct net_device *dev = sprt->dst.dev;
373
374 if (oif) {
375 if (dev->ifindex == oif)
376 return sprt;
377 if (dev->flags & IFF_LOOPBACK) {
378 if (!sprt->rt6i_idev ||
379 sprt->rt6i_idev->dev->ifindex != oif) {
380 if (flags & RT6_LOOKUP_F_IFACE && oif)
381 continue;
382 if (local && (!oif ||
383 local->rt6i_idev->dev->ifindex == oif))
384 continue;
385 }
386 local = sprt;
387 }
388 } else {
389 if (ipv6_chk_addr(net, saddr, dev,
390 flags & RT6_LOOKUP_F_IFACE))
391 return sprt;
392 }
393 }
394
395 if (oif) {
396 if (local)
397 return local;
398
399 if (flags & RT6_LOOKUP_F_IFACE)
400 return net->ipv6.ip6_null_entry;
401 }
402 out:
403 return rt;
404 }
405
406 #ifdef CONFIG_IPV6_ROUTER_PREF
407 static void rt6_probe(struct rt6_info *rt)
408 {
409 struct neighbour *neigh;
410 /*
411 * Okay, this does not seem to be appropriate
412 * for now, however, we need to check if it
413 * is really so; aka Router Reachability Probing.
414 *
415 * Router Reachability Probe MUST be rate-limited
416 * to no more than one per minute.
417 */
418 rcu_read_lock();
419 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
420 if (!neigh || (neigh->nud_state & NUD_VALID))
421 goto out;
422 read_lock_bh(&neigh->lock);
423 if (!(neigh->nud_state & NUD_VALID) &&
424 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
425 struct in6_addr mcaddr;
426 struct in6_addr *target;
427
428 neigh->updated = jiffies;
429 read_unlock_bh(&neigh->lock);
430
431 target = (struct in6_addr *)&neigh->primary_key;
432 addrconf_addr_solict_mult(target, &mcaddr);
433 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
434 } else {
435 read_unlock_bh(&neigh->lock);
436 }
437 out:
438 rcu_read_unlock();
439 }
440 #else
441 static inline void rt6_probe(struct rt6_info *rt)
442 {
443 }
444 #endif
445
446 /*
447 * Default Router Selection (RFC 2461 6.3.6)
448 */
449 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
450 {
451 struct net_device *dev = rt->dst.dev;
452 if (!oif || dev->ifindex == oif)
453 return 2;
454 if ((dev->flags & IFF_LOOPBACK) &&
455 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 return 1;
457 return 0;
458 }
459
460 static inline int rt6_check_neigh(struct rt6_info *rt)
461 {
462 struct neighbour *neigh;
463 int m;
464
465 rcu_read_lock();
466 neigh = dst_get_neighbour_noref(&rt->dst);
467 if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 !(rt->rt6i_flags & RTF_GATEWAY))
469 m = 1;
470 else if (neigh) {
471 read_lock_bh(&neigh->lock);
472 if (neigh->nud_state & NUD_VALID)
473 m = 2;
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 else if (neigh->nud_state & NUD_FAILED)
476 m = 0;
477 #endif
478 else
479 m = 1;
480 read_unlock_bh(&neigh->lock);
481 } else
482 m = 0;
483 rcu_read_unlock();
484 return m;
485 }
486
487 static int rt6_score_route(struct rt6_info *rt, int oif,
488 int strict)
489 {
490 int m, n;
491
492 m = rt6_check_dev(rt, oif);
493 if (!m && (strict & RT6_LOOKUP_F_IFACE))
494 return -1;
495 #ifdef CONFIG_IPV6_ROUTER_PREF
496 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497 #endif
498 n = rt6_check_neigh(rt);
499 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
500 return -1;
501 return m;
502 }
503
504 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 int *mpri, struct rt6_info *match)
506 {
507 int m;
508
509 if (rt6_check_expired(rt))
510 goto out;
511
512 m = rt6_score_route(rt, oif, strict);
513 if (m < 0)
514 goto out;
515
516 if (m > *mpri) {
517 if (strict & RT6_LOOKUP_F_REACHABLE)
518 rt6_probe(match);
519 *mpri = m;
520 match = rt;
521 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 rt6_probe(rt);
523 }
524
525 out:
526 return match;
527 }
528
529 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 struct rt6_info *rr_head,
531 u32 metric, int oif, int strict)
532 {
533 struct rt6_info *rt, *match;
534 int mpri = -1;
535
536 match = NULL;
537 for (rt = rr_head; rt && rt->rt6i_metric == metric;
538 rt = rt->dst.rt6_next)
539 match = find_match(rt, oif, strict, &mpri, match);
540 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
541 rt = rt->dst.rt6_next)
542 match = find_match(rt, oif, strict, &mpri, match);
543
544 return match;
545 }
546
547 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548 {
549 struct rt6_info *match, *rt0;
550 struct net *net;
551
552 rt0 = fn->rr_ptr;
553 if (!rt0)
554 fn->rr_ptr = rt0 = fn->leaf;
555
556 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
557
558 if (!match &&
559 (strict & RT6_LOOKUP_F_REACHABLE)) {
560 struct rt6_info *next = rt0->dst.rt6_next;
561
562 /* no entries matched; do round-robin */
563 if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 next = fn->leaf;
565
566 if (next != rt0)
567 fn->rr_ptr = next;
568 }
569
570 net = dev_net(rt0->dst.dev);
571 return match ? match : net->ipv6.ip6_null_entry;
572 }
573
574 #ifdef CONFIG_IPV6_ROUTE_INFO
575 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
576 const struct in6_addr *gwaddr)
577 {
578 struct net *net = dev_net(dev);
579 struct route_info *rinfo = (struct route_info *) opt;
580 struct in6_addr prefix_buf, *prefix;
581 unsigned int pref;
582 unsigned long lifetime;
583 struct rt6_info *rt;
584
585 if (len < sizeof(struct route_info)) {
586 return -EINVAL;
587 }
588
589 /* Sanity check for prefix_len and length */
590 if (rinfo->length > 3) {
591 return -EINVAL;
592 } else if (rinfo->prefix_len > 128) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 64) {
595 if (rinfo->length < 2) {
596 return -EINVAL;
597 }
598 } else if (rinfo->prefix_len > 0) {
599 if (rinfo->length < 1) {
600 return -EINVAL;
601 }
602 }
603
604 pref = rinfo->route_pref;
605 if (pref == ICMPV6_ROUTER_PREF_INVALID)
606 return -EINVAL;
607
608 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
609
610 if (rinfo->length == 3)
611 prefix = (struct in6_addr *)rinfo->prefix;
612 else {
613 /* this function is safe */
614 ipv6_addr_prefix(&prefix_buf,
615 (struct in6_addr *)rinfo->prefix,
616 rinfo->prefix_len);
617 prefix = &prefix_buf;
618 }
619
620 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
621 dev->ifindex);
622
623 if (rt && !lifetime) {
624 ip6_del_rt(rt);
625 rt = NULL;
626 }
627
628 if (!rt && lifetime)
629 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
630 pref);
631 else if (rt)
632 rt->rt6i_flags = RTF_ROUTEINFO |
633 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
634
635 if (rt) {
636 if (!addrconf_finite_timeout(lifetime))
637 rt6_clean_expires(rt);
638 else
639 rt6_set_expires(rt, jiffies + HZ * lifetime);
640
641 dst_release(&rt->dst);
642 }
643 return 0;
644 }
645 #endif
646
647 #define BACKTRACK(__net, saddr) \
648 do { \
649 if (rt == __net->ipv6.ip6_null_entry) { \
650 struct fib6_node *pn; \
651 while (1) { \
652 if (fn->fn_flags & RTN_TL_ROOT) \
653 goto out; \
654 pn = fn->parent; \
655 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
656 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
657 else \
658 fn = pn; \
659 if (fn->fn_flags & RTN_RTINFO) \
660 goto restart; \
661 } \
662 } \
663 } while (0)
664
665 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
666 struct fib6_table *table,
667 struct flowi6 *fl6, int flags)
668 {
669 struct fib6_node *fn;
670 struct rt6_info *rt;
671
672 read_lock_bh(&table->tb6_lock);
673 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
674 restart:
675 rt = fn->leaf;
676 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
677 BACKTRACK(net, &fl6->saddr);
678 out:
679 dst_use(&rt->dst, jiffies);
680 read_unlock_bh(&table->tb6_lock);
681 return rt;
682
683 }
684
685 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
686 int flags)
687 {
688 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
689 }
690 EXPORT_SYMBOL_GPL(ip6_route_lookup);
691
692 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
693 const struct in6_addr *saddr, int oif, int strict)
694 {
695 struct flowi6 fl6 = {
696 .flowi6_oif = oif,
697 .daddr = *daddr,
698 };
699 struct dst_entry *dst;
700 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
701
702 if (saddr) {
703 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
704 flags |= RT6_LOOKUP_F_HAS_SADDR;
705 }
706
707 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
708 if (dst->error == 0)
709 return (struct rt6_info *) dst;
710
711 dst_release(dst);
712
713 return NULL;
714 }
715
716 EXPORT_SYMBOL(rt6_lookup);
717
718 /* ip6_ins_rt is called with FREE table->tb6_lock.
719 It takes new route entry, the addition fails by any reason the
720 route is freed. In any case, if caller does not hold it, it may
721 be destroyed.
722 */
723
724 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
725 {
726 int err;
727 struct fib6_table *table;
728
729 table = rt->rt6i_table;
730 write_lock_bh(&table->tb6_lock);
731 err = fib6_add(&table->tb6_root, rt, info);
732 write_unlock_bh(&table->tb6_lock);
733
734 return err;
735 }
736
737 int ip6_ins_rt(struct rt6_info *rt)
738 {
739 struct nl_info info = {
740 .nl_net = dev_net(rt->dst.dev),
741 };
742 return __ip6_ins_rt(rt, &info);
743 }
744
745 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
746 const struct in6_addr *daddr,
747 const struct in6_addr *saddr)
748 {
749 struct rt6_info *rt;
750
751 /*
752 * Clone the route.
753 */
754
755 rt = ip6_rt_copy(ort, daddr);
756
757 if (rt) {
758 int attempts = !in_softirq();
759
760 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
761 if (ort->rt6i_dst.plen != 128 &&
762 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
763 rt->rt6i_flags |= RTF_ANYCAST;
764 rt->rt6i_gateway = *daddr;
765 }
766
767 rt->rt6i_flags |= RTF_CACHE;
768
769 #ifdef CONFIG_IPV6_SUBTREES
770 if (rt->rt6i_src.plen && saddr) {
771 rt->rt6i_src.addr = *saddr;
772 rt->rt6i_src.plen = 128;
773 }
774 #endif
775
776 retry:
777 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
778 struct net *net = dev_net(rt->dst.dev);
779 int saved_rt_min_interval =
780 net->ipv6.sysctl.ip6_rt_gc_min_interval;
781 int saved_rt_elasticity =
782 net->ipv6.sysctl.ip6_rt_gc_elasticity;
783
784 if (attempts-- > 0) {
785 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
786 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
787
788 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
789
790 net->ipv6.sysctl.ip6_rt_gc_elasticity =
791 saved_rt_elasticity;
792 net->ipv6.sysctl.ip6_rt_gc_min_interval =
793 saved_rt_min_interval;
794 goto retry;
795 }
796
797 net_warn_ratelimited("Neighbour table overflow\n");
798 dst_free(&rt->dst);
799 return NULL;
800 }
801 }
802
803 return rt;
804 }
805
806 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
807 const struct in6_addr *daddr)
808 {
809 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
810
811 if (rt) {
812 rt->rt6i_flags |= RTF_CACHE;
813 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
814 }
815 return rt;
816 }
817
818 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
819 struct flowi6 *fl6, int flags)
820 {
821 struct fib6_node *fn;
822 struct rt6_info *rt, *nrt;
823 int strict = 0;
824 int attempts = 3;
825 int err;
826 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
827
828 strict |= flags & RT6_LOOKUP_F_IFACE;
829
830 relookup:
831 read_lock_bh(&table->tb6_lock);
832
833 restart_2:
834 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
835
836 restart:
837 rt = rt6_select(fn, oif, strict | reachable);
838
839 BACKTRACK(net, &fl6->saddr);
840 if (rt == net->ipv6.ip6_null_entry ||
841 rt->rt6i_flags & RTF_CACHE)
842 goto out;
843
844 dst_hold(&rt->dst);
845 read_unlock_bh(&table->tb6_lock);
846
847 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
848 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
849 else if (!(rt->dst.flags & DST_HOST))
850 nrt = rt6_alloc_clone(rt, &fl6->daddr);
851 else
852 goto out2;
853
854 dst_release(&rt->dst);
855 rt = nrt ? : net->ipv6.ip6_null_entry;
856
857 dst_hold(&rt->dst);
858 if (nrt) {
859 err = ip6_ins_rt(nrt);
860 if (!err)
861 goto out2;
862 }
863
864 if (--attempts <= 0)
865 goto out2;
866
867 /*
868 * Race condition! In the gap, when table->tb6_lock was
869 * released someone could insert this route. Relookup.
870 */
871 dst_release(&rt->dst);
872 goto relookup;
873
874 out:
875 if (reachable) {
876 reachable = 0;
877 goto restart_2;
878 }
879 dst_hold(&rt->dst);
880 read_unlock_bh(&table->tb6_lock);
881 out2:
882 rt->dst.lastuse = jiffies;
883 rt->dst.__use++;
884
885 return rt;
886 }
887
888 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
889 struct flowi6 *fl6, int flags)
890 {
891 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
892 }
893
894 static struct dst_entry *ip6_route_input_lookup(struct net *net,
895 struct net_device *dev,
896 struct flowi6 *fl6, int flags)
897 {
898 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
899 flags |= RT6_LOOKUP_F_IFACE;
900
901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
902 }
903
904 void ip6_route_input(struct sk_buff *skb)
905 {
906 const struct ipv6hdr *iph = ipv6_hdr(skb);
907 struct net *net = dev_net(skb->dev);
908 int flags = RT6_LOOKUP_F_HAS_SADDR;
909 struct flowi6 fl6 = {
910 .flowi6_iif = skb->dev->ifindex,
911 .daddr = iph->daddr,
912 .saddr = iph->saddr,
913 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
914 .flowi6_mark = skb->mark,
915 .flowi6_proto = iph->nexthdr,
916 };
917
918 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
919 }
920
921 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
922 struct flowi6 *fl6, int flags)
923 {
924 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
925 }
926
927 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
928 struct flowi6 *fl6)
929 {
930 int flags = 0;
931
932 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
933 flags |= RT6_LOOKUP_F_IFACE;
934
935 if (!ipv6_addr_any(&fl6->saddr))
936 flags |= RT6_LOOKUP_F_HAS_SADDR;
937 else if (sk)
938 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
939
940 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
941 }
942
943 EXPORT_SYMBOL(ip6_route_output);
944
945 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
946 {
947 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
948 struct dst_entry *new = NULL;
949
950 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
951 if (rt) {
952 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
953
954 new = &rt->dst;
955
956 new->__use = 1;
957 new->input = dst_discard;
958 new->output = dst_discard;
959
960 if (dst_metrics_read_only(&ort->dst))
961 new->_metrics = ort->dst._metrics;
962 else
963 dst_copy_metrics(new, &ort->dst);
964 rt->rt6i_idev = ort->rt6i_idev;
965 if (rt->rt6i_idev)
966 in6_dev_hold(rt->rt6i_idev);
967
968 rt->rt6i_gateway = ort->rt6i_gateway;
969 rt->rt6i_flags = ort->rt6i_flags;
970 rt6_clean_expires(rt);
971 rt->rt6i_metric = 0;
972
973 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
974 #ifdef CONFIG_IPV6_SUBTREES
975 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
976 #endif
977
978 dst_free(new);
979 }
980
981 dst_release(dst_orig);
982 return new ? new : ERR_PTR(-ENOMEM);
983 }
984
985 /*
986 * Destination cache support functions
987 */
988
989 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
990 {
991 struct rt6_info *rt;
992
993 rt = (struct rt6_info *) dst;
994
995 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
996 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
997 if (!rt->rt6i_peer)
998 rt6_bind_peer(rt, 0);
999 rt->rt6i_peer_genid = rt6_peer_genid();
1000 }
1001 return dst;
1002 }
1003 return NULL;
1004 }
1005
1006 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1007 {
1008 struct rt6_info *rt = (struct rt6_info *) dst;
1009
1010 if (rt) {
1011 if (rt->rt6i_flags & RTF_CACHE) {
1012 if (rt6_check_expired(rt)) {
1013 ip6_del_rt(rt);
1014 dst = NULL;
1015 }
1016 } else {
1017 dst_release(dst);
1018 dst = NULL;
1019 }
1020 }
1021 return dst;
1022 }
1023
1024 static void ip6_link_failure(struct sk_buff *skb)
1025 {
1026 struct rt6_info *rt;
1027
1028 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1029
1030 rt = (struct rt6_info *) skb_dst(skb);
1031 if (rt) {
1032 if (rt->rt6i_flags & RTF_CACHE)
1033 rt6_update_expires(rt, 0);
1034 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1035 rt->rt6i_node->fn_sernum = -1;
1036 }
1037 }
1038
1039 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1040 {
1041 struct rt6_info *rt6 = (struct rt6_info*)dst;
1042
1043 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1044 rt6->rt6i_flags |= RTF_MODIFIED;
1045 if (mtu < IPV6_MIN_MTU) {
1046 u32 features = dst_metric(dst, RTAX_FEATURES);
1047 mtu = IPV6_MIN_MTU;
1048 features |= RTAX_FEATURE_ALLFRAG;
1049 dst_metric_set(dst, RTAX_FEATURES, features);
1050 }
1051 dst_metric_set(dst, RTAX_MTU, mtu);
1052 }
1053 }
1054
1055 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1056 {
1057 struct net_device *dev = dst->dev;
1058 unsigned int mtu = dst_mtu(dst);
1059 struct net *net = dev_net(dev);
1060
1061 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1062
1063 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1064 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1065
1066 /*
1067 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1068 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1069 * IPV6_MAXPLEN is also valid and means: "any MSS,
1070 * rely only on pmtu discovery"
1071 */
1072 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1073 mtu = IPV6_MAXPLEN;
1074 return mtu;
1075 }
1076
1077 static unsigned int ip6_mtu(const struct dst_entry *dst)
1078 {
1079 struct inet6_dev *idev;
1080 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1081
1082 if (mtu)
1083 return mtu;
1084
1085 mtu = IPV6_MIN_MTU;
1086
1087 rcu_read_lock();
1088 idev = __in6_dev_get(dst->dev);
1089 if (idev)
1090 mtu = idev->cnf.mtu6;
1091 rcu_read_unlock();
1092
1093 return mtu;
1094 }
1095
1096 static struct dst_entry *icmp6_dst_gc_list;
1097 static DEFINE_SPINLOCK(icmp6_dst_lock);
1098
1099 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1100 struct neighbour *neigh,
1101 struct flowi6 *fl6)
1102 {
1103 struct dst_entry *dst;
1104 struct rt6_info *rt;
1105 struct inet6_dev *idev = in6_dev_get(dev);
1106 struct net *net = dev_net(dev);
1107
1108 if (unlikely(!idev))
1109 return ERR_PTR(-ENODEV);
1110
1111 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1112 if (unlikely(!rt)) {
1113 in6_dev_put(idev);
1114 dst = ERR_PTR(-ENOMEM);
1115 goto out;
1116 }
1117
1118 if (neigh)
1119 neigh_hold(neigh);
1120 else {
1121 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1122 if (IS_ERR(neigh)) {
1123 in6_dev_put(idev);
1124 dst_free(&rt->dst);
1125 return ERR_CAST(neigh);
1126 }
1127 }
1128
1129 rt->dst.flags |= DST_HOST;
1130 rt->dst.output = ip6_output;
1131 dst_set_neighbour(&rt->dst, neigh);
1132 atomic_set(&rt->dst.__refcnt, 1);
1133 rt->rt6i_dst.addr = fl6->daddr;
1134 rt->rt6i_dst.plen = 128;
1135 rt->rt6i_idev = idev;
1136 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1137
1138 spin_lock_bh(&icmp6_dst_lock);
1139 rt->dst.next = icmp6_dst_gc_list;
1140 icmp6_dst_gc_list = &rt->dst;
1141 spin_unlock_bh(&icmp6_dst_lock);
1142
1143 fib6_force_start_gc(net);
1144
1145 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1146
1147 out:
1148 return dst;
1149 }
1150
1151 int icmp6_dst_gc(void)
1152 {
1153 struct dst_entry *dst, **pprev;
1154 int more = 0;
1155
1156 spin_lock_bh(&icmp6_dst_lock);
1157 pprev = &icmp6_dst_gc_list;
1158
1159 while ((dst = *pprev) != NULL) {
1160 if (!atomic_read(&dst->__refcnt)) {
1161 *pprev = dst->next;
1162 dst_free(dst);
1163 } else {
1164 pprev = &dst->next;
1165 ++more;
1166 }
1167 }
1168
1169 spin_unlock_bh(&icmp6_dst_lock);
1170
1171 return more;
1172 }
1173
1174 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1175 void *arg)
1176 {
1177 struct dst_entry *dst, **pprev;
1178
1179 spin_lock_bh(&icmp6_dst_lock);
1180 pprev = &icmp6_dst_gc_list;
1181 while ((dst = *pprev) != NULL) {
1182 struct rt6_info *rt = (struct rt6_info *) dst;
1183 if (func(rt, arg)) {
1184 *pprev = dst->next;
1185 dst_free(dst);
1186 } else {
1187 pprev = &dst->next;
1188 }
1189 }
1190 spin_unlock_bh(&icmp6_dst_lock);
1191 }
1192
1193 static int ip6_dst_gc(struct dst_ops *ops)
1194 {
1195 unsigned long now = jiffies;
1196 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1197 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1198 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1199 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1200 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1201 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1202 int entries;
1203
1204 entries = dst_entries_get_fast(ops);
1205 if (time_after(rt_last_gc + rt_min_interval, now) &&
1206 entries <= rt_max_size)
1207 goto out;
1208
1209 net->ipv6.ip6_rt_gc_expire++;
1210 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1211 net->ipv6.ip6_rt_last_gc = now;
1212 entries = dst_entries_get_slow(ops);
1213 if (entries < ops->gc_thresh)
1214 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1215 out:
1216 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1217 return entries > rt_max_size;
1218 }
1219
1220 /* Clean host part of a prefix. Not necessary in radix tree,
1221 but results in cleaner routing tables.
1222
1223 Remove it only when all the things will work!
1224 */
1225
1226 int ip6_dst_hoplimit(struct dst_entry *dst)
1227 {
1228 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1229 if (hoplimit == 0) {
1230 struct net_device *dev = dst->dev;
1231 struct inet6_dev *idev;
1232
1233 rcu_read_lock();
1234 idev = __in6_dev_get(dev);
1235 if (idev)
1236 hoplimit = idev->cnf.hop_limit;
1237 else
1238 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1239 rcu_read_unlock();
1240 }
1241 return hoplimit;
1242 }
1243 EXPORT_SYMBOL(ip6_dst_hoplimit);
1244
1245 /*
1246 *
1247 */
1248
1249 int ip6_route_add(struct fib6_config *cfg)
1250 {
1251 int err;
1252 struct net *net = cfg->fc_nlinfo.nl_net;
1253 struct rt6_info *rt = NULL;
1254 struct net_device *dev = NULL;
1255 struct inet6_dev *idev = NULL;
1256 struct fib6_table *table;
1257 int addr_type;
1258
1259 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1260 return -EINVAL;
1261 #ifndef CONFIG_IPV6_SUBTREES
1262 if (cfg->fc_src_len)
1263 return -EINVAL;
1264 #endif
1265 if (cfg->fc_ifindex) {
1266 err = -ENODEV;
1267 dev = dev_get_by_index(net, cfg->fc_ifindex);
1268 if (!dev)
1269 goto out;
1270 idev = in6_dev_get(dev);
1271 if (!idev)
1272 goto out;
1273 }
1274
1275 if (cfg->fc_metric == 0)
1276 cfg->fc_metric = IP6_RT_PRIO_USER;
1277
1278 err = -ENOBUFS;
1279 if (cfg->fc_nlinfo.nlh &&
1280 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1281 table = fib6_get_table(net, cfg->fc_table);
1282 if (!table) {
1283 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1284 table = fib6_new_table(net, cfg->fc_table);
1285 }
1286 } else {
1287 table = fib6_new_table(net, cfg->fc_table);
1288 }
1289
1290 if (!table)
1291 goto out;
1292
1293 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1294
1295 if (!rt) {
1296 err = -ENOMEM;
1297 goto out;
1298 }
1299
1300 rt->dst.obsolete = -1;
1301
1302 if (cfg->fc_flags & RTF_EXPIRES)
1303 rt6_set_expires(rt, jiffies +
1304 clock_t_to_jiffies(cfg->fc_expires));
1305 else
1306 rt6_clean_expires(rt);
1307
1308 if (cfg->fc_protocol == RTPROT_UNSPEC)
1309 cfg->fc_protocol = RTPROT_BOOT;
1310 rt->rt6i_protocol = cfg->fc_protocol;
1311
1312 addr_type = ipv6_addr_type(&cfg->fc_dst);
1313
1314 if (addr_type & IPV6_ADDR_MULTICAST)
1315 rt->dst.input = ip6_mc_input;
1316 else if (cfg->fc_flags & RTF_LOCAL)
1317 rt->dst.input = ip6_input;
1318 else
1319 rt->dst.input = ip6_forward;
1320
1321 rt->dst.output = ip6_output;
1322
1323 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1324 rt->rt6i_dst.plen = cfg->fc_dst_len;
1325 if (rt->rt6i_dst.plen == 128)
1326 rt->dst.flags |= DST_HOST;
1327
1328 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1329 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1330 if (!metrics) {
1331 err = -ENOMEM;
1332 goto out;
1333 }
1334 dst_init_metrics(&rt->dst, metrics, 0);
1335 }
1336 #ifdef CONFIG_IPV6_SUBTREES
1337 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1338 rt->rt6i_src.plen = cfg->fc_src_len;
1339 #endif
1340
1341 rt->rt6i_metric = cfg->fc_metric;
1342
1343 /* We cannot add true routes via loopback here,
1344 they would result in kernel looping; promote them to reject routes
1345 */
1346 if ((cfg->fc_flags & RTF_REJECT) ||
1347 (dev && (dev->flags & IFF_LOOPBACK) &&
1348 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1349 !(cfg->fc_flags & RTF_LOCAL))) {
1350 /* hold loopback dev/idev if we haven't done so. */
1351 if (dev != net->loopback_dev) {
1352 if (dev) {
1353 dev_put(dev);
1354 in6_dev_put(idev);
1355 }
1356 dev = net->loopback_dev;
1357 dev_hold(dev);
1358 idev = in6_dev_get(dev);
1359 if (!idev) {
1360 err = -ENODEV;
1361 goto out;
1362 }
1363 }
1364 rt->dst.output = ip6_pkt_discard_out;
1365 rt->dst.input = ip6_pkt_discard;
1366 rt->dst.error = -ENETUNREACH;
1367 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1368 goto install_route;
1369 }
1370
1371 if (cfg->fc_flags & RTF_GATEWAY) {
1372 const struct in6_addr *gw_addr;
1373 int gwa_type;
1374
1375 gw_addr = &cfg->fc_gateway;
1376 rt->rt6i_gateway = *gw_addr;
1377 gwa_type = ipv6_addr_type(gw_addr);
1378
1379 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1380 struct rt6_info *grt;
1381
1382 /* IPv6 strictly inhibits using not link-local
1383 addresses as nexthop address.
1384 Otherwise, router will not able to send redirects.
1385 It is very good, but in some (rare!) circumstances
1386 (SIT, PtP, NBMA NOARP links) it is handy to allow
1387 some exceptions. --ANK
1388 */
1389 err = -EINVAL;
1390 if (!(gwa_type & IPV6_ADDR_UNICAST))
1391 goto out;
1392
1393 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1394
1395 err = -EHOSTUNREACH;
1396 if (!grt)
1397 goto out;
1398 if (dev) {
1399 if (dev != grt->dst.dev) {
1400 dst_release(&grt->dst);
1401 goto out;
1402 }
1403 } else {
1404 dev = grt->dst.dev;
1405 idev = grt->rt6i_idev;
1406 dev_hold(dev);
1407 in6_dev_hold(grt->rt6i_idev);
1408 }
1409 if (!(grt->rt6i_flags & RTF_GATEWAY))
1410 err = 0;
1411 dst_release(&grt->dst);
1412
1413 if (err)
1414 goto out;
1415 }
1416 err = -EINVAL;
1417 if (!dev || (dev->flags & IFF_LOOPBACK))
1418 goto out;
1419 }
1420
1421 err = -ENODEV;
1422 if (!dev)
1423 goto out;
1424
1425 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1426 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1427 err = -EINVAL;
1428 goto out;
1429 }
1430 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1431 rt->rt6i_prefsrc.plen = 128;
1432 } else
1433 rt->rt6i_prefsrc.plen = 0;
1434
1435 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1436 err = rt6_bind_neighbour(rt, dev);
1437 if (err)
1438 goto out;
1439 }
1440
1441 rt->rt6i_flags = cfg->fc_flags;
1442
1443 install_route:
1444 if (cfg->fc_mx) {
1445 struct nlattr *nla;
1446 int remaining;
1447
1448 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1449 int type = nla_type(nla);
1450
1451 if (type) {
1452 if (type > RTAX_MAX) {
1453 err = -EINVAL;
1454 goto out;
1455 }
1456
1457 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1458 }
1459 }
1460 }
1461
1462 rt->dst.dev = dev;
1463 rt->rt6i_idev = idev;
1464 rt->rt6i_table = table;
1465
1466 cfg->fc_nlinfo.nl_net = dev_net(dev);
1467
1468 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1469
1470 out:
1471 if (dev)
1472 dev_put(dev);
1473 if (idev)
1474 in6_dev_put(idev);
1475 if (rt)
1476 dst_free(&rt->dst);
1477 return err;
1478 }
1479
1480 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1481 {
1482 int err;
1483 struct fib6_table *table;
1484 struct net *net = dev_net(rt->dst.dev);
1485
1486 if (rt == net->ipv6.ip6_null_entry)
1487 return -ENOENT;
1488
1489 table = rt->rt6i_table;
1490 write_lock_bh(&table->tb6_lock);
1491
1492 err = fib6_del(rt, info);
1493 dst_release(&rt->dst);
1494
1495 write_unlock_bh(&table->tb6_lock);
1496
1497 return err;
1498 }
1499
1500 int ip6_del_rt(struct rt6_info *rt)
1501 {
1502 struct nl_info info = {
1503 .nl_net = dev_net(rt->dst.dev),
1504 };
1505 return __ip6_del_rt(rt, &info);
1506 }
1507
1508 static int ip6_route_del(struct fib6_config *cfg)
1509 {
1510 struct fib6_table *table;
1511 struct fib6_node *fn;
1512 struct rt6_info *rt;
1513 int err = -ESRCH;
1514
1515 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1516 if (!table)
1517 return err;
1518
1519 read_lock_bh(&table->tb6_lock);
1520
1521 fn = fib6_locate(&table->tb6_root,
1522 &cfg->fc_dst, cfg->fc_dst_len,
1523 &cfg->fc_src, cfg->fc_src_len);
1524
1525 if (fn) {
1526 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1527 if (cfg->fc_ifindex &&
1528 (!rt->dst.dev ||
1529 rt->dst.dev->ifindex != cfg->fc_ifindex))
1530 continue;
1531 if (cfg->fc_flags & RTF_GATEWAY &&
1532 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1533 continue;
1534 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1535 continue;
1536 dst_hold(&rt->dst);
1537 read_unlock_bh(&table->tb6_lock);
1538
1539 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1540 }
1541 }
1542 read_unlock_bh(&table->tb6_lock);
1543
1544 return err;
1545 }
1546
1547 /*
1548 * Handle redirects
1549 */
1550 struct ip6rd_flowi {
1551 struct flowi6 fl6;
1552 struct in6_addr gateway;
1553 };
1554
1555 static struct rt6_info *__ip6_route_redirect(struct net *net,
1556 struct fib6_table *table,
1557 struct flowi6 *fl6,
1558 int flags)
1559 {
1560 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1561 struct rt6_info *rt;
1562 struct fib6_node *fn;
1563
1564 /*
1565 * Get the "current" route for this destination and
1566 * check if the redirect has come from approriate router.
1567 *
1568 * RFC 2461 specifies that redirects should only be
1569 * accepted if they come from the nexthop to the target.
1570 * Due to the way the routes are chosen, this notion
1571 * is a bit fuzzy and one might need to check all possible
1572 * routes.
1573 */
1574
1575 read_lock_bh(&table->tb6_lock);
1576 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1577 restart:
1578 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1579 /*
1580 * Current route is on-link; redirect is always invalid.
1581 *
1582 * Seems, previous statement is not true. It could
1583 * be node, which looks for us as on-link (f.e. proxy ndisc)
1584 * But then router serving it might decide, that we should
1585 * know truth 8)8) --ANK (980726).
1586 */
1587 if (rt6_check_expired(rt))
1588 continue;
1589 if (!(rt->rt6i_flags & RTF_GATEWAY))
1590 continue;
1591 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1592 continue;
1593 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1594 continue;
1595 break;
1596 }
1597
1598 if (!rt)
1599 rt = net->ipv6.ip6_null_entry;
1600 BACKTRACK(net, &fl6->saddr);
1601 out:
1602 dst_hold(&rt->dst);
1603
1604 read_unlock_bh(&table->tb6_lock);
1605
1606 return rt;
1607 };
1608
1609 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1610 const struct in6_addr *src,
1611 const struct in6_addr *gateway,
1612 struct net_device *dev)
1613 {
1614 int flags = RT6_LOOKUP_F_HAS_SADDR;
1615 struct net *net = dev_net(dev);
1616 struct ip6rd_flowi rdfl = {
1617 .fl6 = {
1618 .flowi6_oif = dev->ifindex,
1619 .daddr = *dest,
1620 .saddr = *src,
1621 },
1622 };
1623
1624 rdfl.gateway = *gateway;
1625
1626 if (rt6_need_strict(dest))
1627 flags |= RT6_LOOKUP_F_IFACE;
1628
1629 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1630 flags, __ip6_route_redirect);
1631 }
1632
1633 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1634 const struct in6_addr *saddr,
1635 struct neighbour *neigh, u8 *lladdr, int on_link)
1636 {
1637 struct rt6_info *rt, *nrt = NULL;
1638 struct netevent_redirect netevent;
1639 struct net *net = dev_net(neigh->dev);
1640
1641 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1642
1643 if (rt == net->ipv6.ip6_null_entry) {
1644 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1645 goto out;
1646 }
1647
1648 /*
1649 * We have finally decided to accept it.
1650 */
1651
1652 neigh_update(neigh, lladdr, NUD_STALE,
1653 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1654 NEIGH_UPDATE_F_OVERRIDE|
1655 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1656 NEIGH_UPDATE_F_ISROUTER))
1657 );
1658
1659 /*
1660 * Redirect received -> path was valid.
1661 * Look, redirects are sent only in response to data packets,
1662 * so that this nexthop apparently is reachable. --ANK
1663 */
1664 dst_confirm(&rt->dst);
1665
1666 /* Duplicate redirect: silently ignore. */
1667 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1668 goto out;
1669
1670 nrt = ip6_rt_copy(rt, dest);
1671 if (!nrt)
1672 goto out;
1673
1674 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1675 if (on_link)
1676 nrt->rt6i_flags &= ~RTF_GATEWAY;
1677
1678 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1679 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1680
1681 if (ip6_ins_rt(nrt))
1682 goto out;
1683
1684 netevent.old = &rt->dst;
1685 netevent.new = &nrt->dst;
1686 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1687
1688 if (rt->rt6i_flags & RTF_CACHE) {
1689 ip6_del_rt(rt);
1690 return;
1691 }
1692
1693 out:
1694 dst_release(&rt->dst);
1695 }
1696
1697 /*
1698 * Handle ICMP "packet too big" messages
1699 * i.e. Path MTU discovery
1700 */
1701
1702 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1703 struct net *net, u32 pmtu, int ifindex)
1704 {
1705 struct rt6_info *rt, *nrt;
1706 int allfrag = 0;
1707 again:
1708 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1709 if (!rt)
1710 return;
1711
1712 if (rt6_check_expired(rt)) {
1713 ip6_del_rt(rt);
1714 goto again;
1715 }
1716
1717 if (pmtu >= dst_mtu(&rt->dst))
1718 goto out;
1719
1720 if (pmtu < IPV6_MIN_MTU) {
1721 /*
1722 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1723 * MTU (1280) and a fragment header should always be included
1724 * after a node receiving Too Big message reporting PMTU is
1725 * less than the IPv6 Minimum Link MTU.
1726 */
1727 pmtu = IPV6_MIN_MTU;
1728 allfrag = 1;
1729 }
1730
1731 /* New mtu received -> path was valid.
1732 They are sent only in response to data packets,
1733 so that this nexthop apparently is reachable. --ANK
1734 */
1735 dst_confirm(&rt->dst);
1736
1737 /* Host route. If it is static, it would be better
1738 not to override it, but add new one, so that
1739 when cache entry will expire old pmtu
1740 would return automatically.
1741 */
1742 if (rt->rt6i_flags & RTF_CACHE) {
1743 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1744 if (allfrag) {
1745 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1746 features |= RTAX_FEATURE_ALLFRAG;
1747 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1748 }
1749 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1750 rt->rt6i_flags |= RTF_MODIFIED;
1751 goto out;
1752 }
1753
1754 /* Network route.
1755 Two cases are possible:
1756 1. It is connected route. Action: COW
1757 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1758 */
1759 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1760 nrt = rt6_alloc_cow(rt, daddr, saddr);
1761 else
1762 nrt = rt6_alloc_clone(rt, daddr);
1763
1764 if (nrt) {
1765 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1766 if (allfrag) {
1767 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1768 features |= RTAX_FEATURE_ALLFRAG;
1769 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1770 }
1771
1772 /* According to RFC 1981, detecting PMTU increase shouldn't be
1773 * happened within 5 mins, the recommended timer is 10 mins.
1774 * Here this route expiration time is set to ip6_rt_mtu_expires
1775 * which is 10 mins. After 10 mins the decreased pmtu is expired
1776 * and detecting PMTU increase will be automatically happened.
1777 */
1778 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1779 nrt->rt6i_flags |= RTF_DYNAMIC;
1780 ip6_ins_rt(nrt);
1781 }
1782 out:
1783 dst_release(&rt->dst);
1784 }
1785
1786 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1787 struct net_device *dev, u32 pmtu)
1788 {
1789 struct net *net = dev_net(dev);
1790
1791 /*
1792 * RFC 1981 states that a node "MUST reduce the size of the packets it
1793 * is sending along the path" that caused the Packet Too Big message.
1794 * Since it's not possible in the general case to determine which
1795 * interface was used to send the original packet, we update the MTU
1796 * on the interface that will be used to send future packets. We also
1797 * update the MTU on the interface that received the Packet Too Big in
1798 * case the original packet was forced out that interface with
1799 * SO_BINDTODEVICE or similar. This is the next best thing to the
1800 * correct behaviour, which would be to update the MTU on all
1801 * interfaces.
1802 */
1803 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1804 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1805 }
1806
1807 /*
1808 * Misc support functions
1809 */
1810
1811 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1812 const struct in6_addr *dest)
1813 {
1814 struct net *net = dev_net(ort->dst.dev);
1815 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1816 ort->dst.dev, 0);
1817
1818 if (rt) {
1819 rt->dst.input = ort->dst.input;
1820 rt->dst.output = ort->dst.output;
1821 rt->dst.flags |= DST_HOST;
1822
1823 rt->rt6i_dst.addr = *dest;
1824 rt->rt6i_dst.plen = 128;
1825 dst_copy_metrics(&rt->dst, &ort->dst);
1826 rt->dst.error = ort->dst.error;
1827 rt->rt6i_idev = ort->rt6i_idev;
1828 if (rt->rt6i_idev)
1829 in6_dev_hold(rt->rt6i_idev);
1830 rt->dst.lastuse = jiffies;
1831
1832 rt->rt6i_gateway = ort->rt6i_gateway;
1833 rt->rt6i_flags = ort->rt6i_flags;
1834 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1835 (RTF_DEFAULT | RTF_ADDRCONF))
1836 rt6_set_from(rt, ort);
1837 else
1838 rt6_clean_expires(rt);
1839 rt->rt6i_metric = 0;
1840
1841 #ifdef CONFIG_IPV6_SUBTREES
1842 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1843 #endif
1844 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1845 rt->rt6i_table = ort->rt6i_table;
1846 }
1847 return rt;
1848 }
1849
1850 #ifdef CONFIG_IPV6_ROUTE_INFO
1851 static struct rt6_info *rt6_get_route_info(struct net *net,
1852 const struct in6_addr *prefix, int prefixlen,
1853 const struct in6_addr *gwaddr, int ifindex)
1854 {
1855 struct fib6_node *fn;
1856 struct rt6_info *rt = NULL;
1857 struct fib6_table *table;
1858
1859 table = fib6_get_table(net, RT6_TABLE_INFO);
1860 if (!table)
1861 return NULL;
1862
1863 write_lock_bh(&table->tb6_lock);
1864 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1865 if (!fn)
1866 goto out;
1867
1868 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1869 if (rt->dst.dev->ifindex != ifindex)
1870 continue;
1871 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1872 continue;
1873 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1874 continue;
1875 dst_hold(&rt->dst);
1876 break;
1877 }
1878 out:
1879 write_unlock_bh(&table->tb6_lock);
1880 return rt;
1881 }
1882
1883 static struct rt6_info *rt6_add_route_info(struct net *net,
1884 const struct in6_addr *prefix, int prefixlen,
1885 const struct in6_addr *gwaddr, int ifindex,
1886 unsigned int pref)
1887 {
1888 struct fib6_config cfg = {
1889 .fc_table = RT6_TABLE_INFO,
1890 .fc_metric = IP6_RT_PRIO_USER,
1891 .fc_ifindex = ifindex,
1892 .fc_dst_len = prefixlen,
1893 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1894 RTF_UP | RTF_PREF(pref),
1895 .fc_nlinfo.pid = 0,
1896 .fc_nlinfo.nlh = NULL,
1897 .fc_nlinfo.nl_net = net,
1898 };
1899
1900 cfg.fc_dst = *prefix;
1901 cfg.fc_gateway = *gwaddr;
1902
1903 /* We should treat it as a default route if prefix length is 0. */
1904 if (!prefixlen)
1905 cfg.fc_flags |= RTF_DEFAULT;
1906
1907 ip6_route_add(&cfg);
1908
1909 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1910 }
1911 #endif
1912
1913 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1914 {
1915 struct rt6_info *rt;
1916 struct fib6_table *table;
1917
1918 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1919 if (!table)
1920 return NULL;
1921
1922 write_lock_bh(&table->tb6_lock);
1923 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1924 if (dev == rt->dst.dev &&
1925 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1926 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1927 break;
1928 }
1929 if (rt)
1930 dst_hold(&rt->dst);
1931 write_unlock_bh(&table->tb6_lock);
1932 return rt;
1933 }
1934
1935 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1936 struct net_device *dev,
1937 unsigned int pref)
1938 {
1939 struct fib6_config cfg = {
1940 .fc_table = RT6_TABLE_DFLT,
1941 .fc_metric = IP6_RT_PRIO_USER,
1942 .fc_ifindex = dev->ifindex,
1943 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1944 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1945 .fc_nlinfo.pid = 0,
1946 .fc_nlinfo.nlh = NULL,
1947 .fc_nlinfo.nl_net = dev_net(dev),
1948 };
1949
1950 cfg.fc_gateway = *gwaddr;
1951
1952 ip6_route_add(&cfg);
1953
1954 return rt6_get_dflt_router(gwaddr, dev);
1955 }
1956
1957 void rt6_purge_dflt_routers(struct net *net)
1958 {
1959 struct rt6_info *rt;
1960 struct fib6_table *table;
1961
1962 /* NOTE: Keep consistent with rt6_get_dflt_router */
1963 table = fib6_get_table(net, RT6_TABLE_DFLT);
1964 if (!table)
1965 return;
1966
1967 restart:
1968 read_lock_bh(&table->tb6_lock);
1969 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1970 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1971 dst_hold(&rt->dst);
1972 read_unlock_bh(&table->tb6_lock);
1973 ip6_del_rt(rt);
1974 goto restart;
1975 }
1976 }
1977 read_unlock_bh(&table->tb6_lock);
1978 }
1979
1980 static void rtmsg_to_fib6_config(struct net *net,
1981 struct in6_rtmsg *rtmsg,
1982 struct fib6_config *cfg)
1983 {
1984 memset(cfg, 0, sizeof(*cfg));
1985
1986 cfg->fc_table = RT6_TABLE_MAIN;
1987 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1988 cfg->fc_metric = rtmsg->rtmsg_metric;
1989 cfg->fc_expires = rtmsg->rtmsg_info;
1990 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1991 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1992 cfg->fc_flags = rtmsg->rtmsg_flags;
1993
1994 cfg->fc_nlinfo.nl_net = net;
1995
1996 cfg->fc_dst = rtmsg->rtmsg_dst;
1997 cfg->fc_src = rtmsg->rtmsg_src;
1998 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1999 }
2000
2001 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2002 {
2003 struct fib6_config cfg;
2004 struct in6_rtmsg rtmsg;
2005 int err;
2006
2007 switch(cmd) {
2008 case SIOCADDRT: /* Add a route */
2009 case SIOCDELRT: /* Delete a route */
2010 if (!capable(CAP_NET_ADMIN))
2011 return -EPERM;
2012 err = copy_from_user(&rtmsg, arg,
2013 sizeof(struct in6_rtmsg));
2014 if (err)
2015 return -EFAULT;
2016
2017 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2018
2019 rtnl_lock();
2020 switch (cmd) {
2021 case SIOCADDRT:
2022 err = ip6_route_add(&cfg);
2023 break;
2024 case SIOCDELRT:
2025 err = ip6_route_del(&cfg);
2026 break;
2027 default:
2028 err = -EINVAL;
2029 }
2030 rtnl_unlock();
2031
2032 return err;
2033 }
2034
2035 return -EINVAL;
2036 }
2037
2038 /*
2039 * Drop the packet on the floor
2040 */
2041
2042 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2043 {
2044 int type;
2045 struct dst_entry *dst = skb_dst(skb);
2046 switch (ipstats_mib_noroutes) {
2047 case IPSTATS_MIB_INNOROUTES:
2048 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2049 if (type == IPV6_ADDR_ANY) {
2050 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2051 IPSTATS_MIB_INADDRERRORS);
2052 break;
2053 }
2054 /* FALLTHROUGH */
2055 case IPSTATS_MIB_OUTNOROUTES:
2056 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2057 ipstats_mib_noroutes);
2058 break;
2059 }
2060 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2061 kfree_skb(skb);
2062 return 0;
2063 }
2064
2065 static int ip6_pkt_discard(struct sk_buff *skb)
2066 {
2067 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2068 }
2069
2070 static int ip6_pkt_discard_out(struct sk_buff *skb)
2071 {
2072 skb->dev = skb_dst(skb)->dev;
2073 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2074 }
2075
2076 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2077
2078 static int ip6_pkt_prohibit(struct sk_buff *skb)
2079 {
2080 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2081 }
2082
2083 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2084 {
2085 skb->dev = skb_dst(skb)->dev;
2086 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2087 }
2088
2089 #endif
2090
2091 /*
2092 * Allocate a dst for local (unicast / anycast) address.
2093 */
2094
2095 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2096 const struct in6_addr *addr,
2097 bool anycast)
2098 {
2099 struct net *net = dev_net(idev->dev);
2100 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2101 net->loopback_dev, 0);
2102 int err;
2103
2104 if (!rt) {
2105 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2106 return ERR_PTR(-ENOMEM);
2107 }
2108
2109 in6_dev_hold(idev);
2110
2111 rt->dst.flags |= DST_HOST;
2112 rt->dst.input = ip6_input;
2113 rt->dst.output = ip6_output;
2114 rt->rt6i_idev = idev;
2115 rt->dst.obsolete = -1;
2116
2117 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2118 if (anycast)
2119 rt->rt6i_flags |= RTF_ANYCAST;
2120 else
2121 rt->rt6i_flags |= RTF_LOCAL;
2122 err = rt6_bind_neighbour(rt, rt->dst.dev);
2123 if (err) {
2124 dst_free(&rt->dst);
2125 return ERR_PTR(err);
2126 }
2127
2128 rt->rt6i_dst.addr = *addr;
2129 rt->rt6i_dst.plen = 128;
2130 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2131
2132 atomic_set(&rt->dst.__refcnt, 1);
2133
2134 return rt;
2135 }
2136
2137 int ip6_route_get_saddr(struct net *net,
2138 struct rt6_info *rt,
2139 const struct in6_addr *daddr,
2140 unsigned int prefs,
2141 struct in6_addr *saddr)
2142 {
2143 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2144 int err = 0;
2145 if (rt->rt6i_prefsrc.plen)
2146 *saddr = rt->rt6i_prefsrc.addr;
2147 else
2148 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2149 daddr, prefs, saddr);
2150 return err;
2151 }
2152
2153 /* remove deleted ip from prefsrc entries */
2154 struct arg_dev_net_ip {
2155 struct net_device *dev;
2156 struct net *net;
2157 struct in6_addr *addr;
2158 };
2159
2160 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2161 {
2162 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2163 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2164 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2165
2166 if (((void *)rt->dst.dev == dev || !dev) &&
2167 rt != net->ipv6.ip6_null_entry &&
2168 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2169 /* remove prefsrc entry */
2170 rt->rt6i_prefsrc.plen = 0;
2171 }
2172 return 0;
2173 }
2174
2175 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2176 {
2177 struct net *net = dev_net(ifp->idev->dev);
2178 struct arg_dev_net_ip adni = {
2179 .dev = ifp->idev->dev,
2180 .net = net,
2181 .addr = &ifp->addr,
2182 };
2183 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2184 }
2185
2186 struct arg_dev_net {
2187 struct net_device *dev;
2188 struct net *net;
2189 };
2190
2191 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2192 {
2193 const struct arg_dev_net *adn = arg;
2194 const struct net_device *dev = adn->dev;
2195
2196 if ((rt->dst.dev == dev || !dev) &&
2197 rt != adn->net->ipv6.ip6_null_entry)
2198 return -1;
2199
2200 return 0;
2201 }
2202
2203 void rt6_ifdown(struct net *net, struct net_device *dev)
2204 {
2205 struct arg_dev_net adn = {
2206 .dev = dev,
2207 .net = net,
2208 };
2209
2210 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2211 icmp6_clean_all(fib6_ifdown, &adn);
2212 }
2213
2214 struct rt6_mtu_change_arg {
2215 struct net_device *dev;
2216 unsigned int mtu;
2217 };
2218
2219 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2220 {
2221 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2222 struct inet6_dev *idev;
2223
2224 /* In IPv6 pmtu discovery is not optional,
2225 so that RTAX_MTU lock cannot disable it.
2226 We still use this lock to block changes
2227 caused by addrconf/ndisc.
2228 */
2229
2230 idev = __in6_dev_get(arg->dev);
2231 if (!idev)
2232 return 0;
2233
2234 /* For administrative MTU increase, there is no way to discover
2235 IPv6 PMTU increase, so PMTU increase should be updated here.
2236 Since RFC 1981 doesn't include administrative MTU increase
2237 update PMTU increase is a MUST. (i.e. jumbo frame)
2238 */
2239 /*
2240 If new MTU is less than route PMTU, this new MTU will be the
2241 lowest MTU in the path, update the route PMTU to reflect PMTU
2242 decreases; if new MTU is greater than route PMTU, and the
2243 old MTU is the lowest MTU in the path, update the route PMTU
2244 to reflect the increase. In this case if the other nodes' MTU
2245 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2246 PMTU discouvery.
2247 */
2248 if (rt->dst.dev == arg->dev &&
2249 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2250 (dst_mtu(&rt->dst) >= arg->mtu ||
2251 (dst_mtu(&rt->dst) < arg->mtu &&
2252 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2253 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2254 }
2255 return 0;
2256 }
2257
2258 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2259 {
2260 struct rt6_mtu_change_arg arg = {
2261 .dev = dev,
2262 .mtu = mtu,
2263 };
2264
2265 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2266 }
2267
2268 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2269 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2270 [RTA_OIF] = { .type = NLA_U32 },
2271 [RTA_IIF] = { .type = NLA_U32 },
2272 [RTA_PRIORITY] = { .type = NLA_U32 },
2273 [RTA_METRICS] = { .type = NLA_NESTED },
2274 };
2275
2276 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2277 struct fib6_config *cfg)
2278 {
2279 struct rtmsg *rtm;
2280 struct nlattr *tb[RTA_MAX+1];
2281 int err;
2282
2283 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2284 if (err < 0)
2285 goto errout;
2286
2287 err = -EINVAL;
2288 rtm = nlmsg_data(nlh);
2289 memset(cfg, 0, sizeof(*cfg));
2290
2291 cfg->fc_table = rtm->rtm_table;
2292 cfg->fc_dst_len = rtm->rtm_dst_len;
2293 cfg->fc_src_len = rtm->rtm_src_len;
2294 cfg->fc_flags = RTF_UP;
2295 cfg->fc_protocol = rtm->rtm_protocol;
2296
2297 if (rtm->rtm_type == RTN_UNREACHABLE)
2298 cfg->fc_flags |= RTF_REJECT;
2299
2300 if (rtm->rtm_type == RTN_LOCAL)
2301 cfg->fc_flags |= RTF_LOCAL;
2302
2303 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2304 cfg->fc_nlinfo.nlh = nlh;
2305 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2306
2307 if (tb[RTA_GATEWAY]) {
2308 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2309 cfg->fc_flags |= RTF_GATEWAY;
2310 }
2311
2312 if (tb[RTA_DST]) {
2313 int plen = (rtm->rtm_dst_len + 7) >> 3;
2314
2315 if (nla_len(tb[RTA_DST]) < plen)
2316 goto errout;
2317
2318 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2319 }
2320
2321 if (tb[RTA_SRC]) {
2322 int plen = (rtm->rtm_src_len + 7) >> 3;
2323
2324 if (nla_len(tb[RTA_SRC]) < plen)
2325 goto errout;
2326
2327 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2328 }
2329
2330 if (tb[RTA_PREFSRC])
2331 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2332
2333 if (tb[RTA_OIF])
2334 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2335
2336 if (tb[RTA_PRIORITY])
2337 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2338
2339 if (tb[RTA_METRICS]) {
2340 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2341 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2342 }
2343
2344 if (tb[RTA_TABLE])
2345 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2346
2347 err = 0;
2348 errout:
2349 return err;
2350 }
2351
2352 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2353 {
2354 struct fib6_config cfg;
2355 int err;
2356
2357 err = rtm_to_fib6_config(skb, nlh, &cfg);
2358 if (err < 0)
2359 return err;
2360
2361 return ip6_route_del(&cfg);
2362 }
2363
2364 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2365 {
2366 struct fib6_config cfg;
2367 int err;
2368
2369 err = rtm_to_fib6_config(skb, nlh, &cfg);
2370 if (err < 0)
2371 return err;
2372
2373 return ip6_route_add(&cfg);
2374 }
2375
2376 static inline size_t rt6_nlmsg_size(void)
2377 {
2378 return NLMSG_ALIGN(sizeof(struct rtmsg))
2379 + nla_total_size(16) /* RTA_SRC */
2380 + nla_total_size(16) /* RTA_DST */
2381 + nla_total_size(16) /* RTA_GATEWAY */
2382 + nla_total_size(16) /* RTA_PREFSRC */
2383 + nla_total_size(4) /* RTA_TABLE */
2384 + nla_total_size(4) /* RTA_IIF */
2385 + nla_total_size(4) /* RTA_OIF */
2386 + nla_total_size(4) /* RTA_PRIORITY */
2387 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2388 + nla_total_size(sizeof(struct rta_cacheinfo));
2389 }
2390
2391 static int rt6_fill_node(struct net *net,
2392 struct sk_buff *skb, struct rt6_info *rt,
2393 struct in6_addr *dst, struct in6_addr *src,
2394 int iif, int type, u32 pid, u32 seq,
2395 int prefix, int nowait, unsigned int flags)
2396 {
2397 const struct inet_peer *peer;
2398 struct rtmsg *rtm;
2399 struct nlmsghdr *nlh;
2400 long expires;
2401 u32 table;
2402 struct neighbour *n;
2403 u32 ts, tsage;
2404
2405 if (prefix) { /* user wants prefix routes only */
2406 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2407 /* success since this is not a prefix route */
2408 return 1;
2409 }
2410 }
2411
2412 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2413 if (!nlh)
2414 return -EMSGSIZE;
2415
2416 rtm = nlmsg_data(nlh);
2417 rtm->rtm_family = AF_INET6;
2418 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2419 rtm->rtm_src_len = rt->rt6i_src.plen;
2420 rtm->rtm_tos = 0;
2421 if (rt->rt6i_table)
2422 table = rt->rt6i_table->tb6_id;
2423 else
2424 table = RT6_TABLE_UNSPEC;
2425 rtm->rtm_table = table;
2426 if (nla_put_u32(skb, RTA_TABLE, table))
2427 goto nla_put_failure;
2428 if (rt->rt6i_flags & RTF_REJECT)
2429 rtm->rtm_type = RTN_UNREACHABLE;
2430 else if (rt->rt6i_flags & RTF_LOCAL)
2431 rtm->rtm_type = RTN_LOCAL;
2432 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2433 rtm->rtm_type = RTN_LOCAL;
2434 else
2435 rtm->rtm_type = RTN_UNICAST;
2436 rtm->rtm_flags = 0;
2437 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2438 rtm->rtm_protocol = rt->rt6i_protocol;
2439 if (rt->rt6i_flags & RTF_DYNAMIC)
2440 rtm->rtm_protocol = RTPROT_REDIRECT;
2441 else if (rt->rt6i_flags & RTF_ADDRCONF)
2442 rtm->rtm_protocol = RTPROT_KERNEL;
2443 else if (rt->rt6i_flags & RTF_DEFAULT)
2444 rtm->rtm_protocol = RTPROT_RA;
2445
2446 if (rt->rt6i_flags & RTF_CACHE)
2447 rtm->rtm_flags |= RTM_F_CLONED;
2448
2449 if (dst) {
2450 if (nla_put(skb, RTA_DST, 16, dst))
2451 goto nla_put_failure;
2452 rtm->rtm_dst_len = 128;
2453 } else if (rtm->rtm_dst_len)
2454 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2455 goto nla_put_failure;
2456 #ifdef CONFIG_IPV6_SUBTREES
2457 if (src) {
2458 if (nla_put(skb, RTA_SRC, 16, src))
2459 goto nla_put_failure;
2460 rtm->rtm_src_len = 128;
2461 } else if (rtm->rtm_src_len &&
2462 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2463 goto nla_put_failure;
2464 #endif
2465 if (iif) {
2466 #ifdef CONFIG_IPV6_MROUTE
2467 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2468 int err = ip6mr_get_route(net, skb, rtm, nowait);
2469 if (err <= 0) {
2470 if (!nowait) {
2471 if (err == 0)
2472 return 0;
2473 goto nla_put_failure;
2474 } else {
2475 if (err == -EMSGSIZE)
2476 goto nla_put_failure;
2477 }
2478 }
2479 } else
2480 #endif
2481 if (nla_put_u32(skb, RTA_IIF, iif))
2482 goto nla_put_failure;
2483 } else if (dst) {
2484 struct in6_addr saddr_buf;
2485 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2486 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2487 goto nla_put_failure;
2488 }
2489
2490 if (rt->rt6i_prefsrc.plen) {
2491 struct in6_addr saddr_buf;
2492 saddr_buf = rt->rt6i_prefsrc.addr;
2493 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2494 goto nla_put_failure;
2495 }
2496
2497 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2498 goto nla_put_failure;
2499
2500 rcu_read_lock();
2501 n = dst_get_neighbour_noref(&rt->dst);
2502 if (n) {
2503 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2504 rcu_read_unlock();
2505 goto nla_put_failure;
2506 }
2507 }
2508 rcu_read_unlock();
2509
2510 if (rt->dst.dev &&
2511 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2512 goto nla_put_failure;
2513 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2514 goto nla_put_failure;
2515 if (!(rt->rt6i_flags & RTF_EXPIRES))
2516 expires = 0;
2517 else if (rt->dst.expires - jiffies < INT_MAX)
2518 expires = rt->dst.expires - jiffies;
2519 else
2520 expires = INT_MAX;
2521
2522 peer = rt->rt6i_peer;
2523 ts = tsage = 0;
2524 if (peer && peer->tcp_ts_stamp) {
2525 ts = peer->tcp_ts;
2526 tsage = get_seconds() - peer->tcp_ts_stamp;
2527 }
2528
2529 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2530 expires, rt->dst.error) < 0)
2531 goto nla_put_failure;
2532
2533 return nlmsg_end(skb, nlh);
2534
2535 nla_put_failure:
2536 nlmsg_cancel(skb, nlh);
2537 return -EMSGSIZE;
2538 }
2539
2540 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2541 {
2542 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2543 int prefix;
2544
2545 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2546 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2547 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2548 } else
2549 prefix = 0;
2550
2551 return rt6_fill_node(arg->net,
2552 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2553 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2554 prefix, 0, NLM_F_MULTI);
2555 }
2556
2557 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2558 {
2559 struct net *net = sock_net(in_skb->sk);
2560 struct nlattr *tb[RTA_MAX+1];
2561 struct rt6_info *rt;
2562 struct sk_buff *skb;
2563 struct rtmsg *rtm;
2564 struct flowi6 fl6;
2565 int err, iif = 0, oif = 0;
2566
2567 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2568 if (err < 0)
2569 goto errout;
2570
2571 err = -EINVAL;
2572 memset(&fl6, 0, sizeof(fl6));
2573
2574 if (tb[RTA_SRC]) {
2575 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2576 goto errout;
2577
2578 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2579 }
2580
2581 if (tb[RTA_DST]) {
2582 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2583 goto errout;
2584
2585 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2586 }
2587
2588 if (tb[RTA_IIF])
2589 iif = nla_get_u32(tb[RTA_IIF]);
2590
2591 if (tb[RTA_OIF])
2592 oif = nla_get_u32(tb[RTA_OIF]);
2593
2594 if (iif) {
2595 struct net_device *dev;
2596 int flags = 0;
2597
2598 dev = __dev_get_by_index(net, iif);
2599 if (!dev) {
2600 err = -ENODEV;
2601 goto errout;
2602 }
2603
2604 fl6.flowi6_iif = iif;
2605
2606 if (!ipv6_addr_any(&fl6.saddr))
2607 flags |= RT6_LOOKUP_F_HAS_SADDR;
2608
2609 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2610 flags);
2611 } else {
2612 fl6.flowi6_oif = oif;
2613
2614 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2615 }
2616
2617 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2618 if (!skb) {
2619 dst_release(&rt->dst);
2620 err = -ENOBUFS;
2621 goto errout;
2622 }
2623
2624 /* Reserve room for dummy headers, this skb can pass
2625 through good chunk of routing engine.
2626 */
2627 skb_reset_mac_header(skb);
2628 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2629
2630 skb_dst_set(skb, &rt->dst);
2631
2632 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2633 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2634 nlh->nlmsg_seq, 0, 0, 0);
2635 if (err < 0) {
2636 kfree_skb(skb);
2637 goto errout;
2638 }
2639
2640 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2641 errout:
2642 return err;
2643 }
2644
2645 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2646 {
2647 struct sk_buff *skb;
2648 struct net *net = info->nl_net;
2649 u32 seq;
2650 int err;
2651
2652 err = -ENOBUFS;
2653 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2654
2655 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2656 if (!skb)
2657 goto errout;
2658
2659 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2660 event, info->pid, seq, 0, 0, 0);
2661 if (err < 0) {
2662 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2663 WARN_ON(err == -EMSGSIZE);
2664 kfree_skb(skb);
2665 goto errout;
2666 }
2667 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2668 info->nlh, gfp_any());
2669 return;
2670 errout:
2671 if (err < 0)
2672 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2673 }
2674
2675 static int ip6_route_dev_notify(struct notifier_block *this,
2676 unsigned long event, void *data)
2677 {
2678 struct net_device *dev = (struct net_device *)data;
2679 struct net *net = dev_net(dev);
2680
2681 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2682 net->ipv6.ip6_null_entry->dst.dev = dev;
2683 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2684 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2685 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2686 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2687 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2688 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2689 #endif
2690 }
2691
2692 return NOTIFY_OK;
2693 }
2694
2695 /*
2696 * /proc
2697 */
2698
2699 #ifdef CONFIG_PROC_FS
2700
2701 struct rt6_proc_arg
2702 {
2703 char *buffer;
2704 int offset;
2705 int length;
2706 int skip;
2707 int len;
2708 };
2709
2710 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2711 {
2712 struct seq_file *m = p_arg;
2713 struct neighbour *n;
2714
2715 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2716
2717 #ifdef CONFIG_IPV6_SUBTREES
2718 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2719 #else
2720 seq_puts(m, "00000000000000000000000000000000 00 ");
2721 #endif
2722 rcu_read_lock();
2723 n = dst_get_neighbour_noref(&rt->dst);
2724 if (n) {
2725 seq_printf(m, "%pi6", n->primary_key);
2726 } else {
2727 seq_puts(m, "00000000000000000000000000000000");
2728 }
2729 rcu_read_unlock();
2730 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2731 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2732 rt->dst.__use, rt->rt6i_flags,
2733 rt->dst.dev ? rt->dst.dev->name : "");
2734 return 0;
2735 }
2736
2737 static int ipv6_route_show(struct seq_file *m, void *v)
2738 {
2739 struct net *net = (struct net *)m->private;
2740 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2741 return 0;
2742 }
2743
2744 static int ipv6_route_open(struct inode *inode, struct file *file)
2745 {
2746 return single_open_net(inode, file, ipv6_route_show);
2747 }
2748
2749 static const struct file_operations ipv6_route_proc_fops = {
2750 .owner = THIS_MODULE,
2751 .open = ipv6_route_open,
2752 .read = seq_read,
2753 .llseek = seq_lseek,
2754 .release = single_release_net,
2755 };
2756
2757 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2758 {
2759 struct net *net = (struct net *)seq->private;
2760 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2761 net->ipv6.rt6_stats->fib_nodes,
2762 net->ipv6.rt6_stats->fib_route_nodes,
2763 net->ipv6.rt6_stats->fib_rt_alloc,
2764 net->ipv6.rt6_stats->fib_rt_entries,
2765 net->ipv6.rt6_stats->fib_rt_cache,
2766 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2767 net->ipv6.rt6_stats->fib_discarded_routes);
2768
2769 return 0;
2770 }
2771
2772 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2773 {
2774 return single_open_net(inode, file, rt6_stats_seq_show);
2775 }
2776
2777 static const struct file_operations rt6_stats_seq_fops = {
2778 .owner = THIS_MODULE,
2779 .open = rt6_stats_seq_open,
2780 .read = seq_read,
2781 .llseek = seq_lseek,
2782 .release = single_release_net,
2783 };
2784 #endif /* CONFIG_PROC_FS */
2785
2786 #ifdef CONFIG_SYSCTL
2787
2788 static
2789 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2790 void __user *buffer, size_t *lenp, loff_t *ppos)
2791 {
2792 struct net *net;
2793 int delay;
2794 if (!write)
2795 return -EINVAL;
2796
2797 net = (struct net *)ctl->extra1;
2798 delay = net->ipv6.sysctl.flush_delay;
2799 proc_dointvec(ctl, write, buffer, lenp, ppos);
2800 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2801 return 0;
2802 }
2803
2804 ctl_table ipv6_route_table_template[] = {
2805 {
2806 .procname = "flush",
2807 .data = &init_net.ipv6.sysctl.flush_delay,
2808 .maxlen = sizeof(int),
2809 .mode = 0200,
2810 .proc_handler = ipv6_sysctl_rtcache_flush
2811 },
2812 {
2813 .procname = "gc_thresh",
2814 .data = &ip6_dst_ops_template.gc_thresh,
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
2817 .proc_handler = proc_dointvec,
2818 },
2819 {
2820 .procname = "max_size",
2821 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
2824 .proc_handler = proc_dointvec,
2825 },
2826 {
2827 .procname = "gc_min_interval",
2828 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2829 .maxlen = sizeof(int),
2830 .mode = 0644,
2831 .proc_handler = proc_dointvec_jiffies,
2832 },
2833 {
2834 .procname = "gc_timeout",
2835 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2836 .maxlen = sizeof(int),
2837 .mode = 0644,
2838 .proc_handler = proc_dointvec_jiffies,
2839 },
2840 {
2841 .procname = "gc_interval",
2842 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2843 .maxlen = sizeof(int),
2844 .mode = 0644,
2845 .proc_handler = proc_dointvec_jiffies,
2846 },
2847 {
2848 .procname = "gc_elasticity",
2849 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2850 .maxlen = sizeof(int),
2851 .mode = 0644,
2852 .proc_handler = proc_dointvec,
2853 },
2854 {
2855 .procname = "mtu_expires",
2856 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2857 .maxlen = sizeof(int),
2858 .mode = 0644,
2859 .proc_handler = proc_dointvec_jiffies,
2860 },
2861 {
2862 .procname = "min_adv_mss",
2863 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .proc_handler = proc_dointvec,
2867 },
2868 {
2869 .procname = "gc_min_interval_ms",
2870 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2871 .maxlen = sizeof(int),
2872 .mode = 0644,
2873 .proc_handler = proc_dointvec_ms_jiffies,
2874 },
2875 { }
2876 };
2877
2878 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2879 {
2880 struct ctl_table *table;
2881
2882 table = kmemdup(ipv6_route_table_template,
2883 sizeof(ipv6_route_table_template),
2884 GFP_KERNEL);
2885
2886 if (table) {
2887 table[0].data = &net->ipv6.sysctl.flush_delay;
2888 table[0].extra1 = net;
2889 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2890 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2891 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2892 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2893 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2894 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2895 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2896 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2897 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2898 }
2899
2900 return table;
2901 }
2902 #endif
2903
2904 static int __net_init ip6_route_net_init(struct net *net)
2905 {
2906 int ret = -ENOMEM;
2907
2908 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2909 sizeof(net->ipv6.ip6_dst_ops));
2910
2911 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2912 goto out_ip6_dst_ops;
2913
2914 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2915 sizeof(*net->ipv6.ip6_null_entry),
2916 GFP_KERNEL);
2917 if (!net->ipv6.ip6_null_entry)
2918 goto out_ip6_dst_entries;
2919 net->ipv6.ip6_null_entry->dst.path =
2920 (struct dst_entry *)net->ipv6.ip6_null_entry;
2921 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2922 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2923 ip6_template_metrics, true);
2924
2925 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2927 sizeof(*net->ipv6.ip6_prohibit_entry),
2928 GFP_KERNEL);
2929 if (!net->ipv6.ip6_prohibit_entry)
2930 goto out_ip6_null_entry;
2931 net->ipv6.ip6_prohibit_entry->dst.path =
2932 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2933 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2934 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2935 ip6_template_metrics, true);
2936
2937 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2938 sizeof(*net->ipv6.ip6_blk_hole_entry),
2939 GFP_KERNEL);
2940 if (!net->ipv6.ip6_blk_hole_entry)
2941 goto out_ip6_prohibit_entry;
2942 net->ipv6.ip6_blk_hole_entry->dst.path =
2943 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2944 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2945 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2946 ip6_template_metrics, true);
2947 #endif
2948
2949 net->ipv6.sysctl.flush_delay = 0;
2950 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2951 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2952 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2953 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2954 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2955 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2956 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2957
2958 #ifdef CONFIG_PROC_FS
2959 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2960 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2961 #endif
2962 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2963
2964 ret = 0;
2965 out:
2966 return ret;
2967
2968 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2969 out_ip6_prohibit_entry:
2970 kfree(net->ipv6.ip6_prohibit_entry);
2971 out_ip6_null_entry:
2972 kfree(net->ipv6.ip6_null_entry);
2973 #endif
2974 out_ip6_dst_entries:
2975 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2976 out_ip6_dst_ops:
2977 goto out;
2978 }
2979
2980 static void __net_exit ip6_route_net_exit(struct net *net)
2981 {
2982 #ifdef CONFIG_PROC_FS
2983 proc_net_remove(net, "ipv6_route");
2984 proc_net_remove(net, "rt6_stats");
2985 #endif
2986 kfree(net->ipv6.ip6_null_entry);
2987 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988 kfree(net->ipv6.ip6_prohibit_entry);
2989 kfree(net->ipv6.ip6_blk_hole_entry);
2990 #endif
2991 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992 }
2993
2994 static struct pernet_operations ip6_route_net_ops = {
2995 .init = ip6_route_net_init,
2996 .exit = ip6_route_net_exit,
2997 };
2998
2999 static int __net_init ipv6_inetpeer_init(struct net *net)
3000 {
3001 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3002
3003 if (!bp)
3004 return -ENOMEM;
3005 inet_peer_base_init(bp);
3006 net->ipv6.peers = bp;
3007 return 0;
3008 }
3009
3010 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3011 {
3012 struct inet_peer_base *bp = net->ipv6.peers;
3013
3014 net->ipv6.peers = NULL;
3015 inetpeer_invalidate_tree(bp);
3016 kfree(bp);
3017 }
3018
3019 static struct pernet_operations ipv6_inetpeer_ops = {
3020 .init = ipv6_inetpeer_init,
3021 .exit = ipv6_inetpeer_exit,
3022 };
3023
3024 static struct notifier_block ip6_route_dev_notifier = {
3025 .notifier_call = ip6_route_dev_notify,
3026 .priority = 0,
3027 };
3028
3029 int __init ip6_route_init(void)
3030 {
3031 int ret;
3032
3033 ret = -ENOMEM;
3034 ip6_dst_ops_template.kmem_cachep =
3035 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3036 SLAB_HWCACHE_ALIGN, NULL);
3037 if (!ip6_dst_ops_template.kmem_cachep)
3038 goto out;
3039
3040 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3041 if (ret)
3042 goto out_kmem_cache;
3043
3044 ret = register_pernet_subsys(&ip6_route_net_ops);
3045 if (ret)
3046 goto out_dst_entries;
3047
3048 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3049 if (ret)
3050 goto out_register_subsys;
3051
3052 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3053
3054 /* Registering of the loopback is done before this portion of code,
3055 * the loopback reference in rt6_info will not be taken, do it
3056 * manually for init_net */
3057 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3058 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3059 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3060 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3061 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3062 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3063 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3064 #endif
3065 ret = fib6_init();
3066 if (ret)
3067 goto out_register_inetpeer;
3068
3069 ret = xfrm6_init();
3070 if (ret)
3071 goto out_fib6_init;
3072
3073 ret = fib6_rules_init();
3074 if (ret)
3075 goto xfrm6_init;
3076
3077 ret = -ENOBUFS;
3078 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3079 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3080 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3081 goto fib6_rules_init;
3082
3083 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3084 if (ret)
3085 goto fib6_rules_init;
3086
3087 out:
3088 return ret;
3089
3090 fib6_rules_init:
3091 fib6_rules_cleanup();
3092 xfrm6_init:
3093 xfrm6_fini();
3094 out_fib6_init:
3095 fib6_gc_cleanup();
3096 out_register_inetpeer:
3097 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098 out_register_subsys:
3099 unregister_pernet_subsys(&ip6_route_net_ops);
3100 out_dst_entries:
3101 dst_entries_destroy(&ip6_dst_blackhole_ops);
3102 out_kmem_cache:
3103 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3104 goto out;
3105 }
3106
3107 void ip6_route_cleanup(void)
3108 {
3109 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3110 fib6_rules_cleanup();
3111 xfrm6_fini();
3112 fib6_gc_cleanup();
3113 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3114 unregister_pernet_subsys(&ip6_route_net_ops);
3115 dst_entries_destroy(&ip6_dst_blackhole_ops);
3116 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3117 }