56624b974d3128cac0e839f592e21d5168511f98
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void ip6_dst_destroy(struct dst_entry *);
86 static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88 static int ip6_dst_gc(struct dst_ops *ops);
89
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int ip6_pkt_prohibit(struct sk_buff *skb);
93 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
99 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101 static size_t rt6_nlmsg_size(struct rt6_info *rt);
102 static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
107
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info *rt6_add_route_info(struct net *net,
110 const struct in6_addr *prefix, int prefixlen,
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
113 unsigned int pref);
114 static struct rt6_info *rt6_get_route_info(struct net *net,
115 const struct in6_addr *prefix, int prefixlen,
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
118 #endif
119
120 struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123 };
124
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127 static void rt6_uncached_list_add(struct rt6_info *rt)
128 {
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
131 rt->rt6i_uncached_list = ul;
132
133 spin_lock_bh(&ul->lock);
134 list_add_tail(&rt->rt6i_uncached, &ul->head);
135 spin_unlock_bh(&ul->lock);
136 }
137
138 static void rt6_uncached_list_del(struct rt6_info *rt)
139 {
140 if (!list_empty(&rt->rt6i_uncached)) {
141 struct uncached_list *ul = rt->rt6i_uncached_list;
142
143 spin_lock_bh(&ul->lock);
144 list_del(&rt->rt6i_uncached);
145 spin_unlock_bh(&ul->lock);
146 }
147 }
148
149 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150 {
151 struct net_device *loopback_dev = net->loopback_dev;
152 int cpu;
153
154 if (dev == loopback_dev)
155 return;
156
157 for_each_possible_cpu(cpu) {
158 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 struct rt6_info *rt;
160
161 spin_lock_bh(&ul->lock);
162 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 struct inet6_dev *rt_idev = rt->rt6i_idev;
164 struct net_device *rt_dev = rt->dst.dev;
165
166 if (rt_idev->dev == dev) {
167 rt->rt6i_idev = in6_dev_get(loopback_dev);
168 in6_dev_put(rt_idev);
169 }
170
171 if (rt_dev == dev) {
172 rt->dst.dev = loopback_dev;
173 dev_hold(rt->dst.dev);
174 dev_put(rt_dev);
175 }
176 }
177 spin_unlock_bh(&ul->lock);
178 }
179 }
180
181 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182 {
183 return dst_metrics_write_ptr(rt->dst.from);
184 }
185
186 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187 {
188 struct rt6_info *rt = (struct rt6_info *)dst;
189
190 if (rt->rt6i_flags & RTF_PCPU)
191 return rt6_pcpu_cow_metrics(rt);
192 else if (rt->rt6i_flags & RTF_CACHE)
193 return NULL;
194 else
195 return dst_cow_metrics_generic(dst, old);
196 }
197
198 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 struct sk_buff *skb,
200 const void *daddr)
201 {
202 struct in6_addr *p = &rt->rt6i_gateway;
203
204 if (!ipv6_addr_any(p))
205 return (const void *) p;
206 else if (skb)
207 return &ipv6_hdr(skb)->daddr;
208 return daddr;
209 }
210
211 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 struct sk_buff *skb,
213 const void *daddr)
214 {
215 struct rt6_info *rt = (struct rt6_info *) dst;
216 struct neighbour *n;
217
218 daddr = choose_neigh_daddr(rt, skb, daddr);
219 n = __ipv6_neigh_lookup(dst->dev, daddr);
220 if (n)
221 return n;
222 return neigh_create(&nd_tbl, daddr, dst->dev);
223 }
224
225 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226 {
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
230 daddr = choose_neigh_daddr(rt, NULL, daddr);
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238 }
239
240 static struct dst_ops ip6_dst_ops_template = {
241 .family = AF_INET6,
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
245 .default_advmss = ip6_default_advmss,
246 .mtu = ip6_mtu,
247 .cow_metrics = ipv6_cow_metrics,
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
253 .redirect = rt6_do_redirect,
254 .local_out = __ip6_local_out,
255 .neigh_lookup = ip6_neigh_lookup,
256 .confirm_neigh = ip6_confirm_neigh,
257 };
258
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
260 {
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
264 }
265
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
268 {
269 }
270
271 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
273 {
274 }
275
276 static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
280 .mtu = ip6_blackhole_mtu,
281 .default_advmss = ip6_default_advmss,
282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
283 .redirect = ip6_rt_blackhole_redirect,
284 .cow_metrics = dst_cow_metrics_generic,
285 .neigh_lookup = ip6_neigh_lookup,
286 };
287
288 static const u32 ip6_template_metrics[RTAX_MAX] = {
289 [RTAX_HOPLIMIT - 1] = 0,
290 };
291
292 static const struct rt6_info ip6_null_entry_template = {
293 .dst = {
294 .__refcnt = ATOMIC_INIT(1),
295 .__use = 1,
296 .obsolete = DST_OBSOLETE_FORCE_CHK,
297 .error = -ENETUNREACH,
298 .input = ip6_pkt_discard,
299 .output = ip6_pkt_discard_out,
300 },
301 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
302 .rt6i_protocol = RTPROT_KERNEL,
303 .rt6i_metric = ~(u32) 0,
304 .rt6i_ref = ATOMIC_INIT(1),
305 };
306
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
309 static const struct rt6_info ip6_prohibit_entry_template = {
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
313 .obsolete = DST_OBSOLETE_FORCE_CHK,
314 .error = -EACCES,
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
319 .rt6i_protocol = RTPROT_KERNEL,
320 .rt6i_metric = ~(u32) 0,
321 .rt6i_ref = ATOMIC_INIT(1),
322 };
323
324 static const struct rt6_info ip6_blk_hole_entry_template = {
325 .dst = {
326 .__refcnt = ATOMIC_INIT(1),
327 .__use = 1,
328 .obsolete = DST_OBSOLETE_FORCE_CHK,
329 .error = -EINVAL,
330 .input = dst_discard,
331 .output = dst_discard_out,
332 },
333 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
334 .rt6i_protocol = RTPROT_KERNEL,
335 .rt6i_metric = ~(u32) 0,
336 .rt6i_ref = ATOMIC_INIT(1),
337 };
338
339 #endif
340
341 static void rt6_info_init(struct rt6_info *rt)
342 {
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 INIT_LIST_HEAD(&rt->rt6i_siblings);
347 INIT_LIST_HEAD(&rt->rt6i_uncached);
348 }
349
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 struct net_device *dev,
353 int flags)
354 {
355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356 1, DST_OBSOLETE_FORCE_CHK, flags);
357
358 if (rt)
359 rt6_info_init(rt);
360
361 return rt;
362 }
363
364 struct rt6_info *ip6_dst_alloc(struct net *net,
365 struct net_device *dev,
366 int flags)
367 {
368 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
369
370 if (rt) {
371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 if (rt->rt6i_pcpu) {
373 int cpu;
374
375 for_each_possible_cpu(cpu) {
376 struct rt6_info **p;
377
378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 /* no one shares rt */
380 *p = NULL;
381 }
382 } else {
383 dst_release_immediate(&rt->dst);
384 return NULL;
385 }
386 }
387
388 return rt;
389 }
390 EXPORT_SYMBOL(ip6_dst_alloc);
391
392 static void ip6_dst_destroy(struct dst_entry *dst)
393 {
394 struct rt6_info *rt = (struct rt6_info *)dst;
395 struct dst_entry *from = dst->from;
396 struct inet6_dev *idev;
397
398 dst_destroy_metrics_generic(dst);
399 free_percpu(rt->rt6i_pcpu);
400 rt6_uncached_list_del(rt);
401
402 idev = rt->rt6i_idev;
403 if (idev) {
404 rt->rt6i_idev = NULL;
405 in6_dev_put(idev);
406 }
407
408 dst->from = NULL;
409 dst_release(from);
410 }
411
412 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 int how)
414 {
415 struct rt6_info *rt = (struct rt6_info *)dst;
416 struct inet6_dev *idev = rt->rt6i_idev;
417 struct net_device *loopback_dev =
418 dev_net(dev)->loopback_dev;
419
420 if (idev && idev->dev != loopback_dev) {
421 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 if (loopback_idev) {
423 rt->rt6i_idev = loopback_idev;
424 in6_dev_put(idev);
425 }
426 }
427 }
428
429 static bool __rt6_check_expired(const struct rt6_info *rt)
430 {
431 if (rt->rt6i_flags & RTF_EXPIRES)
432 return time_after(jiffies, rt->dst.expires);
433 else
434 return false;
435 }
436
437 static bool rt6_check_expired(const struct rt6_info *rt)
438 {
439 if (rt->rt6i_flags & RTF_EXPIRES) {
440 if (time_after(jiffies, rt->dst.expires))
441 return true;
442 } else if (rt->dst.from) {
443 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
444 rt6_check_expired((struct rt6_info *)rt->dst.from);
445 }
446 return false;
447 }
448
449 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
450 struct flowi6 *fl6, int oif,
451 int strict)
452 {
453 struct rt6_info *sibling, *next_sibling;
454 int route_choosen;
455
456 /* We might have already computed the hash for ICMPv6 errors. In such
457 * case it will always be non-zero. Otherwise now is the time to do it.
458 */
459 if (!fl6->mp_hash)
460 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
461
462 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
463 /* Don't change the route, if route_choosen == 0
464 * (siblings does not include ourself)
465 */
466 if (route_choosen)
467 list_for_each_entry_safe(sibling, next_sibling,
468 &match->rt6i_siblings, rt6i_siblings) {
469 route_choosen--;
470 if (route_choosen == 0) {
471 if (rt6_score_route(sibling, oif, strict) < 0)
472 break;
473 match = sibling;
474 break;
475 }
476 }
477 return match;
478 }
479
480 /*
481 * Route lookup. Any table->tb6_lock is implied.
482 */
483
484 static inline struct rt6_info *rt6_device_match(struct net *net,
485 struct rt6_info *rt,
486 const struct in6_addr *saddr,
487 int oif,
488 int flags)
489 {
490 struct rt6_info *local = NULL;
491 struct rt6_info *sprt;
492
493 if (!oif && ipv6_addr_any(saddr))
494 goto out;
495
496 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
497 struct net_device *dev = sprt->dst.dev;
498
499 if (oif) {
500 if (dev->ifindex == oif)
501 return sprt;
502 if (dev->flags & IFF_LOOPBACK) {
503 if (!sprt->rt6i_idev ||
504 sprt->rt6i_idev->dev->ifindex != oif) {
505 if (flags & RT6_LOOKUP_F_IFACE)
506 continue;
507 if (local &&
508 local->rt6i_idev->dev->ifindex == oif)
509 continue;
510 }
511 local = sprt;
512 }
513 } else {
514 if (ipv6_chk_addr(net, saddr, dev,
515 flags & RT6_LOOKUP_F_IFACE))
516 return sprt;
517 }
518 }
519
520 if (oif) {
521 if (local)
522 return local;
523
524 if (flags & RT6_LOOKUP_F_IFACE)
525 return net->ipv6.ip6_null_entry;
526 }
527 out:
528 return rt;
529 }
530
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532 struct __rt6_probe_work {
533 struct work_struct work;
534 struct in6_addr target;
535 struct net_device *dev;
536 };
537
538 static void rt6_probe_deferred(struct work_struct *w)
539 {
540 struct in6_addr mcaddr;
541 struct __rt6_probe_work *work =
542 container_of(w, struct __rt6_probe_work, work);
543
544 addrconf_addr_solict_mult(&work->target, &mcaddr);
545 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
546 dev_put(work->dev);
547 kfree(work);
548 }
549
550 static void rt6_probe(struct rt6_info *rt)
551 {
552 struct __rt6_probe_work *work;
553 struct neighbour *neigh;
554 /*
555 * Okay, this does not seem to be appropriate
556 * for now, however, we need to check if it
557 * is really so; aka Router Reachability Probing.
558 *
559 * Router Reachability Probe MUST be rate-limited
560 * to no more than one per minute.
561 */
562 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
563 return;
564 rcu_read_lock_bh();
565 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
566 if (neigh) {
567 if (neigh->nud_state & NUD_VALID)
568 goto out;
569
570 work = NULL;
571 write_lock(&neigh->lock);
572 if (!(neigh->nud_state & NUD_VALID) &&
573 time_after(jiffies,
574 neigh->updated +
575 rt->rt6i_idev->cnf.rtr_probe_interval)) {
576 work = kmalloc(sizeof(*work), GFP_ATOMIC);
577 if (work)
578 __neigh_set_probe_once(neigh);
579 }
580 write_unlock(&neigh->lock);
581 } else {
582 work = kmalloc(sizeof(*work), GFP_ATOMIC);
583 }
584
585 if (work) {
586 INIT_WORK(&work->work, rt6_probe_deferred);
587 work->target = rt->rt6i_gateway;
588 dev_hold(rt->dst.dev);
589 work->dev = rt->dst.dev;
590 schedule_work(&work->work);
591 }
592
593 out:
594 rcu_read_unlock_bh();
595 }
596 #else
597 static inline void rt6_probe(struct rt6_info *rt)
598 {
599 }
600 #endif
601
602 /*
603 * Default Router Selection (RFC 2461 6.3.6)
604 */
605 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
606 {
607 struct net_device *dev = rt->dst.dev;
608 if (!oif || dev->ifindex == oif)
609 return 2;
610 if ((dev->flags & IFF_LOOPBACK) &&
611 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
612 return 1;
613 return 0;
614 }
615
616 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
617 {
618 struct neighbour *neigh;
619 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
620
621 if (rt->rt6i_flags & RTF_NONEXTHOP ||
622 !(rt->rt6i_flags & RTF_GATEWAY))
623 return RT6_NUD_SUCCEED;
624
625 rcu_read_lock_bh();
626 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
627 if (neigh) {
628 read_lock(&neigh->lock);
629 if (neigh->nud_state & NUD_VALID)
630 ret = RT6_NUD_SUCCEED;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632 else if (!(neigh->nud_state & NUD_FAILED))
633 ret = RT6_NUD_SUCCEED;
634 else
635 ret = RT6_NUD_FAIL_PROBE;
636 #endif
637 read_unlock(&neigh->lock);
638 } else {
639 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
640 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
641 }
642 rcu_read_unlock_bh();
643
644 return ret;
645 }
646
647 static int rt6_score_route(struct rt6_info *rt, int oif,
648 int strict)
649 {
650 int m;
651
652 m = rt6_check_dev(rt, oif);
653 if (!m && (strict & RT6_LOOKUP_F_IFACE))
654 return RT6_NUD_FAIL_HARD;
655 #ifdef CONFIG_IPV6_ROUTER_PREF
656 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
657 #endif
658 if (strict & RT6_LOOKUP_F_REACHABLE) {
659 int n = rt6_check_neigh(rt);
660 if (n < 0)
661 return n;
662 }
663 return m;
664 }
665
666 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
667 int *mpri, struct rt6_info *match,
668 bool *do_rr)
669 {
670 int m;
671 bool match_do_rr = false;
672 struct inet6_dev *idev = rt->rt6i_idev;
673 struct net_device *dev = rt->dst.dev;
674
675 if (dev && !netif_carrier_ok(dev) &&
676 idev->cnf.ignore_routes_with_linkdown &&
677 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
678 goto out;
679
680 if (rt6_check_expired(rt))
681 goto out;
682
683 m = rt6_score_route(rt, oif, strict);
684 if (m == RT6_NUD_FAIL_DO_RR) {
685 match_do_rr = true;
686 m = 0; /* lowest valid score */
687 } else if (m == RT6_NUD_FAIL_HARD) {
688 goto out;
689 }
690
691 if (strict & RT6_LOOKUP_F_REACHABLE)
692 rt6_probe(rt);
693
694 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
695 if (m > *mpri) {
696 *do_rr = match_do_rr;
697 *mpri = m;
698 match = rt;
699 }
700 out:
701 return match;
702 }
703
704 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
705 struct rt6_info *rr_head,
706 u32 metric, int oif, int strict,
707 bool *do_rr)
708 {
709 struct rt6_info *rt, *match, *cont;
710 int mpri = -1;
711
712 match = NULL;
713 cont = NULL;
714 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
715 if (rt->rt6i_metric != metric) {
716 cont = rt;
717 break;
718 }
719
720 match = find_match(rt, oif, strict, &mpri, match, do_rr);
721 }
722
723 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 }
731
732 if (match || !cont)
733 return match;
734
735 for (rt = cont; rt; rt = rt->dst.rt6_next)
736 match = find_match(rt, oif, strict, &mpri, match, do_rr);
737
738 return match;
739 }
740
741 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
742 {
743 struct rt6_info *match, *rt0;
744 struct net *net;
745 bool do_rr = false;
746
747 rt0 = fn->rr_ptr;
748 if (!rt0)
749 fn->rr_ptr = rt0 = fn->leaf;
750
751 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
752 &do_rr);
753
754 if (do_rr) {
755 struct rt6_info *next = rt0->dst.rt6_next;
756
757 /* no entries matched; do round-robin */
758 if (!next || next->rt6i_metric != rt0->rt6i_metric)
759 next = fn->leaf;
760
761 if (next != rt0)
762 fn->rr_ptr = next;
763 }
764
765 net = dev_net(rt0->dst.dev);
766 return match ? match : net->ipv6.ip6_null_entry;
767 }
768
769 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
770 {
771 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
772 }
773
774 #ifdef CONFIG_IPV6_ROUTE_INFO
775 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
776 const struct in6_addr *gwaddr)
777 {
778 struct net *net = dev_net(dev);
779 struct route_info *rinfo = (struct route_info *) opt;
780 struct in6_addr prefix_buf, *prefix;
781 unsigned int pref;
782 unsigned long lifetime;
783 struct rt6_info *rt;
784
785 if (len < sizeof(struct route_info)) {
786 return -EINVAL;
787 }
788
789 /* Sanity check for prefix_len and length */
790 if (rinfo->length > 3) {
791 return -EINVAL;
792 } else if (rinfo->prefix_len > 128) {
793 return -EINVAL;
794 } else if (rinfo->prefix_len > 64) {
795 if (rinfo->length < 2) {
796 return -EINVAL;
797 }
798 } else if (rinfo->prefix_len > 0) {
799 if (rinfo->length < 1) {
800 return -EINVAL;
801 }
802 }
803
804 pref = rinfo->route_pref;
805 if (pref == ICMPV6_ROUTER_PREF_INVALID)
806 return -EINVAL;
807
808 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
809
810 if (rinfo->length == 3)
811 prefix = (struct in6_addr *)rinfo->prefix;
812 else {
813 /* this function is safe */
814 ipv6_addr_prefix(&prefix_buf,
815 (struct in6_addr *)rinfo->prefix,
816 rinfo->prefix_len);
817 prefix = &prefix_buf;
818 }
819
820 if (rinfo->prefix_len == 0)
821 rt = rt6_get_dflt_router(gwaddr, dev);
822 else
823 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
824 gwaddr, dev);
825
826 if (rt && !lifetime) {
827 ip6_del_rt(rt);
828 rt = NULL;
829 }
830
831 if (!rt && lifetime)
832 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
833 dev, pref);
834 else if (rt)
835 rt->rt6i_flags = RTF_ROUTEINFO |
836 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
837
838 if (rt) {
839 if (!addrconf_finite_timeout(lifetime))
840 rt6_clean_expires(rt);
841 else
842 rt6_set_expires(rt, jiffies + HZ * lifetime);
843
844 ip6_rt_put(rt);
845 }
846 return 0;
847 }
848 #endif
849
850 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
851 struct in6_addr *saddr)
852 {
853 struct fib6_node *pn;
854 while (1) {
855 if (fn->fn_flags & RTN_TL_ROOT)
856 return NULL;
857 pn = fn->parent;
858 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
859 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
860 else
861 fn = pn;
862 if (fn->fn_flags & RTN_RTINFO)
863 return fn;
864 }
865 }
866
867 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
868 struct fib6_table *table,
869 struct flowi6 *fl6, int flags)
870 {
871 struct fib6_node *fn;
872 struct rt6_info *rt;
873
874 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
875 flags &= ~RT6_LOOKUP_F_IFACE;
876
877 read_lock_bh(&table->tb6_lock);
878 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
879 restart:
880 rt = fn->leaf;
881 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
882 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
883 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
884 if (rt == net->ipv6.ip6_null_entry) {
885 fn = fib6_backtrack(fn, &fl6->saddr);
886 if (fn)
887 goto restart;
888 }
889 dst_use(&rt->dst, jiffies);
890 read_unlock_bh(&table->tb6_lock);
891
892 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
893
894 return rt;
895
896 }
897
898 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
899 int flags)
900 {
901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
902 }
903 EXPORT_SYMBOL_GPL(ip6_route_lookup);
904
905 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
906 const struct in6_addr *saddr, int oif, int strict)
907 {
908 struct flowi6 fl6 = {
909 .flowi6_oif = oif,
910 .daddr = *daddr,
911 };
912 struct dst_entry *dst;
913 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
914
915 if (saddr) {
916 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
917 flags |= RT6_LOOKUP_F_HAS_SADDR;
918 }
919
920 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
921 if (dst->error == 0)
922 return (struct rt6_info *) dst;
923
924 dst_release(dst);
925
926 return NULL;
927 }
928 EXPORT_SYMBOL(rt6_lookup);
929
930 /* ip6_ins_rt is called with FREE table->tb6_lock.
931 * It takes new route entry, the addition fails by any reason the
932 * route is released.
933 * Caller must hold dst before calling it.
934 */
935
936 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
937 struct mx6_config *mxc,
938 struct netlink_ext_ack *extack)
939 {
940 int err;
941 struct fib6_table *table;
942
943 table = rt->rt6i_table;
944 write_lock_bh(&table->tb6_lock);
945 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
946 write_unlock_bh(&table->tb6_lock);
947
948 return err;
949 }
950
951 int ip6_ins_rt(struct rt6_info *rt)
952 {
953 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
954 struct mx6_config mxc = { .mx = NULL, };
955
956 /* Hold dst to account for the reference from the fib6 tree */
957 dst_hold(&rt->dst);
958 return __ip6_ins_rt(rt, &info, &mxc, NULL);
959 }
960
961 /* called with rcu_lock held */
962 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
963 {
964 struct net_device *dev = rt->dst.dev;
965
966 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
967 /* for copies of local routes, dst->dev needs to be the
968 * device if it is a master device, the master device if
969 * device is enslaved, and the loopback as the default
970 */
971 if (netif_is_l3_slave(dev) &&
972 !rt6_need_strict(&rt->rt6i_dst.addr))
973 dev = l3mdev_master_dev_rcu(dev);
974 else if (!netif_is_l3_master(dev))
975 dev = dev_net(dev)->loopback_dev;
976 /* last case is netif_is_l3_master(dev) is true in which
977 * case we want dev returned to be dev
978 */
979 }
980
981 return dev;
982 }
983
984 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
985 const struct in6_addr *daddr,
986 const struct in6_addr *saddr)
987 {
988 struct net_device *dev;
989 struct rt6_info *rt;
990
991 /*
992 * Clone the route.
993 */
994
995 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
996 ort = (struct rt6_info *)ort->dst.from;
997
998 rcu_read_lock();
999 dev = ip6_rt_get_dev_rcu(ort);
1000 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1001 rcu_read_unlock();
1002 if (!rt)
1003 return NULL;
1004
1005 ip6_rt_copy_init(rt, ort);
1006 rt->rt6i_flags |= RTF_CACHE;
1007 rt->rt6i_metric = 0;
1008 rt->dst.flags |= DST_HOST;
1009 rt->rt6i_dst.addr = *daddr;
1010 rt->rt6i_dst.plen = 128;
1011
1012 if (!rt6_is_gw_or_nonexthop(ort)) {
1013 if (ort->rt6i_dst.plen != 128 &&
1014 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1015 rt->rt6i_flags |= RTF_ANYCAST;
1016 #ifdef CONFIG_IPV6_SUBTREES
1017 if (rt->rt6i_src.plen && saddr) {
1018 rt->rt6i_src.addr = *saddr;
1019 rt->rt6i_src.plen = 128;
1020 }
1021 #endif
1022 }
1023
1024 return rt;
1025 }
1026
1027 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1028 {
1029 struct net_device *dev;
1030 struct rt6_info *pcpu_rt;
1031
1032 rcu_read_lock();
1033 dev = ip6_rt_get_dev_rcu(rt);
1034 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1035 rcu_read_unlock();
1036 if (!pcpu_rt)
1037 return NULL;
1038 ip6_rt_copy_init(pcpu_rt, rt);
1039 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1040 pcpu_rt->rt6i_flags |= RTF_PCPU;
1041 return pcpu_rt;
1042 }
1043
1044 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1045 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1046 {
1047 struct rt6_info *pcpu_rt, **p;
1048
1049 p = this_cpu_ptr(rt->rt6i_pcpu);
1050 pcpu_rt = *p;
1051
1052 if (pcpu_rt) {
1053 dst_hold(&pcpu_rt->dst);
1054 rt6_dst_from_metrics_check(pcpu_rt);
1055 }
1056 return pcpu_rt;
1057 }
1058
1059 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1060 {
1061 struct fib6_table *table = rt->rt6i_table;
1062 struct rt6_info *pcpu_rt, *prev, **p;
1063
1064 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1065 if (!pcpu_rt) {
1066 struct net *net = dev_net(rt->dst.dev);
1067
1068 dst_hold(&net->ipv6.ip6_null_entry->dst);
1069 return net->ipv6.ip6_null_entry;
1070 }
1071
1072 read_lock_bh(&table->tb6_lock);
1073 if (rt->rt6i_pcpu) {
1074 p = this_cpu_ptr(rt->rt6i_pcpu);
1075 prev = cmpxchg(p, NULL, pcpu_rt);
1076 if (prev) {
1077 /* If someone did it before us, return prev instead */
1078 dst_release_immediate(&pcpu_rt->dst);
1079 pcpu_rt = prev;
1080 }
1081 } else {
1082 /* rt has been removed from the fib6 tree
1083 * before we have a chance to acquire the read_lock.
1084 * In this case, don't brother to create a pcpu rt
1085 * since rt is going away anyway. The next
1086 * dst_check() will trigger a re-lookup.
1087 */
1088 dst_release_immediate(&pcpu_rt->dst);
1089 pcpu_rt = rt;
1090 }
1091 dst_hold(&pcpu_rt->dst);
1092 rt6_dst_from_metrics_check(pcpu_rt);
1093 read_unlock_bh(&table->tb6_lock);
1094 return pcpu_rt;
1095 }
1096
1097 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1098 int oif, struct flowi6 *fl6, int flags)
1099 {
1100 struct fib6_node *fn, *saved_fn;
1101 struct rt6_info *rt;
1102 int strict = 0;
1103
1104 strict |= flags & RT6_LOOKUP_F_IFACE;
1105 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1106 if (net->ipv6.devconf_all->forwarding == 0)
1107 strict |= RT6_LOOKUP_F_REACHABLE;
1108
1109 read_lock_bh(&table->tb6_lock);
1110
1111 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1112 saved_fn = fn;
1113
1114 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1115 oif = 0;
1116
1117 redo_rt6_select:
1118 rt = rt6_select(fn, oif, strict);
1119 if (rt->rt6i_nsiblings)
1120 rt = rt6_multipath_select(rt, fl6, oif, strict);
1121 if (rt == net->ipv6.ip6_null_entry) {
1122 fn = fib6_backtrack(fn, &fl6->saddr);
1123 if (fn)
1124 goto redo_rt6_select;
1125 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1126 /* also consider unreachable route */
1127 strict &= ~RT6_LOOKUP_F_REACHABLE;
1128 fn = saved_fn;
1129 goto redo_rt6_select;
1130 }
1131 }
1132
1133
1134 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1135 dst_use(&rt->dst, jiffies);
1136 read_unlock_bh(&table->tb6_lock);
1137
1138 rt6_dst_from_metrics_check(rt);
1139
1140 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1141 return rt;
1142 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1143 !(rt->rt6i_flags & RTF_GATEWAY))) {
1144 /* Create a RTF_CACHE clone which will not be
1145 * owned by the fib6 tree. It is for the special case where
1146 * the daddr in the skb during the neighbor look-up is different
1147 * from the fl6->daddr used to look-up route here.
1148 */
1149
1150 struct rt6_info *uncached_rt;
1151
1152 dst_use(&rt->dst, jiffies);
1153 read_unlock_bh(&table->tb6_lock);
1154
1155 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1156 dst_release(&rt->dst);
1157
1158 if (uncached_rt) {
1159 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1160 * No need for another dst_hold()
1161 */
1162 rt6_uncached_list_add(uncached_rt);
1163 } else {
1164 uncached_rt = net->ipv6.ip6_null_entry;
1165 dst_hold(&uncached_rt->dst);
1166 }
1167
1168 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1169 return uncached_rt;
1170
1171 } else {
1172 /* Get a percpu copy */
1173
1174 struct rt6_info *pcpu_rt;
1175
1176 rt->dst.lastuse = jiffies;
1177 rt->dst.__use++;
1178 pcpu_rt = rt6_get_pcpu_route(rt);
1179
1180 if (pcpu_rt) {
1181 read_unlock_bh(&table->tb6_lock);
1182 } else {
1183 /* We have to do the read_unlock first
1184 * because rt6_make_pcpu_route() may trigger
1185 * ip6_dst_gc() which will take the write_lock.
1186 */
1187 dst_hold(&rt->dst);
1188 read_unlock_bh(&table->tb6_lock);
1189 pcpu_rt = rt6_make_pcpu_route(rt);
1190 dst_release(&rt->dst);
1191 }
1192
1193 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1194 return pcpu_rt;
1195
1196 }
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_pol_route);
1199
1200 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1201 struct flowi6 *fl6, int flags)
1202 {
1203 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1204 }
1205
1206 struct dst_entry *ip6_route_input_lookup(struct net *net,
1207 struct net_device *dev,
1208 struct flowi6 *fl6, int flags)
1209 {
1210 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1211 flags |= RT6_LOOKUP_F_IFACE;
1212
1213 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1214 }
1215 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1216
1217 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1218 struct flow_keys *keys)
1219 {
1220 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1221 const struct ipv6hdr *key_iph = outer_iph;
1222 const struct ipv6hdr *inner_iph;
1223 const struct icmp6hdr *icmph;
1224 struct ipv6hdr _inner_iph;
1225 struct icmp6hdr _icmph;
1226
1227 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1228 goto out;
1229
1230 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1231 sizeof(_icmph), &_icmph);
1232 if (!icmph)
1233 goto out;
1234
1235 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1236 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1237 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1238 icmph->icmp6_type != ICMPV6_PARAMPROB)
1239 goto out;
1240
1241 inner_iph = skb_header_pointer(skb,
1242 skb_transport_offset(skb) + sizeof(*icmph),
1243 sizeof(_inner_iph), &_inner_iph);
1244 if (!inner_iph)
1245 goto out;
1246
1247 key_iph = inner_iph;
1248 out:
1249 memset(keys, 0, sizeof(*keys));
1250 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1251 keys->addrs.v6addrs.src = key_iph->saddr;
1252 keys->addrs.v6addrs.dst = key_iph->daddr;
1253 keys->tags.flow_label = ip6_flowlabel(key_iph);
1254 keys->basic.ip_proto = key_iph->nexthdr;
1255 }
1256
1257 /* if skb is set it will be used and fl6 can be NULL */
1258 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1259 {
1260 struct flow_keys hash_keys;
1261
1262 if (skb) {
1263 ip6_multipath_l3_keys(skb, &hash_keys);
1264 return flow_hash_from_keys(&hash_keys);
1265 }
1266
1267 return get_hash_from_flowi6(fl6);
1268 }
1269
1270 void ip6_route_input(struct sk_buff *skb)
1271 {
1272 const struct ipv6hdr *iph = ipv6_hdr(skb);
1273 struct net *net = dev_net(skb->dev);
1274 int flags = RT6_LOOKUP_F_HAS_SADDR;
1275 struct ip_tunnel_info *tun_info;
1276 struct flowi6 fl6 = {
1277 .flowi6_iif = skb->dev->ifindex,
1278 .daddr = iph->daddr,
1279 .saddr = iph->saddr,
1280 .flowlabel = ip6_flowinfo(iph),
1281 .flowi6_mark = skb->mark,
1282 .flowi6_proto = iph->nexthdr,
1283 };
1284
1285 tun_info = skb_tunnel_info(skb);
1286 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1287 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1288 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1289 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
1290 skb_dst_drop(skb);
1291 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1292 }
1293
1294 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1295 struct flowi6 *fl6, int flags)
1296 {
1297 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1298 }
1299
1300 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1301 struct flowi6 *fl6, int flags)
1302 {
1303 bool any_src;
1304
1305 if (rt6_need_strict(&fl6->daddr)) {
1306 struct dst_entry *dst;
1307
1308 dst = l3mdev_link_scope_lookup(net, fl6);
1309 if (dst)
1310 return dst;
1311 }
1312
1313 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1314
1315 any_src = ipv6_addr_any(&fl6->saddr);
1316 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1317 (fl6->flowi6_oif && any_src))
1318 flags |= RT6_LOOKUP_F_IFACE;
1319
1320 if (!any_src)
1321 flags |= RT6_LOOKUP_F_HAS_SADDR;
1322 else if (sk)
1323 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1324
1325 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1326 }
1327 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1328
1329 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1330 {
1331 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1332 struct net_device *loopback_dev = net->loopback_dev;
1333 struct dst_entry *new = NULL;
1334
1335 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1336 DST_OBSOLETE_DEAD, 0);
1337 if (rt) {
1338 rt6_info_init(rt);
1339
1340 new = &rt->dst;
1341 new->__use = 1;
1342 new->input = dst_discard;
1343 new->output = dst_discard_out;
1344
1345 dst_copy_metrics(new, &ort->dst);
1346
1347 rt->rt6i_idev = in6_dev_get(loopback_dev);
1348 rt->rt6i_gateway = ort->rt6i_gateway;
1349 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1350 rt->rt6i_metric = 0;
1351
1352 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1353 #ifdef CONFIG_IPV6_SUBTREES
1354 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1355 #endif
1356 }
1357
1358 dst_release(dst_orig);
1359 return new ? new : ERR_PTR(-ENOMEM);
1360 }
1361
1362 /*
1363 * Destination cache support functions
1364 */
1365
1366 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1367 {
1368 if (rt->dst.from &&
1369 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1370 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1371 }
1372
1373 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1374 {
1375 u32 rt_cookie = 0;
1376
1377 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
1378 return NULL;
1379
1380 if (rt6_check_expired(rt))
1381 return NULL;
1382
1383 return &rt->dst;
1384 }
1385
1386 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1387 {
1388 if (!__rt6_check_expired(rt) &&
1389 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1390 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1391 return &rt->dst;
1392 else
1393 return NULL;
1394 }
1395
1396 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1397 {
1398 struct rt6_info *rt;
1399
1400 rt = (struct rt6_info *) dst;
1401
1402 /* All IPV6 dsts are created with ->obsolete set to the value
1403 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1404 * into this function always.
1405 */
1406
1407 rt6_dst_from_metrics_check(rt);
1408
1409 if (rt->rt6i_flags & RTF_PCPU ||
1410 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
1411 return rt6_dst_from_check(rt, cookie);
1412 else
1413 return rt6_check(rt, cookie);
1414 }
1415
1416 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1417 {
1418 struct rt6_info *rt = (struct rt6_info *) dst;
1419
1420 if (rt) {
1421 if (rt->rt6i_flags & RTF_CACHE) {
1422 if (rt6_check_expired(rt)) {
1423 ip6_del_rt(rt);
1424 dst = NULL;
1425 }
1426 } else {
1427 dst_release(dst);
1428 dst = NULL;
1429 }
1430 }
1431 return dst;
1432 }
1433
1434 static void ip6_link_failure(struct sk_buff *skb)
1435 {
1436 struct rt6_info *rt;
1437
1438 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1439
1440 rt = (struct rt6_info *) skb_dst(skb);
1441 if (rt) {
1442 if (rt->rt6i_flags & RTF_CACHE) {
1443 if (dst_hold_safe(&rt->dst))
1444 ip6_del_rt(rt);
1445 } else {
1446 struct fib6_node *fn;
1447
1448 rcu_read_lock();
1449 fn = rcu_dereference(rt->rt6i_node);
1450 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1451 fn->fn_sernum = -1;
1452 rcu_read_unlock();
1453 }
1454 }
1455 }
1456
1457 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1458 {
1459 struct net *net = dev_net(rt->dst.dev);
1460
1461 rt->rt6i_flags |= RTF_MODIFIED;
1462 rt->rt6i_pmtu = mtu;
1463 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1464 }
1465
1466 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1467 {
1468 return !(rt->rt6i_flags & RTF_CACHE) &&
1469 (rt->rt6i_flags & RTF_PCPU ||
1470 rcu_access_pointer(rt->rt6i_node));
1471 }
1472
1473 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1474 const struct ipv6hdr *iph, u32 mtu)
1475 {
1476 const struct in6_addr *daddr, *saddr;
1477 struct rt6_info *rt6 = (struct rt6_info *)dst;
1478
1479 if (dst_metric_locked(dst, RTAX_MTU))
1480 return;
1481
1482 if (iph) {
1483 daddr = &iph->daddr;
1484 saddr = &iph->saddr;
1485 } else if (sk) {
1486 daddr = &sk->sk_v6_daddr;
1487 saddr = &inet6_sk(sk)->saddr;
1488 } else {
1489 daddr = NULL;
1490 saddr = NULL;
1491 }
1492 dst_confirm_neigh(dst, daddr);
1493 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1494 if (mtu >= dst_mtu(dst))
1495 return;
1496
1497 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1498 rt6_do_update_pmtu(rt6, mtu);
1499 } else if (daddr) {
1500 struct rt6_info *nrt6;
1501
1502 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1503 if (nrt6) {
1504 rt6_do_update_pmtu(nrt6, mtu);
1505
1506 /* ip6_ins_rt(nrt6) will bump the
1507 * rt6->rt6i_node->fn_sernum
1508 * which will fail the next rt6_check() and
1509 * invalidate the sk->sk_dst_cache.
1510 */
1511 ip6_ins_rt(nrt6);
1512 /* Release the reference taken in
1513 * ip6_rt_cache_alloc()
1514 */
1515 dst_release(&nrt6->dst);
1516 }
1517 }
1518 }
1519
1520 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1521 struct sk_buff *skb, u32 mtu)
1522 {
1523 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1524 }
1525
1526 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1527 int oif, u32 mark, kuid_t uid)
1528 {
1529 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1530 struct dst_entry *dst;
1531 struct flowi6 fl6;
1532
1533 memset(&fl6, 0, sizeof(fl6));
1534 fl6.flowi6_oif = oif;
1535 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1536 fl6.daddr = iph->daddr;
1537 fl6.saddr = iph->saddr;
1538 fl6.flowlabel = ip6_flowinfo(iph);
1539 fl6.flowi6_uid = uid;
1540
1541 dst = ip6_route_output(net, NULL, &fl6);
1542 if (!dst->error)
1543 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1544 dst_release(dst);
1545 }
1546 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1547
1548 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1549 {
1550 int oif = sk->sk_bound_dev_if;
1551 struct dst_entry *dst;
1552
1553 if (!oif && skb->dev)
1554 oif = l3mdev_master_ifindex(skb->dev);
1555
1556 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
1557
1558 dst = __sk_dst_get(sk);
1559 if (!dst || !dst->obsolete ||
1560 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1561 return;
1562
1563 bh_lock_sock(sk);
1564 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1565 ip6_datagram_dst_update(sk, false);
1566 bh_unlock_sock(sk);
1567 }
1568 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1569
1570 /* Handle redirects */
1571 struct ip6rd_flowi {
1572 struct flowi6 fl6;
1573 struct in6_addr gateway;
1574 };
1575
1576 static struct rt6_info *__ip6_route_redirect(struct net *net,
1577 struct fib6_table *table,
1578 struct flowi6 *fl6,
1579 int flags)
1580 {
1581 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1582 struct rt6_info *rt;
1583 struct fib6_node *fn;
1584
1585 /* Get the "current" route for this destination and
1586 * check if the redirect has come from appropriate router.
1587 *
1588 * RFC 4861 specifies that redirects should only be
1589 * accepted if they come from the nexthop to the target.
1590 * Due to the way the routes are chosen, this notion
1591 * is a bit fuzzy and one might need to check all possible
1592 * routes.
1593 */
1594
1595 read_lock_bh(&table->tb6_lock);
1596 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1597 restart:
1598 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1599 if (rt6_check_expired(rt))
1600 continue;
1601 if (rt->dst.error)
1602 break;
1603 if (!(rt->rt6i_flags & RTF_GATEWAY))
1604 continue;
1605 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1606 continue;
1607 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1608 continue;
1609 break;
1610 }
1611
1612 if (!rt)
1613 rt = net->ipv6.ip6_null_entry;
1614 else if (rt->dst.error) {
1615 rt = net->ipv6.ip6_null_entry;
1616 goto out;
1617 }
1618
1619 if (rt == net->ipv6.ip6_null_entry) {
1620 fn = fib6_backtrack(fn, &fl6->saddr);
1621 if (fn)
1622 goto restart;
1623 }
1624
1625 out:
1626 dst_hold(&rt->dst);
1627
1628 read_unlock_bh(&table->tb6_lock);
1629
1630 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1631 return rt;
1632 };
1633
1634 static struct dst_entry *ip6_route_redirect(struct net *net,
1635 const struct flowi6 *fl6,
1636 const struct in6_addr *gateway)
1637 {
1638 int flags = RT6_LOOKUP_F_HAS_SADDR;
1639 struct ip6rd_flowi rdfl;
1640
1641 rdfl.fl6 = *fl6;
1642 rdfl.gateway = *gateway;
1643
1644 return fib6_rule_lookup(net, &rdfl.fl6,
1645 flags, __ip6_route_redirect);
1646 }
1647
1648 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1649 kuid_t uid)
1650 {
1651 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1652 struct dst_entry *dst;
1653 struct flowi6 fl6;
1654
1655 memset(&fl6, 0, sizeof(fl6));
1656 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1657 fl6.flowi6_oif = oif;
1658 fl6.flowi6_mark = mark;
1659 fl6.daddr = iph->daddr;
1660 fl6.saddr = iph->saddr;
1661 fl6.flowlabel = ip6_flowinfo(iph);
1662 fl6.flowi6_uid = uid;
1663
1664 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1665 rt6_do_redirect(dst, NULL, skb);
1666 dst_release(dst);
1667 }
1668 EXPORT_SYMBOL_GPL(ip6_redirect);
1669
1670 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1671 u32 mark)
1672 {
1673 const struct ipv6hdr *iph = ipv6_hdr(skb);
1674 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1675 struct dst_entry *dst;
1676 struct flowi6 fl6;
1677
1678 memset(&fl6, 0, sizeof(fl6));
1679 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1680 fl6.flowi6_oif = oif;
1681 fl6.flowi6_mark = mark;
1682 fl6.daddr = msg->dest;
1683 fl6.saddr = iph->daddr;
1684 fl6.flowi6_uid = sock_net_uid(net, NULL);
1685
1686 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1687 rt6_do_redirect(dst, NULL, skb);
1688 dst_release(dst);
1689 }
1690
1691 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1692 {
1693 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1694 sk->sk_uid);
1695 }
1696 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1697
1698 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1699 {
1700 struct net_device *dev = dst->dev;
1701 unsigned int mtu = dst_mtu(dst);
1702 struct net *net = dev_net(dev);
1703
1704 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1705
1706 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1707 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1708
1709 /*
1710 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1711 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1712 * IPV6_MAXPLEN is also valid and means: "any MSS,
1713 * rely only on pmtu discovery"
1714 */
1715 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1716 mtu = IPV6_MAXPLEN;
1717 return mtu;
1718 }
1719
1720 static unsigned int ip6_mtu(const struct dst_entry *dst)
1721 {
1722 const struct rt6_info *rt = (const struct rt6_info *)dst;
1723 unsigned int mtu = rt->rt6i_pmtu;
1724 struct inet6_dev *idev;
1725
1726 if (mtu)
1727 goto out;
1728
1729 mtu = dst_metric_raw(dst, RTAX_MTU);
1730 if (mtu)
1731 goto out;
1732
1733 mtu = IPV6_MIN_MTU;
1734
1735 rcu_read_lock();
1736 idev = __in6_dev_get(dst->dev);
1737 if (idev)
1738 mtu = idev->cnf.mtu6;
1739 rcu_read_unlock();
1740
1741 out:
1742 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1743
1744 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1745 }
1746
1747 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1748 struct flowi6 *fl6)
1749 {
1750 struct dst_entry *dst;
1751 struct rt6_info *rt;
1752 struct inet6_dev *idev = in6_dev_get(dev);
1753 struct net *net = dev_net(dev);
1754
1755 if (unlikely(!idev))
1756 return ERR_PTR(-ENODEV);
1757
1758 rt = ip6_dst_alloc(net, dev, 0);
1759 if (unlikely(!rt)) {
1760 in6_dev_put(idev);
1761 dst = ERR_PTR(-ENOMEM);
1762 goto out;
1763 }
1764
1765 rt->dst.flags |= DST_HOST;
1766 rt->dst.input = ip6_input;
1767 rt->dst.output = ip6_output;
1768 rt->rt6i_gateway = fl6->daddr;
1769 rt->rt6i_dst.addr = fl6->daddr;
1770 rt->rt6i_dst.plen = 128;
1771 rt->rt6i_idev = idev;
1772 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1773
1774 /* Add this dst into uncached_list so that rt6_ifdown() can
1775 * do proper release of the net_device
1776 */
1777 rt6_uncached_list_add(rt);
1778
1779 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1780
1781 out:
1782 return dst;
1783 }
1784
1785 static int ip6_dst_gc(struct dst_ops *ops)
1786 {
1787 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1788 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1789 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1790 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1791 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1792 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1793 int entries;
1794
1795 entries = dst_entries_get_fast(ops);
1796 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1797 entries <= rt_max_size)
1798 goto out;
1799
1800 net->ipv6.ip6_rt_gc_expire++;
1801 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1802 entries = dst_entries_get_slow(ops);
1803 if (entries < ops->gc_thresh)
1804 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1805 out:
1806 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1807 return entries > rt_max_size;
1808 }
1809
1810 static int ip6_convert_metrics(struct mx6_config *mxc,
1811 const struct fib6_config *cfg)
1812 {
1813 bool ecn_ca = false;
1814 struct nlattr *nla;
1815 int remaining;
1816 u32 *mp;
1817
1818 if (!cfg->fc_mx)
1819 return 0;
1820
1821 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1822 if (unlikely(!mp))
1823 return -ENOMEM;
1824
1825 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1826 int type = nla_type(nla);
1827 u32 val;
1828
1829 if (!type)
1830 continue;
1831 if (unlikely(type > RTAX_MAX))
1832 goto err;
1833
1834 if (type == RTAX_CC_ALGO) {
1835 char tmp[TCP_CA_NAME_MAX];
1836
1837 nla_strlcpy(tmp, nla, sizeof(tmp));
1838 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1839 if (val == TCP_CA_UNSPEC)
1840 goto err;
1841 } else {
1842 val = nla_get_u32(nla);
1843 }
1844 if (type == RTAX_HOPLIMIT && val > 255)
1845 val = 255;
1846 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1847 goto err;
1848
1849 mp[type - 1] = val;
1850 __set_bit(type - 1, mxc->mx_valid);
1851 }
1852
1853 if (ecn_ca) {
1854 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1855 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1856 }
1857
1858 mxc->mx = mp;
1859 return 0;
1860 err:
1861 kfree(mp);
1862 return -EINVAL;
1863 }
1864
1865 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1866 struct fib6_config *cfg,
1867 const struct in6_addr *gw_addr)
1868 {
1869 struct flowi6 fl6 = {
1870 .flowi6_oif = cfg->fc_ifindex,
1871 .daddr = *gw_addr,
1872 .saddr = cfg->fc_prefsrc,
1873 };
1874 struct fib6_table *table;
1875 struct rt6_info *rt;
1876 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1877
1878 table = fib6_get_table(net, cfg->fc_table);
1879 if (!table)
1880 return NULL;
1881
1882 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1883 flags |= RT6_LOOKUP_F_HAS_SADDR;
1884
1885 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1886
1887 /* if table lookup failed, fall back to full lookup */
1888 if (rt == net->ipv6.ip6_null_entry) {
1889 ip6_rt_put(rt);
1890 rt = NULL;
1891 }
1892
1893 return rt;
1894 }
1895
1896 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1897 struct netlink_ext_ack *extack)
1898 {
1899 struct net *net = cfg->fc_nlinfo.nl_net;
1900 struct rt6_info *rt = NULL;
1901 struct net_device *dev = NULL;
1902 struct inet6_dev *idev = NULL;
1903 struct fib6_table *table;
1904 int addr_type;
1905 int err = -EINVAL;
1906
1907 /* RTF_PCPU is an internal flag; can not be set by userspace */
1908 if (cfg->fc_flags & RTF_PCPU) {
1909 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
1910 goto out;
1911 }
1912
1913 if (cfg->fc_dst_len > 128) {
1914 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1915 goto out;
1916 }
1917 if (cfg->fc_src_len > 128) {
1918 NL_SET_ERR_MSG(extack, "Invalid source address length");
1919 goto out;
1920 }
1921 #ifndef CONFIG_IPV6_SUBTREES
1922 if (cfg->fc_src_len) {
1923 NL_SET_ERR_MSG(extack,
1924 "Specifying source address requires IPV6_SUBTREES to be enabled");
1925 goto out;
1926 }
1927 #endif
1928 if (cfg->fc_ifindex) {
1929 err = -ENODEV;
1930 dev = dev_get_by_index(net, cfg->fc_ifindex);
1931 if (!dev)
1932 goto out;
1933 idev = in6_dev_get(dev);
1934 if (!idev)
1935 goto out;
1936 }
1937
1938 if (cfg->fc_metric == 0)
1939 cfg->fc_metric = IP6_RT_PRIO_USER;
1940
1941 err = -ENOBUFS;
1942 if (cfg->fc_nlinfo.nlh &&
1943 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1944 table = fib6_get_table(net, cfg->fc_table);
1945 if (!table) {
1946 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1947 table = fib6_new_table(net, cfg->fc_table);
1948 }
1949 } else {
1950 table = fib6_new_table(net, cfg->fc_table);
1951 }
1952
1953 if (!table)
1954 goto out;
1955
1956 rt = ip6_dst_alloc(net, NULL,
1957 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1958
1959 if (!rt) {
1960 err = -ENOMEM;
1961 goto out;
1962 }
1963
1964 if (cfg->fc_flags & RTF_EXPIRES)
1965 rt6_set_expires(rt, jiffies +
1966 clock_t_to_jiffies(cfg->fc_expires));
1967 else
1968 rt6_clean_expires(rt);
1969
1970 if (cfg->fc_protocol == RTPROT_UNSPEC)
1971 cfg->fc_protocol = RTPROT_BOOT;
1972 rt->rt6i_protocol = cfg->fc_protocol;
1973
1974 addr_type = ipv6_addr_type(&cfg->fc_dst);
1975
1976 if (addr_type & IPV6_ADDR_MULTICAST)
1977 rt->dst.input = ip6_mc_input;
1978 else if (cfg->fc_flags & RTF_LOCAL)
1979 rt->dst.input = ip6_input;
1980 else
1981 rt->dst.input = ip6_forward;
1982
1983 rt->dst.output = ip6_output;
1984
1985 if (cfg->fc_encap) {
1986 struct lwtunnel_state *lwtstate;
1987
1988 err = lwtunnel_build_state(cfg->fc_encap_type,
1989 cfg->fc_encap, AF_INET6, cfg,
1990 &lwtstate, extack);
1991 if (err)
1992 goto out;
1993 rt->dst.lwtstate = lwtstate_get(lwtstate);
1994 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1995 rt->dst.lwtstate->orig_output = rt->dst.output;
1996 rt->dst.output = lwtunnel_output;
1997 }
1998 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1999 rt->dst.lwtstate->orig_input = rt->dst.input;
2000 rt->dst.input = lwtunnel_input;
2001 }
2002 }
2003
2004 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2005 rt->rt6i_dst.plen = cfg->fc_dst_len;
2006 if (rt->rt6i_dst.plen == 128)
2007 rt->dst.flags |= DST_HOST;
2008
2009 #ifdef CONFIG_IPV6_SUBTREES
2010 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2011 rt->rt6i_src.plen = cfg->fc_src_len;
2012 #endif
2013
2014 rt->rt6i_metric = cfg->fc_metric;
2015
2016 /* We cannot add true routes via loopback here,
2017 they would result in kernel looping; promote them to reject routes
2018 */
2019 if ((cfg->fc_flags & RTF_REJECT) ||
2020 (dev && (dev->flags & IFF_LOOPBACK) &&
2021 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2022 !(cfg->fc_flags & RTF_LOCAL))) {
2023 /* hold loopback dev/idev if we haven't done so. */
2024 if (dev != net->loopback_dev) {
2025 if (dev) {
2026 dev_put(dev);
2027 in6_dev_put(idev);
2028 }
2029 dev = net->loopback_dev;
2030 dev_hold(dev);
2031 idev = in6_dev_get(dev);
2032 if (!idev) {
2033 err = -ENODEV;
2034 goto out;
2035 }
2036 }
2037 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
2038 switch (cfg->fc_type) {
2039 case RTN_BLACKHOLE:
2040 rt->dst.error = -EINVAL;
2041 rt->dst.output = dst_discard_out;
2042 rt->dst.input = dst_discard;
2043 break;
2044 case RTN_PROHIBIT:
2045 rt->dst.error = -EACCES;
2046 rt->dst.output = ip6_pkt_prohibit_out;
2047 rt->dst.input = ip6_pkt_prohibit;
2048 break;
2049 case RTN_THROW:
2050 case RTN_UNREACHABLE:
2051 default:
2052 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
2053 : (cfg->fc_type == RTN_UNREACHABLE)
2054 ? -EHOSTUNREACH : -ENETUNREACH;
2055 rt->dst.output = ip6_pkt_discard_out;
2056 rt->dst.input = ip6_pkt_discard;
2057 break;
2058 }
2059 goto install_route;
2060 }
2061
2062 if (cfg->fc_flags & RTF_GATEWAY) {
2063 const struct in6_addr *gw_addr;
2064 int gwa_type;
2065
2066 gw_addr = &cfg->fc_gateway;
2067 gwa_type = ipv6_addr_type(gw_addr);
2068
2069 /* if gw_addr is local we will fail to detect this in case
2070 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2071 * will return already-added prefix route via interface that
2072 * prefix route was assigned to, which might be non-loopback.
2073 */
2074 err = -EINVAL;
2075 if (ipv6_chk_addr_and_flags(net, gw_addr,
2076 gwa_type & IPV6_ADDR_LINKLOCAL ?
2077 dev : NULL, 0, 0)) {
2078 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2079 goto out;
2080 }
2081 rt->rt6i_gateway = *gw_addr;
2082
2083 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2084 struct rt6_info *grt = NULL;
2085
2086 /* IPv6 strictly inhibits using not link-local
2087 addresses as nexthop address.
2088 Otherwise, router will not able to send redirects.
2089 It is very good, but in some (rare!) circumstances
2090 (SIT, PtP, NBMA NOARP links) it is handy to allow
2091 some exceptions. --ANK
2092 We allow IPv4-mapped nexthops to support RFC4798-type
2093 addressing
2094 */
2095 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2096 IPV6_ADDR_MAPPED))) {
2097 NL_SET_ERR_MSG(extack,
2098 "Invalid gateway address");
2099 goto out;
2100 }
2101
2102 if (cfg->fc_table) {
2103 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2104
2105 if (grt) {
2106 if (grt->rt6i_flags & RTF_GATEWAY ||
2107 (dev && dev != grt->dst.dev)) {
2108 ip6_rt_put(grt);
2109 grt = NULL;
2110 }
2111 }
2112 }
2113
2114 if (!grt)
2115 grt = rt6_lookup(net, gw_addr, NULL,
2116 cfg->fc_ifindex, 1);
2117
2118 err = -EHOSTUNREACH;
2119 if (!grt)
2120 goto out;
2121 if (dev) {
2122 if (dev != grt->dst.dev) {
2123 ip6_rt_put(grt);
2124 goto out;
2125 }
2126 } else {
2127 dev = grt->dst.dev;
2128 idev = grt->rt6i_idev;
2129 dev_hold(dev);
2130 in6_dev_hold(grt->rt6i_idev);
2131 }
2132 if (!(grt->rt6i_flags & RTF_GATEWAY))
2133 err = 0;
2134 ip6_rt_put(grt);
2135
2136 if (err)
2137 goto out;
2138 }
2139 err = -EINVAL;
2140 if (!dev) {
2141 NL_SET_ERR_MSG(extack, "Egress device not specified");
2142 goto out;
2143 } else if (dev->flags & IFF_LOOPBACK) {
2144 NL_SET_ERR_MSG(extack,
2145 "Egress device can not be loopback device for this route");
2146 goto out;
2147 }
2148 }
2149
2150 err = -ENODEV;
2151 if (!dev)
2152 goto out;
2153
2154 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2155 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2156 NL_SET_ERR_MSG(extack, "Invalid source address");
2157 err = -EINVAL;
2158 goto out;
2159 }
2160 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2161 rt->rt6i_prefsrc.plen = 128;
2162 } else
2163 rt->rt6i_prefsrc.plen = 0;
2164
2165 rt->rt6i_flags = cfg->fc_flags;
2166
2167 install_route:
2168 rt->dst.dev = dev;
2169 rt->rt6i_idev = idev;
2170 rt->rt6i_table = table;
2171
2172 cfg->fc_nlinfo.nl_net = dev_net(dev);
2173
2174 return rt;
2175 out:
2176 if (dev)
2177 dev_put(dev);
2178 if (idev)
2179 in6_dev_put(idev);
2180 if (rt)
2181 dst_release_immediate(&rt->dst);
2182
2183 return ERR_PTR(err);
2184 }
2185
2186 int ip6_route_add(struct fib6_config *cfg,
2187 struct netlink_ext_ack *extack)
2188 {
2189 struct mx6_config mxc = { .mx = NULL, };
2190 struct rt6_info *rt;
2191 int err;
2192
2193 rt = ip6_route_info_create(cfg, extack);
2194 if (IS_ERR(rt)) {
2195 err = PTR_ERR(rt);
2196 rt = NULL;
2197 goto out;
2198 }
2199
2200 err = ip6_convert_metrics(&mxc, cfg);
2201 if (err)
2202 goto out;
2203
2204 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
2205
2206 kfree(mxc.mx);
2207
2208 return err;
2209 out:
2210 if (rt)
2211 dst_release_immediate(&rt->dst);
2212
2213 return err;
2214 }
2215
2216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2217 {
2218 int err;
2219 struct fib6_table *table;
2220 struct net *net = dev_net(rt->dst.dev);
2221
2222 if (rt == net->ipv6.ip6_null_entry) {
2223 err = -ENOENT;
2224 goto out;
2225 }
2226
2227 table = rt->rt6i_table;
2228 write_lock_bh(&table->tb6_lock);
2229 err = fib6_del(rt, info);
2230 write_unlock_bh(&table->tb6_lock);
2231
2232 out:
2233 ip6_rt_put(rt);
2234 return err;
2235 }
2236
2237 int ip6_del_rt(struct rt6_info *rt)
2238 {
2239 struct nl_info info = {
2240 .nl_net = dev_net(rt->dst.dev),
2241 };
2242 return __ip6_del_rt(rt, &info);
2243 }
2244
2245 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2246 {
2247 struct nl_info *info = &cfg->fc_nlinfo;
2248 struct net *net = info->nl_net;
2249 struct sk_buff *skb = NULL;
2250 struct fib6_table *table;
2251 int err = -ENOENT;
2252
2253 if (rt == net->ipv6.ip6_null_entry)
2254 goto out_put;
2255 table = rt->rt6i_table;
2256 write_lock_bh(&table->tb6_lock);
2257
2258 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2259 struct rt6_info *sibling, *next_sibling;
2260
2261 /* prefer to send a single notification with all hops */
2262 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2263 if (skb) {
2264 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2265
2266 if (rt6_fill_node(net, skb, rt,
2267 NULL, NULL, 0, RTM_DELROUTE,
2268 info->portid, seq, 0) < 0) {
2269 kfree_skb(skb);
2270 skb = NULL;
2271 } else
2272 info->skip_notify = 1;
2273 }
2274
2275 list_for_each_entry_safe(sibling, next_sibling,
2276 &rt->rt6i_siblings,
2277 rt6i_siblings) {
2278 err = fib6_del(sibling, info);
2279 if (err)
2280 goto out_unlock;
2281 }
2282 }
2283
2284 err = fib6_del(rt, info);
2285 out_unlock:
2286 write_unlock_bh(&table->tb6_lock);
2287 out_put:
2288 ip6_rt_put(rt);
2289
2290 if (skb) {
2291 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2292 info->nlh, gfp_any());
2293 }
2294 return err;
2295 }
2296
2297 static int ip6_route_del(struct fib6_config *cfg,
2298 struct netlink_ext_ack *extack)
2299 {
2300 struct fib6_table *table;
2301 struct fib6_node *fn;
2302 struct rt6_info *rt;
2303 int err = -ESRCH;
2304
2305 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2306 if (!table) {
2307 NL_SET_ERR_MSG(extack, "FIB table does not exist");
2308 return err;
2309 }
2310
2311 read_lock_bh(&table->tb6_lock);
2312
2313 fn = fib6_locate(&table->tb6_root,
2314 &cfg->fc_dst, cfg->fc_dst_len,
2315 &cfg->fc_src, cfg->fc_src_len);
2316
2317 if (fn) {
2318 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2319 if ((rt->rt6i_flags & RTF_CACHE) &&
2320 !(cfg->fc_flags & RTF_CACHE))
2321 continue;
2322 if (cfg->fc_ifindex &&
2323 (!rt->dst.dev ||
2324 rt->dst.dev->ifindex != cfg->fc_ifindex))
2325 continue;
2326 if (cfg->fc_flags & RTF_GATEWAY &&
2327 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2328 continue;
2329 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2330 continue;
2331 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2332 continue;
2333 dst_hold(&rt->dst);
2334 read_unlock_bh(&table->tb6_lock);
2335
2336 /* if gateway was specified only delete the one hop */
2337 if (cfg->fc_flags & RTF_GATEWAY)
2338 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2339
2340 return __ip6_del_rt_siblings(rt, cfg);
2341 }
2342 }
2343 read_unlock_bh(&table->tb6_lock);
2344
2345 return err;
2346 }
2347
2348 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2349 {
2350 struct netevent_redirect netevent;
2351 struct rt6_info *rt, *nrt = NULL;
2352 struct ndisc_options ndopts;
2353 struct inet6_dev *in6_dev;
2354 struct neighbour *neigh;
2355 struct rd_msg *msg;
2356 int optlen, on_link;
2357 u8 *lladdr;
2358
2359 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2360 optlen -= sizeof(*msg);
2361
2362 if (optlen < 0) {
2363 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2364 return;
2365 }
2366
2367 msg = (struct rd_msg *)icmp6_hdr(skb);
2368
2369 if (ipv6_addr_is_multicast(&msg->dest)) {
2370 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2371 return;
2372 }
2373
2374 on_link = 0;
2375 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2376 on_link = 1;
2377 } else if (ipv6_addr_type(&msg->target) !=
2378 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2379 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2380 return;
2381 }
2382
2383 in6_dev = __in6_dev_get(skb->dev);
2384 if (!in6_dev)
2385 return;
2386 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2387 return;
2388
2389 /* RFC2461 8.1:
2390 * The IP source address of the Redirect MUST be the same as the current
2391 * first-hop router for the specified ICMP Destination Address.
2392 */
2393
2394 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2395 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2396 return;
2397 }
2398
2399 lladdr = NULL;
2400 if (ndopts.nd_opts_tgt_lladdr) {
2401 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2402 skb->dev);
2403 if (!lladdr) {
2404 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2405 return;
2406 }
2407 }
2408
2409 rt = (struct rt6_info *) dst;
2410 if (rt->rt6i_flags & RTF_REJECT) {
2411 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2412 return;
2413 }
2414
2415 /* Redirect received -> path was valid.
2416 * Look, redirects are sent only in response to data packets,
2417 * so that this nexthop apparently is reachable. --ANK
2418 */
2419 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2420
2421 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2422 if (!neigh)
2423 return;
2424
2425 /*
2426 * We have finally decided to accept it.
2427 */
2428
2429 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2430 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2431 NEIGH_UPDATE_F_OVERRIDE|
2432 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2433 NEIGH_UPDATE_F_ISROUTER)),
2434 NDISC_REDIRECT, &ndopts);
2435
2436 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2437 if (!nrt)
2438 goto out;
2439
2440 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2441 if (on_link)
2442 nrt->rt6i_flags &= ~RTF_GATEWAY;
2443
2444 nrt->rt6i_protocol = RTPROT_REDIRECT;
2445 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2446
2447 if (ip6_ins_rt(nrt))
2448 goto out_release;
2449
2450 netevent.old = &rt->dst;
2451 netevent.new = &nrt->dst;
2452 netevent.daddr = &msg->dest;
2453 netevent.neigh = neigh;
2454 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2455
2456 if (rt->rt6i_flags & RTF_CACHE) {
2457 rt = (struct rt6_info *) dst_clone(&rt->dst);
2458 ip6_del_rt(rt);
2459 }
2460
2461 out_release:
2462 /* Release the reference taken in
2463 * ip6_rt_cache_alloc()
2464 */
2465 dst_release(&nrt->dst);
2466
2467 out:
2468 neigh_release(neigh);
2469 }
2470
2471 /*
2472 * Misc support functions
2473 */
2474
2475 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2476 {
2477 BUG_ON(from->dst.from);
2478
2479 rt->rt6i_flags &= ~RTF_EXPIRES;
2480 dst_hold(&from->dst);
2481 rt->dst.from = &from->dst;
2482 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2483 }
2484
2485 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2486 {
2487 rt->dst.input = ort->dst.input;
2488 rt->dst.output = ort->dst.output;
2489 rt->rt6i_dst = ort->rt6i_dst;
2490 rt->dst.error = ort->dst.error;
2491 rt->rt6i_idev = ort->rt6i_idev;
2492 if (rt->rt6i_idev)
2493 in6_dev_hold(rt->rt6i_idev);
2494 rt->dst.lastuse = jiffies;
2495 rt->rt6i_gateway = ort->rt6i_gateway;
2496 rt->rt6i_flags = ort->rt6i_flags;
2497 rt6_set_from(rt, ort);
2498 rt->rt6i_metric = ort->rt6i_metric;
2499 #ifdef CONFIG_IPV6_SUBTREES
2500 rt->rt6i_src = ort->rt6i_src;
2501 #endif
2502 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2503 rt->rt6i_table = ort->rt6i_table;
2504 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2505 }
2506
2507 #ifdef CONFIG_IPV6_ROUTE_INFO
2508 static struct rt6_info *rt6_get_route_info(struct net *net,
2509 const struct in6_addr *prefix, int prefixlen,
2510 const struct in6_addr *gwaddr,
2511 struct net_device *dev)
2512 {
2513 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
2514 struct fib6_node *fn;
2515 struct rt6_info *rt = NULL;
2516 struct fib6_table *table;
2517
2518 table = fib6_get_table(net, tb_id);
2519 if (!table)
2520 return NULL;
2521
2522 read_lock_bh(&table->tb6_lock);
2523 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2524 if (!fn)
2525 goto out;
2526
2527 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2528 if (rt->dst.dev->ifindex != dev->ifindex)
2529 continue;
2530 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2531 continue;
2532 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2533 continue;
2534 dst_hold(&rt->dst);
2535 break;
2536 }
2537 out:
2538 read_unlock_bh(&table->tb6_lock);
2539 return rt;
2540 }
2541
2542 static struct rt6_info *rt6_add_route_info(struct net *net,
2543 const struct in6_addr *prefix, int prefixlen,
2544 const struct in6_addr *gwaddr,
2545 struct net_device *dev,
2546 unsigned int pref)
2547 {
2548 struct fib6_config cfg = {
2549 .fc_metric = IP6_RT_PRIO_USER,
2550 .fc_ifindex = dev->ifindex,
2551 .fc_dst_len = prefixlen,
2552 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2553 RTF_UP | RTF_PREF(pref),
2554 .fc_protocol = RTPROT_RA,
2555 .fc_nlinfo.portid = 0,
2556 .fc_nlinfo.nlh = NULL,
2557 .fc_nlinfo.nl_net = net,
2558 };
2559
2560 cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
2561 cfg.fc_dst = *prefix;
2562 cfg.fc_gateway = *gwaddr;
2563
2564 /* We should treat it as a default route if prefix length is 0. */
2565 if (!prefixlen)
2566 cfg.fc_flags |= RTF_DEFAULT;
2567
2568 ip6_route_add(&cfg, NULL);
2569
2570 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2571 }
2572 #endif
2573
2574 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2575 {
2576 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
2577 struct rt6_info *rt;
2578 struct fib6_table *table;
2579
2580 table = fib6_get_table(dev_net(dev), tb_id);
2581 if (!table)
2582 return NULL;
2583
2584 read_lock_bh(&table->tb6_lock);
2585 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2586 if (dev == rt->dst.dev &&
2587 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2588 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2589 break;
2590 }
2591 if (rt)
2592 dst_hold(&rt->dst);
2593 read_unlock_bh(&table->tb6_lock);
2594 return rt;
2595 }
2596
2597 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2598 struct net_device *dev,
2599 unsigned int pref)
2600 {
2601 struct fib6_config cfg = {
2602 .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
2603 .fc_metric = IP6_RT_PRIO_USER,
2604 .fc_ifindex = dev->ifindex,
2605 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2606 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2607 .fc_protocol = RTPROT_RA,
2608 .fc_nlinfo.portid = 0,
2609 .fc_nlinfo.nlh = NULL,
2610 .fc_nlinfo.nl_net = dev_net(dev),
2611 };
2612
2613 cfg.fc_gateway = *gwaddr;
2614
2615 if (!ip6_route_add(&cfg, NULL)) {
2616 struct fib6_table *table;
2617
2618 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2619 if (table)
2620 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2621 }
2622
2623 return rt6_get_dflt_router(gwaddr, dev);
2624 }
2625
2626 int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2627 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2628 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2629 return -1;
2630 return 0;
2631 }
2632
2633 void rt6_purge_dflt_routers(struct net *net)
2634 {
2635 fib6_clean_all(net, rt6_addrconf_purge, NULL);
2636 }
2637
2638 static void rtmsg_to_fib6_config(struct net *net,
2639 struct in6_rtmsg *rtmsg,
2640 struct fib6_config *cfg)
2641 {
2642 memset(cfg, 0, sizeof(*cfg));
2643
2644 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2645 : RT6_TABLE_MAIN;
2646 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2647 cfg->fc_metric = rtmsg->rtmsg_metric;
2648 cfg->fc_expires = rtmsg->rtmsg_info;
2649 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2650 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2651 cfg->fc_flags = rtmsg->rtmsg_flags;
2652
2653 cfg->fc_nlinfo.nl_net = net;
2654
2655 cfg->fc_dst = rtmsg->rtmsg_dst;
2656 cfg->fc_src = rtmsg->rtmsg_src;
2657 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2658 }
2659
2660 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2661 {
2662 struct fib6_config cfg;
2663 struct in6_rtmsg rtmsg;
2664 int err;
2665
2666 switch (cmd) {
2667 case SIOCADDRT: /* Add a route */
2668 case SIOCDELRT: /* Delete a route */
2669 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2670 return -EPERM;
2671 err = copy_from_user(&rtmsg, arg,
2672 sizeof(struct in6_rtmsg));
2673 if (err)
2674 return -EFAULT;
2675
2676 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2677
2678 rtnl_lock();
2679 switch (cmd) {
2680 case SIOCADDRT:
2681 err = ip6_route_add(&cfg, NULL);
2682 break;
2683 case SIOCDELRT:
2684 err = ip6_route_del(&cfg, NULL);
2685 break;
2686 default:
2687 err = -EINVAL;
2688 }
2689 rtnl_unlock();
2690
2691 return err;
2692 }
2693
2694 return -EINVAL;
2695 }
2696
2697 /*
2698 * Drop the packet on the floor
2699 */
2700
2701 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2702 {
2703 int type;
2704 struct dst_entry *dst = skb_dst(skb);
2705 switch (ipstats_mib_noroutes) {
2706 case IPSTATS_MIB_INNOROUTES:
2707 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2708 if (type == IPV6_ADDR_ANY) {
2709 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2710 IPSTATS_MIB_INADDRERRORS);
2711 break;
2712 }
2713 /* FALLTHROUGH */
2714 case IPSTATS_MIB_OUTNOROUTES:
2715 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2716 ipstats_mib_noroutes);
2717 break;
2718 }
2719 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2720 kfree_skb(skb);
2721 return 0;
2722 }
2723
2724 static int ip6_pkt_discard(struct sk_buff *skb)
2725 {
2726 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2727 }
2728
2729 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2730 {
2731 skb->dev = skb_dst(skb)->dev;
2732 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2733 }
2734
2735 static int ip6_pkt_prohibit(struct sk_buff *skb)
2736 {
2737 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2738 }
2739
2740 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2741 {
2742 skb->dev = skb_dst(skb)->dev;
2743 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2744 }
2745
2746 /*
2747 * Allocate a dst for local (unicast / anycast) address.
2748 */
2749
2750 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2751 const struct in6_addr *addr,
2752 bool anycast)
2753 {
2754 u32 tb_id;
2755 struct net *net = dev_net(idev->dev);
2756 struct net_device *dev = idev->dev;
2757 struct rt6_info *rt;
2758
2759 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2760 if (!rt)
2761 return ERR_PTR(-ENOMEM);
2762
2763 in6_dev_hold(idev);
2764
2765 rt->dst.flags |= DST_HOST;
2766 rt->dst.input = ip6_input;
2767 rt->dst.output = ip6_output;
2768 rt->rt6i_idev = idev;
2769
2770 rt->rt6i_protocol = RTPROT_KERNEL;
2771 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2772 if (anycast)
2773 rt->rt6i_flags |= RTF_ANYCAST;
2774 else
2775 rt->rt6i_flags |= RTF_LOCAL;
2776
2777 rt->rt6i_gateway = *addr;
2778 rt->rt6i_dst.addr = *addr;
2779 rt->rt6i_dst.plen = 128;
2780 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2781 rt->rt6i_table = fib6_get_table(net, tb_id);
2782
2783 return rt;
2784 }
2785
2786 /* remove deleted ip from prefsrc entries */
2787 struct arg_dev_net_ip {
2788 struct net_device *dev;
2789 struct net *net;
2790 struct in6_addr *addr;
2791 };
2792
2793 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2794 {
2795 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2796 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2797 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2798
2799 if (((void *)rt->dst.dev == dev || !dev) &&
2800 rt != net->ipv6.ip6_null_entry &&
2801 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2802 /* remove prefsrc entry */
2803 rt->rt6i_prefsrc.plen = 0;
2804 }
2805 return 0;
2806 }
2807
2808 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2809 {
2810 struct net *net = dev_net(ifp->idev->dev);
2811 struct arg_dev_net_ip adni = {
2812 .dev = ifp->idev->dev,
2813 .net = net,
2814 .addr = &ifp->addr,
2815 };
2816 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2817 }
2818
2819 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2820 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2821
2822 /* Remove routers and update dst entries when gateway turn into host. */
2823 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2824 {
2825 struct in6_addr *gateway = (struct in6_addr *)arg;
2826
2827 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2828 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2829 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2830 return -1;
2831 }
2832 return 0;
2833 }
2834
2835 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2836 {
2837 fib6_clean_all(net, fib6_clean_tohost, gateway);
2838 }
2839
2840 struct arg_dev_net {
2841 struct net_device *dev;
2842 struct net *net;
2843 };
2844
2845 /* called with write lock held for table with rt */
2846 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2847 {
2848 const struct arg_dev_net *adn = arg;
2849 const struct net_device *dev = adn->dev;
2850
2851 if ((rt->dst.dev == dev || !dev) &&
2852 rt != adn->net->ipv6.ip6_null_entry &&
2853 (rt->rt6i_nsiblings == 0 ||
2854 (dev && netdev_unregistering(dev)) ||
2855 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
2856 return -1;
2857
2858 return 0;
2859 }
2860
2861 void rt6_ifdown(struct net *net, struct net_device *dev)
2862 {
2863 struct arg_dev_net adn = {
2864 .dev = dev,
2865 .net = net,
2866 };
2867
2868 fib6_clean_all(net, fib6_ifdown, &adn);
2869 if (dev)
2870 rt6_uncached_list_flush_dev(net, dev);
2871 }
2872
2873 struct rt6_mtu_change_arg {
2874 struct net_device *dev;
2875 unsigned int mtu;
2876 };
2877
2878 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2879 {
2880 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2881 struct inet6_dev *idev;
2882
2883 /* In IPv6 pmtu discovery is not optional,
2884 so that RTAX_MTU lock cannot disable it.
2885 We still use this lock to block changes
2886 caused by addrconf/ndisc.
2887 */
2888
2889 idev = __in6_dev_get(arg->dev);
2890 if (!idev)
2891 return 0;
2892
2893 /* For administrative MTU increase, there is no way to discover
2894 IPv6 PMTU increase, so PMTU increase should be updated here.
2895 Since RFC 1981 doesn't include administrative MTU increase
2896 update PMTU increase is a MUST. (i.e. jumbo frame)
2897 */
2898 /*
2899 If new MTU is less than route PMTU, this new MTU will be the
2900 lowest MTU in the path, update the route PMTU to reflect PMTU
2901 decreases; if new MTU is greater than route PMTU, and the
2902 old MTU is the lowest MTU in the path, update the route PMTU
2903 to reflect the increase. In this case if the other nodes' MTU
2904 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2905 PMTU discovery.
2906 */
2907 if (rt->dst.dev == arg->dev &&
2908 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2909 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2910 if (rt->rt6i_flags & RTF_CACHE) {
2911 /* For RTF_CACHE with rt6i_pmtu == 0
2912 * (i.e. a redirected route),
2913 * the metrics of its rt->dst.from has already
2914 * been updated.
2915 */
2916 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2917 rt->rt6i_pmtu = arg->mtu;
2918 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2919 (dst_mtu(&rt->dst) < arg->mtu &&
2920 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2921 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2922 }
2923 }
2924 return 0;
2925 }
2926
2927 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2928 {
2929 struct rt6_mtu_change_arg arg = {
2930 .dev = dev,
2931 .mtu = mtu,
2932 };
2933
2934 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2935 }
2936
2937 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2938 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2939 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
2940 [RTA_OIF] = { .type = NLA_U32 },
2941 [RTA_IIF] = { .type = NLA_U32 },
2942 [RTA_PRIORITY] = { .type = NLA_U32 },
2943 [RTA_METRICS] = { .type = NLA_NESTED },
2944 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2945 [RTA_PREF] = { .type = NLA_U8 },
2946 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2947 [RTA_ENCAP] = { .type = NLA_NESTED },
2948 [RTA_EXPIRES] = { .type = NLA_U32 },
2949 [RTA_UID] = { .type = NLA_U32 },
2950 [RTA_MARK] = { .type = NLA_U32 },
2951 [RTA_TABLE] = { .type = NLA_U32 },
2952 };
2953
2954 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2955 struct fib6_config *cfg,
2956 struct netlink_ext_ack *extack)
2957 {
2958 struct rtmsg *rtm;
2959 struct nlattr *tb[RTA_MAX+1];
2960 unsigned int pref;
2961 int err;
2962
2963 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2964 NULL);
2965 if (err < 0)
2966 goto errout;
2967
2968 err = -EINVAL;
2969 rtm = nlmsg_data(nlh);
2970 memset(cfg, 0, sizeof(*cfg));
2971
2972 cfg->fc_table = rtm->rtm_table;
2973 cfg->fc_dst_len = rtm->rtm_dst_len;
2974 cfg->fc_src_len = rtm->rtm_src_len;
2975 cfg->fc_flags = RTF_UP;
2976 cfg->fc_protocol = rtm->rtm_protocol;
2977 cfg->fc_type = rtm->rtm_type;
2978
2979 if (rtm->rtm_type == RTN_UNREACHABLE ||
2980 rtm->rtm_type == RTN_BLACKHOLE ||
2981 rtm->rtm_type == RTN_PROHIBIT ||
2982 rtm->rtm_type == RTN_THROW)
2983 cfg->fc_flags |= RTF_REJECT;
2984
2985 if (rtm->rtm_type == RTN_LOCAL)
2986 cfg->fc_flags |= RTF_LOCAL;
2987
2988 if (rtm->rtm_flags & RTM_F_CLONED)
2989 cfg->fc_flags |= RTF_CACHE;
2990
2991 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2992 cfg->fc_nlinfo.nlh = nlh;
2993 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2994
2995 if (tb[RTA_GATEWAY]) {
2996 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2997 cfg->fc_flags |= RTF_GATEWAY;
2998 }
2999 if (tb[RTA_VIA]) {
3000 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
3001 goto errout;
3002 }
3003
3004 if (tb[RTA_DST]) {
3005 int plen = (rtm->rtm_dst_len + 7) >> 3;
3006
3007 if (nla_len(tb[RTA_DST]) < plen)
3008 goto errout;
3009
3010 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
3011 }
3012
3013 if (tb[RTA_SRC]) {
3014 int plen = (rtm->rtm_src_len + 7) >> 3;
3015
3016 if (nla_len(tb[RTA_SRC]) < plen)
3017 goto errout;
3018
3019 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
3020 }
3021
3022 if (tb[RTA_PREFSRC])
3023 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
3024
3025 if (tb[RTA_OIF])
3026 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3027
3028 if (tb[RTA_PRIORITY])
3029 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3030
3031 if (tb[RTA_METRICS]) {
3032 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3033 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
3034 }
3035
3036 if (tb[RTA_TABLE])
3037 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3038
3039 if (tb[RTA_MULTIPATH]) {
3040 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3041 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
3042
3043 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
3044 cfg->fc_mp_len, extack);
3045 if (err < 0)
3046 goto errout;
3047 }
3048
3049 if (tb[RTA_PREF]) {
3050 pref = nla_get_u8(tb[RTA_PREF]);
3051 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3052 pref != ICMPV6_ROUTER_PREF_HIGH)
3053 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3054 cfg->fc_flags |= RTF_PREF(pref);
3055 }
3056
3057 if (tb[RTA_ENCAP])
3058 cfg->fc_encap = tb[RTA_ENCAP];
3059
3060 if (tb[RTA_ENCAP_TYPE]) {
3061 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3062
3063 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
3064 if (err < 0)
3065 goto errout;
3066 }
3067
3068 if (tb[RTA_EXPIRES]) {
3069 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3070
3071 if (addrconf_finite_timeout(timeout)) {
3072 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3073 cfg->fc_flags |= RTF_EXPIRES;
3074 }
3075 }
3076
3077 err = 0;
3078 errout:
3079 return err;
3080 }
3081
3082 struct rt6_nh {
3083 struct rt6_info *rt6_info;
3084 struct fib6_config r_cfg;
3085 struct mx6_config mxc;
3086 struct list_head next;
3087 };
3088
3089 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3090 {
3091 struct rt6_nh *nh;
3092
3093 list_for_each_entry(nh, rt6_nh_list, next) {
3094 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3095 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3096 nh->r_cfg.fc_ifindex);
3097 }
3098 }
3099
3100 static int ip6_route_info_append(struct list_head *rt6_nh_list,
3101 struct rt6_info *rt, struct fib6_config *r_cfg)
3102 {
3103 struct rt6_nh *nh;
3104 int err = -EEXIST;
3105
3106 list_for_each_entry(nh, rt6_nh_list, next) {
3107 /* check if rt6_info already exists */
3108 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
3109 return err;
3110 }
3111
3112 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3113 if (!nh)
3114 return -ENOMEM;
3115 nh->rt6_info = rt;
3116 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3117 if (err) {
3118 kfree(nh);
3119 return err;
3120 }
3121 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3122 list_add_tail(&nh->next, rt6_nh_list);
3123
3124 return 0;
3125 }
3126
3127 static void ip6_route_mpath_notify(struct rt6_info *rt,
3128 struct rt6_info *rt_last,
3129 struct nl_info *info,
3130 __u16 nlflags)
3131 {
3132 /* if this is an APPEND route, then rt points to the first route
3133 * inserted and rt_last points to last route inserted. Userspace
3134 * wants a consistent dump of the route which starts at the first
3135 * nexthop. Since sibling routes are always added at the end of
3136 * the list, find the first sibling of the last route appended
3137 */
3138 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3139 rt = list_first_entry(&rt_last->rt6i_siblings,
3140 struct rt6_info,
3141 rt6i_siblings);
3142 }
3143
3144 if (rt)
3145 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3146 }
3147
3148 static int ip6_route_multipath_add(struct fib6_config *cfg,
3149 struct netlink_ext_ack *extack)
3150 {
3151 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3152 struct nl_info *info = &cfg->fc_nlinfo;
3153 struct fib6_config r_cfg;
3154 struct rtnexthop *rtnh;
3155 struct rt6_info *rt;
3156 struct rt6_nh *err_nh;
3157 struct rt6_nh *nh, *nh_safe;
3158 __u16 nlflags;
3159 int remaining;
3160 int attrlen;
3161 int err = 1;
3162 int nhn = 0;
3163 int replace = (cfg->fc_nlinfo.nlh &&
3164 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3165 LIST_HEAD(rt6_nh_list);
3166
3167 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3168 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3169 nlflags |= NLM_F_APPEND;
3170
3171 remaining = cfg->fc_mp_len;
3172 rtnh = (struct rtnexthop *)cfg->fc_mp;
3173
3174 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3175 * rt6_info structs per nexthop
3176 */
3177 while (rtnh_ok(rtnh, remaining)) {
3178 memcpy(&r_cfg, cfg, sizeof(*cfg));
3179 if (rtnh->rtnh_ifindex)
3180 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3181
3182 attrlen = rtnh_attrlen(rtnh);
3183 if (attrlen > 0) {
3184 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3185
3186 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3187 if (nla) {
3188 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3189 r_cfg.fc_flags |= RTF_GATEWAY;
3190 }
3191 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3192 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3193 if (nla)
3194 r_cfg.fc_encap_type = nla_get_u16(nla);
3195 }
3196
3197 rt = ip6_route_info_create(&r_cfg, extack);
3198 if (IS_ERR(rt)) {
3199 err = PTR_ERR(rt);
3200 rt = NULL;
3201 goto cleanup;
3202 }
3203
3204 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3205 if (err) {
3206 dst_release_immediate(&rt->dst);
3207 goto cleanup;
3208 }
3209
3210 rtnh = rtnh_next(rtnh, &remaining);
3211 }
3212
3213 /* for add and replace send one notification with all nexthops.
3214 * Skip the notification in fib6_add_rt2node and send one with
3215 * the full route when done
3216 */
3217 info->skip_notify = 1;
3218
3219 err_nh = NULL;
3220 list_for_each_entry(nh, &rt6_nh_list, next) {
3221 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3222
3223 if (!err) {
3224 /* save reference to last route successfully inserted */
3225 rt_last = nh->rt6_info;
3226
3227 /* save reference to first route for notification */
3228 if (!rt_notif)
3229 rt_notif = nh->rt6_info;
3230 }
3231
3232 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3233 nh->rt6_info = NULL;
3234 if (err) {
3235 if (replace && nhn)
3236 ip6_print_replace_route_err(&rt6_nh_list);
3237 err_nh = nh;
3238 goto add_errout;
3239 }
3240
3241 /* Because each route is added like a single route we remove
3242 * these flags after the first nexthop: if there is a collision,
3243 * we have already failed to add the first nexthop:
3244 * fib6_add_rt2node() has rejected it; when replacing, old
3245 * nexthops have been replaced by first new, the rest should
3246 * be added to it.
3247 */
3248 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3249 NLM_F_REPLACE);
3250 nhn++;
3251 }
3252
3253 /* success ... tell user about new route */
3254 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3255 goto cleanup;
3256
3257 add_errout:
3258 /* send notification for routes that were added so that
3259 * the delete notifications sent by ip6_route_del are
3260 * coherent
3261 */
3262 if (rt_notif)
3263 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3264
3265 /* Delete routes that were already added */
3266 list_for_each_entry(nh, &rt6_nh_list, next) {
3267 if (err_nh == nh)
3268 break;
3269 ip6_route_del(&nh->r_cfg, extack);
3270 }
3271
3272 cleanup:
3273 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3274 if (nh->rt6_info)
3275 dst_release_immediate(&nh->rt6_info->dst);
3276 kfree(nh->mxc.mx);
3277 list_del(&nh->next);
3278 kfree(nh);
3279 }
3280
3281 return err;
3282 }
3283
3284 static int ip6_route_multipath_del(struct fib6_config *cfg,
3285 struct netlink_ext_ack *extack)
3286 {
3287 struct fib6_config r_cfg;
3288 struct rtnexthop *rtnh;
3289 int remaining;
3290 int attrlen;
3291 int err = 1, last_err = 0;
3292
3293 remaining = cfg->fc_mp_len;
3294 rtnh = (struct rtnexthop *)cfg->fc_mp;
3295
3296 /* Parse a Multipath Entry */
3297 while (rtnh_ok(rtnh, remaining)) {
3298 memcpy(&r_cfg, cfg, sizeof(*cfg));
3299 if (rtnh->rtnh_ifindex)
3300 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3301
3302 attrlen = rtnh_attrlen(rtnh);
3303 if (attrlen > 0) {
3304 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3305
3306 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3307 if (nla) {
3308 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3309 r_cfg.fc_flags |= RTF_GATEWAY;
3310 }
3311 }
3312 err = ip6_route_del(&r_cfg, extack);
3313 if (err)
3314 last_err = err;
3315
3316 rtnh = rtnh_next(rtnh, &remaining);
3317 }
3318
3319 return last_err;
3320 }
3321
3322 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3323 struct netlink_ext_ack *extack)
3324 {
3325 struct fib6_config cfg;
3326 int err;
3327
3328 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3329 if (err < 0)
3330 return err;
3331
3332 if (cfg.fc_mp)
3333 return ip6_route_multipath_del(&cfg, extack);
3334 else {
3335 cfg.fc_delete_all_nh = 1;
3336 return ip6_route_del(&cfg, extack);
3337 }
3338 }
3339
3340 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3341 struct netlink_ext_ack *extack)
3342 {
3343 struct fib6_config cfg;
3344 int err;
3345
3346 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
3347 if (err < 0)
3348 return err;
3349
3350 if (cfg.fc_mp)
3351 return ip6_route_multipath_add(&cfg, extack);
3352 else
3353 return ip6_route_add(&cfg, extack);
3354 }
3355
3356 static size_t rt6_nlmsg_size(struct rt6_info *rt)
3357 {
3358 int nexthop_len = 0;
3359
3360 if (rt->rt6i_nsiblings) {
3361 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3362 + NLA_ALIGN(sizeof(struct rtnexthop))
3363 + nla_total_size(16) /* RTA_GATEWAY */
3364 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3365
3366 nexthop_len *= rt->rt6i_nsiblings;
3367 }
3368
3369 return NLMSG_ALIGN(sizeof(struct rtmsg))
3370 + nla_total_size(16) /* RTA_SRC */
3371 + nla_total_size(16) /* RTA_DST */
3372 + nla_total_size(16) /* RTA_GATEWAY */
3373 + nla_total_size(16) /* RTA_PREFSRC */
3374 + nla_total_size(4) /* RTA_TABLE */
3375 + nla_total_size(4) /* RTA_IIF */
3376 + nla_total_size(4) /* RTA_OIF */
3377 + nla_total_size(4) /* RTA_PRIORITY */
3378 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3379 + nla_total_size(sizeof(struct rta_cacheinfo))
3380 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3381 + nla_total_size(1) /* RTA_PREF */
3382 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3383 + nexthop_len;
3384 }
3385
3386 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3387 unsigned int *flags, bool skip_oif)
3388 {
3389 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3390 *flags |= RTNH_F_LINKDOWN;
3391 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3392 *flags |= RTNH_F_DEAD;
3393 }
3394
3395 if (rt->rt6i_flags & RTF_GATEWAY) {
3396 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3397 goto nla_put_failure;
3398 }
3399
3400 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
3401 *flags |= RTNH_F_OFFLOAD;
3402
3403 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3404 if (!skip_oif && rt->dst.dev &&
3405 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3406 goto nla_put_failure;
3407
3408 if (rt->dst.lwtstate &&
3409 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3410 goto nla_put_failure;
3411
3412 return 0;
3413
3414 nla_put_failure:
3415 return -EMSGSIZE;
3416 }
3417
3418 /* add multipath next hop */
3419 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3420 {
3421 struct rtnexthop *rtnh;
3422 unsigned int flags = 0;
3423
3424 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3425 if (!rtnh)
3426 goto nla_put_failure;
3427
3428 rtnh->rtnh_hops = 0;
3429 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3430
3431 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
3432 goto nla_put_failure;
3433
3434 rtnh->rtnh_flags = flags;
3435
3436 /* length of rtnetlink header + attributes */
3437 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3438
3439 return 0;
3440
3441 nla_put_failure:
3442 return -EMSGSIZE;
3443 }
3444
3445 static int rt6_fill_node(struct net *net,
3446 struct sk_buff *skb, struct rt6_info *rt,
3447 struct in6_addr *dst, struct in6_addr *src,
3448 int iif, int type, u32 portid, u32 seq,
3449 unsigned int flags)
3450 {
3451 u32 metrics[RTAX_MAX];
3452 struct rtmsg *rtm;
3453 struct nlmsghdr *nlh;
3454 long expires;
3455 u32 table;
3456
3457 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3458 if (!nlh)
3459 return -EMSGSIZE;
3460
3461 rtm = nlmsg_data(nlh);
3462 rtm->rtm_family = AF_INET6;
3463 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3464 rtm->rtm_src_len = rt->rt6i_src.plen;
3465 rtm->rtm_tos = 0;
3466 if (rt->rt6i_table)
3467 table = rt->rt6i_table->tb6_id;
3468 else
3469 table = RT6_TABLE_UNSPEC;
3470 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
3471 if (nla_put_u32(skb, RTA_TABLE, table))
3472 goto nla_put_failure;
3473 if (rt->rt6i_flags & RTF_REJECT) {
3474 switch (rt->dst.error) {
3475 case -EINVAL:
3476 rtm->rtm_type = RTN_BLACKHOLE;
3477 break;
3478 case -EACCES:
3479 rtm->rtm_type = RTN_PROHIBIT;
3480 break;
3481 case -EAGAIN:
3482 rtm->rtm_type = RTN_THROW;
3483 break;
3484 default:
3485 rtm->rtm_type = RTN_UNREACHABLE;
3486 break;
3487 }
3488 }
3489 else if (rt->rt6i_flags & RTF_LOCAL)
3490 rtm->rtm_type = RTN_LOCAL;
3491 else if (rt->rt6i_flags & RTF_ANYCAST)
3492 rtm->rtm_type = RTN_ANYCAST;
3493 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3494 rtm->rtm_type = RTN_LOCAL;
3495 else
3496 rtm->rtm_type = RTN_UNICAST;
3497 rtm->rtm_flags = 0;
3498 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3499 rtm->rtm_protocol = rt->rt6i_protocol;
3500
3501 if (rt->rt6i_flags & RTF_CACHE)
3502 rtm->rtm_flags |= RTM_F_CLONED;
3503
3504 if (dst) {
3505 if (nla_put_in6_addr(skb, RTA_DST, dst))
3506 goto nla_put_failure;
3507 rtm->rtm_dst_len = 128;
3508 } else if (rtm->rtm_dst_len)
3509 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3510 goto nla_put_failure;
3511 #ifdef CONFIG_IPV6_SUBTREES
3512 if (src) {
3513 if (nla_put_in6_addr(skb, RTA_SRC, src))
3514 goto nla_put_failure;
3515 rtm->rtm_src_len = 128;
3516 } else if (rtm->rtm_src_len &&
3517 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3518 goto nla_put_failure;
3519 #endif
3520 if (iif) {
3521 #ifdef CONFIG_IPV6_MROUTE
3522 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3523 int err = ip6mr_get_route(net, skb, rtm, portid);
3524
3525 if (err == 0)
3526 return 0;
3527 if (err < 0)
3528 goto nla_put_failure;
3529 } else
3530 #endif
3531 if (nla_put_u32(skb, RTA_IIF, iif))
3532 goto nla_put_failure;
3533 } else if (dst) {
3534 struct in6_addr saddr_buf;
3535 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3536 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3537 goto nla_put_failure;
3538 }
3539
3540 if (rt->rt6i_prefsrc.plen) {
3541 struct in6_addr saddr_buf;
3542 saddr_buf = rt->rt6i_prefsrc.addr;
3543 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3544 goto nla_put_failure;
3545 }
3546
3547 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3548 if (rt->rt6i_pmtu)
3549 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3550 if (rtnetlink_put_metrics(skb, metrics) < 0)
3551 goto nla_put_failure;
3552
3553 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3554 goto nla_put_failure;
3555
3556 /* For multipath routes, walk the siblings list and add
3557 * each as a nexthop within RTA_MULTIPATH.
3558 */
3559 if (rt->rt6i_nsiblings) {
3560 struct rt6_info *sibling, *next_sibling;
3561 struct nlattr *mp;
3562
3563 mp = nla_nest_start(skb, RTA_MULTIPATH);
3564 if (!mp)
3565 goto nla_put_failure;
3566
3567 if (rt6_add_nexthop(skb, rt) < 0)
3568 goto nla_put_failure;
3569
3570 list_for_each_entry_safe(sibling, next_sibling,
3571 &rt->rt6i_siblings, rt6i_siblings) {
3572 if (rt6_add_nexthop(skb, sibling) < 0)
3573 goto nla_put_failure;
3574 }
3575
3576 nla_nest_end(skb, mp);
3577 } else {
3578 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
3579 goto nla_put_failure;
3580 }
3581
3582 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3583
3584 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3585 goto nla_put_failure;
3586
3587 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3588 goto nla_put_failure;
3589
3590
3591 nlmsg_end(skb, nlh);
3592 return 0;
3593
3594 nla_put_failure:
3595 nlmsg_cancel(skb, nlh);
3596 return -EMSGSIZE;
3597 }
3598
3599 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3600 {
3601 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3602 struct net *net = arg->net;
3603
3604 if (rt == net->ipv6.ip6_null_entry)
3605 return 0;
3606
3607 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3608 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3609
3610 /* user wants prefix routes only */
3611 if (rtm->rtm_flags & RTM_F_PREFIX &&
3612 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3613 /* success since this is not a prefix route */
3614 return 1;
3615 }
3616 }
3617
3618 return rt6_fill_node(net,
3619 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3620 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3621 NLM_F_MULTI);
3622 }
3623
3624 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3625 struct netlink_ext_ack *extack)
3626 {
3627 struct net *net = sock_net(in_skb->sk);
3628 struct nlattr *tb[RTA_MAX+1];
3629 int err, iif = 0, oif = 0;
3630 struct dst_entry *dst;
3631 struct rt6_info *rt;
3632 struct sk_buff *skb;
3633 struct rtmsg *rtm;
3634 struct flowi6 fl6;
3635 bool fibmatch;
3636
3637 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3638 extack);
3639 if (err < 0)
3640 goto errout;
3641
3642 err = -EINVAL;
3643 memset(&fl6, 0, sizeof(fl6));
3644 rtm = nlmsg_data(nlh);
3645 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3646 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
3647
3648 if (tb[RTA_SRC]) {
3649 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3650 goto errout;
3651
3652 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3653 }
3654
3655 if (tb[RTA_DST]) {
3656 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3657 goto errout;
3658
3659 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3660 }
3661
3662 if (tb[RTA_IIF])
3663 iif = nla_get_u32(tb[RTA_IIF]);
3664
3665 if (tb[RTA_OIF])
3666 oif = nla_get_u32(tb[RTA_OIF]);
3667
3668 if (tb[RTA_MARK])
3669 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3670
3671 if (tb[RTA_UID])
3672 fl6.flowi6_uid = make_kuid(current_user_ns(),
3673 nla_get_u32(tb[RTA_UID]));
3674 else
3675 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3676
3677 if (iif) {
3678 struct net_device *dev;
3679 int flags = 0;
3680
3681 rcu_read_lock();
3682
3683 dev = dev_get_by_index_rcu(net, iif);
3684 if (!dev) {
3685 rcu_read_unlock();
3686 err = -ENODEV;
3687 goto errout;
3688 }
3689
3690 fl6.flowi6_iif = iif;
3691
3692 if (!ipv6_addr_any(&fl6.saddr))
3693 flags |= RT6_LOOKUP_F_HAS_SADDR;
3694
3695 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
3696
3697 rcu_read_unlock();
3698 } else {
3699 fl6.flowi6_oif = oif;
3700
3701 dst = ip6_route_output(net, NULL, &fl6);
3702 }
3703
3704
3705 rt = container_of(dst, struct rt6_info, dst);
3706 if (rt->dst.error) {
3707 err = rt->dst.error;
3708 ip6_rt_put(rt);
3709 goto errout;
3710 }
3711
3712 if (rt == net->ipv6.ip6_null_entry) {
3713 err = rt->dst.error;
3714 ip6_rt_put(rt);
3715 goto errout;
3716 }
3717
3718 if (fibmatch && rt->dst.from) {
3719 struct rt6_info *ort = container_of(rt->dst.from,
3720 struct rt6_info, dst);
3721
3722 dst_hold(&ort->dst);
3723 ip6_rt_put(rt);
3724 rt = ort;
3725 }
3726
3727 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3728 if (!skb) {
3729 ip6_rt_put(rt);
3730 err = -ENOBUFS;
3731 goto errout;
3732 }
3733
3734 skb_dst_set(skb, &rt->dst);
3735 if (fibmatch)
3736 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3737 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3738 nlh->nlmsg_seq, 0);
3739 else
3740 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3741 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3742 nlh->nlmsg_seq, 0);
3743 if (err < 0) {
3744 kfree_skb(skb);
3745 goto errout;
3746 }
3747
3748 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3749 errout:
3750 return err;
3751 }
3752
3753 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3754 unsigned int nlm_flags)
3755 {
3756 struct sk_buff *skb;
3757 struct net *net = info->nl_net;
3758 u32 seq;
3759 int err;
3760
3761 err = -ENOBUFS;
3762 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3763
3764 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3765 if (!skb)
3766 goto errout;
3767
3768 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3769 event, info->portid, seq, nlm_flags);
3770 if (err < 0) {
3771 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3772 WARN_ON(err == -EMSGSIZE);
3773 kfree_skb(skb);
3774 goto errout;
3775 }
3776 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3777 info->nlh, gfp_any());
3778 return;
3779 errout:
3780 if (err < 0)
3781 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3782 }
3783
3784 static int ip6_route_dev_notify(struct notifier_block *this,
3785 unsigned long event, void *ptr)
3786 {
3787 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3788 struct net *net = dev_net(dev);
3789
3790 if (!(dev->flags & IFF_LOOPBACK))
3791 return NOTIFY_OK;
3792
3793 if (event == NETDEV_REGISTER) {
3794 net->ipv6.ip6_null_entry->dst.dev = dev;
3795 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3796 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3797 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3798 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3799 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3800 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3801 #endif
3802 } else if (event == NETDEV_UNREGISTER &&
3803 dev->reg_state != NETREG_UNREGISTERED) {
3804 /* NETDEV_UNREGISTER could be fired for multiple times by
3805 * netdev_wait_allrefs(). Make sure we only call this once.
3806 */
3807 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
3808 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3809 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3810 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3811 #endif
3812 }
3813
3814 return NOTIFY_OK;
3815 }
3816
3817 /*
3818 * /proc
3819 */
3820
3821 #ifdef CONFIG_PROC_FS
3822
3823 static const struct file_operations ipv6_route_proc_fops = {
3824 .owner = THIS_MODULE,
3825 .open = ipv6_route_open,
3826 .read = seq_read,
3827 .llseek = seq_lseek,
3828 .release = seq_release_net,
3829 };
3830
3831 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3832 {
3833 struct net *net = (struct net *)seq->private;
3834 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3835 net->ipv6.rt6_stats->fib_nodes,
3836 net->ipv6.rt6_stats->fib_route_nodes,
3837 net->ipv6.rt6_stats->fib_rt_alloc,
3838 net->ipv6.rt6_stats->fib_rt_entries,
3839 net->ipv6.rt6_stats->fib_rt_cache,
3840 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3841 net->ipv6.rt6_stats->fib_discarded_routes);
3842
3843 return 0;
3844 }
3845
3846 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3847 {
3848 return single_open_net(inode, file, rt6_stats_seq_show);
3849 }
3850
3851 static const struct file_operations rt6_stats_seq_fops = {
3852 .owner = THIS_MODULE,
3853 .open = rt6_stats_seq_open,
3854 .read = seq_read,
3855 .llseek = seq_lseek,
3856 .release = single_release_net,
3857 };
3858 #endif /* CONFIG_PROC_FS */
3859
3860 #ifdef CONFIG_SYSCTL
3861
3862 static
3863 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3864 void __user *buffer, size_t *lenp, loff_t *ppos)
3865 {
3866 struct net *net;
3867 int delay;
3868 if (!write)
3869 return -EINVAL;
3870
3871 net = (struct net *)ctl->extra1;
3872 delay = net->ipv6.sysctl.flush_delay;
3873 proc_dointvec(ctl, write, buffer, lenp, ppos);
3874 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3875 return 0;
3876 }
3877
3878 struct ctl_table ipv6_route_table_template[] = {
3879 {
3880 .procname = "flush",
3881 .data = &init_net.ipv6.sysctl.flush_delay,
3882 .maxlen = sizeof(int),
3883 .mode = 0200,
3884 .proc_handler = ipv6_sysctl_rtcache_flush
3885 },
3886 {
3887 .procname = "gc_thresh",
3888 .data = &ip6_dst_ops_template.gc_thresh,
3889 .maxlen = sizeof(int),
3890 .mode = 0644,
3891 .proc_handler = proc_dointvec,
3892 },
3893 {
3894 .procname = "max_size",
3895 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3896 .maxlen = sizeof(int),
3897 .mode = 0644,
3898 .proc_handler = proc_dointvec,
3899 },
3900 {
3901 .procname = "gc_min_interval",
3902 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3903 .maxlen = sizeof(int),
3904 .mode = 0644,
3905 .proc_handler = proc_dointvec_jiffies,
3906 },
3907 {
3908 .procname = "gc_timeout",
3909 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3910 .maxlen = sizeof(int),
3911 .mode = 0644,
3912 .proc_handler = proc_dointvec_jiffies,
3913 },
3914 {
3915 .procname = "gc_interval",
3916 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3917 .maxlen = sizeof(int),
3918 .mode = 0644,
3919 .proc_handler = proc_dointvec_jiffies,
3920 },
3921 {
3922 .procname = "gc_elasticity",
3923 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3924 .maxlen = sizeof(int),
3925 .mode = 0644,
3926 .proc_handler = proc_dointvec,
3927 },
3928 {
3929 .procname = "mtu_expires",
3930 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3931 .maxlen = sizeof(int),
3932 .mode = 0644,
3933 .proc_handler = proc_dointvec_jiffies,
3934 },
3935 {
3936 .procname = "min_adv_mss",
3937 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3938 .maxlen = sizeof(int),
3939 .mode = 0644,
3940 .proc_handler = proc_dointvec,
3941 },
3942 {
3943 .procname = "gc_min_interval_ms",
3944 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3945 .maxlen = sizeof(int),
3946 .mode = 0644,
3947 .proc_handler = proc_dointvec_ms_jiffies,
3948 },
3949 { }
3950 };
3951
3952 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3953 {
3954 struct ctl_table *table;
3955
3956 table = kmemdup(ipv6_route_table_template,
3957 sizeof(ipv6_route_table_template),
3958 GFP_KERNEL);
3959
3960 if (table) {
3961 table[0].data = &net->ipv6.sysctl.flush_delay;
3962 table[0].extra1 = net;
3963 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3964 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3965 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3966 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3967 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3968 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3969 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3970 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3971 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3972
3973 /* Don't export sysctls to unprivileged users */
3974 if (net->user_ns != &init_user_ns)
3975 table[0].procname = NULL;
3976 }
3977
3978 return table;
3979 }
3980 #endif
3981
3982 static int __net_init ip6_route_net_init(struct net *net)
3983 {
3984 int ret = -ENOMEM;
3985
3986 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3987 sizeof(net->ipv6.ip6_dst_ops));
3988
3989 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3990 goto out_ip6_dst_ops;
3991
3992 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3993 sizeof(*net->ipv6.ip6_null_entry),
3994 GFP_KERNEL);
3995 if (!net->ipv6.ip6_null_entry)
3996 goto out_ip6_dst_entries;
3997 net->ipv6.ip6_null_entry->dst.path =
3998 (struct dst_entry *)net->ipv6.ip6_null_entry;
3999 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4000 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4001 ip6_template_metrics, true);
4002
4003 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4004 net->ipv6.fib6_has_custom_rules = false;
4005 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4006 sizeof(*net->ipv6.ip6_prohibit_entry),
4007 GFP_KERNEL);
4008 if (!net->ipv6.ip6_prohibit_entry)
4009 goto out_ip6_null_entry;
4010 net->ipv6.ip6_prohibit_entry->dst.path =
4011 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
4012 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4013 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4014 ip6_template_metrics, true);
4015
4016 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4017 sizeof(*net->ipv6.ip6_blk_hole_entry),
4018 GFP_KERNEL);
4019 if (!net->ipv6.ip6_blk_hole_entry)
4020 goto out_ip6_prohibit_entry;
4021 net->ipv6.ip6_blk_hole_entry->dst.path =
4022 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
4023 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
4024 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4025 ip6_template_metrics, true);
4026 #endif
4027
4028 net->ipv6.sysctl.flush_delay = 0;
4029 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4030 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4031 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4032 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4033 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4034 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4035 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4036
4037 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4038
4039 ret = 0;
4040 out:
4041 return ret;
4042
4043 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4044 out_ip6_prohibit_entry:
4045 kfree(net->ipv6.ip6_prohibit_entry);
4046 out_ip6_null_entry:
4047 kfree(net->ipv6.ip6_null_entry);
4048 #endif
4049 out_ip6_dst_entries:
4050 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4051 out_ip6_dst_ops:
4052 goto out;
4053 }
4054
4055 static void __net_exit ip6_route_net_exit(struct net *net)
4056 {
4057 kfree(net->ipv6.ip6_null_entry);
4058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4059 kfree(net->ipv6.ip6_prohibit_entry);
4060 kfree(net->ipv6.ip6_blk_hole_entry);
4061 #endif
4062 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
4063 }
4064
4065 static int __net_init ip6_route_net_init_late(struct net *net)
4066 {
4067 #ifdef CONFIG_PROC_FS
4068 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4069 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
4070 #endif
4071 return 0;
4072 }
4073
4074 static void __net_exit ip6_route_net_exit_late(struct net *net)
4075 {
4076 #ifdef CONFIG_PROC_FS
4077 remove_proc_entry("ipv6_route", net->proc_net);
4078 remove_proc_entry("rt6_stats", net->proc_net);
4079 #endif
4080 }
4081
4082 static struct pernet_operations ip6_route_net_ops = {
4083 .init = ip6_route_net_init,
4084 .exit = ip6_route_net_exit,
4085 };
4086
4087 static int __net_init ipv6_inetpeer_init(struct net *net)
4088 {
4089 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4090
4091 if (!bp)
4092 return -ENOMEM;
4093 inet_peer_base_init(bp);
4094 net->ipv6.peers = bp;
4095 return 0;
4096 }
4097
4098 static void __net_exit ipv6_inetpeer_exit(struct net *net)
4099 {
4100 struct inet_peer_base *bp = net->ipv6.peers;
4101
4102 net->ipv6.peers = NULL;
4103 inetpeer_invalidate_tree(bp);
4104 kfree(bp);
4105 }
4106
4107 static struct pernet_operations ipv6_inetpeer_ops = {
4108 .init = ipv6_inetpeer_init,
4109 .exit = ipv6_inetpeer_exit,
4110 };
4111
4112 static struct pernet_operations ip6_route_net_late_ops = {
4113 .init = ip6_route_net_init_late,
4114 .exit = ip6_route_net_exit_late,
4115 };
4116
4117 static struct notifier_block ip6_route_dev_notifier = {
4118 .notifier_call = ip6_route_dev_notify,
4119 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
4120 };
4121
4122 void __init ip6_route_init_special_entries(void)
4123 {
4124 /* Registering of the loopback is done before this portion of code,
4125 * the loopback reference in rt6_info will not be taken, do it
4126 * manually for init_net */
4127 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4128 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4129 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4130 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4131 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4132 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4133 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4134 #endif
4135 }
4136
4137 int __init ip6_route_init(void)
4138 {
4139 int ret;
4140 int cpu;
4141
4142 ret = -ENOMEM;
4143 ip6_dst_ops_template.kmem_cachep =
4144 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4145 SLAB_HWCACHE_ALIGN, NULL);
4146 if (!ip6_dst_ops_template.kmem_cachep)
4147 goto out;
4148
4149 ret = dst_entries_init(&ip6_dst_blackhole_ops);
4150 if (ret)
4151 goto out_kmem_cache;
4152
4153 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4154 if (ret)
4155 goto out_dst_entries;
4156
4157 ret = register_pernet_subsys(&ip6_route_net_ops);
4158 if (ret)
4159 goto out_register_inetpeer;
4160
4161 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4162
4163 ret = fib6_init();
4164 if (ret)
4165 goto out_register_subsys;
4166
4167 ret = xfrm6_init();
4168 if (ret)
4169 goto out_fib6_init;
4170
4171 ret = fib6_rules_init();
4172 if (ret)
4173 goto xfrm6_init;
4174
4175 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4176 if (ret)
4177 goto fib6_rules_init;
4178
4179 ret = -ENOBUFS;
4180 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4181 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
4182 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4183 RTNL_FLAG_DOIT_UNLOCKED))
4184 goto out_register_late_subsys;
4185
4186 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
4187 if (ret)
4188 goto out_register_late_subsys;
4189
4190 for_each_possible_cpu(cpu) {
4191 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4192
4193 INIT_LIST_HEAD(&ul->head);
4194 spin_lock_init(&ul->lock);
4195 }
4196
4197 out:
4198 return ret;
4199
4200 out_register_late_subsys:
4201 unregister_pernet_subsys(&ip6_route_net_late_ops);
4202 fib6_rules_init:
4203 fib6_rules_cleanup();
4204 xfrm6_init:
4205 xfrm6_fini();
4206 out_fib6_init:
4207 fib6_gc_cleanup();
4208 out_register_subsys:
4209 unregister_pernet_subsys(&ip6_route_net_ops);
4210 out_register_inetpeer:
4211 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4212 out_dst_entries:
4213 dst_entries_destroy(&ip6_dst_blackhole_ops);
4214 out_kmem_cache:
4215 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4216 goto out;
4217 }
4218
4219 void ip6_route_cleanup(void)
4220 {
4221 unregister_netdevice_notifier(&ip6_route_dev_notifier);
4222 unregister_pernet_subsys(&ip6_route_net_late_ops);
4223 fib6_rules_cleanup();
4224 xfrm6_fini();
4225 fib6_gc_cleanup();
4226 unregister_pernet_subsys(&ipv6_inetpeer_ops);
4227 unregister_pernet_subsys(&ip6_route_net_ops);
4228 dst_entries_destroy(&ip6_dst_blackhole_ops);
4229 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
4230 }