2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
57 #include <net/dst_metadata.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
67 #include <linux/uaccess.h>
70 #include <linux/sysctl.h>
74 RT6_NUD_FAIL_HARD
= -3,
75 RT6_NUD_FAIL_PROBE
= -2,
76 RT6_NUD_FAIL_DO_RR
= -1,
80 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
);
81 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
82 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
83 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
84 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
85 static void ip6_dst_destroy(struct dst_entry
*);
86 static void ip6_dst_ifdown(struct dst_entry
*,
87 struct net_device
*dev
, int how
);
88 static int ip6_dst_gc(struct dst_ops
*ops
);
90 static int ip6_pkt_discard(struct sk_buff
*skb
);
91 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
92 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
93 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
94 static void ip6_link_failure(struct sk_buff
*skb
);
95 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
96 struct sk_buff
*skb
, u32 mtu
);
97 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
,
99 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
);
100 static int rt6_score_route(struct rt6_info
*rt
, int oif
, int strict
);
101 static size_t rt6_nlmsg_size(struct rt6_info
*rt
);
102 static int rt6_fill_node(struct net
*net
,
103 struct sk_buff
*skb
, struct rt6_info
*rt
,
104 struct in6_addr
*dst
, struct in6_addr
*src
,
105 int iif
, int type
, u32 portid
, u32 seq
,
108 #ifdef CONFIG_IPV6_ROUTE_INFO
109 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
110 const struct in6_addr
*prefix
, int prefixlen
,
111 const struct in6_addr
*gwaddr
,
112 struct net_device
*dev
,
114 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
115 const struct in6_addr
*prefix
, int prefixlen
,
116 const struct in6_addr
*gwaddr
,
117 struct net_device
*dev
);
120 struct uncached_list
{
122 struct list_head head
;
125 static DEFINE_PER_CPU_ALIGNED(struct uncached_list
, rt6_uncached_list
);
127 static void rt6_uncached_list_add(struct rt6_info
*rt
)
129 struct uncached_list
*ul
= raw_cpu_ptr(&rt6_uncached_list
);
131 rt
->rt6i_uncached_list
= ul
;
133 spin_lock_bh(&ul
->lock
);
134 list_add_tail(&rt
->rt6i_uncached
, &ul
->head
);
135 spin_unlock_bh(&ul
->lock
);
138 static void rt6_uncached_list_del(struct rt6_info
*rt
)
140 if (!list_empty(&rt
->rt6i_uncached
)) {
141 struct uncached_list
*ul
= rt
->rt6i_uncached_list
;
143 spin_lock_bh(&ul
->lock
);
144 list_del(&rt
->rt6i_uncached
);
145 spin_unlock_bh(&ul
->lock
);
149 static void rt6_uncached_list_flush_dev(struct net
*net
, struct net_device
*dev
)
151 struct net_device
*loopback_dev
= net
->loopback_dev
;
154 if (dev
== loopback_dev
)
157 for_each_possible_cpu(cpu
) {
158 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
161 spin_lock_bh(&ul
->lock
);
162 list_for_each_entry(rt
, &ul
->head
, rt6i_uncached
) {
163 struct inet6_dev
*rt_idev
= rt
->rt6i_idev
;
164 struct net_device
*rt_dev
= rt
->dst
.dev
;
166 if (rt_idev
->dev
== dev
) {
167 rt
->rt6i_idev
= in6_dev_get(loopback_dev
);
168 in6_dev_put(rt_idev
);
172 rt
->dst
.dev
= loopback_dev
;
173 dev_hold(rt
->dst
.dev
);
177 spin_unlock_bh(&ul
->lock
);
181 static u32
*rt6_pcpu_cow_metrics(struct rt6_info
*rt
)
183 return dst_metrics_write_ptr(rt
->dst
.from
);
186 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
188 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
190 if (rt
->rt6i_flags
& RTF_PCPU
)
191 return rt6_pcpu_cow_metrics(rt
);
192 else if (rt
->rt6i_flags
& RTF_CACHE
)
195 return dst_cow_metrics_generic(dst
, old
);
198 static inline const void *choose_neigh_daddr(struct rt6_info
*rt
,
202 struct in6_addr
*p
= &rt
->rt6i_gateway
;
204 if (!ipv6_addr_any(p
))
205 return (const void *) p
;
207 return &ipv6_hdr(skb
)->daddr
;
211 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
,
215 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
218 daddr
= choose_neigh_daddr(rt
, skb
, daddr
);
219 n
= __ipv6_neigh_lookup(dst
->dev
, daddr
);
222 return neigh_create(&nd_tbl
, daddr
, dst
->dev
);
225 static void ip6_confirm_neigh(const struct dst_entry
*dst
, const void *daddr
)
227 struct net_device
*dev
= dst
->dev
;
228 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
230 daddr
= choose_neigh_daddr(rt
, NULL
, daddr
);
233 if (dev
->flags
& (IFF_NOARP
| IFF_LOOPBACK
))
235 if (ipv6_addr_is_multicast((const struct in6_addr
*)daddr
))
237 __ipv6_confirm_neigh(dev
, daddr
);
240 static struct dst_ops ip6_dst_ops_template
= {
244 .check
= ip6_dst_check
,
245 .default_advmss
= ip6_default_advmss
,
247 .cow_metrics
= ipv6_cow_metrics
,
248 .destroy
= ip6_dst_destroy
,
249 .ifdown
= ip6_dst_ifdown
,
250 .negative_advice
= ip6_negative_advice
,
251 .link_failure
= ip6_link_failure
,
252 .update_pmtu
= ip6_rt_update_pmtu
,
253 .redirect
= rt6_do_redirect
,
254 .local_out
= __ip6_local_out
,
255 .neigh_lookup
= ip6_neigh_lookup
,
256 .confirm_neigh
= ip6_confirm_neigh
,
259 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
261 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
263 return mtu
? : dst
->dev
->mtu
;
266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
267 struct sk_buff
*skb
, u32 mtu
)
271 static void ip6_rt_blackhole_redirect(struct dst_entry
*dst
, struct sock
*sk
,
276 static struct dst_ops ip6_dst_blackhole_ops
= {
278 .destroy
= ip6_dst_destroy
,
279 .check
= ip6_dst_check
,
280 .mtu
= ip6_blackhole_mtu
,
281 .default_advmss
= ip6_default_advmss
,
282 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
283 .redirect
= ip6_rt_blackhole_redirect
,
284 .cow_metrics
= dst_cow_metrics_generic
,
285 .neigh_lookup
= ip6_neigh_lookup
,
288 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
289 [RTAX_HOPLIMIT
- 1] = 0,
292 static const struct rt6_info ip6_null_entry_template
= {
294 .__refcnt
= ATOMIC_INIT(1),
296 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
297 .error
= -ENETUNREACH
,
298 .input
= ip6_pkt_discard
,
299 .output
= ip6_pkt_discard_out
,
301 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
302 .rt6i_protocol
= RTPROT_KERNEL
,
303 .rt6i_metric
= ~(u32
) 0,
304 .rt6i_ref
= ATOMIC_INIT(1),
307 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
309 static const struct rt6_info ip6_prohibit_entry_template
= {
311 .__refcnt
= ATOMIC_INIT(1),
313 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
315 .input
= ip6_pkt_prohibit
,
316 .output
= ip6_pkt_prohibit_out
,
318 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
319 .rt6i_protocol
= RTPROT_KERNEL
,
320 .rt6i_metric
= ~(u32
) 0,
321 .rt6i_ref
= ATOMIC_INIT(1),
324 static const struct rt6_info ip6_blk_hole_entry_template
= {
326 .__refcnt
= ATOMIC_INIT(1),
328 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
330 .input
= dst_discard
,
331 .output
= dst_discard_out
,
333 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
334 .rt6i_protocol
= RTPROT_KERNEL
,
335 .rt6i_metric
= ~(u32
) 0,
336 .rt6i_ref
= ATOMIC_INIT(1),
341 static void rt6_info_init(struct rt6_info
*rt
)
343 struct dst_entry
*dst
= &rt
->dst
;
345 memset(dst
+ 1, 0, sizeof(*rt
) - sizeof(*dst
));
346 INIT_LIST_HEAD(&rt
->rt6i_siblings
);
347 INIT_LIST_HEAD(&rt
->rt6i_uncached
);
350 /* allocate dst with ip6_dst_ops */
351 static struct rt6_info
*__ip6_dst_alloc(struct net
*net
,
352 struct net_device
*dev
,
355 struct rt6_info
*rt
= dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
,
356 1, DST_OBSOLETE_FORCE_CHK
, flags
);
364 struct rt6_info
*ip6_dst_alloc(struct net
*net
,
365 struct net_device
*dev
,
368 struct rt6_info
*rt
= __ip6_dst_alloc(net
, dev
, flags
);
371 rt
->rt6i_pcpu
= alloc_percpu_gfp(struct rt6_info
*, GFP_ATOMIC
);
375 for_each_possible_cpu(cpu
) {
378 p
= per_cpu_ptr(rt
->rt6i_pcpu
, cpu
);
379 /* no one shares rt */
383 dst_release_immediate(&rt
->dst
);
390 EXPORT_SYMBOL(ip6_dst_alloc
);
392 static void ip6_dst_destroy(struct dst_entry
*dst
)
394 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
395 struct dst_entry
*from
= dst
->from
;
396 struct inet6_dev
*idev
;
398 dst_destroy_metrics_generic(dst
);
399 free_percpu(rt
->rt6i_pcpu
);
400 rt6_uncached_list_del(rt
);
402 idev
= rt
->rt6i_idev
;
404 rt
->rt6i_idev
= NULL
;
412 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
415 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
416 struct inet6_dev
*idev
= rt
->rt6i_idev
;
417 struct net_device
*loopback_dev
=
418 dev_net(dev
)->loopback_dev
;
420 if (idev
&& idev
->dev
!= loopback_dev
) {
421 struct inet6_dev
*loopback_idev
= in6_dev_get(loopback_dev
);
423 rt
->rt6i_idev
= loopback_idev
;
429 static bool __rt6_check_expired(const struct rt6_info
*rt
)
431 if (rt
->rt6i_flags
& RTF_EXPIRES
)
432 return time_after(jiffies
, rt
->dst
.expires
);
437 static bool rt6_check_expired(const struct rt6_info
*rt
)
439 if (rt
->rt6i_flags
& RTF_EXPIRES
) {
440 if (time_after(jiffies
, rt
->dst
.expires
))
442 } else if (rt
->dst
.from
) {
443 return rt
->dst
.obsolete
!= DST_OBSOLETE_FORCE_CHK
||
444 rt6_check_expired((struct rt6_info
*)rt
->dst
.from
);
449 static struct rt6_info
*rt6_multipath_select(struct rt6_info
*match
,
450 struct flowi6
*fl6
, int oif
,
453 struct rt6_info
*sibling
, *next_sibling
;
456 /* We might have already computed the hash for ICMPv6 errors. In such
457 * case it will always be non-zero. Otherwise now is the time to do it.
460 fl6
->mp_hash
= rt6_multipath_hash(fl6
, NULL
);
462 route_choosen
= fl6
->mp_hash
% (match
->rt6i_nsiblings
+ 1);
463 /* Don't change the route, if route_choosen == 0
464 * (siblings does not include ourself)
467 list_for_each_entry_safe(sibling
, next_sibling
,
468 &match
->rt6i_siblings
, rt6i_siblings
) {
470 if (route_choosen
== 0) {
471 if (rt6_score_route(sibling
, oif
, strict
) < 0)
481 * Route lookup. Any table->tb6_lock is implied.
484 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
486 const struct in6_addr
*saddr
,
490 struct rt6_info
*local
= NULL
;
491 struct rt6_info
*sprt
;
493 if (!oif
&& ipv6_addr_any(saddr
))
496 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
497 struct net_device
*dev
= sprt
->dst
.dev
;
500 if (dev
->ifindex
== oif
)
502 if (dev
->flags
& IFF_LOOPBACK
) {
503 if (!sprt
->rt6i_idev
||
504 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
505 if (flags
& RT6_LOOKUP_F_IFACE
)
508 local
->rt6i_idev
->dev
->ifindex
== oif
)
514 if (ipv6_chk_addr(net
, saddr
, dev
,
515 flags
& RT6_LOOKUP_F_IFACE
))
524 if (flags
& RT6_LOOKUP_F_IFACE
)
525 return net
->ipv6
.ip6_null_entry
;
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532 struct __rt6_probe_work
{
533 struct work_struct work
;
534 struct in6_addr target
;
535 struct net_device
*dev
;
538 static void rt6_probe_deferred(struct work_struct
*w
)
540 struct in6_addr mcaddr
;
541 struct __rt6_probe_work
*work
=
542 container_of(w
, struct __rt6_probe_work
, work
);
544 addrconf_addr_solict_mult(&work
->target
, &mcaddr
);
545 ndisc_send_ns(work
->dev
, &work
->target
, &mcaddr
, NULL
, 0);
550 static void rt6_probe(struct rt6_info
*rt
)
552 struct __rt6_probe_work
*work
;
553 struct neighbour
*neigh
;
555 * Okay, this does not seem to be appropriate
556 * for now, however, we need to check if it
557 * is really so; aka Router Reachability Probing.
559 * Router Reachability Probe MUST be rate-limited
560 * to no more than one per minute.
562 if (!rt
|| !(rt
->rt6i_flags
& RTF_GATEWAY
))
565 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
567 if (neigh
->nud_state
& NUD_VALID
)
571 write_lock(&neigh
->lock
);
572 if (!(neigh
->nud_state
& NUD_VALID
) &&
575 rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
576 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
578 __neigh_set_probe_once(neigh
);
580 write_unlock(&neigh
->lock
);
582 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
586 INIT_WORK(&work
->work
, rt6_probe_deferred
);
587 work
->target
= rt
->rt6i_gateway
;
588 dev_hold(rt
->dst
.dev
);
589 work
->dev
= rt
->dst
.dev
;
590 schedule_work(&work
->work
);
594 rcu_read_unlock_bh();
597 static inline void rt6_probe(struct rt6_info
*rt
)
603 * Default Router Selection (RFC 2461 6.3.6)
605 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
607 struct net_device
*dev
= rt
->dst
.dev
;
608 if (!oif
|| dev
->ifindex
== oif
)
610 if ((dev
->flags
& IFF_LOOPBACK
) &&
611 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
616 static inline enum rt6_nud_state
rt6_check_neigh(struct rt6_info
*rt
)
618 struct neighbour
*neigh
;
619 enum rt6_nud_state ret
= RT6_NUD_FAIL_HARD
;
621 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
622 !(rt
->rt6i_flags
& RTF_GATEWAY
))
623 return RT6_NUD_SUCCEED
;
626 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
628 read_lock(&neigh
->lock
);
629 if (neigh
->nud_state
& NUD_VALID
)
630 ret
= RT6_NUD_SUCCEED
;
631 #ifdef CONFIG_IPV6_ROUTER_PREF
632 else if (!(neigh
->nud_state
& NUD_FAILED
))
633 ret
= RT6_NUD_SUCCEED
;
635 ret
= RT6_NUD_FAIL_PROBE
;
637 read_unlock(&neigh
->lock
);
639 ret
= IS_ENABLED(CONFIG_IPV6_ROUTER_PREF
) ?
640 RT6_NUD_SUCCEED
: RT6_NUD_FAIL_DO_RR
;
642 rcu_read_unlock_bh();
647 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
652 m
= rt6_check_dev(rt
, oif
);
653 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
654 return RT6_NUD_FAIL_HARD
;
655 #ifdef CONFIG_IPV6_ROUTER_PREF
656 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
658 if (strict
& RT6_LOOKUP_F_REACHABLE
) {
659 int n
= rt6_check_neigh(rt
);
666 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
667 int *mpri
, struct rt6_info
*match
,
671 bool match_do_rr
= false;
672 struct inet6_dev
*idev
= rt
->rt6i_idev
;
673 struct net_device
*dev
= rt
->dst
.dev
;
675 if (dev
&& !netif_carrier_ok(dev
) &&
676 idev
->cnf
.ignore_routes_with_linkdown
&&
677 !(strict
& RT6_LOOKUP_F_IGNORE_LINKSTATE
))
680 if (rt6_check_expired(rt
))
683 m
= rt6_score_route(rt
, oif
, strict
);
684 if (m
== RT6_NUD_FAIL_DO_RR
) {
686 m
= 0; /* lowest valid score */
687 } else if (m
== RT6_NUD_FAIL_HARD
) {
691 if (strict
& RT6_LOOKUP_F_REACHABLE
)
694 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
696 *do_rr
= match_do_rr
;
704 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
705 struct rt6_info
*rr_head
,
706 u32 metric
, int oif
, int strict
,
709 struct rt6_info
*rt
, *match
, *cont
;
714 for (rt
= rr_head
; rt
; rt
= rt
->dst
.rt6_next
) {
715 if (rt
->rt6i_metric
!= metric
) {
720 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
723 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
; rt
= rt
->dst
.rt6_next
) {
724 if (rt
->rt6i_metric
!= metric
) {
729 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
735 for (rt
= cont
; rt
; rt
= rt
->dst
.rt6_next
)
736 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
741 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
743 struct rt6_info
*match
, *rt0
;
749 fn
->rr_ptr
= rt0
= fn
->leaf
;
751 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
,
755 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
757 /* no entries matched; do round-robin */
758 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
765 net
= dev_net(rt0
->dst
.dev
);
766 return match
? match
: net
->ipv6
.ip6_null_entry
;
769 static bool rt6_is_gw_or_nonexthop(const struct rt6_info
*rt
)
771 return (rt
->rt6i_flags
& (RTF_NONEXTHOP
| RTF_GATEWAY
));
774 #ifdef CONFIG_IPV6_ROUTE_INFO
775 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
776 const struct in6_addr
*gwaddr
)
778 struct net
*net
= dev_net(dev
);
779 struct route_info
*rinfo
= (struct route_info
*) opt
;
780 struct in6_addr prefix_buf
, *prefix
;
782 unsigned long lifetime
;
785 if (len
< sizeof(struct route_info
)) {
789 /* Sanity check for prefix_len and length */
790 if (rinfo
->length
> 3) {
792 } else if (rinfo
->prefix_len
> 128) {
794 } else if (rinfo
->prefix_len
> 64) {
795 if (rinfo
->length
< 2) {
798 } else if (rinfo
->prefix_len
> 0) {
799 if (rinfo
->length
< 1) {
804 pref
= rinfo
->route_pref
;
805 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
808 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
810 if (rinfo
->length
== 3)
811 prefix
= (struct in6_addr
*)rinfo
->prefix
;
813 /* this function is safe */
814 ipv6_addr_prefix(&prefix_buf
,
815 (struct in6_addr
*)rinfo
->prefix
,
817 prefix
= &prefix_buf
;
820 if (rinfo
->prefix_len
== 0)
821 rt
= rt6_get_dflt_router(gwaddr
, dev
);
823 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
,
826 if (rt
&& !lifetime
) {
832 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
835 rt
->rt6i_flags
= RTF_ROUTEINFO
|
836 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
839 if (!addrconf_finite_timeout(lifetime
))
840 rt6_clean_expires(rt
);
842 rt6_set_expires(rt
, jiffies
+ HZ
* lifetime
);
850 static struct fib6_node
* fib6_backtrack(struct fib6_node
*fn
,
851 struct in6_addr
*saddr
)
853 struct fib6_node
*pn
;
855 if (fn
->fn_flags
& RTN_TL_ROOT
)
858 if (FIB6_SUBTREE(pn
) && FIB6_SUBTREE(pn
) != fn
)
859 fn
= fib6_lookup(FIB6_SUBTREE(pn
), NULL
, saddr
);
862 if (fn
->fn_flags
& RTN_RTINFO
)
867 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
868 struct fib6_table
*table
,
869 struct flowi6
*fl6
, int flags
)
871 struct fib6_node
*fn
;
874 if (fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
)
875 flags
&= ~RT6_LOOKUP_F_IFACE
;
877 read_lock_bh(&table
->tb6_lock
);
878 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
881 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
882 if (rt
->rt6i_nsiblings
&& fl6
->flowi6_oif
== 0)
883 rt
= rt6_multipath_select(rt
, fl6
, fl6
->flowi6_oif
, flags
);
884 if (rt
== net
->ipv6
.ip6_null_entry
) {
885 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
889 dst_use(&rt
->dst
, jiffies
);
890 read_unlock_bh(&table
->tb6_lock
);
892 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
898 struct dst_entry
*ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
901 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
903 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
905 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
906 const struct in6_addr
*saddr
, int oif
, int strict
)
908 struct flowi6 fl6
= {
912 struct dst_entry
*dst
;
913 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
916 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
917 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
920 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
922 return (struct rt6_info
*) dst
;
928 EXPORT_SYMBOL(rt6_lookup
);
930 /* ip6_ins_rt is called with FREE table->tb6_lock.
931 * It takes new route entry, the addition fails by any reason the
933 * Caller must hold dst before calling it.
936 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
,
937 struct mx6_config
*mxc
,
938 struct netlink_ext_ack
*extack
)
941 struct fib6_table
*table
;
943 table
= rt
->rt6i_table
;
944 write_lock_bh(&table
->tb6_lock
);
945 err
= fib6_add(&table
->tb6_root
, rt
, info
, mxc
, extack
);
946 write_unlock_bh(&table
->tb6_lock
);
951 int ip6_ins_rt(struct rt6_info
*rt
)
953 struct nl_info info
= { .nl_net
= dev_net(rt
->dst
.dev
), };
954 struct mx6_config mxc
= { .mx
= NULL
, };
956 /* Hold dst to account for the reference from the fib6 tree */
958 return __ip6_ins_rt(rt
, &info
, &mxc
, NULL
);
961 /* called with rcu_lock held */
962 static struct net_device
*ip6_rt_get_dev_rcu(struct rt6_info
*rt
)
964 struct net_device
*dev
= rt
->dst
.dev
;
966 if (rt
->rt6i_flags
& (RTF_LOCAL
| RTF_ANYCAST
)) {
967 /* for copies of local routes, dst->dev needs to be the
968 * device if it is a master device, the master device if
969 * device is enslaved, and the loopback as the default
971 if (netif_is_l3_slave(dev
) &&
972 !rt6_need_strict(&rt
->rt6i_dst
.addr
))
973 dev
= l3mdev_master_dev_rcu(dev
);
974 else if (!netif_is_l3_master(dev
))
975 dev
= dev_net(dev
)->loopback_dev
;
976 /* last case is netif_is_l3_master(dev) is true in which
977 * case we want dev returned to be dev
984 static struct rt6_info
*ip6_rt_cache_alloc(struct rt6_info
*ort
,
985 const struct in6_addr
*daddr
,
986 const struct in6_addr
*saddr
)
988 struct net_device
*dev
;
995 if (ort
->rt6i_flags
& (RTF_CACHE
| RTF_PCPU
))
996 ort
= (struct rt6_info
*)ort
->dst
.from
;
999 dev
= ip6_rt_get_dev_rcu(ort
);
1000 rt
= __ip6_dst_alloc(dev_net(dev
), dev
, 0);
1005 ip6_rt_copy_init(rt
, ort
);
1006 rt
->rt6i_flags
|= RTF_CACHE
;
1007 rt
->rt6i_metric
= 0;
1008 rt
->dst
.flags
|= DST_HOST
;
1009 rt
->rt6i_dst
.addr
= *daddr
;
1010 rt
->rt6i_dst
.plen
= 128;
1012 if (!rt6_is_gw_or_nonexthop(ort
)) {
1013 if (ort
->rt6i_dst
.plen
!= 128 &&
1014 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
1015 rt
->rt6i_flags
|= RTF_ANYCAST
;
1016 #ifdef CONFIG_IPV6_SUBTREES
1017 if (rt
->rt6i_src
.plen
&& saddr
) {
1018 rt
->rt6i_src
.addr
= *saddr
;
1019 rt
->rt6i_src
.plen
= 128;
1027 static struct rt6_info
*ip6_rt_pcpu_alloc(struct rt6_info
*rt
)
1029 struct net_device
*dev
;
1030 struct rt6_info
*pcpu_rt
;
1033 dev
= ip6_rt_get_dev_rcu(rt
);
1034 pcpu_rt
= __ip6_dst_alloc(dev_net(dev
), dev
, rt
->dst
.flags
);
1038 ip6_rt_copy_init(pcpu_rt
, rt
);
1039 pcpu_rt
->rt6i_protocol
= rt
->rt6i_protocol
;
1040 pcpu_rt
->rt6i_flags
|= RTF_PCPU
;
1044 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1045 static struct rt6_info
*rt6_get_pcpu_route(struct rt6_info
*rt
)
1047 struct rt6_info
*pcpu_rt
, **p
;
1049 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1053 dst_hold(&pcpu_rt
->dst
);
1054 rt6_dst_from_metrics_check(pcpu_rt
);
1059 static struct rt6_info
*rt6_make_pcpu_route(struct rt6_info
*rt
)
1061 struct fib6_table
*table
= rt
->rt6i_table
;
1062 struct rt6_info
*pcpu_rt
, *prev
, **p
;
1064 pcpu_rt
= ip6_rt_pcpu_alloc(rt
);
1066 struct net
*net
= dev_net(rt
->dst
.dev
);
1068 dst_hold(&net
->ipv6
.ip6_null_entry
->dst
);
1069 return net
->ipv6
.ip6_null_entry
;
1072 read_lock_bh(&table
->tb6_lock
);
1073 if (rt
->rt6i_pcpu
) {
1074 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1075 prev
= cmpxchg(p
, NULL
, pcpu_rt
);
1077 /* If someone did it before us, return prev instead */
1078 dst_release_immediate(&pcpu_rt
->dst
);
1082 /* rt has been removed from the fib6 tree
1083 * before we have a chance to acquire the read_lock.
1084 * In this case, don't brother to create a pcpu rt
1085 * since rt is going away anyway. The next
1086 * dst_check() will trigger a re-lookup.
1088 dst_release_immediate(&pcpu_rt
->dst
);
1091 dst_hold(&pcpu_rt
->dst
);
1092 rt6_dst_from_metrics_check(pcpu_rt
);
1093 read_unlock_bh(&table
->tb6_lock
);
1097 struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
,
1098 int oif
, struct flowi6
*fl6
, int flags
)
1100 struct fib6_node
*fn
, *saved_fn
;
1101 struct rt6_info
*rt
;
1104 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
1105 strict
|= flags
& RT6_LOOKUP_F_IGNORE_LINKSTATE
;
1106 if (net
->ipv6
.devconf_all
->forwarding
== 0)
1107 strict
|= RT6_LOOKUP_F_REACHABLE
;
1109 read_lock_bh(&table
->tb6_lock
);
1111 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1114 if (fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
)
1118 rt
= rt6_select(fn
, oif
, strict
);
1119 if (rt
->rt6i_nsiblings
)
1120 rt
= rt6_multipath_select(rt
, fl6
, oif
, strict
);
1121 if (rt
== net
->ipv6
.ip6_null_entry
) {
1122 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1124 goto redo_rt6_select
;
1125 else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
1126 /* also consider unreachable route */
1127 strict
&= ~RT6_LOOKUP_F_REACHABLE
;
1129 goto redo_rt6_select
;
1134 if (rt
== net
->ipv6
.ip6_null_entry
|| (rt
->rt6i_flags
& RTF_CACHE
)) {
1135 dst_use(&rt
->dst
, jiffies
);
1136 read_unlock_bh(&table
->tb6_lock
);
1138 rt6_dst_from_metrics_check(rt
);
1140 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1142 } else if (unlikely((fl6
->flowi6_flags
& FLOWI_FLAG_KNOWN_NH
) &&
1143 !(rt
->rt6i_flags
& RTF_GATEWAY
))) {
1144 /* Create a RTF_CACHE clone which will not be
1145 * owned by the fib6 tree. It is for the special case where
1146 * the daddr in the skb during the neighbor look-up is different
1147 * from the fl6->daddr used to look-up route here.
1150 struct rt6_info
*uncached_rt
;
1152 dst_use(&rt
->dst
, jiffies
);
1153 read_unlock_bh(&table
->tb6_lock
);
1155 uncached_rt
= ip6_rt_cache_alloc(rt
, &fl6
->daddr
, NULL
);
1156 dst_release(&rt
->dst
);
1159 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1160 * No need for another dst_hold()
1162 rt6_uncached_list_add(uncached_rt
);
1164 uncached_rt
= net
->ipv6
.ip6_null_entry
;
1165 dst_hold(&uncached_rt
->dst
);
1168 trace_fib6_table_lookup(net
, uncached_rt
, table
->tb6_id
, fl6
);
1172 /* Get a percpu copy */
1174 struct rt6_info
*pcpu_rt
;
1176 rt
->dst
.lastuse
= jiffies
;
1178 pcpu_rt
= rt6_get_pcpu_route(rt
);
1181 read_unlock_bh(&table
->tb6_lock
);
1183 /* We have to do the read_unlock first
1184 * because rt6_make_pcpu_route() may trigger
1185 * ip6_dst_gc() which will take the write_lock.
1188 read_unlock_bh(&table
->tb6_lock
);
1189 pcpu_rt
= rt6_make_pcpu_route(rt
);
1190 dst_release(&rt
->dst
);
1193 trace_fib6_table_lookup(net
, pcpu_rt
, table
->tb6_id
, fl6
);
1198 EXPORT_SYMBOL_GPL(ip6_pol_route
);
1200 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
1201 struct flowi6
*fl6
, int flags
)
1203 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
1206 struct dst_entry
*ip6_route_input_lookup(struct net
*net
,
1207 struct net_device
*dev
,
1208 struct flowi6
*fl6
, int flags
)
1210 if (rt6_need_strict(&fl6
->daddr
) && dev
->type
!= ARPHRD_PIMREG
)
1211 flags
|= RT6_LOOKUP_F_IFACE
;
1213 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_input
);
1215 EXPORT_SYMBOL_GPL(ip6_route_input_lookup
);
1217 static void ip6_multipath_l3_keys(const struct sk_buff
*skb
,
1218 struct flow_keys
*keys
)
1220 const struct ipv6hdr
*outer_iph
= ipv6_hdr(skb
);
1221 const struct ipv6hdr
*key_iph
= outer_iph
;
1222 const struct ipv6hdr
*inner_iph
;
1223 const struct icmp6hdr
*icmph
;
1224 struct ipv6hdr _inner_iph
;
1225 struct icmp6hdr _icmph
;
1227 if (likely(outer_iph
->nexthdr
!= IPPROTO_ICMPV6
))
1230 icmph
= skb_header_pointer(skb
, skb_transport_offset(skb
),
1231 sizeof(_icmph
), &_icmph
);
1235 if (icmph
->icmp6_type
!= ICMPV6_DEST_UNREACH
&&
1236 icmph
->icmp6_type
!= ICMPV6_PKT_TOOBIG
&&
1237 icmph
->icmp6_type
!= ICMPV6_TIME_EXCEED
&&
1238 icmph
->icmp6_type
!= ICMPV6_PARAMPROB
)
1241 inner_iph
= skb_header_pointer(skb
,
1242 skb_transport_offset(skb
) + sizeof(*icmph
),
1243 sizeof(_inner_iph
), &_inner_iph
);
1247 key_iph
= inner_iph
;
1249 memset(keys
, 0, sizeof(*keys
));
1250 keys
->control
.addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
1251 keys
->addrs
.v6addrs
.src
= key_iph
->saddr
;
1252 keys
->addrs
.v6addrs
.dst
= key_iph
->daddr
;
1253 keys
->tags
.flow_label
= ip6_flowlabel(key_iph
);
1254 keys
->basic
.ip_proto
= key_iph
->nexthdr
;
1257 /* if skb is set it will be used and fl6 can be NULL */
1258 u32
rt6_multipath_hash(const struct flowi6
*fl6
, const struct sk_buff
*skb
)
1260 struct flow_keys hash_keys
;
1263 ip6_multipath_l3_keys(skb
, &hash_keys
);
1264 return flow_hash_from_keys(&hash_keys
);
1267 return get_hash_from_flowi6(fl6
);
1270 void ip6_route_input(struct sk_buff
*skb
)
1272 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1273 struct net
*net
= dev_net(skb
->dev
);
1274 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1275 struct ip_tunnel_info
*tun_info
;
1276 struct flowi6 fl6
= {
1277 .flowi6_iif
= skb
->dev
->ifindex
,
1278 .daddr
= iph
->daddr
,
1279 .saddr
= iph
->saddr
,
1280 .flowlabel
= ip6_flowinfo(iph
),
1281 .flowi6_mark
= skb
->mark
,
1282 .flowi6_proto
= iph
->nexthdr
,
1285 tun_info
= skb_tunnel_info(skb
);
1286 if (tun_info
&& !(tun_info
->mode
& IP_TUNNEL_INFO_TX
))
1287 fl6
.flowi6_tun_key
.tun_id
= tun_info
->key
.tun_id
;
1288 if (unlikely(fl6
.flowi6_proto
== IPPROTO_ICMPV6
))
1289 fl6
.mp_hash
= rt6_multipath_hash(&fl6
, skb
);
1291 skb_dst_set(skb
, ip6_route_input_lookup(net
, skb
->dev
, &fl6
, flags
));
1294 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
1295 struct flowi6
*fl6
, int flags
)
1297 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
1300 struct dst_entry
*ip6_route_output_flags(struct net
*net
, const struct sock
*sk
,
1301 struct flowi6
*fl6
, int flags
)
1305 if (rt6_need_strict(&fl6
->daddr
)) {
1306 struct dst_entry
*dst
;
1308 dst
= l3mdev_link_scope_lookup(net
, fl6
);
1313 fl6
->flowi6_iif
= LOOPBACK_IFINDEX
;
1315 any_src
= ipv6_addr_any(&fl6
->saddr
);
1316 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
) ||
1317 (fl6
->flowi6_oif
&& any_src
))
1318 flags
|= RT6_LOOKUP_F_IFACE
;
1321 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
1323 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
1325 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
1327 EXPORT_SYMBOL_GPL(ip6_route_output_flags
);
1329 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
1331 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
1332 struct net_device
*loopback_dev
= net
->loopback_dev
;
1333 struct dst_entry
*new = NULL
;
1335 rt
= dst_alloc(&ip6_dst_blackhole_ops
, loopback_dev
, 1,
1336 DST_OBSOLETE_DEAD
, 0);
1342 new->input
= dst_discard
;
1343 new->output
= dst_discard_out
;
1345 dst_copy_metrics(new, &ort
->dst
);
1347 rt
->rt6i_idev
= in6_dev_get(loopback_dev
);
1348 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1349 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_PCPU
;
1350 rt
->rt6i_metric
= 0;
1352 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1353 #ifdef CONFIG_IPV6_SUBTREES
1354 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1358 dst_release(dst_orig
);
1359 return new ? new : ERR_PTR(-ENOMEM
);
1363 * Destination cache support functions
1366 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
)
1369 dst_metrics_ptr(&rt
->dst
) != dst_metrics_ptr(rt
->dst
.from
))
1370 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(rt
->dst
.from
), true);
1373 static struct dst_entry
*rt6_check(struct rt6_info
*rt
, u32 cookie
)
1377 if (!rt6_get_cookie_safe(rt
, &rt_cookie
) || rt_cookie
!= cookie
)
1380 if (rt6_check_expired(rt
))
1386 static struct dst_entry
*rt6_dst_from_check(struct rt6_info
*rt
, u32 cookie
)
1388 if (!__rt6_check_expired(rt
) &&
1389 rt
->dst
.obsolete
== DST_OBSOLETE_FORCE_CHK
&&
1390 rt6_check((struct rt6_info
*)(rt
->dst
.from
), cookie
))
1396 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
1398 struct rt6_info
*rt
;
1400 rt
= (struct rt6_info
*) dst
;
1402 /* All IPV6 dsts are created with ->obsolete set to the value
1403 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1404 * into this function always.
1407 rt6_dst_from_metrics_check(rt
);
1409 if (rt
->rt6i_flags
& RTF_PCPU
||
1410 (unlikely(!list_empty(&rt
->rt6i_uncached
)) && rt
->dst
.from
))
1411 return rt6_dst_from_check(rt
, cookie
);
1413 return rt6_check(rt
, cookie
);
1416 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
1418 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1421 if (rt
->rt6i_flags
& RTF_CACHE
) {
1422 if (rt6_check_expired(rt
)) {
1434 static void ip6_link_failure(struct sk_buff
*skb
)
1436 struct rt6_info
*rt
;
1438 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1440 rt
= (struct rt6_info
*) skb_dst(skb
);
1442 if (rt
->rt6i_flags
& RTF_CACHE
) {
1443 if (dst_hold_safe(&rt
->dst
))
1446 struct fib6_node
*fn
;
1449 fn
= rcu_dereference(rt
->rt6i_node
);
1450 if (fn
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
1457 static void rt6_do_update_pmtu(struct rt6_info
*rt
, u32 mtu
)
1459 struct net
*net
= dev_net(rt
->dst
.dev
);
1461 rt
->rt6i_flags
|= RTF_MODIFIED
;
1462 rt
->rt6i_pmtu
= mtu
;
1463 rt6_update_expires(rt
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1466 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info
*rt
)
1468 return !(rt
->rt6i_flags
& RTF_CACHE
) &&
1469 (rt
->rt6i_flags
& RTF_PCPU
||
1470 rcu_access_pointer(rt
->rt6i_node
));
1473 static void __ip6_rt_update_pmtu(struct dst_entry
*dst
, const struct sock
*sk
,
1474 const struct ipv6hdr
*iph
, u32 mtu
)
1476 const struct in6_addr
*daddr
, *saddr
;
1477 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1479 if (dst_metric_locked(dst
, RTAX_MTU
))
1483 daddr
= &iph
->daddr
;
1484 saddr
= &iph
->saddr
;
1486 daddr
= &sk
->sk_v6_daddr
;
1487 saddr
= &inet6_sk(sk
)->saddr
;
1492 dst_confirm_neigh(dst
, daddr
);
1493 mtu
= max_t(u32
, mtu
, IPV6_MIN_MTU
);
1494 if (mtu
>= dst_mtu(dst
))
1497 if (!rt6_cache_allowed_for_pmtu(rt6
)) {
1498 rt6_do_update_pmtu(rt6
, mtu
);
1500 struct rt6_info
*nrt6
;
1502 nrt6
= ip6_rt_cache_alloc(rt6
, daddr
, saddr
);
1504 rt6_do_update_pmtu(nrt6
, mtu
);
1506 /* ip6_ins_rt(nrt6) will bump the
1507 * rt6->rt6i_node->fn_sernum
1508 * which will fail the next rt6_check() and
1509 * invalidate the sk->sk_dst_cache.
1512 /* Release the reference taken in
1513 * ip6_rt_cache_alloc()
1515 dst_release(&nrt6
->dst
);
1520 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
1521 struct sk_buff
*skb
, u32 mtu
)
1523 __ip6_rt_update_pmtu(dst
, sk
, skb
? ipv6_hdr(skb
) : NULL
, mtu
);
1526 void ip6_update_pmtu(struct sk_buff
*skb
, struct net
*net
, __be32 mtu
,
1527 int oif
, u32 mark
, kuid_t uid
)
1529 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1530 struct dst_entry
*dst
;
1533 memset(&fl6
, 0, sizeof(fl6
));
1534 fl6
.flowi6_oif
= oif
;
1535 fl6
.flowi6_mark
= mark
? mark
: IP6_REPLY_MARK(net
, skb
->mark
);
1536 fl6
.daddr
= iph
->daddr
;
1537 fl6
.saddr
= iph
->saddr
;
1538 fl6
.flowlabel
= ip6_flowinfo(iph
);
1539 fl6
.flowi6_uid
= uid
;
1541 dst
= ip6_route_output(net
, NULL
, &fl6
);
1543 __ip6_rt_update_pmtu(dst
, NULL
, iph
, ntohl(mtu
));
1546 EXPORT_SYMBOL_GPL(ip6_update_pmtu
);
1548 void ip6_sk_update_pmtu(struct sk_buff
*skb
, struct sock
*sk
, __be32 mtu
)
1550 int oif
= sk
->sk_bound_dev_if
;
1551 struct dst_entry
*dst
;
1553 if (!oif
&& skb
->dev
)
1554 oif
= l3mdev_master_ifindex(skb
->dev
);
1556 ip6_update_pmtu(skb
, sock_net(sk
), mtu
, oif
, sk
->sk_mark
, sk
->sk_uid
);
1558 dst
= __sk_dst_get(sk
);
1559 if (!dst
|| !dst
->obsolete
||
1560 dst
->ops
->check(dst
, inet6_sk(sk
)->dst_cookie
))
1564 if (!sock_owned_by_user(sk
) && !ipv6_addr_v4mapped(&sk
->sk_v6_daddr
))
1565 ip6_datagram_dst_update(sk
, false);
1568 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu
);
1570 /* Handle redirects */
1571 struct ip6rd_flowi
{
1573 struct in6_addr gateway
;
1576 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1577 struct fib6_table
*table
,
1581 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1582 struct rt6_info
*rt
;
1583 struct fib6_node
*fn
;
1585 /* Get the "current" route for this destination and
1586 * check if the redirect has come from appropriate router.
1588 * RFC 4861 specifies that redirects should only be
1589 * accepted if they come from the nexthop to the target.
1590 * Due to the way the routes are chosen, this notion
1591 * is a bit fuzzy and one might need to check all possible
1595 read_lock_bh(&table
->tb6_lock
);
1596 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1598 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1599 if (rt6_check_expired(rt
))
1603 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1605 if (fl6
->flowi6_oif
!= rt
->dst
.dev
->ifindex
)
1607 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1613 rt
= net
->ipv6
.ip6_null_entry
;
1614 else if (rt
->dst
.error
) {
1615 rt
= net
->ipv6
.ip6_null_entry
;
1619 if (rt
== net
->ipv6
.ip6_null_entry
) {
1620 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1628 read_unlock_bh(&table
->tb6_lock
);
1630 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1634 static struct dst_entry
*ip6_route_redirect(struct net
*net
,
1635 const struct flowi6
*fl6
,
1636 const struct in6_addr
*gateway
)
1638 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1639 struct ip6rd_flowi rdfl
;
1642 rdfl
.gateway
= *gateway
;
1644 return fib6_rule_lookup(net
, &rdfl
.fl6
,
1645 flags
, __ip6_route_redirect
);
1648 void ip6_redirect(struct sk_buff
*skb
, struct net
*net
, int oif
, u32 mark
,
1651 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1652 struct dst_entry
*dst
;
1655 memset(&fl6
, 0, sizeof(fl6
));
1656 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1657 fl6
.flowi6_oif
= oif
;
1658 fl6
.flowi6_mark
= mark
;
1659 fl6
.daddr
= iph
->daddr
;
1660 fl6
.saddr
= iph
->saddr
;
1661 fl6
.flowlabel
= ip6_flowinfo(iph
);
1662 fl6
.flowi6_uid
= uid
;
1664 dst
= ip6_route_redirect(net
, &fl6
, &ipv6_hdr(skb
)->saddr
);
1665 rt6_do_redirect(dst
, NULL
, skb
);
1668 EXPORT_SYMBOL_GPL(ip6_redirect
);
1670 void ip6_redirect_no_header(struct sk_buff
*skb
, struct net
*net
, int oif
,
1673 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1674 const struct rd_msg
*msg
= (struct rd_msg
*)icmp6_hdr(skb
);
1675 struct dst_entry
*dst
;
1678 memset(&fl6
, 0, sizeof(fl6
));
1679 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1680 fl6
.flowi6_oif
= oif
;
1681 fl6
.flowi6_mark
= mark
;
1682 fl6
.daddr
= msg
->dest
;
1683 fl6
.saddr
= iph
->daddr
;
1684 fl6
.flowi6_uid
= sock_net_uid(net
, NULL
);
1686 dst
= ip6_route_redirect(net
, &fl6
, &iph
->saddr
);
1687 rt6_do_redirect(dst
, NULL
, skb
);
1691 void ip6_sk_redirect(struct sk_buff
*skb
, struct sock
*sk
)
1693 ip6_redirect(skb
, sock_net(sk
), sk
->sk_bound_dev_if
, sk
->sk_mark
,
1696 EXPORT_SYMBOL_GPL(ip6_sk_redirect
);
1698 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1700 struct net_device
*dev
= dst
->dev
;
1701 unsigned int mtu
= dst_mtu(dst
);
1702 struct net
*net
= dev_net(dev
);
1704 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1706 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1707 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1710 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1711 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1712 * IPV6_MAXPLEN is also valid and means: "any MSS,
1713 * rely only on pmtu discovery"
1715 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1720 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1722 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
1723 unsigned int mtu
= rt
->rt6i_pmtu
;
1724 struct inet6_dev
*idev
;
1729 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1736 idev
= __in6_dev_get(dst
->dev
);
1738 mtu
= idev
->cnf
.mtu6
;
1742 mtu
= min_t(unsigned int, mtu
, IP6_MAX_MTU
);
1744 return mtu
- lwtunnel_headroom(dst
->lwtstate
, mtu
);
1747 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1750 struct dst_entry
*dst
;
1751 struct rt6_info
*rt
;
1752 struct inet6_dev
*idev
= in6_dev_get(dev
);
1753 struct net
*net
= dev_net(dev
);
1755 if (unlikely(!idev
))
1756 return ERR_PTR(-ENODEV
);
1758 rt
= ip6_dst_alloc(net
, dev
, 0);
1759 if (unlikely(!rt
)) {
1761 dst
= ERR_PTR(-ENOMEM
);
1765 rt
->dst
.flags
|= DST_HOST
;
1766 rt
->dst
.input
= ip6_input
;
1767 rt
->dst
.output
= ip6_output
;
1768 rt
->rt6i_gateway
= fl6
->daddr
;
1769 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1770 rt
->rt6i_dst
.plen
= 128;
1771 rt
->rt6i_idev
= idev
;
1772 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 0);
1774 /* Add this dst into uncached_list so that rt6_ifdown() can
1775 * do proper release of the net_device
1777 rt6_uncached_list_add(rt
);
1779 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1785 static int ip6_dst_gc(struct dst_ops
*ops
)
1787 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1788 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1789 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1790 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1791 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1792 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1795 entries
= dst_entries_get_fast(ops
);
1796 if (time_after(rt_last_gc
+ rt_min_interval
, jiffies
) &&
1797 entries
<= rt_max_size
)
1800 net
->ipv6
.ip6_rt_gc_expire
++;
1801 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
, true);
1802 entries
= dst_entries_get_slow(ops
);
1803 if (entries
< ops
->gc_thresh
)
1804 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1806 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1807 return entries
> rt_max_size
;
1810 static int ip6_convert_metrics(struct mx6_config
*mxc
,
1811 const struct fib6_config
*cfg
)
1813 bool ecn_ca
= false;
1821 mp
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1825 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1826 int type
= nla_type(nla
);
1831 if (unlikely(type
> RTAX_MAX
))
1834 if (type
== RTAX_CC_ALGO
) {
1835 char tmp
[TCP_CA_NAME_MAX
];
1837 nla_strlcpy(tmp
, nla
, sizeof(tmp
));
1838 val
= tcp_ca_get_key_by_name(tmp
, &ecn_ca
);
1839 if (val
== TCP_CA_UNSPEC
)
1842 val
= nla_get_u32(nla
);
1844 if (type
== RTAX_HOPLIMIT
&& val
> 255)
1846 if (type
== RTAX_FEATURES
&& (val
& ~RTAX_FEATURE_MASK
))
1850 __set_bit(type
- 1, mxc
->mx_valid
);
1854 __set_bit(RTAX_FEATURES
- 1, mxc
->mx_valid
);
1855 mp
[RTAX_FEATURES
- 1] |= DST_FEATURE_ECN_CA
;
1865 static struct rt6_info
*ip6_nh_lookup_table(struct net
*net
,
1866 struct fib6_config
*cfg
,
1867 const struct in6_addr
*gw_addr
)
1869 struct flowi6 fl6
= {
1870 .flowi6_oif
= cfg
->fc_ifindex
,
1872 .saddr
= cfg
->fc_prefsrc
,
1874 struct fib6_table
*table
;
1875 struct rt6_info
*rt
;
1876 int flags
= RT6_LOOKUP_F_IFACE
| RT6_LOOKUP_F_IGNORE_LINKSTATE
;
1878 table
= fib6_get_table(net
, cfg
->fc_table
);
1882 if (!ipv6_addr_any(&cfg
->fc_prefsrc
))
1883 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
1885 rt
= ip6_pol_route(net
, table
, cfg
->fc_ifindex
, &fl6
, flags
);
1887 /* if table lookup failed, fall back to full lookup */
1888 if (rt
== net
->ipv6
.ip6_null_entry
) {
1896 static struct rt6_info
*ip6_route_info_create(struct fib6_config
*cfg
,
1897 struct netlink_ext_ack
*extack
)
1899 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1900 struct rt6_info
*rt
= NULL
;
1901 struct net_device
*dev
= NULL
;
1902 struct inet6_dev
*idev
= NULL
;
1903 struct fib6_table
*table
;
1907 /* RTF_PCPU is an internal flag; can not be set by userspace */
1908 if (cfg
->fc_flags
& RTF_PCPU
) {
1909 NL_SET_ERR_MSG(extack
, "Userspace can not set RTF_PCPU");
1913 if (cfg
->fc_dst_len
> 128) {
1914 NL_SET_ERR_MSG(extack
, "Invalid prefix length");
1917 if (cfg
->fc_src_len
> 128) {
1918 NL_SET_ERR_MSG(extack
, "Invalid source address length");
1921 #ifndef CONFIG_IPV6_SUBTREES
1922 if (cfg
->fc_src_len
) {
1923 NL_SET_ERR_MSG(extack
,
1924 "Specifying source address requires IPV6_SUBTREES to be enabled");
1928 if (cfg
->fc_ifindex
) {
1930 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1933 idev
= in6_dev_get(dev
);
1938 if (cfg
->fc_metric
== 0)
1939 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1942 if (cfg
->fc_nlinfo
.nlh
&&
1943 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1944 table
= fib6_get_table(net
, cfg
->fc_table
);
1946 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1947 table
= fib6_new_table(net
, cfg
->fc_table
);
1950 table
= fib6_new_table(net
, cfg
->fc_table
);
1956 rt
= ip6_dst_alloc(net
, NULL
,
1957 (cfg
->fc_flags
& RTF_ADDRCONF
) ? 0 : DST_NOCOUNT
);
1964 if (cfg
->fc_flags
& RTF_EXPIRES
)
1965 rt6_set_expires(rt
, jiffies
+
1966 clock_t_to_jiffies(cfg
->fc_expires
));
1968 rt6_clean_expires(rt
);
1970 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1971 cfg
->fc_protocol
= RTPROT_BOOT
;
1972 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1974 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1976 if (addr_type
& IPV6_ADDR_MULTICAST
)
1977 rt
->dst
.input
= ip6_mc_input
;
1978 else if (cfg
->fc_flags
& RTF_LOCAL
)
1979 rt
->dst
.input
= ip6_input
;
1981 rt
->dst
.input
= ip6_forward
;
1983 rt
->dst
.output
= ip6_output
;
1985 if (cfg
->fc_encap
) {
1986 struct lwtunnel_state
*lwtstate
;
1988 err
= lwtunnel_build_state(cfg
->fc_encap_type
,
1989 cfg
->fc_encap
, AF_INET6
, cfg
,
1993 rt
->dst
.lwtstate
= lwtstate_get(lwtstate
);
1994 if (lwtunnel_output_redirect(rt
->dst
.lwtstate
)) {
1995 rt
->dst
.lwtstate
->orig_output
= rt
->dst
.output
;
1996 rt
->dst
.output
= lwtunnel_output
;
1998 if (lwtunnel_input_redirect(rt
->dst
.lwtstate
)) {
1999 rt
->dst
.lwtstate
->orig_input
= rt
->dst
.input
;
2000 rt
->dst
.input
= lwtunnel_input
;
2004 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
2005 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
2006 if (rt
->rt6i_dst
.plen
== 128)
2007 rt
->dst
.flags
|= DST_HOST
;
2009 #ifdef CONFIG_IPV6_SUBTREES
2010 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
2011 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
2014 rt
->rt6i_metric
= cfg
->fc_metric
;
2016 /* We cannot add true routes via loopback here,
2017 they would result in kernel looping; promote them to reject routes
2019 if ((cfg
->fc_flags
& RTF_REJECT
) ||
2020 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
2021 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
2022 !(cfg
->fc_flags
& RTF_LOCAL
))) {
2023 /* hold loopback dev/idev if we haven't done so. */
2024 if (dev
!= net
->loopback_dev
) {
2029 dev
= net
->loopback_dev
;
2031 idev
= in6_dev_get(dev
);
2037 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
2038 switch (cfg
->fc_type
) {
2040 rt
->dst
.error
= -EINVAL
;
2041 rt
->dst
.output
= dst_discard_out
;
2042 rt
->dst
.input
= dst_discard
;
2045 rt
->dst
.error
= -EACCES
;
2046 rt
->dst
.output
= ip6_pkt_prohibit_out
;
2047 rt
->dst
.input
= ip6_pkt_prohibit
;
2050 case RTN_UNREACHABLE
:
2052 rt
->dst
.error
= (cfg
->fc_type
== RTN_THROW
) ? -EAGAIN
2053 : (cfg
->fc_type
== RTN_UNREACHABLE
)
2054 ? -EHOSTUNREACH
: -ENETUNREACH
;
2055 rt
->dst
.output
= ip6_pkt_discard_out
;
2056 rt
->dst
.input
= ip6_pkt_discard
;
2062 if (cfg
->fc_flags
& RTF_GATEWAY
) {
2063 const struct in6_addr
*gw_addr
;
2066 gw_addr
= &cfg
->fc_gateway
;
2067 gwa_type
= ipv6_addr_type(gw_addr
);
2069 /* if gw_addr is local we will fail to detect this in case
2070 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2071 * will return already-added prefix route via interface that
2072 * prefix route was assigned to, which might be non-loopback.
2075 if (ipv6_chk_addr_and_flags(net
, gw_addr
,
2076 gwa_type
& IPV6_ADDR_LINKLOCAL
?
2077 dev
: NULL
, 0, 0)) {
2078 NL_SET_ERR_MSG(extack
, "Invalid gateway address");
2081 rt
->rt6i_gateway
= *gw_addr
;
2083 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
2084 struct rt6_info
*grt
= NULL
;
2086 /* IPv6 strictly inhibits using not link-local
2087 addresses as nexthop address.
2088 Otherwise, router will not able to send redirects.
2089 It is very good, but in some (rare!) circumstances
2090 (SIT, PtP, NBMA NOARP links) it is handy to allow
2091 some exceptions. --ANK
2092 We allow IPv4-mapped nexthops to support RFC4798-type
2095 if (!(gwa_type
& (IPV6_ADDR_UNICAST
|
2096 IPV6_ADDR_MAPPED
))) {
2097 NL_SET_ERR_MSG(extack
,
2098 "Invalid gateway address");
2102 if (cfg
->fc_table
) {
2103 grt
= ip6_nh_lookup_table(net
, cfg
, gw_addr
);
2106 if (grt
->rt6i_flags
& RTF_GATEWAY
||
2107 (dev
&& dev
!= grt
->dst
.dev
)) {
2115 grt
= rt6_lookup(net
, gw_addr
, NULL
,
2116 cfg
->fc_ifindex
, 1);
2118 err
= -EHOSTUNREACH
;
2122 if (dev
!= grt
->dst
.dev
) {
2128 idev
= grt
->rt6i_idev
;
2130 in6_dev_hold(grt
->rt6i_idev
);
2132 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
2141 NL_SET_ERR_MSG(extack
, "Egress device not specified");
2143 } else if (dev
->flags
& IFF_LOOPBACK
) {
2144 NL_SET_ERR_MSG(extack
,
2145 "Egress device can not be loopback device for this route");
2154 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
2155 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
2156 NL_SET_ERR_MSG(extack
, "Invalid source address");
2160 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
2161 rt
->rt6i_prefsrc
.plen
= 128;
2163 rt
->rt6i_prefsrc
.plen
= 0;
2165 rt
->rt6i_flags
= cfg
->fc_flags
;
2169 rt
->rt6i_idev
= idev
;
2170 rt
->rt6i_table
= table
;
2172 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
2181 dst_release_immediate(&rt
->dst
);
2183 return ERR_PTR(err
);
2186 int ip6_route_add(struct fib6_config
*cfg
,
2187 struct netlink_ext_ack
*extack
)
2189 struct mx6_config mxc
= { .mx
= NULL
, };
2190 struct rt6_info
*rt
;
2193 rt
= ip6_route_info_create(cfg
, extack
);
2200 err
= ip6_convert_metrics(&mxc
, cfg
);
2204 err
= __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
, &mxc
, extack
);
2211 dst_release_immediate(&rt
->dst
);
2216 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
2219 struct fib6_table
*table
;
2220 struct net
*net
= dev_net(rt
->dst
.dev
);
2222 if (rt
== net
->ipv6
.ip6_null_entry
) {
2227 table
= rt
->rt6i_table
;
2228 write_lock_bh(&table
->tb6_lock
);
2229 err
= fib6_del(rt
, info
);
2230 write_unlock_bh(&table
->tb6_lock
);
2237 int ip6_del_rt(struct rt6_info
*rt
)
2239 struct nl_info info
= {
2240 .nl_net
= dev_net(rt
->dst
.dev
),
2242 return __ip6_del_rt(rt
, &info
);
2245 static int __ip6_del_rt_siblings(struct rt6_info
*rt
, struct fib6_config
*cfg
)
2247 struct nl_info
*info
= &cfg
->fc_nlinfo
;
2248 struct net
*net
= info
->nl_net
;
2249 struct sk_buff
*skb
= NULL
;
2250 struct fib6_table
*table
;
2253 if (rt
== net
->ipv6
.ip6_null_entry
)
2255 table
= rt
->rt6i_table
;
2256 write_lock_bh(&table
->tb6_lock
);
2258 if (rt
->rt6i_nsiblings
&& cfg
->fc_delete_all_nh
) {
2259 struct rt6_info
*sibling
, *next_sibling
;
2261 /* prefer to send a single notification with all hops */
2262 skb
= nlmsg_new(rt6_nlmsg_size(rt
), gfp_any());
2264 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
2266 if (rt6_fill_node(net
, skb
, rt
,
2267 NULL
, NULL
, 0, RTM_DELROUTE
,
2268 info
->portid
, seq
, 0) < 0) {
2272 info
->skip_notify
= 1;
2275 list_for_each_entry_safe(sibling
, next_sibling
,
2278 err
= fib6_del(sibling
, info
);
2284 err
= fib6_del(rt
, info
);
2286 write_unlock_bh(&table
->tb6_lock
);
2291 rtnl_notify(skb
, net
, info
->portid
, RTNLGRP_IPV6_ROUTE
,
2292 info
->nlh
, gfp_any());
2297 static int ip6_route_del(struct fib6_config
*cfg
,
2298 struct netlink_ext_ack
*extack
)
2300 struct fib6_table
*table
;
2301 struct fib6_node
*fn
;
2302 struct rt6_info
*rt
;
2305 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
2307 NL_SET_ERR_MSG(extack
, "FIB table does not exist");
2311 read_lock_bh(&table
->tb6_lock
);
2313 fn
= fib6_locate(&table
->tb6_root
,
2314 &cfg
->fc_dst
, cfg
->fc_dst_len
,
2315 &cfg
->fc_src
, cfg
->fc_src_len
);
2318 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2319 if ((rt
->rt6i_flags
& RTF_CACHE
) &&
2320 !(cfg
->fc_flags
& RTF_CACHE
))
2322 if (cfg
->fc_ifindex
&&
2324 rt
->dst
.dev
->ifindex
!= cfg
->fc_ifindex
))
2326 if (cfg
->fc_flags
& RTF_GATEWAY
&&
2327 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
2329 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
2331 if (cfg
->fc_protocol
&& cfg
->fc_protocol
!= rt
->rt6i_protocol
)
2334 read_unlock_bh(&table
->tb6_lock
);
2336 /* if gateway was specified only delete the one hop */
2337 if (cfg
->fc_flags
& RTF_GATEWAY
)
2338 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
2340 return __ip6_del_rt_siblings(rt
, cfg
);
2343 read_unlock_bh(&table
->tb6_lock
);
2348 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
, struct sk_buff
*skb
)
2350 struct netevent_redirect netevent
;
2351 struct rt6_info
*rt
, *nrt
= NULL
;
2352 struct ndisc_options ndopts
;
2353 struct inet6_dev
*in6_dev
;
2354 struct neighbour
*neigh
;
2356 int optlen
, on_link
;
2359 optlen
= skb_tail_pointer(skb
) - skb_transport_header(skb
);
2360 optlen
-= sizeof(*msg
);
2363 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2367 msg
= (struct rd_msg
*)icmp6_hdr(skb
);
2369 if (ipv6_addr_is_multicast(&msg
->dest
)) {
2370 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2375 if (ipv6_addr_equal(&msg
->dest
, &msg
->target
)) {
2377 } else if (ipv6_addr_type(&msg
->target
) !=
2378 (IPV6_ADDR_UNICAST
|IPV6_ADDR_LINKLOCAL
)) {
2379 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2383 in6_dev
= __in6_dev_get(skb
->dev
);
2386 if (in6_dev
->cnf
.forwarding
|| !in6_dev
->cnf
.accept_redirects
)
2390 * The IP source address of the Redirect MUST be the same as the current
2391 * first-hop router for the specified ICMP Destination Address.
2394 if (!ndisc_parse_options(skb
->dev
, msg
->opt
, optlen
, &ndopts
)) {
2395 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2400 if (ndopts
.nd_opts_tgt_lladdr
) {
2401 lladdr
= ndisc_opt_addr_data(ndopts
.nd_opts_tgt_lladdr
,
2404 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2409 rt
= (struct rt6_info
*) dst
;
2410 if (rt
->rt6i_flags
& RTF_REJECT
) {
2411 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2415 /* Redirect received -> path was valid.
2416 * Look, redirects are sent only in response to data packets,
2417 * so that this nexthop apparently is reachable. --ANK
2419 dst_confirm_neigh(&rt
->dst
, &ipv6_hdr(skb
)->saddr
);
2421 neigh
= __neigh_lookup(&nd_tbl
, &msg
->target
, skb
->dev
, 1);
2426 * We have finally decided to accept it.
2429 ndisc_update(skb
->dev
, neigh
, lladdr
, NUD_STALE
,
2430 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
2431 NEIGH_UPDATE_F_OVERRIDE
|
2432 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
2433 NEIGH_UPDATE_F_ISROUTER
)),
2434 NDISC_REDIRECT
, &ndopts
);
2436 nrt
= ip6_rt_cache_alloc(rt
, &msg
->dest
, NULL
);
2440 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
2442 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
2444 nrt
->rt6i_protocol
= RTPROT_REDIRECT
;
2445 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
2447 if (ip6_ins_rt(nrt
))
2450 netevent
.old
= &rt
->dst
;
2451 netevent
.new = &nrt
->dst
;
2452 netevent
.daddr
= &msg
->dest
;
2453 netevent
.neigh
= neigh
;
2454 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
2456 if (rt
->rt6i_flags
& RTF_CACHE
) {
2457 rt
= (struct rt6_info
*) dst_clone(&rt
->dst
);
2462 /* Release the reference taken in
2463 * ip6_rt_cache_alloc()
2465 dst_release(&nrt
->dst
);
2468 neigh_release(neigh
);
2472 * Misc support functions
2475 static void rt6_set_from(struct rt6_info
*rt
, struct rt6_info
*from
)
2477 BUG_ON(from
->dst
.from
);
2479 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
2480 dst_hold(&from
->dst
);
2481 rt
->dst
.from
= &from
->dst
;
2482 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(&from
->dst
), true);
2485 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
)
2487 rt
->dst
.input
= ort
->dst
.input
;
2488 rt
->dst
.output
= ort
->dst
.output
;
2489 rt
->rt6i_dst
= ort
->rt6i_dst
;
2490 rt
->dst
.error
= ort
->dst
.error
;
2491 rt
->rt6i_idev
= ort
->rt6i_idev
;
2493 in6_dev_hold(rt
->rt6i_idev
);
2494 rt
->dst
.lastuse
= jiffies
;
2495 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
2496 rt
->rt6i_flags
= ort
->rt6i_flags
;
2497 rt6_set_from(rt
, ort
);
2498 rt
->rt6i_metric
= ort
->rt6i_metric
;
2499 #ifdef CONFIG_IPV6_SUBTREES
2500 rt
->rt6i_src
= ort
->rt6i_src
;
2502 rt
->rt6i_prefsrc
= ort
->rt6i_prefsrc
;
2503 rt
->rt6i_table
= ort
->rt6i_table
;
2504 rt
->dst
.lwtstate
= lwtstate_get(ort
->dst
.lwtstate
);
2507 #ifdef CONFIG_IPV6_ROUTE_INFO
2508 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
2509 const struct in6_addr
*prefix
, int prefixlen
,
2510 const struct in6_addr
*gwaddr
,
2511 struct net_device
*dev
)
2513 u32 tb_id
= l3mdev_fib_table(dev
) ? : addrconf_rt_table(dev
, RT6_TABLE_INFO
);
2514 struct fib6_node
*fn
;
2515 struct rt6_info
*rt
= NULL
;
2516 struct fib6_table
*table
;
2518 table
= fib6_get_table(net
, tb_id
);
2522 read_lock_bh(&table
->tb6_lock
);
2523 fn
= fib6_locate(&table
->tb6_root
, prefix
, prefixlen
, NULL
, 0);
2527 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2528 if (rt
->dst
.dev
->ifindex
!= dev
->ifindex
)
2530 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
2532 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
2538 read_unlock_bh(&table
->tb6_lock
);
2542 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
2543 const struct in6_addr
*prefix
, int prefixlen
,
2544 const struct in6_addr
*gwaddr
,
2545 struct net_device
*dev
,
2548 struct fib6_config cfg
= {
2549 .fc_metric
= IP6_RT_PRIO_USER
,
2550 .fc_ifindex
= dev
->ifindex
,
2551 .fc_dst_len
= prefixlen
,
2552 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
2553 RTF_UP
| RTF_PREF(pref
),
2554 .fc_protocol
= RTPROT_RA
,
2555 .fc_nlinfo
.portid
= 0,
2556 .fc_nlinfo
.nlh
= NULL
,
2557 .fc_nlinfo
.nl_net
= net
,
2560 cfg
.fc_table
= l3mdev_fib_table(dev
) ? : addrconf_rt_table(dev
, RT6_TABLE_INFO
),
2561 cfg
.fc_dst
= *prefix
;
2562 cfg
.fc_gateway
= *gwaddr
;
2564 /* We should treat it as a default route if prefix length is 0. */
2566 cfg
.fc_flags
|= RTF_DEFAULT
;
2568 ip6_route_add(&cfg
, NULL
);
2570 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, dev
);
2574 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
2576 u32 tb_id
= l3mdev_fib_table(dev
) ? : addrconf_rt_table(dev
, RT6_TABLE_MAIN
);
2577 struct rt6_info
*rt
;
2578 struct fib6_table
*table
;
2580 table
= fib6_get_table(dev_net(dev
), tb_id
);
2584 read_lock_bh(&table
->tb6_lock
);
2585 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2586 if (dev
== rt
->dst
.dev
&&
2587 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
2588 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
2593 read_unlock_bh(&table
->tb6_lock
);
2597 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
2598 struct net_device
*dev
,
2601 struct fib6_config cfg
= {
2602 .fc_table
= l3mdev_fib_table(dev
) ? : addrconf_rt_table(dev
, RT6_TABLE_DFLT
),
2603 .fc_metric
= IP6_RT_PRIO_USER
,
2604 .fc_ifindex
= dev
->ifindex
,
2605 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
2606 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
2607 .fc_protocol
= RTPROT_RA
,
2608 .fc_nlinfo
.portid
= 0,
2609 .fc_nlinfo
.nlh
= NULL
,
2610 .fc_nlinfo
.nl_net
= dev_net(dev
),
2613 cfg
.fc_gateway
= *gwaddr
;
2615 if (!ip6_route_add(&cfg
, NULL
)) {
2616 struct fib6_table
*table
;
2618 table
= fib6_get_table(dev_net(dev
), cfg
.fc_table
);
2620 table
->flags
|= RT6_TABLE_HAS_DFLT_ROUTER
;
2623 return rt6_get_dflt_router(gwaddr
, dev
);
2626 int rt6_addrconf_purge(struct rt6_info
*rt
, void *arg
) {
2627 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
) &&
2628 (!rt
->rt6i_idev
|| rt
->rt6i_idev
->cnf
.accept_ra
!= 2))
2633 void rt6_purge_dflt_routers(struct net
*net
)
2635 fib6_clean_all(net
, rt6_addrconf_purge
, NULL
);
2638 static void rtmsg_to_fib6_config(struct net
*net
,
2639 struct in6_rtmsg
*rtmsg
,
2640 struct fib6_config
*cfg
)
2642 memset(cfg
, 0, sizeof(*cfg
));
2644 cfg
->fc_table
= l3mdev_fib_table_by_index(net
, rtmsg
->rtmsg_ifindex
) ?
2646 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
2647 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
2648 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
2649 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
2650 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
2651 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
2653 cfg
->fc_nlinfo
.nl_net
= net
;
2655 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
2656 cfg
->fc_src
= rtmsg
->rtmsg_src
;
2657 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
2660 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
2662 struct fib6_config cfg
;
2663 struct in6_rtmsg rtmsg
;
2667 case SIOCADDRT
: /* Add a route */
2668 case SIOCDELRT
: /* Delete a route */
2669 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
2671 err
= copy_from_user(&rtmsg
, arg
,
2672 sizeof(struct in6_rtmsg
));
2676 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
2681 err
= ip6_route_add(&cfg
, NULL
);
2684 err
= ip6_route_del(&cfg
, NULL
);
2698 * Drop the packet on the floor
2701 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2704 struct dst_entry
*dst
= skb_dst(skb
);
2705 switch (ipstats_mib_noroutes
) {
2706 case IPSTATS_MIB_INNOROUTES
:
2707 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2708 if (type
== IPV6_ADDR_ANY
) {
2709 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2710 IPSTATS_MIB_INADDRERRORS
);
2714 case IPSTATS_MIB_OUTNOROUTES
:
2715 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2716 ipstats_mib_noroutes
);
2719 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2724 static int ip6_pkt_discard(struct sk_buff
*skb
)
2726 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2729 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2731 skb
->dev
= skb_dst(skb
)->dev
;
2732 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2735 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2737 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2740 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2742 skb
->dev
= skb_dst(skb
)->dev
;
2743 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2747 * Allocate a dst for local (unicast / anycast) address.
2750 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2751 const struct in6_addr
*addr
,
2755 struct net
*net
= dev_net(idev
->dev
);
2756 struct net_device
*dev
= idev
->dev
;
2757 struct rt6_info
*rt
;
2759 rt
= ip6_dst_alloc(net
, dev
, DST_NOCOUNT
);
2761 return ERR_PTR(-ENOMEM
);
2765 rt
->dst
.flags
|= DST_HOST
;
2766 rt
->dst
.input
= ip6_input
;
2767 rt
->dst
.output
= ip6_output
;
2768 rt
->rt6i_idev
= idev
;
2770 rt
->rt6i_protocol
= RTPROT_KERNEL
;
2771 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2773 rt
->rt6i_flags
|= RTF_ANYCAST
;
2775 rt
->rt6i_flags
|= RTF_LOCAL
;
2777 rt
->rt6i_gateway
= *addr
;
2778 rt
->rt6i_dst
.addr
= *addr
;
2779 rt
->rt6i_dst
.plen
= 128;
2780 tb_id
= l3mdev_fib_table(idev
->dev
) ? : RT6_TABLE_LOCAL
;
2781 rt
->rt6i_table
= fib6_get_table(net
, tb_id
);
2786 /* remove deleted ip from prefsrc entries */
2787 struct arg_dev_net_ip
{
2788 struct net_device
*dev
;
2790 struct in6_addr
*addr
;
2793 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2795 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2796 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2797 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2799 if (((void *)rt
->dst
.dev
== dev
|| !dev
) &&
2800 rt
!= net
->ipv6
.ip6_null_entry
&&
2801 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2802 /* remove prefsrc entry */
2803 rt
->rt6i_prefsrc
.plen
= 0;
2808 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2810 struct net
*net
= dev_net(ifp
->idev
->dev
);
2811 struct arg_dev_net_ip adni
= {
2812 .dev
= ifp
->idev
->dev
,
2816 fib6_clean_all(net
, fib6_remove_prefsrc
, &adni
);
2819 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2820 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2822 /* Remove routers and update dst entries when gateway turn into host. */
2823 static int fib6_clean_tohost(struct rt6_info
*rt
, void *arg
)
2825 struct in6_addr
*gateway
= (struct in6_addr
*)arg
;
2827 if ((((rt
->rt6i_flags
& RTF_RA_ROUTER
) == RTF_RA_ROUTER
) ||
2828 ((rt
->rt6i_flags
& RTF_CACHE_GATEWAY
) == RTF_CACHE_GATEWAY
)) &&
2829 ipv6_addr_equal(gateway
, &rt
->rt6i_gateway
)) {
2835 void rt6_clean_tohost(struct net
*net
, struct in6_addr
*gateway
)
2837 fib6_clean_all(net
, fib6_clean_tohost
, gateway
);
2840 struct arg_dev_net
{
2841 struct net_device
*dev
;
2845 /* called with write lock held for table with rt */
2846 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2848 const struct arg_dev_net
*adn
= arg
;
2849 const struct net_device
*dev
= adn
->dev
;
2851 if ((rt
->dst
.dev
== dev
|| !dev
) &&
2852 rt
!= adn
->net
->ipv6
.ip6_null_entry
&&
2853 (rt
->rt6i_nsiblings
== 0 ||
2854 (dev
&& netdev_unregistering(dev
)) ||
2855 !rt
->rt6i_idev
->cnf
.ignore_routes_with_linkdown
))
2861 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2863 struct arg_dev_net adn
= {
2868 fib6_clean_all(net
, fib6_ifdown
, &adn
);
2870 rt6_uncached_list_flush_dev(net
, dev
);
2873 struct rt6_mtu_change_arg
{
2874 struct net_device
*dev
;
2878 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2880 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2881 struct inet6_dev
*idev
;
2883 /* In IPv6 pmtu discovery is not optional,
2884 so that RTAX_MTU lock cannot disable it.
2885 We still use this lock to block changes
2886 caused by addrconf/ndisc.
2889 idev
= __in6_dev_get(arg
->dev
);
2893 /* For administrative MTU increase, there is no way to discover
2894 IPv6 PMTU increase, so PMTU increase should be updated here.
2895 Since RFC 1981 doesn't include administrative MTU increase
2896 update PMTU increase is a MUST. (i.e. jumbo frame)
2899 If new MTU is less than route PMTU, this new MTU will be the
2900 lowest MTU in the path, update the route PMTU to reflect PMTU
2901 decreases; if new MTU is greater than route PMTU, and the
2902 old MTU is the lowest MTU in the path, update the route PMTU
2903 to reflect the increase. In this case if the other nodes' MTU
2904 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2907 if (rt
->dst
.dev
== arg
->dev
&&
2908 dst_metric_raw(&rt
->dst
, RTAX_MTU
) &&
2909 !dst_metric_locked(&rt
->dst
, RTAX_MTU
)) {
2910 if (rt
->rt6i_flags
& RTF_CACHE
) {
2911 /* For RTF_CACHE with rt6i_pmtu == 0
2912 * (i.e. a redirected route),
2913 * the metrics of its rt->dst.from has already
2916 if (rt
->rt6i_pmtu
&& rt
->rt6i_pmtu
> arg
->mtu
)
2917 rt
->rt6i_pmtu
= arg
->mtu
;
2918 } else if (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2919 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2920 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
)) {
2921 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2927 void rt6_mtu_change(struct net_device
*dev
, unsigned int mtu
)
2929 struct rt6_mtu_change_arg arg
= {
2934 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, &arg
);
2937 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2938 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2939 [RTA_PREFSRC
] = { .len
= sizeof(struct in6_addr
) },
2940 [RTA_OIF
] = { .type
= NLA_U32
},
2941 [RTA_IIF
] = { .type
= NLA_U32
},
2942 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2943 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2944 [RTA_MULTIPATH
] = { .len
= sizeof(struct rtnexthop
) },
2945 [RTA_PREF
] = { .type
= NLA_U8
},
2946 [RTA_ENCAP_TYPE
] = { .type
= NLA_U16
},
2947 [RTA_ENCAP
] = { .type
= NLA_NESTED
},
2948 [RTA_EXPIRES
] = { .type
= NLA_U32
},
2949 [RTA_UID
] = { .type
= NLA_U32
},
2950 [RTA_MARK
] = { .type
= NLA_U32
},
2951 [RTA_TABLE
] = { .type
= NLA_U32
},
2954 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2955 struct fib6_config
*cfg
,
2956 struct netlink_ext_ack
*extack
)
2959 struct nlattr
*tb
[RTA_MAX
+1];
2963 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
,
2969 rtm
= nlmsg_data(nlh
);
2970 memset(cfg
, 0, sizeof(*cfg
));
2972 cfg
->fc_table
= rtm
->rtm_table
;
2973 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2974 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2975 cfg
->fc_flags
= RTF_UP
;
2976 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2977 cfg
->fc_type
= rtm
->rtm_type
;
2979 if (rtm
->rtm_type
== RTN_UNREACHABLE
||
2980 rtm
->rtm_type
== RTN_BLACKHOLE
||
2981 rtm
->rtm_type
== RTN_PROHIBIT
||
2982 rtm
->rtm_type
== RTN_THROW
)
2983 cfg
->fc_flags
|= RTF_REJECT
;
2985 if (rtm
->rtm_type
== RTN_LOCAL
)
2986 cfg
->fc_flags
|= RTF_LOCAL
;
2988 if (rtm
->rtm_flags
& RTM_F_CLONED
)
2989 cfg
->fc_flags
|= RTF_CACHE
;
2991 cfg
->fc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
2992 cfg
->fc_nlinfo
.nlh
= nlh
;
2993 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2995 if (tb
[RTA_GATEWAY
]) {
2996 cfg
->fc_gateway
= nla_get_in6_addr(tb
[RTA_GATEWAY
]);
2997 cfg
->fc_flags
|= RTF_GATEWAY
;
3000 NL_SET_ERR_MSG(extack
, "IPv6 does not support RTA_VIA attribute");
3005 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
3007 if (nla_len(tb
[RTA_DST
]) < plen
)
3010 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
3014 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
3016 if (nla_len(tb
[RTA_SRC
]) < plen
)
3019 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
3022 if (tb
[RTA_PREFSRC
])
3023 cfg
->fc_prefsrc
= nla_get_in6_addr(tb
[RTA_PREFSRC
]);
3026 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
3028 if (tb
[RTA_PRIORITY
])
3029 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
3031 if (tb
[RTA_METRICS
]) {
3032 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
3033 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
3037 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
3039 if (tb
[RTA_MULTIPATH
]) {
3040 cfg
->fc_mp
= nla_data(tb
[RTA_MULTIPATH
]);
3041 cfg
->fc_mp_len
= nla_len(tb
[RTA_MULTIPATH
]);
3043 err
= lwtunnel_valid_encap_type_attr(cfg
->fc_mp
,
3044 cfg
->fc_mp_len
, extack
);
3050 pref
= nla_get_u8(tb
[RTA_PREF
]);
3051 if (pref
!= ICMPV6_ROUTER_PREF_LOW
&&
3052 pref
!= ICMPV6_ROUTER_PREF_HIGH
)
3053 pref
= ICMPV6_ROUTER_PREF_MEDIUM
;
3054 cfg
->fc_flags
|= RTF_PREF(pref
);
3058 cfg
->fc_encap
= tb
[RTA_ENCAP
];
3060 if (tb
[RTA_ENCAP_TYPE
]) {
3061 cfg
->fc_encap_type
= nla_get_u16(tb
[RTA_ENCAP_TYPE
]);
3063 err
= lwtunnel_valid_encap_type(cfg
->fc_encap_type
, extack
);
3068 if (tb
[RTA_EXPIRES
]) {
3069 unsigned long timeout
= addrconf_timeout_fixup(nla_get_u32(tb
[RTA_EXPIRES
]), HZ
);
3071 if (addrconf_finite_timeout(timeout
)) {
3072 cfg
->fc_expires
= jiffies_to_clock_t(timeout
* HZ
);
3073 cfg
->fc_flags
|= RTF_EXPIRES
;
3083 struct rt6_info
*rt6_info
;
3084 struct fib6_config r_cfg
;
3085 struct mx6_config mxc
;
3086 struct list_head next
;
3089 static void ip6_print_replace_route_err(struct list_head
*rt6_nh_list
)
3093 list_for_each_entry(nh
, rt6_nh_list
, next
) {
3094 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
3095 &nh
->r_cfg
.fc_dst
, &nh
->r_cfg
.fc_gateway
,
3096 nh
->r_cfg
.fc_ifindex
);
3100 static int ip6_route_info_append(struct list_head
*rt6_nh_list
,
3101 struct rt6_info
*rt
, struct fib6_config
*r_cfg
)
3106 list_for_each_entry(nh
, rt6_nh_list
, next
) {
3107 /* check if rt6_info already exists */
3108 if (rt6_duplicate_nexthop(nh
->rt6_info
, rt
))
3112 nh
= kzalloc(sizeof(*nh
), GFP_KERNEL
);
3116 err
= ip6_convert_metrics(&nh
->mxc
, r_cfg
);
3121 memcpy(&nh
->r_cfg
, r_cfg
, sizeof(*r_cfg
));
3122 list_add_tail(&nh
->next
, rt6_nh_list
);
3127 static void ip6_route_mpath_notify(struct rt6_info
*rt
,
3128 struct rt6_info
*rt_last
,
3129 struct nl_info
*info
,
3132 /* if this is an APPEND route, then rt points to the first route
3133 * inserted and rt_last points to last route inserted. Userspace
3134 * wants a consistent dump of the route which starts at the first
3135 * nexthop. Since sibling routes are always added at the end of
3136 * the list, find the first sibling of the last route appended
3138 if ((nlflags
& NLM_F_APPEND
) && rt_last
&& rt_last
->rt6i_nsiblings
) {
3139 rt
= list_first_entry(&rt_last
->rt6i_siblings
,
3145 inet6_rt_notify(RTM_NEWROUTE
, rt
, info
, nlflags
);
3148 static int ip6_route_multipath_add(struct fib6_config
*cfg
,
3149 struct netlink_ext_ack
*extack
)
3151 struct rt6_info
*rt_notif
= NULL
, *rt_last
= NULL
;
3152 struct nl_info
*info
= &cfg
->fc_nlinfo
;
3153 struct fib6_config r_cfg
;
3154 struct rtnexthop
*rtnh
;
3155 struct rt6_info
*rt
;
3156 struct rt6_nh
*err_nh
;
3157 struct rt6_nh
*nh
, *nh_safe
;
3163 int replace
= (cfg
->fc_nlinfo
.nlh
&&
3164 (cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_REPLACE
));
3165 LIST_HEAD(rt6_nh_list
);
3167 nlflags
= replace
? NLM_F_REPLACE
: NLM_F_CREATE
;
3168 if (info
->nlh
&& info
->nlh
->nlmsg_flags
& NLM_F_APPEND
)
3169 nlflags
|= NLM_F_APPEND
;
3171 remaining
= cfg
->fc_mp_len
;
3172 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
3174 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3175 * rt6_info structs per nexthop
3177 while (rtnh_ok(rtnh
, remaining
)) {
3178 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
3179 if (rtnh
->rtnh_ifindex
)
3180 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
3182 attrlen
= rtnh_attrlen(rtnh
);
3184 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
3186 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
3188 r_cfg
.fc_gateway
= nla_get_in6_addr(nla
);
3189 r_cfg
.fc_flags
|= RTF_GATEWAY
;
3191 r_cfg
.fc_encap
= nla_find(attrs
, attrlen
, RTA_ENCAP
);
3192 nla
= nla_find(attrs
, attrlen
, RTA_ENCAP_TYPE
);
3194 r_cfg
.fc_encap_type
= nla_get_u16(nla
);
3197 rt
= ip6_route_info_create(&r_cfg
, extack
);
3204 err
= ip6_route_info_append(&rt6_nh_list
, rt
, &r_cfg
);
3206 dst_release_immediate(&rt
->dst
);
3210 rtnh
= rtnh_next(rtnh
, &remaining
);
3213 /* for add and replace send one notification with all nexthops.
3214 * Skip the notification in fib6_add_rt2node and send one with
3215 * the full route when done
3217 info
->skip_notify
= 1;
3220 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
3221 err
= __ip6_ins_rt(nh
->rt6_info
, info
, &nh
->mxc
, extack
);
3224 /* save reference to last route successfully inserted */
3225 rt_last
= nh
->rt6_info
;
3227 /* save reference to first route for notification */
3229 rt_notif
= nh
->rt6_info
;
3232 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3233 nh
->rt6_info
= NULL
;
3236 ip6_print_replace_route_err(&rt6_nh_list
);
3241 /* Because each route is added like a single route we remove
3242 * these flags after the first nexthop: if there is a collision,
3243 * we have already failed to add the first nexthop:
3244 * fib6_add_rt2node() has rejected it; when replacing, old
3245 * nexthops have been replaced by first new, the rest should
3248 cfg
->fc_nlinfo
.nlh
->nlmsg_flags
&= ~(NLM_F_EXCL
|
3253 /* success ... tell user about new route */
3254 ip6_route_mpath_notify(rt_notif
, rt_last
, info
, nlflags
);
3258 /* send notification for routes that were added so that
3259 * the delete notifications sent by ip6_route_del are
3263 ip6_route_mpath_notify(rt_notif
, rt_last
, info
, nlflags
);
3265 /* Delete routes that were already added */
3266 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
3269 ip6_route_del(&nh
->r_cfg
, extack
);
3273 list_for_each_entry_safe(nh
, nh_safe
, &rt6_nh_list
, next
) {
3275 dst_release_immediate(&nh
->rt6_info
->dst
);
3277 list_del(&nh
->next
);
3284 static int ip6_route_multipath_del(struct fib6_config
*cfg
,
3285 struct netlink_ext_ack
*extack
)
3287 struct fib6_config r_cfg
;
3288 struct rtnexthop
*rtnh
;
3291 int err
= 1, last_err
= 0;
3293 remaining
= cfg
->fc_mp_len
;
3294 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
3296 /* Parse a Multipath Entry */
3297 while (rtnh_ok(rtnh
, remaining
)) {
3298 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
3299 if (rtnh
->rtnh_ifindex
)
3300 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
3302 attrlen
= rtnh_attrlen(rtnh
);
3304 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
3306 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
3308 nla_memcpy(&r_cfg
.fc_gateway
, nla
, 16);
3309 r_cfg
.fc_flags
|= RTF_GATEWAY
;
3312 err
= ip6_route_del(&r_cfg
, extack
);
3316 rtnh
= rtnh_next(rtnh
, &remaining
);
3322 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
3323 struct netlink_ext_ack
*extack
)
3325 struct fib6_config cfg
;
3328 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
, extack
);
3333 return ip6_route_multipath_del(&cfg
, extack
);
3335 cfg
.fc_delete_all_nh
= 1;
3336 return ip6_route_del(&cfg
, extack
);
3340 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
3341 struct netlink_ext_ack
*extack
)
3343 struct fib6_config cfg
;
3346 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
, extack
);
3351 return ip6_route_multipath_add(&cfg
, extack
);
3353 return ip6_route_add(&cfg
, extack
);
3356 static size_t rt6_nlmsg_size(struct rt6_info
*rt
)
3358 int nexthop_len
= 0;
3360 if (rt
->rt6i_nsiblings
) {
3361 nexthop_len
= nla_total_size(0) /* RTA_MULTIPATH */
3362 + NLA_ALIGN(sizeof(struct rtnexthop
))
3363 + nla_total_size(16) /* RTA_GATEWAY */
3364 + lwtunnel_get_encap_size(rt
->dst
.lwtstate
);
3366 nexthop_len
*= rt
->rt6i_nsiblings
;
3369 return NLMSG_ALIGN(sizeof(struct rtmsg
))
3370 + nla_total_size(16) /* RTA_SRC */
3371 + nla_total_size(16) /* RTA_DST */
3372 + nla_total_size(16) /* RTA_GATEWAY */
3373 + nla_total_size(16) /* RTA_PREFSRC */
3374 + nla_total_size(4) /* RTA_TABLE */
3375 + nla_total_size(4) /* RTA_IIF */
3376 + nla_total_size(4) /* RTA_OIF */
3377 + nla_total_size(4) /* RTA_PRIORITY */
3378 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
3379 + nla_total_size(sizeof(struct rta_cacheinfo
))
3380 + nla_total_size(TCP_CA_NAME_MAX
) /* RTAX_CC_ALGO */
3381 + nla_total_size(1) /* RTA_PREF */
3382 + lwtunnel_get_encap_size(rt
->dst
.lwtstate
)
3386 static int rt6_nexthop_info(struct sk_buff
*skb
, struct rt6_info
*rt
,
3387 unsigned int *flags
, bool skip_oif
)
3389 if (!netif_running(rt
->dst
.dev
) || !netif_carrier_ok(rt
->dst
.dev
)) {
3390 *flags
|= RTNH_F_LINKDOWN
;
3391 if (rt
->rt6i_idev
->cnf
.ignore_routes_with_linkdown
)
3392 *flags
|= RTNH_F_DEAD
;
3395 if (rt
->rt6i_flags
& RTF_GATEWAY
) {
3396 if (nla_put_in6_addr(skb
, RTA_GATEWAY
, &rt
->rt6i_gateway
) < 0)
3397 goto nla_put_failure
;
3400 if (rt
->rt6i_nh_flags
& RTNH_F_OFFLOAD
)
3401 *flags
|= RTNH_F_OFFLOAD
;
3403 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3404 if (!skip_oif
&& rt
->dst
.dev
&&
3405 nla_put_u32(skb
, RTA_OIF
, rt
->dst
.dev
->ifindex
))
3406 goto nla_put_failure
;
3408 if (rt
->dst
.lwtstate
&&
3409 lwtunnel_fill_encap(skb
, rt
->dst
.lwtstate
) < 0)
3410 goto nla_put_failure
;
3418 /* add multipath next hop */
3419 static int rt6_add_nexthop(struct sk_buff
*skb
, struct rt6_info
*rt
)
3421 struct rtnexthop
*rtnh
;
3422 unsigned int flags
= 0;
3424 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
3426 goto nla_put_failure
;
3428 rtnh
->rtnh_hops
= 0;
3429 rtnh
->rtnh_ifindex
= rt
->dst
.dev
? rt
->dst
.dev
->ifindex
: 0;
3431 if (rt6_nexthop_info(skb
, rt
, &flags
, true) < 0)
3432 goto nla_put_failure
;
3434 rtnh
->rtnh_flags
= flags
;
3436 /* length of rtnetlink header + attributes */
3437 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *)rtnh
;
3445 static int rt6_fill_node(struct net
*net
,
3446 struct sk_buff
*skb
, struct rt6_info
*rt
,
3447 struct in6_addr
*dst
, struct in6_addr
*src
,
3448 int iif
, int type
, u32 portid
, u32 seq
,
3451 u32 metrics
[RTAX_MAX
];
3453 struct nlmsghdr
*nlh
;
3457 nlh
= nlmsg_put(skb
, portid
, seq
, type
, sizeof(*rtm
), flags
);
3461 rtm
= nlmsg_data(nlh
);
3462 rtm
->rtm_family
= AF_INET6
;
3463 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
3464 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
3467 table
= rt
->rt6i_table
->tb6_id
;
3469 table
= RT6_TABLE_UNSPEC
;
3470 rtm
->rtm_table
= table
< 256 ? table
: RT_TABLE_COMPAT
;
3471 if (nla_put_u32(skb
, RTA_TABLE
, table
))
3472 goto nla_put_failure
;
3473 if (rt
->rt6i_flags
& RTF_REJECT
) {
3474 switch (rt
->dst
.error
) {
3476 rtm
->rtm_type
= RTN_BLACKHOLE
;
3479 rtm
->rtm_type
= RTN_PROHIBIT
;
3482 rtm
->rtm_type
= RTN_THROW
;
3485 rtm
->rtm_type
= RTN_UNREACHABLE
;
3489 else if (rt
->rt6i_flags
& RTF_LOCAL
)
3490 rtm
->rtm_type
= RTN_LOCAL
;
3491 else if (rt
->rt6i_flags
& RTF_ANYCAST
)
3492 rtm
->rtm_type
= RTN_ANYCAST
;
3493 else if (rt
->dst
.dev
&& (rt
->dst
.dev
->flags
& IFF_LOOPBACK
))
3494 rtm
->rtm_type
= RTN_LOCAL
;
3496 rtm
->rtm_type
= RTN_UNICAST
;
3498 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
3499 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
3501 if (rt
->rt6i_flags
& RTF_CACHE
)
3502 rtm
->rtm_flags
|= RTM_F_CLONED
;
3505 if (nla_put_in6_addr(skb
, RTA_DST
, dst
))
3506 goto nla_put_failure
;
3507 rtm
->rtm_dst_len
= 128;
3508 } else if (rtm
->rtm_dst_len
)
3509 if (nla_put_in6_addr(skb
, RTA_DST
, &rt
->rt6i_dst
.addr
))
3510 goto nla_put_failure
;
3511 #ifdef CONFIG_IPV6_SUBTREES
3513 if (nla_put_in6_addr(skb
, RTA_SRC
, src
))
3514 goto nla_put_failure
;
3515 rtm
->rtm_src_len
= 128;
3516 } else if (rtm
->rtm_src_len
&&
3517 nla_put_in6_addr(skb
, RTA_SRC
, &rt
->rt6i_src
.addr
))
3518 goto nla_put_failure
;
3521 #ifdef CONFIG_IPV6_MROUTE
3522 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
3523 int err
= ip6mr_get_route(net
, skb
, rtm
, portid
);
3528 goto nla_put_failure
;
3531 if (nla_put_u32(skb
, RTA_IIF
, iif
))
3532 goto nla_put_failure
;
3534 struct in6_addr saddr_buf
;
3535 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0 &&
3536 nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3537 goto nla_put_failure
;
3540 if (rt
->rt6i_prefsrc
.plen
) {
3541 struct in6_addr saddr_buf
;
3542 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
3543 if (nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3544 goto nla_put_failure
;
3547 memcpy(metrics
, dst_metrics_ptr(&rt
->dst
), sizeof(metrics
));
3549 metrics
[RTAX_MTU
- 1] = rt
->rt6i_pmtu
;
3550 if (rtnetlink_put_metrics(skb
, metrics
) < 0)
3551 goto nla_put_failure
;
3553 if (nla_put_u32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
))
3554 goto nla_put_failure
;
3556 /* For multipath routes, walk the siblings list and add
3557 * each as a nexthop within RTA_MULTIPATH.
3559 if (rt
->rt6i_nsiblings
) {
3560 struct rt6_info
*sibling
, *next_sibling
;
3563 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
3565 goto nla_put_failure
;
3567 if (rt6_add_nexthop(skb
, rt
) < 0)
3568 goto nla_put_failure
;
3570 list_for_each_entry_safe(sibling
, next_sibling
,
3571 &rt
->rt6i_siblings
, rt6i_siblings
) {
3572 if (rt6_add_nexthop(skb
, sibling
) < 0)
3573 goto nla_put_failure
;
3576 nla_nest_end(skb
, mp
);
3578 if (rt6_nexthop_info(skb
, rt
, &rtm
->rtm_flags
, false) < 0)
3579 goto nla_put_failure
;
3582 expires
= (rt
->rt6i_flags
& RTF_EXPIRES
) ? rt
->dst
.expires
- jiffies
: 0;
3584 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, expires
, rt
->dst
.error
) < 0)
3585 goto nla_put_failure
;
3587 if (nla_put_u8(skb
, RTA_PREF
, IPV6_EXTRACT_PREF(rt
->rt6i_flags
)))
3588 goto nla_put_failure
;
3591 nlmsg_end(skb
, nlh
);
3595 nlmsg_cancel(skb
, nlh
);
3599 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
3601 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
3602 struct net
*net
= arg
->net
;
3604 if (rt
== net
->ipv6
.ip6_null_entry
)
3607 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
3608 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
3610 /* user wants prefix routes only */
3611 if (rtm
->rtm_flags
& RTM_F_PREFIX
&&
3612 !(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
3613 /* success since this is not a prefix route */
3618 return rt6_fill_node(net
,
3619 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
3620 NETLINK_CB(arg
->cb
->skb
).portid
, arg
->cb
->nlh
->nlmsg_seq
,
3624 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
,
3625 struct netlink_ext_ack
*extack
)
3627 struct net
*net
= sock_net(in_skb
->sk
);
3628 struct nlattr
*tb
[RTA_MAX
+1];
3629 int err
, iif
= 0, oif
= 0;
3630 struct dst_entry
*dst
;
3631 struct rt6_info
*rt
;
3632 struct sk_buff
*skb
;
3637 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
,
3643 memset(&fl6
, 0, sizeof(fl6
));
3644 rtm
= nlmsg_data(nlh
);
3645 fl6
.flowlabel
= ip6_make_flowinfo(rtm
->rtm_tos
, 0);
3646 fibmatch
= !!(rtm
->rtm_flags
& RTM_F_FIB_MATCH
);
3649 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
3652 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
3656 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
3659 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
3663 iif
= nla_get_u32(tb
[RTA_IIF
]);
3666 oif
= nla_get_u32(tb
[RTA_OIF
]);
3669 fl6
.flowi6_mark
= nla_get_u32(tb
[RTA_MARK
]);
3672 fl6
.flowi6_uid
= make_kuid(current_user_ns(),
3673 nla_get_u32(tb
[RTA_UID
]));
3675 fl6
.flowi6_uid
= iif
? INVALID_UID
: current_uid();
3678 struct net_device
*dev
;
3683 dev
= dev_get_by_index_rcu(net
, iif
);
3690 fl6
.flowi6_iif
= iif
;
3692 if (!ipv6_addr_any(&fl6
.saddr
))
3693 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
3695 dst
= ip6_route_input_lookup(net
, dev
, &fl6
, flags
);
3699 fl6
.flowi6_oif
= oif
;
3701 dst
= ip6_route_output(net
, NULL
, &fl6
);
3705 rt
= container_of(dst
, struct rt6_info
, dst
);
3706 if (rt
->dst
.error
) {
3707 err
= rt
->dst
.error
;
3712 if (rt
== net
->ipv6
.ip6_null_entry
) {
3713 err
= rt
->dst
.error
;
3718 if (fibmatch
&& rt
->dst
.from
) {
3719 struct rt6_info
*ort
= container_of(rt
->dst
.from
,
3720 struct rt6_info
, dst
);
3722 dst_hold(&ort
->dst
);
3727 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
3734 skb_dst_set(skb
, &rt
->dst
);
3736 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, iif
,
3737 RTM_NEWROUTE
, NETLINK_CB(in_skb
).portid
,
3740 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
3741 RTM_NEWROUTE
, NETLINK_CB(in_skb
).portid
,
3748 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
3753 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
,
3754 unsigned int nlm_flags
)
3756 struct sk_buff
*skb
;
3757 struct net
*net
= info
->nl_net
;
3762 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
3764 skb
= nlmsg_new(rt6_nlmsg_size(rt
), gfp_any());
3768 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
3769 event
, info
->portid
, seq
, nlm_flags
);
3771 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3772 WARN_ON(err
== -EMSGSIZE
);
3776 rtnl_notify(skb
, net
, info
->portid
, RTNLGRP_IPV6_ROUTE
,
3777 info
->nlh
, gfp_any());
3781 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
3784 static int ip6_route_dev_notify(struct notifier_block
*this,
3785 unsigned long event
, void *ptr
)
3787 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
3788 struct net
*net
= dev_net(dev
);
3790 if (!(dev
->flags
& IFF_LOOPBACK
))
3793 if (event
== NETDEV_REGISTER
) {
3794 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
3795 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
3796 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3797 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
3798 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
3799 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
3800 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
3802 } else if (event
== NETDEV_UNREGISTER
&&
3803 dev
->reg_state
!= NETREG_UNREGISTERED
) {
3804 /* NETDEV_UNREGISTER could be fired for multiple times by
3805 * netdev_wait_allrefs(). Make sure we only call this once.
3807 in6_dev_put_clear(&net
->ipv6
.ip6_null_entry
->rt6i_idev
);
3808 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3809 in6_dev_put_clear(&net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
);
3810 in6_dev_put_clear(&net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
);
3821 #ifdef CONFIG_PROC_FS
3823 static const struct file_operations ipv6_route_proc_fops
= {
3824 .owner
= THIS_MODULE
,
3825 .open
= ipv6_route_open
,
3827 .llseek
= seq_lseek
,
3828 .release
= seq_release_net
,
3831 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
3833 struct net
*net
= (struct net
*)seq
->private;
3834 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
3835 net
->ipv6
.rt6_stats
->fib_nodes
,
3836 net
->ipv6
.rt6_stats
->fib_route_nodes
,
3837 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
3838 net
->ipv6
.rt6_stats
->fib_rt_entries
,
3839 net
->ipv6
.rt6_stats
->fib_rt_cache
,
3840 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
3841 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
3846 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
3848 return single_open_net(inode
, file
, rt6_stats_seq_show
);
3851 static const struct file_operations rt6_stats_seq_fops
= {
3852 .owner
= THIS_MODULE
,
3853 .open
= rt6_stats_seq_open
,
3855 .llseek
= seq_lseek
,
3856 .release
= single_release_net
,
3858 #endif /* CONFIG_PROC_FS */
3860 #ifdef CONFIG_SYSCTL
3863 int ipv6_sysctl_rtcache_flush(struct ctl_table
*ctl
, int write
,
3864 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
3871 net
= (struct net
*)ctl
->extra1
;
3872 delay
= net
->ipv6
.sysctl
.flush_delay
;
3873 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
3874 fib6_run_gc(delay
<= 0 ? 0 : (unsigned long)delay
, net
, delay
> 0);
3878 struct ctl_table ipv6_route_table_template
[] = {
3880 .procname
= "flush",
3881 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
3882 .maxlen
= sizeof(int),
3884 .proc_handler
= ipv6_sysctl_rtcache_flush
3887 .procname
= "gc_thresh",
3888 .data
= &ip6_dst_ops_template
.gc_thresh
,
3889 .maxlen
= sizeof(int),
3891 .proc_handler
= proc_dointvec
,
3894 .procname
= "max_size",
3895 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
3896 .maxlen
= sizeof(int),
3898 .proc_handler
= proc_dointvec
,
3901 .procname
= "gc_min_interval",
3902 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3903 .maxlen
= sizeof(int),
3905 .proc_handler
= proc_dointvec_jiffies
,
3908 .procname
= "gc_timeout",
3909 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
3910 .maxlen
= sizeof(int),
3912 .proc_handler
= proc_dointvec_jiffies
,
3915 .procname
= "gc_interval",
3916 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
3917 .maxlen
= sizeof(int),
3919 .proc_handler
= proc_dointvec_jiffies
,
3922 .procname
= "gc_elasticity",
3923 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
3924 .maxlen
= sizeof(int),
3926 .proc_handler
= proc_dointvec
,
3929 .procname
= "mtu_expires",
3930 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
3931 .maxlen
= sizeof(int),
3933 .proc_handler
= proc_dointvec_jiffies
,
3936 .procname
= "min_adv_mss",
3937 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
3938 .maxlen
= sizeof(int),
3940 .proc_handler
= proc_dointvec
,
3943 .procname
= "gc_min_interval_ms",
3944 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3945 .maxlen
= sizeof(int),
3947 .proc_handler
= proc_dointvec_ms_jiffies
,
3952 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
3954 struct ctl_table
*table
;
3956 table
= kmemdup(ipv6_route_table_template
,
3957 sizeof(ipv6_route_table_template
),
3961 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
3962 table
[0].extra1
= net
;
3963 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
3964 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
3965 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3966 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
3967 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
3968 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
3969 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
3970 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
3971 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3973 /* Don't export sysctls to unprivileged users */
3974 if (net
->user_ns
!= &init_user_ns
)
3975 table
[0].procname
= NULL
;
3982 static int __net_init
ip6_route_net_init(struct net
*net
)
3986 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
3987 sizeof(net
->ipv6
.ip6_dst_ops
));
3989 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
3990 goto out_ip6_dst_ops
;
3992 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
3993 sizeof(*net
->ipv6
.ip6_null_entry
),
3995 if (!net
->ipv6
.ip6_null_entry
)
3996 goto out_ip6_dst_entries
;
3997 net
->ipv6
.ip6_null_entry
->dst
.path
=
3998 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
3999 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
4000 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
4001 ip6_template_metrics
, true);
4003 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4004 net
->ipv6
.fib6_has_custom_rules
= false;
4005 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
4006 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
4008 if (!net
->ipv6
.ip6_prohibit_entry
)
4009 goto out_ip6_null_entry
;
4010 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
4011 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
4012 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
4013 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
4014 ip6_template_metrics
, true);
4016 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
4017 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
4019 if (!net
->ipv6
.ip6_blk_hole_entry
)
4020 goto out_ip6_prohibit_entry
;
4021 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
4022 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
4023 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
4024 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
4025 ip6_template_metrics
, true);
4028 net
->ipv6
.sysctl
.flush_delay
= 0;
4029 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
4030 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
4031 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
4032 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
4033 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
4034 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
4035 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
4037 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
4043 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4044 out_ip6_prohibit_entry
:
4045 kfree(net
->ipv6
.ip6_prohibit_entry
);
4047 kfree(net
->ipv6
.ip6_null_entry
);
4049 out_ip6_dst_entries
:
4050 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
4055 static void __net_exit
ip6_route_net_exit(struct net
*net
)
4057 kfree(net
->ipv6
.ip6_null_entry
);
4058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4059 kfree(net
->ipv6
.ip6_prohibit_entry
);
4060 kfree(net
->ipv6
.ip6_blk_hole_entry
);
4062 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
4065 static int __net_init
ip6_route_net_init_late(struct net
*net
)
4067 #ifdef CONFIG_PROC_FS
4068 proc_create("ipv6_route", 0, net
->proc_net
, &ipv6_route_proc_fops
);
4069 proc_create("rt6_stats", S_IRUGO
, net
->proc_net
, &rt6_stats_seq_fops
);
4074 static void __net_exit
ip6_route_net_exit_late(struct net
*net
)
4076 #ifdef CONFIG_PROC_FS
4077 remove_proc_entry("ipv6_route", net
->proc_net
);
4078 remove_proc_entry("rt6_stats", net
->proc_net
);
4082 static struct pernet_operations ip6_route_net_ops
= {
4083 .init
= ip6_route_net_init
,
4084 .exit
= ip6_route_net_exit
,
4087 static int __net_init
ipv6_inetpeer_init(struct net
*net
)
4089 struct inet_peer_base
*bp
= kmalloc(sizeof(*bp
), GFP_KERNEL
);
4093 inet_peer_base_init(bp
);
4094 net
->ipv6
.peers
= bp
;
4098 static void __net_exit
ipv6_inetpeer_exit(struct net
*net
)
4100 struct inet_peer_base
*bp
= net
->ipv6
.peers
;
4102 net
->ipv6
.peers
= NULL
;
4103 inetpeer_invalidate_tree(bp
);
4107 static struct pernet_operations ipv6_inetpeer_ops
= {
4108 .init
= ipv6_inetpeer_init
,
4109 .exit
= ipv6_inetpeer_exit
,
4112 static struct pernet_operations ip6_route_net_late_ops
= {
4113 .init
= ip6_route_net_init_late
,
4114 .exit
= ip6_route_net_exit_late
,
4117 static struct notifier_block ip6_route_dev_notifier
= {
4118 .notifier_call
= ip6_route_dev_notify
,
4119 .priority
= ADDRCONF_NOTIFY_PRIORITY
- 10,
4122 void __init
ip6_route_init_special_entries(void)
4124 /* Registering of the loopback is done before this portion of code,
4125 * the loopback reference in rt6_info will not be taken, do it
4126 * manually for init_net */
4127 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
4128 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
4129 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4130 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
4131 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
4132 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
4133 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
4137 int __init
ip6_route_init(void)
4143 ip6_dst_ops_template
.kmem_cachep
=
4144 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
4145 SLAB_HWCACHE_ALIGN
, NULL
);
4146 if (!ip6_dst_ops_template
.kmem_cachep
)
4149 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
4151 goto out_kmem_cache
;
4153 ret
= register_pernet_subsys(&ipv6_inetpeer_ops
);
4155 goto out_dst_entries
;
4157 ret
= register_pernet_subsys(&ip6_route_net_ops
);
4159 goto out_register_inetpeer
;
4161 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
4165 goto out_register_subsys
;
4171 ret
= fib6_rules_init();
4175 ret
= register_pernet_subsys(&ip6_route_net_late_ops
);
4177 goto fib6_rules_init
;
4180 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, 0) ||
4181 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, 0) ||
4182 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
,
4183 RTNL_FLAG_DOIT_UNLOCKED
))
4184 goto out_register_late_subsys
;
4186 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
4188 goto out_register_late_subsys
;
4190 for_each_possible_cpu(cpu
) {
4191 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
4193 INIT_LIST_HEAD(&ul
->head
);
4194 spin_lock_init(&ul
->lock
);
4200 out_register_late_subsys
:
4201 unregister_pernet_subsys(&ip6_route_net_late_ops
);
4203 fib6_rules_cleanup();
4208 out_register_subsys
:
4209 unregister_pernet_subsys(&ip6_route_net_ops
);
4210 out_register_inetpeer
:
4211 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
4213 dst_entries_destroy(&ip6_dst_blackhole_ops
);
4215 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
4219 void ip6_route_cleanup(void)
4221 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
4222 unregister_pernet_subsys(&ip6_route_net_late_ops
);
4223 fib6_rules_cleanup();
4226 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
4227 unregister_pernet_subsys(&ip6_route_net_ops
);
4228 dst_entries_destroy(&ip6_dst_blackhole_ops
);
4229 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);