2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
61 #include <asm/uaccess.h>
64 #include <linux/sysctl.h>
67 static struct rt6_info
*ip6_rt_copy(struct rt6_info
*ort
,
68 const struct in6_addr
*dest
);
69 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
70 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
71 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
72 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
73 static void ip6_dst_destroy(struct dst_entry
*);
74 static void ip6_dst_ifdown(struct dst_entry
*,
75 struct net_device
*dev
, int how
);
76 static int ip6_dst_gc(struct dst_ops
*ops
);
78 static int ip6_pkt_discard(struct sk_buff
*skb
);
79 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
80 static void ip6_link_failure(struct sk_buff
*skb
);
81 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
82 struct sk_buff
*skb
, u32 mtu
);
83 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
,
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
88 const struct in6_addr
*prefix
, int prefixlen
,
89 const struct in6_addr
*gwaddr
, int ifindex
,
91 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
92 const struct in6_addr
*prefix
, int prefixlen
,
93 const struct in6_addr
*gwaddr
, int ifindex
);
96 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
98 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
99 struct inet_peer
*peer
;
102 if (!(rt
->dst
.flags
& DST_HOST
))
105 peer
= rt6_get_peer_create(rt
);
107 u32
*old_p
= __DST_METRICS_PTR(old
);
108 unsigned long prev
, new;
111 if (inet_metrics_new(peer
))
112 memcpy(p
, old_p
, sizeof(u32
) * RTAX_MAX
);
114 new = (unsigned long) p
;
115 prev
= cmpxchg(&dst
->_metrics
, old
, new);
118 p
= __DST_METRICS_PTR(prev
);
119 if (prev
& DST_METRICS_READ_ONLY
)
126 static inline const void *choose_neigh_daddr(struct rt6_info
*rt
,
130 struct in6_addr
*p
= &rt
->rt6i_gateway
;
132 if (!ipv6_addr_any(p
))
133 return (const void *) p
;
135 return &ipv6_hdr(skb
)->daddr
;
139 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
,
143 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
146 daddr
= choose_neigh_daddr(rt
, skb
, daddr
);
147 n
= __ipv6_neigh_lookup(&nd_tbl
, dst
->dev
, daddr
);
150 return neigh_create(&nd_tbl
, daddr
, dst
->dev
);
153 static int rt6_bind_neighbour(struct rt6_info
*rt
, struct net_device
*dev
)
155 struct neighbour
*n
= __ipv6_neigh_lookup(&nd_tbl
, dev
, &rt
->rt6i_gateway
);
157 n
= neigh_create(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
166 static struct dst_ops ip6_dst_ops_template
= {
168 .protocol
= cpu_to_be16(ETH_P_IPV6
),
171 .check
= ip6_dst_check
,
172 .default_advmss
= ip6_default_advmss
,
174 .cow_metrics
= ipv6_cow_metrics
,
175 .destroy
= ip6_dst_destroy
,
176 .ifdown
= ip6_dst_ifdown
,
177 .negative_advice
= ip6_negative_advice
,
178 .link_failure
= ip6_link_failure
,
179 .update_pmtu
= ip6_rt_update_pmtu
,
180 .redirect
= rt6_do_redirect
,
181 .local_out
= __ip6_local_out
,
182 .neigh_lookup
= ip6_neigh_lookup
,
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
187 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
189 return mtu
? : dst
->dev
->mtu
;
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
193 struct sk_buff
*skb
, u32 mtu
)
197 static void ip6_rt_blackhole_redirect(struct dst_entry
*dst
, struct sock
*sk
,
202 static u32
*ip6_rt_blackhole_cow_metrics(struct dst_entry
*dst
,
208 static struct dst_ops ip6_dst_blackhole_ops
= {
210 .protocol
= cpu_to_be16(ETH_P_IPV6
),
211 .destroy
= ip6_dst_destroy
,
212 .check
= ip6_dst_check
,
213 .mtu
= ip6_blackhole_mtu
,
214 .default_advmss
= ip6_default_advmss
,
215 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
216 .redirect
= ip6_rt_blackhole_redirect
,
217 .cow_metrics
= ip6_rt_blackhole_cow_metrics
,
218 .neigh_lookup
= ip6_neigh_lookup
,
221 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
222 [RTAX_HOPLIMIT
- 1] = 255,
225 static struct rt6_info ip6_null_entry_template
= {
227 .__refcnt
= ATOMIC_INIT(1),
230 .error
= -ENETUNREACH
,
231 .input
= ip6_pkt_discard
,
232 .output
= ip6_pkt_discard_out
,
234 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
235 .rt6i_protocol
= RTPROT_KERNEL
,
236 .rt6i_metric
= ~(u32
) 0,
237 .rt6i_ref
= ATOMIC_INIT(1),
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
243 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
245 static struct rt6_info ip6_prohibit_entry_template
= {
247 .__refcnt
= ATOMIC_INIT(1),
251 .input
= ip6_pkt_prohibit
,
252 .output
= ip6_pkt_prohibit_out
,
254 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
255 .rt6i_protocol
= RTPROT_KERNEL
,
256 .rt6i_metric
= ~(u32
) 0,
257 .rt6i_ref
= ATOMIC_INIT(1),
260 static struct rt6_info ip6_blk_hole_entry_template
= {
262 .__refcnt
= ATOMIC_INIT(1),
266 .input
= dst_discard
,
267 .output
= dst_discard
,
269 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
270 .rt6i_protocol
= RTPROT_KERNEL
,
271 .rt6i_metric
= ~(u32
) 0,
272 .rt6i_ref
= ATOMIC_INIT(1),
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info
*ip6_dst_alloc(struct net
*net
,
279 struct net_device
*dev
,
281 struct fib6_table
*table
)
283 struct rt6_info
*rt
= dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
,
284 0, DST_OBSOLETE_NONE
, flags
);
287 struct dst_entry
*dst
= &rt
->dst
;
289 memset(dst
+ 1, 0, sizeof(*rt
) - sizeof(*dst
));
290 rt6_init_peer(rt
, table
? &table
->tb6_peers
: net
->ipv6
.peers
);
295 static void ip6_dst_destroy(struct dst_entry
*dst
)
297 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
298 struct inet6_dev
*idev
= rt
->rt6i_idev
;
301 neigh_release(rt
->n
);
303 if (!(rt
->dst
.flags
& DST_HOST
))
304 dst_destroy_metrics_generic(dst
);
307 rt
->rt6i_idev
= NULL
;
311 if (!(rt
->rt6i_flags
& RTF_EXPIRES
) && dst
->from
)
312 dst_release(dst
->from
);
314 if (rt6_has_peer(rt
)) {
315 struct inet_peer
*peer
= rt6_peer_ptr(rt
);
320 static atomic_t __rt6_peer_genid
= ATOMIC_INIT(0);
322 static u32
rt6_peer_genid(void)
324 return atomic_read(&__rt6_peer_genid
);
327 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
329 struct inet_peer_base
*base
;
330 struct inet_peer
*peer
;
332 base
= inetpeer_base_ptr(rt
->_rt6i_peer
);
336 peer
= inet_getpeer_v6(base
, &rt
->rt6i_dst
.addr
, create
);
338 if (!rt6_set_peer(rt
, peer
))
341 rt
->rt6i_peer_genid
= rt6_peer_genid();
345 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
348 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
349 struct inet6_dev
*idev
= rt
->rt6i_idev
;
350 struct net_device
*loopback_dev
=
351 dev_net(dev
)->loopback_dev
;
353 if (dev
!= loopback_dev
) {
354 if (idev
&& idev
->dev
== dev
) {
355 struct inet6_dev
*loopback_idev
=
356 in6_dev_get(loopback_dev
);
358 rt
->rt6i_idev
= loopback_idev
;
362 if (rt
->n
&& rt
->n
->dev
== dev
) {
363 rt
->n
->dev
= loopback_dev
;
364 dev_hold(loopback_dev
);
370 static bool rt6_check_expired(const struct rt6_info
*rt
)
372 struct rt6_info
*ort
= NULL
;
374 if (rt
->rt6i_flags
& RTF_EXPIRES
) {
375 if (time_after(jiffies
, rt
->dst
.expires
))
377 } else if (rt
->dst
.from
) {
378 ort
= (struct rt6_info
*) rt
->dst
.from
;
379 return (ort
->rt6i_flags
& RTF_EXPIRES
) &&
380 time_after(jiffies
, ort
->dst
.expires
);
385 static bool rt6_need_strict(const struct in6_addr
*daddr
)
387 return ipv6_addr_type(daddr
) &
388 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
392 * Route lookup. Any table->tb6_lock is implied.
395 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
397 const struct in6_addr
*saddr
,
401 struct rt6_info
*local
= NULL
;
402 struct rt6_info
*sprt
;
404 if (!oif
&& ipv6_addr_any(saddr
))
407 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
408 struct net_device
*dev
= sprt
->dst
.dev
;
411 if (dev
->ifindex
== oif
)
413 if (dev
->flags
& IFF_LOOPBACK
) {
414 if (!sprt
->rt6i_idev
||
415 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
416 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
418 if (local
&& (!oif
||
419 local
->rt6i_idev
->dev
->ifindex
== oif
))
425 if (ipv6_chk_addr(net
, saddr
, dev
,
426 flags
& RT6_LOOKUP_F_IFACE
))
435 if (flags
& RT6_LOOKUP_F_IFACE
)
436 return net
->ipv6
.ip6_null_entry
;
442 #ifdef CONFIG_IPV6_ROUTER_PREF
443 static void rt6_probe(struct rt6_info
*rt
)
445 struct neighbour
*neigh
;
447 * Okay, this does not seem to be appropriate
448 * for now, however, we need to check if it
449 * is really so; aka Router Reachability Probing.
451 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute.
455 neigh
= rt
? rt
->n
: NULL
;
456 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
458 read_lock_bh(&neigh
->lock
);
459 if (!(neigh
->nud_state
& NUD_VALID
) &&
460 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
461 struct in6_addr mcaddr
;
462 struct in6_addr
*target
;
464 neigh
->updated
= jiffies
;
465 read_unlock_bh(&neigh
->lock
);
467 target
= (struct in6_addr
*)&neigh
->primary_key
;
468 addrconf_addr_solict_mult(target
, &mcaddr
);
469 ndisc_send_ns(rt
->dst
.dev
, NULL
, target
, &mcaddr
, NULL
);
471 read_unlock_bh(&neigh
->lock
);
477 static inline void rt6_probe(struct rt6_info
*rt
)
483 * Default Router Selection (RFC 2461 6.3.6)
485 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
487 struct net_device
*dev
= rt
->dst
.dev
;
488 if (!oif
|| dev
->ifindex
== oif
)
490 if ((dev
->flags
& IFF_LOOPBACK
) &&
491 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
496 static inline int rt6_check_neigh(struct rt6_info
*rt
)
498 struct neighbour
*neigh
;
503 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
504 !(rt
->rt6i_flags
& RTF_GATEWAY
))
507 read_lock_bh(&neigh
->lock
);
508 if (neigh
->nud_state
& NUD_VALID
)
510 #ifdef CONFIG_IPV6_ROUTER_PREF
511 else if (neigh
->nud_state
& NUD_FAILED
)
516 read_unlock_bh(&neigh
->lock
);
523 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
528 m
= rt6_check_dev(rt
, oif
);
529 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
531 #ifdef CONFIG_IPV6_ROUTER_PREF
532 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
534 n
= rt6_check_neigh(rt
);
535 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
540 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
541 int *mpri
, struct rt6_info
*match
)
545 if (rt6_check_expired(rt
))
548 m
= rt6_score_route(rt
, oif
, strict
);
553 if (strict
& RT6_LOOKUP_F_REACHABLE
)
557 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
565 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
566 struct rt6_info
*rr_head
,
567 u32 metric
, int oif
, int strict
)
569 struct rt6_info
*rt
, *match
;
573 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
574 rt
= rt
->dst
.rt6_next
)
575 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
576 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
577 rt
= rt
->dst
.rt6_next
)
578 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
583 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
585 struct rt6_info
*match
, *rt0
;
590 fn
->rr_ptr
= rt0
= fn
->leaf
;
592 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
595 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
596 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
598 /* no entries matched; do round-robin */
599 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
606 net
= dev_net(rt0
->dst
.dev
);
607 return match
? match
: net
->ipv6
.ip6_null_entry
;
610 #ifdef CONFIG_IPV6_ROUTE_INFO
611 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
612 const struct in6_addr
*gwaddr
)
614 struct net
*net
= dev_net(dev
);
615 struct route_info
*rinfo
= (struct route_info
*) opt
;
616 struct in6_addr prefix_buf
, *prefix
;
618 unsigned long lifetime
;
621 if (len
< sizeof(struct route_info
)) {
625 /* Sanity check for prefix_len and length */
626 if (rinfo
->length
> 3) {
628 } else if (rinfo
->prefix_len
> 128) {
630 } else if (rinfo
->prefix_len
> 64) {
631 if (rinfo
->length
< 2) {
634 } else if (rinfo
->prefix_len
> 0) {
635 if (rinfo
->length
< 1) {
640 pref
= rinfo
->route_pref
;
641 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
644 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
646 if (rinfo
->length
== 3)
647 prefix
= (struct in6_addr
*)rinfo
->prefix
;
649 /* this function is safe */
650 ipv6_addr_prefix(&prefix_buf
,
651 (struct in6_addr
*)rinfo
->prefix
,
653 prefix
= &prefix_buf
;
656 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
659 if (rt
&& !lifetime
) {
665 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
668 rt
->rt6i_flags
= RTF_ROUTEINFO
|
669 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
672 if (!addrconf_finite_timeout(lifetime
))
673 rt6_clean_expires(rt
);
675 rt6_set_expires(rt
, jiffies
+ HZ
* lifetime
);
677 dst_release(&rt
->dst
);
683 #define BACKTRACK(__net, saddr) \
685 if (rt == __net->ipv6.ip6_null_entry) { \
686 struct fib6_node *pn; \
688 if (fn->fn_flags & RTN_TL_ROOT) \
691 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
692 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
695 if (fn->fn_flags & RTN_RTINFO) \
701 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
702 struct fib6_table
*table
,
703 struct flowi6
*fl6
, int flags
)
705 struct fib6_node
*fn
;
708 read_lock_bh(&table
->tb6_lock
);
709 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
712 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
713 BACKTRACK(net
, &fl6
->saddr
);
715 dst_use(&rt
->dst
, jiffies
);
716 read_unlock_bh(&table
->tb6_lock
);
721 struct dst_entry
* ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
724 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
726 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
728 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
729 const struct in6_addr
*saddr
, int oif
, int strict
)
731 struct flowi6 fl6
= {
735 struct dst_entry
*dst
;
736 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
739 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
740 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
743 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
745 return (struct rt6_info
*) dst
;
752 EXPORT_SYMBOL(rt6_lookup
);
754 /* ip6_ins_rt is called with FREE table->tb6_lock.
755 It takes new route entry, the addition fails by any reason the
756 route is freed. In any case, if caller does not hold it, it may
760 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
763 struct fib6_table
*table
;
765 table
= rt
->rt6i_table
;
766 write_lock_bh(&table
->tb6_lock
);
767 err
= fib6_add(&table
->tb6_root
, rt
, info
);
768 write_unlock_bh(&table
->tb6_lock
);
773 int ip6_ins_rt(struct rt6_info
*rt
)
775 struct nl_info info
= {
776 .nl_net
= dev_net(rt
->dst
.dev
),
778 return __ip6_ins_rt(rt
, &info
);
781 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
,
782 const struct in6_addr
*daddr
,
783 const struct in6_addr
*saddr
)
791 rt
= ip6_rt_copy(ort
, daddr
);
794 int attempts
= !in_softirq();
796 if (!(rt
->rt6i_flags
& RTF_GATEWAY
)) {
797 if (ort
->rt6i_dst
.plen
!= 128 &&
798 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
799 rt
->rt6i_flags
|= RTF_ANYCAST
;
800 rt
->rt6i_gateway
= *daddr
;
803 rt
->rt6i_flags
|= RTF_CACHE
;
805 #ifdef CONFIG_IPV6_SUBTREES
806 if (rt
->rt6i_src
.plen
&& saddr
) {
807 rt
->rt6i_src
.addr
= *saddr
;
808 rt
->rt6i_src
.plen
= 128;
813 if (rt6_bind_neighbour(rt
, rt
->dst
.dev
)) {
814 struct net
*net
= dev_net(rt
->dst
.dev
);
815 int saved_rt_min_interval
=
816 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
817 int saved_rt_elasticity
=
818 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
820 if (attempts
-- > 0) {
821 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
822 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
824 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
826 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
828 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
829 saved_rt_min_interval
;
833 net_warn_ratelimited("Neighbour table overflow\n");
842 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
,
843 const struct in6_addr
*daddr
)
845 struct rt6_info
*rt
= ip6_rt_copy(ort
, daddr
);
848 rt
->rt6i_flags
|= RTF_CACHE
;
849 rt
->n
= neigh_clone(ort
->n
);
854 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
855 struct flowi6
*fl6
, int flags
)
857 struct fib6_node
*fn
;
858 struct rt6_info
*rt
, *nrt
;
862 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
864 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
867 read_lock_bh(&table
->tb6_lock
);
870 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
873 rt
= rt6_select(fn
, oif
, strict
| reachable
);
875 BACKTRACK(net
, &fl6
->saddr
);
876 if (rt
== net
->ipv6
.ip6_null_entry
||
877 rt
->rt6i_flags
& RTF_CACHE
)
881 read_unlock_bh(&table
->tb6_lock
);
883 if (!rt
->n
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
884 nrt
= rt6_alloc_cow(rt
, &fl6
->daddr
, &fl6
->saddr
);
885 else if (!(rt
->dst
.flags
& DST_HOST
))
886 nrt
= rt6_alloc_clone(rt
, &fl6
->daddr
);
890 dst_release(&rt
->dst
);
891 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
895 err
= ip6_ins_rt(nrt
);
904 * Race condition! In the gap, when table->tb6_lock was
905 * released someone could insert this route. Relookup.
907 dst_release(&rt
->dst
);
916 read_unlock_bh(&table
->tb6_lock
);
918 rt
->dst
.lastuse
= jiffies
;
924 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
925 struct flowi6
*fl6
, int flags
)
927 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
930 static struct dst_entry
*ip6_route_input_lookup(struct net
*net
,
931 struct net_device
*dev
,
932 struct flowi6
*fl6
, int flags
)
934 if (rt6_need_strict(&fl6
->daddr
) && dev
->type
!= ARPHRD_PIMREG
)
935 flags
|= RT6_LOOKUP_F_IFACE
;
937 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_input
);
940 void ip6_route_input(struct sk_buff
*skb
)
942 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
943 struct net
*net
= dev_net(skb
->dev
);
944 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
945 struct flowi6 fl6
= {
946 .flowi6_iif
= skb
->dev
->ifindex
,
949 .flowlabel
= (* (__be32
*) iph
) & IPV6_FLOWINFO_MASK
,
950 .flowi6_mark
= skb
->mark
,
951 .flowi6_proto
= iph
->nexthdr
,
954 skb_dst_set(skb
, ip6_route_input_lookup(net
, skb
->dev
, &fl6
, flags
));
957 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
958 struct flowi6
*fl6
, int flags
)
960 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
963 struct dst_entry
* ip6_route_output(struct net
*net
, const struct sock
*sk
,
968 fl6
->flowi6_iif
= LOOPBACK_IFINDEX
;
970 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
))
971 flags
|= RT6_LOOKUP_F_IFACE
;
973 if (!ipv6_addr_any(&fl6
->saddr
))
974 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
976 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
978 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
981 EXPORT_SYMBOL(ip6_route_output
);
983 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
985 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
986 struct dst_entry
*new = NULL
;
988 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, DST_OBSOLETE_NONE
, 0);
992 memset(new + 1, 0, sizeof(*rt
) - sizeof(*new));
993 rt6_init_peer(rt
, net
->ipv6
.peers
);
996 new->input
= dst_discard
;
997 new->output
= dst_discard
;
999 if (dst_metrics_read_only(&ort
->dst
))
1000 new->_metrics
= ort
->dst
._metrics
;
1002 dst_copy_metrics(new, &ort
->dst
);
1003 rt
->rt6i_idev
= ort
->rt6i_idev
;
1005 in6_dev_hold(rt
->rt6i_idev
);
1007 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1008 rt
->rt6i_flags
= ort
->rt6i_flags
;
1009 rt6_clean_expires(rt
);
1010 rt
->rt6i_metric
= 0;
1012 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1013 #ifdef CONFIG_IPV6_SUBTREES
1014 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1020 dst_release(dst_orig
);
1021 return new ? new : ERR_PTR(-ENOMEM
);
1025 * Destination cache support functions
1028 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
1030 struct rt6_info
*rt
;
1032 rt
= (struct rt6_info
*) dst
;
1034 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
)) {
1035 if (rt
->rt6i_peer_genid
!= rt6_peer_genid()) {
1036 if (!rt6_has_peer(rt
))
1037 rt6_bind_peer(rt
, 0);
1038 rt
->rt6i_peer_genid
= rt6_peer_genid();
1045 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
1047 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1050 if (rt
->rt6i_flags
& RTF_CACHE
) {
1051 if (rt6_check_expired(rt
)) {
1063 static void ip6_link_failure(struct sk_buff
*skb
)
1065 struct rt6_info
*rt
;
1067 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1069 rt
= (struct rt6_info
*) skb_dst(skb
);
1071 if (rt
->rt6i_flags
& RTF_CACHE
)
1072 rt6_update_expires(rt
, 0);
1073 else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
1074 rt
->rt6i_node
->fn_sernum
= -1;
1078 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
1079 struct sk_buff
*skb
, u32 mtu
)
1081 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1084 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
1085 struct net
*net
= dev_net(dst
->dev
);
1087 rt6
->rt6i_flags
|= RTF_MODIFIED
;
1088 if (mtu
< IPV6_MIN_MTU
) {
1089 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
1091 features
|= RTAX_FEATURE_ALLFRAG
;
1092 dst_metric_set(dst
, RTAX_FEATURES
, features
);
1094 dst_metric_set(dst
, RTAX_MTU
, mtu
);
1095 rt6_update_expires(rt6
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1099 void ip6_update_pmtu(struct sk_buff
*skb
, struct net
*net
, __be32 mtu
,
1102 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1103 struct dst_entry
*dst
;
1106 memset(&fl6
, 0, sizeof(fl6
));
1107 fl6
.flowi6_oif
= oif
;
1108 fl6
.flowi6_mark
= mark
;
1109 fl6
.flowi6_flags
= 0;
1110 fl6
.daddr
= iph
->daddr
;
1111 fl6
.saddr
= iph
->saddr
;
1112 fl6
.flowlabel
= (*(__be32
*) iph
) & IPV6_FLOWINFO_MASK
;
1114 dst
= ip6_route_output(net
, NULL
, &fl6
);
1116 ip6_rt_update_pmtu(dst
, NULL
, skb
, ntohl(mtu
));
1119 EXPORT_SYMBOL_GPL(ip6_update_pmtu
);
1121 void ip6_sk_update_pmtu(struct sk_buff
*skb
, struct sock
*sk
, __be32 mtu
)
1123 ip6_update_pmtu(skb
, sock_net(sk
), mtu
,
1124 sk
->sk_bound_dev_if
, sk
->sk_mark
);
1126 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu
);
1128 void ip6_redirect(struct sk_buff
*skb
, struct net
*net
, int oif
, u32 mark
)
1130 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1131 struct dst_entry
*dst
;
1134 memset(&fl6
, 0, sizeof(fl6
));
1135 fl6
.flowi6_oif
= oif
;
1136 fl6
.flowi6_mark
= mark
;
1137 fl6
.flowi6_flags
= 0;
1138 fl6
.daddr
= iph
->daddr
;
1139 fl6
.saddr
= iph
->saddr
;
1140 fl6
.flowlabel
= (*(__be32
*) iph
) & IPV6_FLOWINFO_MASK
;
1142 dst
= ip6_route_output(net
, NULL
, &fl6
);
1144 rt6_do_redirect(dst
, NULL
, skb
);
1147 EXPORT_SYMBOL_GPL(ip6_redirect
);
1149 void ip6_sk_redirect(struct sk_buff
*skb
, struct sock
*sk
)
1151 ip6_redirect(skb
, sock_net(sk
), sk
->sk_bound_dev_if
, sk
->sk_mark
);
1153 EXPORT_SYMBOL_GPL(ip6_sk_redirect
);
1155 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1157 struct net_device
*dev
= dst
->dev
;
1158 unsigned int mtu
= dst_mtu(dst
);
1159 struct net
*net
= dev_net(dev
);
1161 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1163 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1164 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1167 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1168 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1169 * IPV6_MAXPLEN is also valid and means: "any MSS,
1170 * rely only on pmtu discovery"
1172 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1177 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1179 struct inet6_dev
*idev
;
1180 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1188 idev
= __in6_dev_get(dst
->dev
);
1190 mtu
= idev
->cnf
.mtu6
;
1196 static struct dst_entry
*icmp6_dst_gc_list
;
1197 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1199 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1200 struct neighbour
*neigh
,
1203 struct dst_entry
*dst
;
1204 struct rt6_info
*rt
;
1205 struct inet6_dev
*idev
= in6_dev_get(dev
);
1206 struct net
*net
= dev_net(dev
);
1208 if (unlikely(!idev
))
1209 return ERR_PTR(-ENODEV
);
1211 rt
= ip6_dst_alloc(net
, dev
, 0, NULL
);
1212 if (unlikely(!rt
)) {
1214 dst
= ERR_PTR(-ENOMEM
);
1221 neigh
= ip6_neigh_lookup(&rt
->dst
, NULL
, &fl6
->daddr
);
1222 if (IS_ERR(neigh
)) {
1225 return ERR_CAST(neigh
);
1229 rt
->dst
.flags
|= DST_HOST
;
1230 rt
->dst
.output
= ip6_output
;
1232 atomic_set(&rt
->dst
.__refcnt
, 1);
1233 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1234 rt
->rt6i_dst
.plen
= 128;
1235 rt
->rt6i_idev
= idev
;
1236 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
1238 spin_lock_bh(&icmp6_dst_lock
);
1239 rt
->dst
.next
= icmp6_dst_gc_list
;
1240 icmp6_dst_gc_list
= &rt
->dst
;
1241 spin_unlock_bh(&icmp6_dst_lock
);
1243 fib6_force_start_gc(net
);
1245 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1251 int icmp6_dst_gc(void)
1253 struct dst_entry
*dst
, **pprev
;
1256 spin_lock_bh(&icmp6_dst_lock
);
1257 pprev
= &icmp6_dst_gc_list
;
1259 while ((dst
= *pprev
) != NULL
) {
1260 if (!atomic_read(&dst
->__refcnt
)) {
1269 spin_unlock_bh(&icmp6_dst_lock
);
1274 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1277 struct dst_entry
*dst
, **pprev
;
1279 spin_lock_bh(&icmp6_dst_lock
);
1280 pprev
= &icmp6_dst_gc_list
;
1281 while ((dst
= *pprev
) != NULL
) {
1282 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1283 if (func(rt
, arg
)) {
1290 spin_unlock_bh(&icmp6_dst_lock
);
1293 static int ip6_dst_gc(struct dst_ops
*ops
)
1295 unsigned long now
= jiffies
;
1296 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1297 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1298 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1299 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1300 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1301 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1304 entries
= dst_entries_get_fast(ops
);
1305 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1306 entries
<= rt_max_size
)
1309 net
->ipv6
.ip6_rt_gc_expire
++;
1310 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1311 net
->ipv6
.ip6_rt_last_gc
= now
;
1312 entries
= dst_entries_get_slow(ops
);
1313 if (entries
< ops
->gc_thresh
)
1314 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1316 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1317 return entries
> rt_max_size
;
1320 /* Clean host part of a prefix. Not necessary in radix tree,
1321 but results in cleaner routing tables.
1323 Remove it only when all the things will work!
1326 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1328 int hoplimit
= dst_metric_raw(dst
, RTAX_HOPLIMIT
);
1329 if (hoplimit
== 0) {
1330 struct net_device
*dev
= dst
->dev
;
1331 struct inet6_dev
*idev
;
1334 idev
= __in6_dev_get(dev
);
1336 hoplimit
= idev
->cnf
.hop_limit
;
1338 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1343 EXPORT_SYMBOL(ip6_dst_hoplimit
);
1349 int ip6_route_add(struct fib6_config
*cfg
)
1352 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1353 struct rt6_info
*rt
= NULL
;
1354 struct net_device
*dev
= NULL
;
1355 struct inet6_dev
*idev
= NULL
;
1356 struct fib6_table
*table
;
1359 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1361 #ifndef CONFIG_IPV6_SUBTREES
1362 if (cfg
->fc_src_len
)
1365 if (cfg
->fc_ifindex
) {
1367 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1370 idev
= in6_dev_get(dev
);
1375 if (cfg
->fc_metric
== 0)
1376 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1379 if (cfg
->fc_nlinfo
.nlh
&&
1380 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1381 table
= fib6_get_table(net
, cfg
->fc_table
);
1383 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1384 table
= fib6_new_table(net
, cfg
->fc_table
);
1387 table
= fib6_new_table(net
, cfg
->fc_table
);
1393 rt
= ip6_dst_alloc(net
, NULL
, DST_NOCOUNT
, table
);
1400 rt
->dst
.obsolete
= -1;
1402 if (cfg
->fc_flags
& RTF_EXPIRES
)
1403 rt6_set_expires(rt
, jiffies
+
1404 clock_t_to_jiffies(cfg
->fc_expires
));
1406 rt6_clean_expires(rt
);
1408 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1409 cfg
->fc_protocol
= RTPROT_BOOT
;
1410 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1412 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1414 if (addr_type
& IPV6_ADDR_MULTICAST
)
1415 rt
->dst
.input
= ip6_mc_input
;
1416 else if (cfg
->fc_flags
& RTF_LOCAL
)
1417 rt
->dst
.input
= ip6_input
;
1419 rt
->dst
.input
= ip6_forward
;
1421 rt
->dst
.output
= ip6_output
;
1423 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1424 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1425 if (rt
->rt6i_dst
.plen
== 128)
1426 rt
->dst
.flags
|= DST_HOST
;
1428 if (!(rt
->dst
.flags
& DST_HOST
) && cfg
->fc_mx
) {
1429 u32
*metrics
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1434 dst_init_metrics(&rt
->dst
, metrics
, 0);
1436 #ifdef CONFIG_IPV6_SUBTREES
1437 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1438 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1441 rt
->rt6i_metric
= cfg
->fc_metric
;
1443 /* We cannot add true routes via loopback here,
1444 they would result in kernel looping; promote them to reject routes
1446 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1447 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
1448 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
1449 !(cfg
->fc_flags
& RTF_LOCAL
))) {
1450 /* hold loopback dev/idev if we haven't done so. */
1451 if (dev
!= net
->loopback_dev
) {
1456 dev
= net
->loopback_dev
;
1458 idev
= in6_dev_get(dev
);
1464 rt
->dst
.output
= ip6_pkt_discard_out
;
1465 rt
->dst
.input
= ip6_pkt_discard
;
1466 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1467 switch (cfg
->fc_type
) {
1469 rt
->dst
.error
= -EINVAL
;
1472 rt
->dst
.error
= -EACCES
;
1475 rt
->dst
.error
= -EAGAIN
;
1478 rt
->dst
.error
= -ENETUNREACH
;
1484 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1485 const struct in6_addr
*gw_addr
;
1488 gw_addr
= &cfg
->fc_gateway
;
1489 rt
->rt6i_gateway
= *gw_addr
;
1490 gwa_type
= ipv6_addr_type(gw_addr
);
1492 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1493 struct rt6_info
*grt
;
1495 /* IPv6 strictly inhibits using not link-local
1496 addresses as nexthop address.
1497 Otherwise, router will not able to send redirects.
1498 It is very good, but in some (rare!) circumstances
1499 (SIT, PtP, NBMA NOARP links) it is handy to allow
1500 some exceptions. --ANK
1503 if (!(gwa_type
& IPV6_ADDR_UNICAST
))
1506 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1508 err
= -EHOSTUNREACH
;
1512 if (dev
!= grt
->dst
.dev
) {
1513 dst_release(&grt
->dst
);
1518 idev
= grt
->rt6i_idev
;
1520 in6_dev_hold(grt
->rt6i_idev
);
1522 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
1524 dst_release(&grt
->dst
);
1530 if (!dev
|| (dev
->flags
& IFF_LOOPBACK
))
1538 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1539 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1543 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
1544 rt
->rt6i_prefsrc
.plen
= 128;
1546 rt
->rt6i_prefsrc
.plen
= 0;
1548 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1549 err
= rt6_bind_neighbour(rt
, dev
);
1554 rt
->rt6i_flags
= cfg
->fc_flags
;
1561 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1562 int type
= nla_type(nla
);
1565 if (type
> RTAX_MAX
) {
1570 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1576 rt
->rt6i_idev
= idev
;
1577 rt
->rt6i_table
= table
;
1579 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1581 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1593 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1596 struct fib6_table
*table
;
1597 struct net
*net
= dev_net(rt
->dst
.dev
);
1599 if (rt
== net
->ipv6
.ip6_null_entry
)
1602 table
= rt
->rt6i_table
;
1603 write_lock_bh(&table
->tb6_lock
);
1605 err
= fib6_del(rt
, info
);
1606 dst_release(&rt
->dst
);
1608 write_unlock_bh(&table
->tb6_lock
);
1613 int ip6_del_rt(struct rt6_info
*rt
)
1615 struct nl_info info
= {
1616 .nl_net
= dev_net(rt
->dst
.dev
),
1618 return __ip6_del_rt(rt
, &info
);
1621 static int ip6_route_del(struct fib6_config
*cfg
)
1623 struct fib6_table
*table
;
1624 struct fib6_node
*fn
;
1625 struct rt6_info
*rt
;
1628 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1632 read_lock_bh(&table
->tb6_lock
);
1634 fn
= fib6_locate(&table
->tb6_root
,
1635 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1636 &cfg
->fc_src
, cfg
->fc_src_len
);
1639 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1640 if (cfg
->fc_ifindex
&&
1642 rt
->dst
.dev
->ifindex
!= cfg
->fc_ifindex
))
1644 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1645 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1647 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1650 read_unlock_bh(&table
->tb6_lock
);
1652 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1655 read_unlock_bh(&table
->tb6_lock
);
1660 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
, struct sk_buff
*skb
)
1662 struct net
*net
= dev_net(skb
->dev
);
1663 struct netevent_redirect netevent
;
1664 struct rt6_info
*rt
, *nrt
= NULL
;
1665 const struct in6_addr
*target
;
1666 struct ndisc_options ndopts
;
1667 const struct in6_addr
*dest
;
1668 struct neighbour
*old_neigh
;
1669 struct inet6_dev
*in6_dev
;
1670 struct neighbour
*neigh
;
1671 struct icmp6hdr
*icmph
;
1672 int optlen
, on_link
;
1675 optlen
= skb
->tail
- skb
->transport_header
;
1676 optlen
-= sizeof(struct icmp6hdr
) + 2 * sizeof(struct in6_addr
);
1679 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1683 icmph
= icmp6_hdr(skb
);
1684 target
= (const struct in6_addr
*) (icmph
+ 1);
1687 if (ipv6_addr_is_multicast(dest
)) {
1688 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1693 if (ipv6_addr_equal(dest
, target
)) {
1695 } else if (ipv6_addr_type(target
) !=
1696 (IPV6_ADDR_UNICAST
|IPV6_ADDR_LINKLOCAL
)) {
1697 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1701 in6_dev
= __in6_dev_get(skb
->dev
);
1704 if (in6_dev
->cnf
.forwarding
|| !in6_dev
->cnf
.accept_redirects
)
1708 * The IP source address of the Redirect MUST be the same as the current
1709 * first-hop router for the specified ICMP Destination Address.
1712 if (!ndisc_parse_options((u8
*)(dest
+ 1), optlen
, &ndopts
)) {
1713 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1718 if (ndopts
.nd_opts_tgt_lladdr
) {
1719 lladdr
= ndisc_opt_addr_data(ndopts
.nd_opts_tgt_lladdr
,
1722 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1727 rt
= (struct rt6_info
*) dst
;
1728 if (rt
== net
->ipv6
.ip6_null_entry
) {
1729 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1733 /* Redirect received -> path was valid.
1734 * Look, redirects are sent only in response to data packets,
1735 * so that this nexthop apparently is reachable. --ANK
1737 dst_confirm(&rt
->dst
);
1739 neigh
= __neigh_lookup(&nd_tbl
, target
, skb
->dev
, 1);
1743 /* Duplicate redirect: silently ignore. */
1745 if (neigh
== old_neigh
)
1749 * We have finally decided to accept it.
1752 neigh_update(neigh
, lladdr
, NUD_STALE
,
1753 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1754 NEIGH_UPDATE_F_OVERRIDE
|
1755 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1756 NEIGH_UPDATE_F_ISROUTER
))
1759 nrt
= ip6_rt_copy(rt
, dest
);
1763 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1765 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1767 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
1768 nrt
->n
= neigh_clone(neigh
);
1770 if (ip6_ins_rt(nrt
))
1773 netevent
.old
= &rt
->dst
;
1774 netevent
.old_neigh
= old_neigh
;
1775 netevent
.new = &nrt
->dst
;
1776 netevent
.new_neigh
= neigh
;
1777 netevent
.daddr
= dest
;
1778 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1780 if (rt
->rt6i_flags
& RTF_CACHE
) {
1781 rt
= (struct rt6_info
*) dst_clone(&rt
->dst
);
1786 neigh_release(neigh
);
1790 * Misc support functions
1793 static struct rt6_info
*ip6_rt_copy(struct rt6_info
*ort
,
1794 const struct in6_addr
*dest
)
1796 struct net
*net
= dev_net(ort
->dst
.dev
);
1797 struct rt6_info
*rt
= ip6_dst_alloc(net
, ort
->dst
.dev
, 0,
1801 rt
->dst
.input
= ort
->dst
.input
;
1802 rt
->dst
.output
= ort
->dst
.output
;
1803 rt
->dst
.flags
|= DST_HOST
;
1805 rt
->rt6i_dst
.addr
= *dest
;
1806 rt
->rt6i_dst
.plen
= 128;
1807 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1808 rt
->dst
.error
= ort
->dst
.error
;
1809 rt
->rt6i_idev
= ort
->rt6i_idev
;
1811 in6_dev_hold(rt
->rt6i_idev
);
1812 rt
->dst
.lastuse
= jiffies
;
1814 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1815 rt
->rt6i_flags
= ort
->rt6i_flags
;
1816 if ((ort
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) ==
1817 (RTF_DEFAULT
| RTF_ADDRCONF
))
1818 rt6_set_from(rt
, ort
);
1820 rt6_clean_expires(rt
);
1821 rt
->rt6i_metric
= 0;
1823 #ifdef CONFIG_IPV6_SUBTREES
1824 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1826 memcpy(&rt
->rt6i_prefsrc
, &ort
->rt6i_prefsrc
, sizeof(struct rt6key
));
1827 rt
->rt6i_table
= ort
->rt6i_table
;
1832 #ifdef CONFIG_IPV6_ROUTE_INFO
1833 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1834 const struct in6_addr
*prefix
, int prefixlen
,
1835 const struct in6_addr
*gwaddr
, int ifindex
)
1837 struct fib6_node
*fn
;
1838 struct rt6_info
*rt
= NULL
;
1839 struct fib6_table
*table
;
1841 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1845 write_lock_bh(&table
->tb6_lock
);
1846 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1850 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1851 if (rt
->dst
.dev
->ifindex
!= ifindex
)
1853 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1855 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1861 write_unlock_bh(&table
->tb6_lock
);
1865 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1866 const struct in6_addr
*prefix
, int prefixlen
,
1867 const struct in6_addr
*gwaddr
, int ifindex
,
1870 struct fib6_config cfg
= {
1871 .fc_table
= RT6_TABLE_INFO
,
1872 .fc_metric
= IP6_RT_PRIO_USER
,
1873 .fc_ifindex
= ifindex
,
1874 .fc_dst_len
= prefixlen
,
1875 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1876 RTF_UP
| RTF_PREF(pref
),
1877 .fc_nlinfo
.portid
= 0,
1878 .fc_nlinfo
.nlh
= NULL
,
1879 .fc_nlinfo
.nl_net
= net
,
1882 cfg
.fc_dst
= *prefix
;
1883 cfg
.fc_gateway
= *gwaddr
;
1885 /* We should treat it as a default route if prefix length is 0. */
1887 cfg
.fc_flags
|= RTF_DEFAULT
;
1889 ip6_route_add(&cfg
);
1891 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1895 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
1897 struct rt6_info
*rt
;
1898 struct fib6_table
*table
;
1900 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1904 write_lock_bh(&table
->tb6_lock
);
1905 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1906 if (dev
== rt
->dst
.dev
&&
1907 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1908 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1913 write_unlock_bh(&table
->tb6_lock
);
1917 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
1918 struct net_device
*dev
,
1921 struct fib6_config cfg
= {
1922 .fc_table
= RT6_TABLE_DFLT
,
1923 .fc_metric
= IP6_RT_PRIO_USER
,
1924 .fc_ifindex
= dev
->ifindex
,
1925 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1926 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1927 .fc_nlinfo
.portid
= 0,
1928 .fc_nlinfo
.nlh
= NULL
,
1929 .fc_nlinfo
.nl_net
= dev_net(dev
),
1932 cfg
.fc_gateway
= *gwaddr
;
1934 ip6_route_add(&cfg
);
1936 return rt6_get_dflt_router(gwaddr
, dev
);
1939 void rt6_purge_dflt_routers(struct net
*net
)
1941 struct rt6_info
*rt
;
1942 struct fib6_table
*table
;
1944 /* NOTE: Keep consistent with rt6_get_dflt_router */
1945 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1950 read_lock_bh(&table
->tb6_lock
);
1951 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1952 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1954 read_unlock_bh(&table
->tb6_lock
);
1959 read_unlock_bh(&table
->tb6_lock
);
1962 static void rtmsg_to_fib6_config(struct net
*net
,
1963 struct in6_rtmsg
*rtmsg
,
1964 struct fib6_config
*cfg
)
1966 memset(cfg
, 0, sizeof(*cfg
));
1968 cfg
->fc_table
= RT6_TABLE_MAIN
;
1969 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1970 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1971 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1972 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1973 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1974 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1976 cfg
->fc_nlinfo
.nl_net
= net
;
1978 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
1979 cfg
->fc_src
= rtmsg
->rtmsg_src
;
1980 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
1983 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
1985 struct fib6_config cfg
;
1986 struct in6_rtmsg rtmsg
;
1990 case SIOCADDRT
: /* Add a route */
1991 case SIOCDELRT
: /* Delete a route */
1992 if (!capable(CAP_NET_ADMIN
))
1994 err
= copy_from_user(&rtmsg
, arg
,
1995 sizeof(struct in6_rtmsg
));
1999 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
2004 err
= ip6_route_add(&cfg
);
2007 err
= ip6_route_del(&cfg
);
2021 * Drop the packet on the floor
2024 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2027 struct dst_entry
*dst
= skb_dst(skb
);
2028 switch (ipstats_mib_noroutes
) {
2029 case IPSTATS_MIB_INNOROUTES
:
2030 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2031 if (type
== IPV6_ADDR_ANY
) {
2032 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2033 IPSTATS_MIB_INADDRERRORS
);
2037 case IPSTATS_MIB_OUTNOROUTES
:
2038 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2039 ipstats_mib_noroutes
);
2042 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2047 static int ip6_pkt_discard(struct sk_buff
*skb
)
2049 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2052 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
2054 skb
->dev
= skb_dst(skb
)->dev
;
2055 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2060 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2062 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2065 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
2067 skb
->dev
= skb_dst(skb
)->dev
;
2068 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2074 * Allocate a dst for local (unicast / anycast) address.
2077 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2078 const struct in6_addr
*addr
,
2081 struct net
*net
= dev_net(idev
->dev
);
2082 struct rt6_info
*rt
= ip6_dst_alloc(net
, net
->loopback_dev
, 0, NULL
);
2086 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2087 return ERR_PTR(-ENOMEM
);
2092 rt
->dst
.flags
|= DST_HOST
;
2093 rt
->dst
.input
= ip6_input
;
2094 rt
->dst
.output
= ip6_output
;
2095 rt
->rt6i_idev
= idev
;
2096 rt
->dst
.obsolete
= -1;
2098 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2100 rt
->rt6i_flags
|= RTF_ANYCAST
;
2102 rt
->rt6i_flags
|= RTF_LOCAL
;
2103 err
= rt6_bind_neighbour(rt
, rt
->dst
.dev
);
2106 return ERR_PTR(err
);
2109 rt
->rt6i_dst
.addr
= *addr
;
2110 rt
->rt6i_dst
.plen
= 128;
2111 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2113 atomic_set(&rt
->dst
.__refcnt
, 1);
2118 int ip6_route_get_saddr(struct net
*net
,
2119 struct rt6_info
*rt
,
2120 const struct in6_addr
*daddr
,
2122 struct in6_addr
*saddr
)
2124 struct inet6_dev
*idev
= ip6_dst_idev((struct dst_entry
*)rt
);
2126 if (rt
->rt6i_prefsrc
.plen
)
2127 *saddr
= rt
->rt6i_prefsrc
.addr
;
2129 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2130 daddr
, prefs
, saddr
);
2134 /* remove deleted ip from prefsrc entries */
2135 struct arg_dev_net_ip
{
2136 struct net_device
*dev
;
2138 struct in6_addr
*addr
;
2141 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2143 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2144 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2145 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2147 if (((void *)rt
->dst
.dev
== dev
|| !dev
) &&
2148 rt
!= net
->ipv6
.ip6_null_entry
&&
2149 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2150 /* remove prefsrc entry */
2151 rt
->rt6i_prefsrc
.plen
= 0;
2156 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2158 struct net
*net
= dev_net(ifp
->idev
->dev
);
2159 struct arg_dev_net_ip adni
= {
2160 .dev
= ifp
->idev
->dev
,
2164 fib6_clean_all(net
, fib6_remove_prefsrc
, 0, &adni
);
2167 struct arg_dev_net
{
2168 struct net_device
*dev
;
2172 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2174 const struct arg_dev_net
*adn
= arg
;
2175 const struct net_device
*dev
= adn
->dev
;
2177 if ((rt
->dst
.dev
== dev
|| !dev
) &&
2178 rt
!= adn
->net
->ipv6
.ip6_null_entry
)
2184 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2186 struct arg_dev_net adn
= {
2191 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2192 icmp6_clean_all(fib6_ifdown
, &adn
);
2195 struct rt6_mtu_change_arg
{
2196 struct net_device
*dev
;
2200 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2202 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2203 struct inet6_dev
*idev
;
2205 /* In IPv6 pmtu discovery is not optional,
2206 so that RTAX_MTU lock cannot disable it.
2207 We still use this lock to block changes
2208 caused by addrconf/ndisc.
2211 idev
= __in6_dev_get(arg
->dev
);
2215 /* For administrative MTU increase, there is no way to discover
2216 IPv6 PMTU increase, so PMTU increase should be updated here.
2217 Since RFC 1981 doesn't include administrative MTU increase
2218 update PMTU increase is a MUST. (i.e. jumbo frame)
2221 If new MTU is less than route PMTU, this new MTU will be the
2222 lowest MTU in the path, update the route PMTU to reflect PMTU
2223 decreases; if new MTU is greater than route PMTU, and the
2224 old MTU is the lowest MTU in the path, update the route PMTU
2225 to reflect the increase. In this case if the other nodes' MTU
2226 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2229 if (rt
->dst
.dev
== arg
->dev
&&
2230 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2231 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2232 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2233 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2234 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2239 void rt6_mtu_change(struct net_device
*dev
, unsigned int mtu
)
2241 struct rt6_mtu_change_arg arg
= {
2246 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2249 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2250 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2251 [RTA_OIF
] = { .type
= NLA_U32
},
2252 [RTA_IIF
] = { .type
= NLA_U32
},
2253 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2254 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2257 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2258 struct fib6_config
*cfg
)
2261 struct nlattr
*tb
[RTA_MAX
+1];
2264 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2269 rtm
= nlmsg_data(nlh
);
2270 memset(cfg
, 0, sizeof(*cfg
));
2272 cfg
->fc_table
= rtm
->rtm_table
;
2273 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2274 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2275 cfg
->fc_flags
= RTF_UP
;
2276 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2277 cfg
->fc_type
= rtm
->rtm_type
;
2279 if (rtm
->rtm_type
== RTN_UNREACHABLE
||
2280 rtm
->rtm_type
== RTN_BLACKHOLE
||
2281 rtm
->rtm_type
== RTN_PROHIBIT
||
2282 rtm
->rtm_type
== RTN_THROW
)
2283 cfg
->fc_flags
|= RTF_REJECT
;
2285 if (rtm
->rtm_type
== RTN_LOCAL
)
2286 cfg
->fc_flags
|= RTF_LOCAL
;
2288 cfg
->fc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
2289 cfg
->fc_nlinfo
.nlh
= nlh
;
2290 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2292 if (tb
[RTA_GATEWAY
]) {
2293 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2294 cfg
->fc_flags
|= RTF_GATEWAY
;
2298 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2300 if (nla_len(tb
[RTA_DST
]) < plen
)
2303 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2307 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2309 if (nla_len(tb
[RTA_SRC
]) < plen
)
2312 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2315 if (tb
[RTA_PREFSRC
])
2316 nla_memcpy(&cfg
->fc_prefsrc
, tb
[RTA_PREFSRC
], 16);
2319 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2321 if (tb
[RTA_PRIORITY
])
2322 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2324 if (tb
[RTA_METRICS
]) {
2325 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2326 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2330 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2337 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2339 struct fib6_config cfg
;
2342 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2346 return ip6_route_del(&cfg
);
2349 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2351 struct fib6_config cfg
;
2354 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2358 return ip6_route_add(&cfg
);
2361 static inline size_t rt6_nlmsg_size(void)
2363 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2364 + nla_total_size(16) /* RTA_SRC */
2365 + nla_total_size(16) /* RTA_DST */
2366 + nla_total_size(16) /* RTA_GATEWAY */
2367 + nla_total_size(16) /* RTA_PREFSRC */
2368 + nla_total_size(4) /* RTA_TABLE */
2369 + nla_total_size(4) /* RTA_IIF */
2370 + nla_total_size(4) /* RTA_OIF */
2371 + nla_total_size(4) /* RTA_PRIORITY */
2372 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2373 + nla_total_size(sizeof(struct rta_cacheinfo
));
2376 static int rt6_fill_node(struct net
*net
,
2377 struct sk_buff
*skb
, struct rt6_info
*rt
,
2378 struct in6_addr
*dst
, struct in6_addr
*src
,
2379 int iif
, int type
, u32 portid
, u32 seq
,
2380 int prefix
, int nowait
, unsigned int flags
)
2383 struct nlmsghdr
*nlh
;
2386 struct neighbour
*n
;
2388 if (prefix
) { /* user wants prefix routes only */
2389 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2390 /* success since this is not a prefix route */
2395 nlh
= nlmsg_put(skb
, portid
, seq
, type
, sizeof(*rtm
), flags
);
2399 rtm
= nlmsg_data(nlh
);
2400 rtm
->rtm_family
= AF_INET6
;
2401 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2402 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2405 table
= rt
->rt6i_table
->tb6_id
;
2407 table
= RT6_TABLE_UNSPEC
;
2408 rtm
->rtm_table
= table
;
2409 if (nla_put_u32(skb
, RTA_TABLE
, table
))
2410 goto nla_put_failure
;
2411 if (rt
->rt6i_flags
& RTF_REJECT
) {
2412 switch (rt
->dst
.error
) {
2414 rtm
->rtm_type
= RTN_BLACKHOLE
;
2417 rtm
->rtm_type
= RTN_PROHIBIT
;
2420 rtm
->rtm_type
= RTN_THROW
;
2423 rtm
->rtm_type
= RTN_UNREACHABLE
;
2427 else if (rt
->rt6i_flags
& RTF_LOCAL
)
2428 rtm
->rtm_type
= RTN_LOCAL
;
2429 else if (rt
->dst
.dev
&& (rt
->dst
.dev
->flags
& IFF_LOOPBACK
))
2430 rtm
->rtm_type
= RTN_LOCAL
;
2432 rtm
->rtm_type
= RTN_UNICAST
;
2434 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2435 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2436 if (rt
->rt6i_flags
& RTF_DYNAMIC
)
2437 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2438 else if (rt
->rt6i_flags
& RTF_ADDRCONF
) {
2439 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ROUTEINFO
))
2440 rtm
->rtm_protocol
= RTPROT_RA
;
2442 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2445 if (rt
->rt6i_flags
& RTF_CACHE
)
2446 rtm
->rtm_flags
|= RTM_F_CLONED
;
2449 if (nla_put(skb
, RTA_DST
, 16, dst
))
2450 goto nla_put_failure
;
2451 rtm
->rtm_dst_len
= 128;
2452 } else if (rtm
->rtm_dst_len
)
2453 if (nla_put(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
))
2454 goto nla_put_failure
;
2455 #ifdef CONFIG_IPV6_SUBTREES
2457 if (nla_put(skb
, RTA_SRC
, 16, src
))
2458 goto nla_put_failure
;
2459 rtm
->rtm_src_len
= 128;
2460 } else if (rtm
->rtm_src_len
&&
2461 nla_put(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
))
2462 goto nla_put_failure
;
2465 #ifdef CONFIG_IPV6_MROUTE
2466 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2467 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2472 goto nla_put_failure
;
2474 if (err
== -EMSGSIZE
)
2475 goto nla_put_failure
;
2480 if (nla_put_u32(skb
, RTA_IIF
, iif
))
2481 goto nla_put_failure
;
2483 struct in6_addr saddr_buf
;
2484 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0 &&
2485 nla_put(skb
, RTA_PREFSRC
, 16, &saddr_buf
))
2486 goto nla_put_failure
;
2489 if (rt
->rt6i_prefsrc
.plen
) {
2490 struct in6_addr saddr_buf
;
2491 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
2492 if (nla_put(skb
, RTA_PREFSRC
, 16, &saddr_buf
))
2493 goto nla_put_failure
;
2496 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2497 goto nla_put_failure
;
2502 if (nla_put(skb
, RTA_GATEWAY
, 16, &n
->primary_key
) < 0) {
2504 goto nla_put_failure
;
2510 nla_put_u32(skb
, RTA_OIF
, rt
->dst
.dev
->ifindex
))
2511 goto nla_put_failure
;
2512 if (nla_put_u32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
))
2513 goto nla_put_failure
;
2515 expires
= (rt
->rt6i_flags
& RTF_EXPIRES
) ? rt
->dst
.expires
- jiffies
: 0;
2517 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, expires
, rt
->dst
.error
) < 0)
2518 goto nla_put_failure
;
2520 return nlmsg_end(skb
, nlh
);
2523 nlmsg_cancel(skb
, nlh
);
2527 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2529 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2532 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2533 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2534 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2538 return rt6_fill_node(arg
->net
,
2539 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2540 NETLINK_CB(arg
->cb
->skb
).portid
, arg
->cb
->nlh
->nlmsg_seq
,
2541 prefix
, 0, NLM_F_MULTI
);
2544 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2546 struct net
*net
= sock_net(in_skb
->sk
);
2547 struct nlattr
*tb
[RTA_MAX
+1];
2548 struct rt6_info
*rt
;
2549 struct sk_buff
*skb
;
2552 int err
, iif
= 0, oif
= 0;
2554 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2559 memset(&fl6
, 0, sizeof(fl6
));
2562 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2565 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
2569 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2572 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
2576 iif
= nla_get_u32(tb
[RTA_IIF
]);
2579 oif
= nla_get_u32(tb
[RTA_OIF
]);
2582 struct net_device
*dev
;
2585 dev
= __dev_get_by_index(net
, iif
);
2591 fl6
.flowi6_iif
= iif
;
2593 if (!ipv6_addr_any(&fl6
.saddr
))
2594 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
2596 rt
= (struct rt6_info
*)ip6_route_input_lookup(net
, dev
, &fl6
,
2599 fl6
.flowi6_oif
= oif
;
2601 rt
= (struct rt6_info
*)ip6_route_output(net
, NULL
, &fl6
);
2604 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2606 dst_release(&rt
->dst
);
2611 /* Reserve room for dummy headers, this skb can pass
2612 through good chunk of routing engine.
2614 skb_reset_mac_header(skb
);
2615 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2617 skb_dst_set(skb
, &rt
->dst
);
2619 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
2620 RTM_NEWROUTE
, NETLINK_CB(in_skb
).portid
,
2621 nlh
->nlmsg_seq
, 0, 0, 0);
2627 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
2632 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2634 struct sk_buff
*skb
;
2635 struct net
*net
= info
->nl_net
;
2640 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
2642 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2646 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2647 event
, info
->portid
, seq
, 0, 0, 0);
2649 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2650 WARN_ON(err
== -EMSGSIZE
);
2654 rtnl_notify(skb
, net
, info
->portid
, RTNLGRP_IPV6_ROUTE
,
2655 info
->nlh
, gfp_any());
2659 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2662 static int ip6_route_dev_notify(struct notifier_block
*this,
2663 unsigned long event
, void *data
)
2665 struct net_device
*dev
= (struct net_device
*)data
;
2666 struct net
*net
= dev_net(dev
);
2668 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2669 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2670 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2671 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2672 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2673 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2674 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2675 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2686 #ifdef CONFIG_PROC_FS
2697 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2699 struct seq_file
*m
= p_arg
;
2700 struct neighbour
*n
;
2702 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2704 #ifdef CONFIG_IPV6_SUBTREES
2705 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2707 seq_puts(m
, "00000000000000000000000000000000 00 ");
2712 seq_printf(m
, "%pi6", n
->primary_key
);
2714 seq_puts(m
, "00000000000000000000000000000000");
2717 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2718 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2719 rt
->dst
.__use
, rt
->rt6i_flags
,
2720 rt
->dst
.dev
? rt
->dst
.dev
->name
: "");
2724 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2726 struct net
*net
= (struct net
*)m
->private;
2727 fib6_clean_all_ro(net
, rt6_info_route
, 0, m
);
2731 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2733 return single_open_net(inode
, file
, ipv6_route_show
);
2736 static const struct file_operations ipv6_route_proc_fops
= {
2737 .owner
= THIS_MODULE
,
2738 .open
= ipv6_route_open
,
2740 .llseek
= seq_lseek
,
2741 .release
= single_release_net
,
2744 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2746 struct net
*net
= (struct net
*)seq
->private;
2747 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2748 net
->ipv6
.rt6_stats
->fib_nodes
,
2749 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2750 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2751 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2752 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2753 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2754 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2759 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2761 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2764 static const struct file_operations rt6_stats_seq_fops
= {
2765 .owner
= THIS_MODULE
,
2766 .open
= rt6_stats_seq_open
,
2768 .llseek
= seq_lseek
,
2769 .release
= single_release_net
,
2771 #endif /* CONFIG_PROC_FS */
2773 #ifdef CONFIG_SYSCTL
2776 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2777 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2784 net
= (struct net
*)ctl
->extra1
;
2785 delay
= net
->ipv6
.sysctl
.flush_delay
;
2786 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2787 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2791 ctl_table ipv6_route_table_template
[] = {
2793 .procname
= "flush",
2794 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2795 .maxlen
= sizeof(int),
2797 .proc_handler
= ipv6_sysctl_rtcache_flush
2800 .procname
= "gc_thresh",
2801 .data
= &ip6_dst_ops_template
.gc_thresh
,
2802 .maxlen
= sizeof(int),
2804 .proc_handler
= proc_dointvec
,
2807 .procname
= "max_size",
2808 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2809 .maxlen
= sizeof(int),
2811 .proc_handler
= proc_dointvec
,
2814 .procname
= "gc_min_interval",
2815 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2816 .maxlen
= sizeof(int),
2818 .proc_handler
= proc_dointvec_jiffies
,
2821 .procname
= "gc_timeout",
2822 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2823 .maxlen
= sizeof(int),
2825 .proc_handler
= proc_dointvec_jiffies
,
2828 .procname
= "gc_interval",
2829 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2830 .maxlen
= sizeof(int),
2832 .proc_handler
= proc_dointvec_jiffies
,
2835 .procname
= "gc_elasticity",
2836 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2837 .maxlen
= sizeof(int),
2839 .proc_handler
= proc_dointvec
,
2842 .procname
= "mtu_expires",
2843 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2844 .maxlen
= sizeof(int),
2846 .proc_handler
= proc_dointvec_jiffies
,
2849 .procname
= "min_adv_mss",
2850 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2851 .maxlen
= sizeof(int),
2853 .proc_handler
= proc_dointvec
,
2856 .procname
= "gc_min_interval_ms",
2857 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2858 .maxlen
= sizeof(int),
2860 .proc_handler
= proc_dointvec_ms_jiffies
,
2865 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2867 struct ctl_table
*table
;
2869 table
= kmemdup(ipv6_route_table_template
,
2870 sizeof(ipv6_route_table_template
),
2874 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2875 table
[0].extra1
= net
;
2876 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2877 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2878 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2879 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2880 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2881 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2882 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2883 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2884 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2891 static int __net_init
ip6_route_net_init(struct net
*net
)
2895 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2896 sizeof(net
->ipv6
.ip6_dst_ops
));
2898 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2899 goto out_ip6_dst_ops
;
2901 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2902 sizeof(*net
->ipv6
.ip6_null_entry
),
2904 if (!net
->ipv6
.ip6_null_entry
)
2905 goto out_ip6_dst_entries
;
2906 net
->ipv6
.ip6_null_entry
->dst
.path
=
2907 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2908 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2909 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
2910 ip6_template_metrics
, true);
2912 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2913 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2914 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2916 if (!net
->ipv6
.ip6_prohibit_entry
)
2917 goto out_ip6_null_entry
;
2918 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2919 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2920 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2921 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
2922 ip6_template_metrics
, true);
2924 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2925 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2927 if (!net
->ipv6
.ip6_blk_hole_entry
)
2928 goto out_ip6_prohibit_entry
;
2929 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2930 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2931 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2932 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
2933 ip6_template_metrics
, true);
2936 net
->ipv6
.sysctl
.flush_delay
= 0;
2937 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2938 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2939 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2940 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2941 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2942 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2943 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2945 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2951 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2952 out_ip6_prohibit_entry
:
2953 kfree(net
->ipv6
.ip6_prohibit_entry
);
2955 kfree(net
->ipv6
.ip6_null_entry
);
2957 out_ip6_dst_entries
:
2958 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2963 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2965 kfree(net
->ipv6
.ip6_null_entry
);
2966 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2967 kfree(net
->ipv6
.ip6_prohibit_entry
);
2968 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2970 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2973 static int __net_init
ip6_route_net_init_late(struct net
*net
)
2975 #ifdef CONFIG_PROC_FS
2976 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2977 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2982 static void __net_exit
ip6_route_net_exit_late(struct net
*net
)
2984 #ifdef CONFIG_PROC_FS
2985 proc_net_remove(net
, "ipv6_route");
2986 proc_net_remove(net
, "rt6_stats");
2990 static struct pernet_operations ip6_route_net_ops
= {
2991 .init
= ip6_route_net_init
,
2992 .exit
= ip6_route_net_exit
,
2995 static int __net_init
ipv6_inetpeer_init(struct net
*net
)
2997 struct inet_peer_base
*bp
= kmalloc(sizeof(*bp
), GFP_KERNEL
);
3001 inet_peer_base_init(bp
);
3002 net
->ipv6
.peers
= bp
;
3006 static void __net_exit
ipv6_inetpeer_exit(struct net
*net
)
3008 struct inet_peer_base
*bp
= net
->ipv6
.peers
;
3010 net
->ipv6
.peers
= NULL
;
3011 inetpeer_invalidate_tree(bp
);
3015 static struct pernet_operations ipv6_inetpeer_ops
= {
3016 .init
= ipv6_inetpeer_init
,
3017 .exit
= ipv6_inetpeer_exit
,
3020 static struct pernet_operations ip6_route_net_late_ops
= {
3021 .init
= ip6_route_net_init_late
,
3022 .exit
= ip6_route_net_exit_late
,
3025 static struct notifier_block ip6_route_dev_notifier
= {
3026 .notifier_call
= ip6_route_dev_notify
,
3030 int __init
ip6_route_init(void)
3035 ip6_dst_ops_template
.kmem_cachep
=
3036 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
3037 SLAB_HWCACHE_ALIGN
, NULL
);
3038 if (!ip6_dst_ops_template
.kmem_cachep
)
3041 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
3043 goto out_kmem_cache
;
3045 ret
= register_pernet_subsys(&ipv6_inetpeer_ops
);
3047 goto out_dst_entries
;
3049 ret
= register_pernet_subsys(&ip6_route_net_ops
);
3051 goto out_register_inetpeer
;
3053 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
3055 /* Registering of the loopback is done before this portion of code,
3056 * the loopback reference in rt6_info will not be taken, do it
3057 * manually for init_net */
3058 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
3059 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3060 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3061 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
3062 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3063 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
3064 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3068 goto out_register_subsys
;
3074 ret
= fib6_rules_init();
3078 ret
= register_pernet_subsys(&ip6_route_net_late_ops
);
3080 goto fib6_rules_init
;
3083 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
3084 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
3085 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
3086 goto out_register_late_subsys
;
3088 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
3090 goto out_register_late_subsys
;
3095 out_register_late_subsys
:
3096 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3098 fib6_rules_cleanup();
3103 out_register_subsys
:
3104 unregister_pernet_subsys(&ip6_route_net_ops
);
3105 out_register_inetpeer
:
3106 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3108 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3110 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
3114 void ip6_route_cleanup(void)
3116 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
3117 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3118 fib6_rules_cleanup();
3121 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3122 unregister_pernet_subsys(&ip6_route_net_ops
);
3123 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3124 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);