remove libdss from Makefile
[GitHub/moto-9609/android_kernel_motorola_exynos9610.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
1da177e4 107
70ceb4f5 108#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 109static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 110 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
95c96174 113 unsigned int pref);
efa2cea0 114static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
70ceb4f5
YH
118#endif
119
8d0b94af
MKL
120struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123};
124
125static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127static void rt6_uncached_list_add(struct rt6_info *rt)
128{
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
8d0b94af
MKL
131 rt->rt6i_uncached_list = ul;
132
133 spin_lock_bh(&ul->lock);
134 list_add_tail(&rt->rt6i_uncached, &ul->head);
135 spin_unlock_bh(&ul->lock);
136}
137
138static void rt6_uncached_list_del(struct rt6_info *rt)
139{
140 if (!list_empty(&rt->rt6i_uncached)) {
141 struct uncached_list *ul = rt->rt6i_uncached_list;
142
143 spin_lock_bh(&ul->lock);
144 list_del(&rt->rt6i_uncached);
145 spin_unlock_bh(&ul->lock);
146 }
147}
148
149static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
150{
151 struct net_device *loopback_dev = net->loopback_dev;
152 int cpu;
153
e332bc67
EB
154 if (dev == loopback_dev)
155 return;
156
8d0b94af
MKL
157 for_each_possible_cpu(cpu) {
158 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
159 struct rt6_info *rt;
160
161 spin_lock_bh(&ul->lock);
162 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
163 struct inet6_dev *rt_idev = rt->rt6i_idev;
164 struct net_device *rt_dev = rt->dst.dev;
165
e332bc67 166 if (rt_idev->dev == dev) {
8d0b94af
MKL
167 rt->rt6i_idev = in6_dev_get(loopback_dev);
168 in6_dev_put(rt_idev);
169 }
170
e332bc67 171 if (rt_dev == dev) {
8d0b94af
MKL
172 rt->dst.dev = loopback_dev;
173 dev_hold(rt->dst.dev);
174 dev_put(rt_dev);
175 }
176 }
177 spin_unlock_bh(&ul->lock);
178 }
179}
180
d52d3997
MKL
181static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
182{
183 return dst_metrics_write_ptr(rt->dst.from);
184}
185
06582540
DM
186static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
187{
4b32b5ad 188 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 189
d52d3997
MKL
190 if (rt->rt6i_flags & RTF_PCPU)
191 return rt6_pcpu_cow_metrics(rt);
192 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
193 return NULL;
194 else
3b471175 195 return dst_cow_metrics_generic(dst, old);
06582540
DM
196}
197
f894cbf8
DM
198static inline const void *choose_neigh_daddr(struct rt6_info *rt,
199 struct sk_buff *skb,
200 const void *daddr)
39232973
DM
201{
202 struct in6_addr *p = &rt->rt6i_gateway;
203
a7563f34 204 if (!ipv6_addr_any(p))
39232973 205 return (const void *) p;
f894cbf8
DM
206 else if (skb)
207 return &ipv6_hdr(skb)->daddr;
39232973
DM
208 return daddr;
209}
210
f894cbf8
DM
211static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
212 struct sk_buff *skb,
213 const void *daddr)
d3aaeb38 214{
39232973
DM
215 struct rt6_info *rt = (struct rt6_info *) dst;
216 struct neighbour *n;
217
f894cbf8 218 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 219 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
220 if (n)
221 return n;
222 return neigh_create(&nd_tbl, daddr, dst->dev);
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
230 daddr = choose_neigh_daddr(rt, NULL, daddr);
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
06582540 247 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
d3aaeb38 255 .neigh_lookup = ip6_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 285 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
fb0af4c7 292static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
293 .dst = {
294 .__refcnt = ATOMIC_INIT(1),
295 .__use = 1,
2c20cbd7 296 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 297 .error = -ENETUNREACH,
d8d1f30b
CG
298 .input = ip6_pkt_discard,
299 .output = ip6_pkt_discard_out,
1da177e4
LT
300 },
301 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 302 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
303 .rt6i_metric = ~(u32) 0,
304 .rt6i_ref = ATOMIC_INIT(1),
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 319 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
320 .rt6i_metric = ~(u32) 0,
321 .rt6i_ref = ATOMIC_INIT(1),
322};
323
fb0af4c7 324static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
325 .dst = {
326 .__refcnt = ATOMIC_INIT(1),
327 .__use = 1,
2c20cbd7 328 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 329 .error = -EINVAL,
d8d1f30b 330 .input = dst_discard,
ede2059d 331 .output = dst_discard_out,
101367c2
TG
332 },
333 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 334 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
335 .rt6i_metric = ~(u32) 0,
336 .rt6i_ref = ATOMIC_INIT(1),
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
346 INIT_LIST_HEAD(&rt->rt6i_siblings);
347 INIT_LIST_HEAD(&rt->rt6i_uncached);
348}
349
1da177e4 350/* allocate dst with ip6_dst_ops */
d52d3997
MKL
351static struct rt6_info *__ip6_dst_alloc(struct net *net,
352 struct net_device *dev,
ad706862 353 int flags)
1da177e4 354{
97bab73f 355 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 356 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 357
ebfa45f0
MKL
358 if (rt)
359 rt6_info_init(rt);
8104891b 360
cf911662 361 return rt;
1da177e4
LT
362}
363
9ab179d8
DA
364struct rt6_info *ip6_dst_alloc(struct net *net,
365 struct net_device *dev,
366 int flags)
d52d3997 367{
ad706862 368 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
369
370 if (rt) {
371 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
372 if (rt->rt6i_pcpu) {
373 int cpu;
374
375 for_each_possible_cpu(cpu) {
376 struct rt6_info **p;
377
378 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
379 /* no one shares rt */
380 *p = NULL;
381 }
382 } else {
587fea74 383 dst_release_immediate(&rt->dst);
d52d3997
MKL
384 return NULL;
385 }
386 }
387
388 return rt;
389}
9ab179d8 390EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 391
1da177e4
LT
392static void ip6_dst_destroy(struct dst_entry *dst)
393{
394 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 395 struct dst_entry *from = dst->from;
8d0b94af 396 struct inet6_dev *idev;
1da177e4 397
4b32b5ad 398 dst_destroy_metrics_generic(dst);
87775312 399 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
400 rt6_uncached_list_del(rt);
401
402 idev = rt->rt6i_idev;
38308473 403 if (idev) {
1da177e4
LT
404 rt->rt6i_idev = NULL;
405 in6_dev_put(idev);
1ab1457c 406 }
1716a961 407
ecd98837
YH
408 dst->from = NULL;
409 dst_release(from);
b3419363
DM
410}
411
1da177e4
LT
412static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
413 int how)
414{
415 struct rt6_info *rt = (struct rt6_info *)dst;
416 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 417 struct net_device *loopback_dev =
c346dca1 418 dev_net(dev)->loopback_dev;
1da177e4 419
e5645f51
WW
420 if (idev && idev->dev != loopback_dev) {
421 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
422 if (loopback_idev) {
423 rt->rt6i_idev = loopback_idev;
424 in6_dev_put(idev);
97cac082 425 }
1da177e4
LT
426 }
427}
428
5973fb1e
MKL
429static bool __rt6_check_expired(const struct rt6_info *rt)
430{
431 if (rt->rt6i_flags & RTF_EXPIRES)
432 return time_after(jiffies, rt->dst.expires);
433 else
434 return false;
435}
436
a50feda5 437static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 438{
1716a961
G
439 if (rt->rt6i_flags & RTF_EXPIRES) {
440 if (time_after(jiffies, rt->dst.expires))
a50feda5 441 return true;
1716a961 442 } else if (rt->dst.from) {
1e2ea8ad
XL
443 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
444 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 445 }
a50feda5 446 return false;
1da177e4
LT
447}
448
51ebd318 449static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
450 struct flowi6 *fl6, int oif,
451 int strict)
51ebd318
ND
452{
453 struct rt6_info *sibling, *next_sibling;
454 int route_choosen;
455
b673d6cc
JS
456 /* We might have already computed the hash for ICMPv6 errors. In such
457 * case it will always be non-zero. Otherwise now is the time to do it.
458 */
459 if (!fl6->mp_hash)
460 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
461
462 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
463 /* Don't change the route, if route_choosen == 0
464 * (siblings does not include ourself)
465 */
466 if (route_choosen)
467 list_for_each_entry_safe(sibling, next_sibling,
468 &match->rt6i_siblings, rt6i_siblings) {
469 route_choosen--;
470 if (route_choosen == 0) {
52bd4c0c
ND
471 if (rt6_score_route(sibling, oif, strict) < 0)
472 break;
51ebd318
ND
473 match = sibling;
474 break;
475 }
476 }
477 return match;
478}
479
1da177e4 480/*
c71099ac 481 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
482 */
483
8ed67789
DL
484static inline struct rt6_info *rt6_device_match(struct net *net,
485 struct rt6_info *rt,
b71d1d42 486 const struct in6_addr *saddr,
1da177e4 487 int oif,
d420895e 488 int flags)
1da177e4
LT
489{
490 struct rt6_info *local = NULL;
491 struct rt6_info *sprt;
492
dd3abc4e
YH
493 if (!oif && ipv6_addr_any(saddr))
494 goto out;
495
d8d1f30b 496 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 497 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
498
499 if (oif) {
1da177e4
LT
500 if (dev->ifindex == oif)
501 return sprt;
502 if (dev->flags & IFF_LOOPBACK) {
38308473 503 if (!sprt->rt6i_idev ||
1da177e4 504 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 505 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 506 continue;
17fb0b2b
DA
507 if (local &&
508 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
509 continue;
510 }
511 local = sprt;
512 }
dd3abc4e
YH
513 } else {
514 if (ipv6_chk_addr(net, saddr, dev,
515 flags & RT6_LOOKUP_F_IFACE))
516 return sprt;
1da177e4 517 }
dd3abc4e 518 }
1da177e4 519
dd3abc4e 520 if (oif) {
1da177e4
LT
521 if (local)
522 return local;
523
d420895e 524 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 525 return net->ipv6.ip6_null_entry;
1da177e4 526 }
dd3abc4e 527out:
1da177e4
LT
528 return rt;
529}
530
27097255 531#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
532struct __rt6_probe_work {
533 struct work_struct work;
534 struct in6_addr target;
535 struct net_device *dev;
536};
537
538static void rt6_probe_deferred(struct work_struct *w)
539{
540 struct in6_addr mcaddr;
541 struct __rt6_probe_work *work =
542 container_of(w, struct __rt6_probe_work, work);
543
544 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 545 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 546 dev_put(work->dev);
662f5533 547 kfree(work);
c2f17e82
HFS
548}
549
27097255
YH
550static void rt6_probe(struct rt6_info *rt)
551{
990edb42 552 struct __rt6_probe_work *work;
f2c31e32 553 struct neighbour *neigh;
27097255
YH
554 /*
555 * Okay, this does not seem to be appropriate
556 * for now, however, we need to check if it
557 * is really so; aka Router Reachability Probing.
558 *
559 * Router Reachability Probe MUST be rate-limited
560 * to no more than one per minute.
561 */
2152caea 562 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 563 return;
2152caea
YH
564 rcu_read_lock_bh();
565 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
566 if (neigh) {
8d6c31bf
MKL
567 if (neigh->nud_state & NUD_VALID)
568 goto out;
569
990edb42 570 work = NULL;
2152caea 571 write_lock(&neigh->lock);
990edb42
MKL
572 if (!(neigh->nud_state & NUD_VALID) &&
573 time_after(jiffies,
574 neigh->updated +
575 rt->rt6i_idev->cnf.rtr_probe_interval)) {
576 work = kmalloc(sizeof(*work), GFP_ATOMIC);
577 if (work)
578 __neigh_set_probe_once(neigh);
c2f17e82 579 }
2152caea 580 write_unlock(&neigh->lock);
990edb42
MKL
581 } else {
582 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 583 }
990edb42
MKL
584
585 if (work) {
586 INIT_WORK(&work->work, rt6_probe_deferred);
587 work->target = rt->rt6i_gateway;
588 dev_hold(rt->dst.dev);
589 work->dev = rt->dst.dev;
590 schedule_work(&work->work);
591 }
592
8d6c31bf 593out:
2152caea 594 rcu_read_unlock_bh();
27097255
YH
595}
596#else
597static inline void rt6_probe(struct rt6_info *rt)
598{
27097255
YH
599}
600#endif
601
1da177e4 602/*
554cfb7e 603 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 604 */
b6f99a21 605static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 606{
d1918542 607 struct net_device *dev = rt->dst.dev;
161980f4 608 if (!oif || dev->ifindex == oif)
554cfb7e 609 return 2;
161980f4
DM
610 if ((dev->flags & IFF_LOOPBACK) &&
611 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
612 return 1;
613 return 0;
554cfb7e 614}
1da177e4 615
afc154e9 616static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 617{
f2c31e32 618 struct neighbour *neigh;
afc154e9 619 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 620
4d0c5911
YH
621 if (rt->rt6i_flags & RTF_NONEXTHOP ||
622 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 623 return RT6_NUD_SUCCEED;
145a3621
YH
624
625 rcu_read_lock_bh();
626 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
627 if (neigh) {
628 read_lock(&neigh->lock);
554cfb7e 629 if (neigh->nud_state & NUD_VALID)
afc154e9 630 ret = RT6_NUD_SUCCEED;
398bcbeb 631#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 632 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 633 ret = RT6_NUD_SUCCEED;
7e980569
JB
634 else
635 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 636#endif
145a3621 637 read_unlock(&neigh->lock);
afc154e9
HFS
638 } else {
639 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 640 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 641 }
145a3621
YH
642 rcu_read_unlock_bh();
643
a5a81f0b 644 return ret;
1da177e4
LT
645}
646
554cfb7e
YH
647static int rt6_score_route(struct rt6_info *rt, int oif,
648 int strict)
1da177e4 649{
a5a81f0b 650 int m;
1ab1457c 651
4d0c5911 652 m = rt6_check_dev(rt, oif);
77d16f45 653 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 654 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
655#ifdef CONFIG_IPV6_ROUTER_PREF
656 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
657#endif
afc154e9
HFS
658 if (strict & RT6_LOOKUP_F_REACHABLE) {
659 int n = rt6_check_neigh(rt);
660 if (n < 0)
661 return n;
662 }
554cfb7e
YH
663 return m;
664}
665
f11e6659 666static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
667 int *mpri, struct rt6_info *match,
668 bool *do_rr)
554cfb7e 669{
f11e6659 670 int m;
afc154e9 671 bool match_do_rr = false;
35103d11
AG
672 struct inet6_dev *idev = rt->rt6i_idev;
673 struct net_device *dev = rt->dst.dev;
674
675 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
676 idev->cnf.ignore_routes_with_linkdown &&
677 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 678 goto out;
f11e6659
DM
679
680 if (rt6_check_expired(rt))
681 goto out;
682
683 m = rt6_score_route(rt, oif, strict);
7e980569 684 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
685 match_do_rr = true;
686 m = 0; /* lowest valid score */
7e980569 687 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 688 goto out;
afc154e9
HFS
689 }
690
691 if (strict & RT6_LOOKUP_F_REACHABLE)
692 rt6_probe(rt);
f11e6659 693
7e980569 694 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 695 if (m > *mpri) {
afc154e9 696 *do_rr = match_do_rr;
f11e6659
DM
697 *mpri = m;
698 match = rt;
f11e6659 699 }
f11e6659
DM
700out:
701 return match;
702}
703
704static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
705 struct rt6_info *rr_head,
afc154e9
HFS
706 u32 metric, int oif, int strict,
707 bool *do_rr)
f11e6659 708{
9fbdcfaf 709 struct rt6_info *rt, *match, *cont;
554cfb7e 710 int mpri = -1;
1da177e4 711
f11e6659 712 match = NULL;
9fbdcfaf
SK
713 cont = NULL;
714 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
715 if (rt->rt6i_metric != metric) {
716 cont = rt;
717 break;
718 }
719
720 match = find_match(rt, oif, strict, &mpri, match, do_rr);
721 }
722
723 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
afc154e9 729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
730 }
731
732 if (match || !cont)
733 return match;
734
735 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 736 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 737
f11e6659
DM
738 return match;
739}
1da177e4 740
f11e6659
DM
741static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
742{
743 struct rt6_info *match, *rt0;
8ed67789 744 struct net *net;
afc154e9 745 bool do_rr = false;
1da177e4 746
f11e6659
DM
747 rt0 = fn->rr_ptr;
748 if (!rt0)
749 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 750
afc154e9
HFS
751 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
752 &do_rr);
1da177e4 753
afc154e9 754 if (do_rr) {
d8d1f30b 755 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 756
554cfb7e 757 /* no entries matched; do round-robin */
f11e6659
DM
758 if (!next || next->rt6i_metric != rt0->rt6i_metric)
759 next = fn->leaf;
760
761 if (next != rt0)
762 fn->rr_ptr = next;
1da177e4 763 }
1da177e4 764
d1918542 765 net = dev_net(rt0->dst.dev);
a02cec21 766 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
767}
768
8b9df265
MKL
769static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
770{
771 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
772}
773
70ceb4f5
YH
774#ifdef CONFIG_IPV6_ROUTE_INFO
775int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 776 const struct in6_addr *gwaddr)
70ceb4f5 777{
c346dca1 778 struct net *net = dev_net(dev);
70ceb4f5
YH
779 struct route_info *rinfo = (struct route_info *) opt;
780 struct in6_addr prefix_buf, *prefix;
781 unsigned int pref;
4bed72e4 782 unsigned long lifetime;
70ceb4f5
YH
783 struct rt6_info *rt;
784
785 if (len < sizeof(struct route_info)) {
786 return -EINVAL;
787 }
788
789 /* Sanity check for prefix_len and length */
790 if (rinfo->length > 3) {
791 return -EINVAL;
792 } else if (rinfo->prefix_len > 128) {
793 return -EINVAL;
794 } else if (rinfo->prefix_len > 64) {
795 if (rinfo->length < 2) {
796 return -EINVAL;
797 }
798 } else if (rinfo->prefix_len > 0) {
799 if (rinfo->length < 1) {
800 return -EINVAL;
801 }
802 }
803
804 pref = rinfo->route_pref;
805 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 806 return -EINVAL;
70ceb4f5 807
4bed72e4 808 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
809
810 if (rinfo->length == 3)
811 prefix = (struct in6_addr *)rinfo->prefix;
812 else {
813 /* this function is safe */
814 ipv6_addr_prefix(&prefix_buf,
815 (struct in6_addr *)rinfo->prefix,
816 rinfo->prefix_len);
817 prefix = &prefix_buf;
818 }
819
f104a567
DJ
820 if (rinfo->prefix_len == 0)
821 rt = rt6_get_dflt_router(gwaddr, dev);
822 else
823 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 824 gwaddr, dev);
70ceb4f5
YH
825
826 if (rt && !lifetime) {
e0a1ad73 827 ip6_del_rt(rt);
70ceb4f5
YH
828 rt = NULL;
829 }
830
831 if (!rt && lifetime)
830218c1
DA
832 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
833 dev, pref);
70ceb4f5
YH
834 else if (rt)
835 rt->rt6i_flags = RTF_ROUTEINFO |
836 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
837
838 if (rt) {
1716a961
G
839 if (!addrconf_finite_timeout(lifetime))
840 rt6_clean_expires(rt);
841 else
842 rt6_set_expires(rt, jiffies + HZ * lifetime);
843
94e187c0 844 ip6_rt_put(rt);
70ceb4f5
YH
845 }
846 return 0;
847}
848#endif
849
a3c00e46
MKL
850static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
851 struct in6_addr *saddr)
852{
853 struct fib6_node *pn;
854 while (1) {
855 if (fn->fn_flags & RTN_TL_ROOT)
856 return NULL;
857 pn = fn->parent;
858 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
859 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
860 else
861 fn = pn;
862 if (fn->fn_flags & RTN_RTINFO)
863 return fn;
864 }
865}
c71099ac 866
8ed67789
DL
867static struct rt6_info *ip6_pol_route_lookup(struct net *net,
868 struct fib6_table *table,
4c9483b2 869 struct flowi6 *fl6, int flags)
1da177e4
LT
870{
871 struct fib6_node *fn;
872 struct rt6_info *rt;
873
d1b820bd
DA
874 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
875 flags &= ~RT6_LOOKUP_F_IFACE;
876
c71099ac 877 read_lock_bh(&table->tb6_lock);
4c9483b2 878 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
879restart:
880 rt = fn->leaf;
4c9483b2 881 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 882 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 883 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
884 if (rt == net->ipv6.ip6_null_entry) {
885 fn = fib6_backtrack(fn, &fl6->saddr);
886 if (fn)
887 goto restart;
888 }
d8d1f30b 889 dst_use(&rt->dst, jiffies);
c71099ac 890 read_unlock_bh(&table->tb6_lock);
b811580d
DA
891
892 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
893
c71099ac
TG
894 return rt;
895
896}
897
67ba4152 898struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
899 int flags)
900{
901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
902}
903EXPORT_SYMBOL_GPL(ip6_route_lookup);
904
9acd9f3a
YH
905struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
906 const struct in6_addr *saddr, int oif, int strict)
c71099ac 907{
4c9483b2
DM
908 struct flowi6 fl6 = {
909 .flowi6_oif = oif,
910 .daddr = *daddr,
c71099ac
TG
911 };
912 struct dst_entry *dst;
77d16f45 913 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 914
adaa70bb 915 if (saddr) {
4c9483b2 916 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
917 flags |= RT6_LOOKUP_F_HAS_SADDR;
918 }
919
4c9483b2 920 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
921 if (dst->error == 0)
922 return (struct rt6_info *) dst;
923
924 dst_release(dst);
925
1da177e4
LT
926 return NULL;
927}
7159039a
YH
928EXPORT_SYMBOL(rt6_lookup);
929
c71099ac 930/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
931 * It takes new route entry, the addition fails by any reason the
932 * route is released.
933 * Caller must hold dst before calling it.
1da177e4
LT
934 */
935
e5fd387a 936static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
937 struct mx6_config *mxc,
938 struct netlink_ext_ack *extack)
1da177e4
LT
939{
940 int err;
c71099ac 941 struct fib6_table *table;
1da177e4 942
c71099ac
TG
943 table = rt->rt6i_table;
944 write_lock_bh(&table->tb6_lock);
333c4301 945 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 946 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
947
948 return err;
949}
950
40e22e8f
TG
951int ip6_ins_rt(struct rt6_info *rt)
952{
e715b6d3
FW
953 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
954 struct mx6_config mxc = { .mx = NULL, };
955
1cfb71ee
WW
956 /* Hold dst to account for the reference from the fib6 tree */
957 dst_hold(&rt->dst);
333c4301 958 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
959}
960
4832c30d
DA
961/* called with rcu_lock held */
962static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
963{
964 struct net_device *dev = rt->dst.dev;
965
7afe2e66 966 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
967 /* for copies of local routes, dst->dev needs to be the
968 * device if it is a master device, the master device if
969 * device is enslaved, and the loopback as the default
970 */
971 if (netif_is_l3_slave(dev) &&
972 !rt6_need_strict(&rt->rt6i_dst.addr))
973 dev = l3mdev_master_dev_rcu(dev);
974 else if (!netif_is_l3_master(dev))
975 dev = dev_net(dev)->loopback_dev;
976 /* last case is netif_is_l3_master(dev) is true in which
977 * case we want dev returned to be dev
978 */
979 }
980
981 return dev;
982}
983
8b9df265
MKL
984static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
985 const struct in6_addr *daddr,
986 const struct in6_addr *saddr)
1da177e4 987{
4832c30d 988 struct net_device *dev;
1da177e4
LT
989 struct rt6_info *rt;
990
991 /*
992 * Clone the route.
993 */
994
d52d3997 995 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 996 ort = (struct rt6_info *)ort->dst.from;
1da177e4 997
4832c30d
DA
998 rcu_read_lock();
999 dev = ip6_rt_get_dev_rcu(ort);
1000 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1001 rcu_read_unlock();
83a09abd
MKL
1002 if (!rt)
1003 return NULL;
1004
1005 ip6_rt_copy_init(rt, ort);
1006 rt->rt6i_flags |= RTF_CACHE;
1007 rt->rt6i_metric = 0;
1008 rt->dst.flags |= DST_HOST;
1009 rt->rt6i_dst.addr = *daddr;
1010 rt->rt6i_dst.plen = 128;
1da177e4 1011
83a09abd
MKL
1012 if (!rt6_is_gw_or_nonexthop(ort)) {
1013 if (ort->rt6i_dst.plen != 128 &&
1014 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1015 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1016#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1017 if (rt->rt6i_src.plen && saddr) {
1018 rt->rt6i_src.addr = *saddr;
1019 rt->rt6i_src.plen = 128;
8b9df265 1020 }
83a09abd 1021#endif
95a9a5ba 1022 }
1da177e4 1023
95a9a5ba
YH
1024 return rt;
1025}
1da177e4 1026
d52d3997
MKL
1027static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1028{
4832c30d 1029 struct net_device *dev;
d52d3997
MKL
1030 struct rt6_info *pcpu_rt;
1031
4832c30d
DA
1032 rcu_read_lock();
1033 dev = ip6_rt_get_dev_rcu(rt);
1034 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1035 rcu_read_unlock();
d52d3997
MKL
1036 if (!pcpu_rt)
1037 return NULL;
1038 ip6_rt_copy_init(pcpu_rt, rt);
1039 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1040 pcpu_rt->rt6i_flags |= RTF_PCPU;
1041 return pcpu_rt;
1042}
1043
1044/* It should be called with read_lock_bh(&tb6_lock) acquired */
1045static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1046{
a73e4195 1047 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1048
1049 p = this_cpu_ptr(rt->rt6i_pcpu);
1050 pcpu_rt = *p;
1051
a73e4195
MKL
1052 if (pcpu_rt) {
1053 dst_hold(&pcpu_rt->dst);
1054 rt6_dst_from_metrics_check(pcpu_rt);
1055 }
1056 return pcpu_rt;
1057}
1058
1059static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1060{
add9f2a4 1061 struct fib6_table *table = rt->rt6i_table;
a73e4195 1062 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1063
1064 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1065 if (!pcpu_rt) {
1066 struct net *net = dev_net(rt->dst.dev);
1067
9c7370a1
MKL
1068 dst_hold(&net->ipv6.ip6_null_entry->dst);
1069 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1070 }
1071
add9f2a4
GKH
1072 read_lock_bh(&table->tb6_lock);
1073 if (rt->rt6i_pcpu) {
1074 p = this_cpu_ptr(rt->rt6i_pcpu);
1075 prev = cmpxchg(p, NULL, pcpu_rt);
1076 if (prev) {
1077 /* If someone did it before us, return prev instead */
1078 dst_release_immediate(&pcpu_rt->dst);
1079 pcpu_rt = prev;
1080 }
1081 } else {
1082 /* rt has been removed from the fib6 tree
1083 * before we have a chance to acquire the read_lock.
1084 * In this case, don't brother to create a pcpu rt
1085 * since rt is going away anyway. The next
1086 * dst_check() will trigger a re-lookup.
1087 */
587fea74 1088 dst_release_immediate(&pcpu_rt->dst);
add9f2a4 1089 pcpu_rt = rt;
d52d3997 1090 }
add9f2a4 1091 dst_hold(&pcpu_rt->dst);
d52d3997 1092 rt6_dst_from_metrics_check(pcpu_rt);
add9f2a4 1093 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1094 return pcpu_rt;
1095}
1096
9ff74384
DA
1097struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1098 int oif, struct flowi6 *fl6, int flags)
1da177e4 1099{
367efcb9 1100 struct fib6_node *fn, *saved_fn;
45e4fd26 1101 struct rt6_info *rt;
c71099ac 1102 int strict = 0;
1da177e4 1103
77d16f45 1104 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1105 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1106 if (net->ipv6.devconf_all->forwarding == 0)
1107 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1108
c71099ac 1109 read_lock_bh(&table->tb6_lock);
1da177e4 1110
4c9483b2 1111 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1112 saved_fn = fn;
1da177e4 1113
ca254490
DA
1114 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1115 oif = 0;
1116
a3c00e46 1117redo_rt6_select:
367efcb9 1118 rt = rt6_select(fn, oif, strict);
52bd4c0c 1119 if (rt->rt6i_nsiblings)
367efcb9 1120 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1121 if (rt == net->ipv6.ip6_null_entry) {
1122 fn = fib6_backtrack(fn, &fl6->saddr);
1123 if (fn)
1124 goto redo_rt6_select;
367efcb9
MKL
1125 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1126 /* also consider unreachable route */
1127 strict &= ~RT6_LOOKUP_F_REACHABLE;
1128 fn = saved_fn;
1129 goto redo_rt6_select;
367efcb9 1130 }
a3c00e46
MKL
1131 }
1132
fb9de91e 1133
3da59bd9 1134 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1135 dst_use(&rt->dst, jiffies);
1136 read_unlock_bh(&table->tb6_lock);
1137
1138 rt6_dst_from_metrics_check(rt);
b811580d
DA
1139
1140 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1141 return rt;
3da59bd9
MKL
1142 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1143 !(rt->rt6i_flags & RTF_GATEWAY))) {
1144 /* Create a RTF_CACHE clone which will not be
1145 * owned by the fib6 tree. It is for the special case where
1146 * the daddr in the skb during the neighbor look-up is different
1147 * from the fl6->daddr used to look-up route here.
1148 */
1149
1150 struct rt6_info *uncached_rt;
1151
d52d3997
MKL
1152 dst_use(&rt->dst, jiffies);
1153 read_unlock_bh(&table->tb6_lock);
1154
3da59bd9
MKL
1155 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1156 dst_release(&rt->dst);
c71099ac 1157
1cfb71ee
WW
1158 if (uncached_rt) {
1159 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1160 * No need for another dst_hold()
1161 */
8d0b94af 1162 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1163 } else {
3da59bd9 1164 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1165 dst_hold(&uncached_rt->dst);
1166 }
b811580d
DA
1167
1168 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1169 return uncached_rt;
3da59bd9 1170
d52d3997
MKL
1171 } else {
1172 /* Get a percpu copy */
1173
1174 struct rt6_info *pcpu_rt;
1175
1176 rt->dst.lastuse = jiffies;
1177 rt->dst.__use++;
1178 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1179
9c7370a1
MKL
1180 if (pcpu_rt) {
1181 read_unlock_bh(&table->tb6_lock);
1182 } else {
add9f2a4
GKH
1183 /* We have to do the read_unlock first
1184 * because rt6_make_pcpu_route() may trigger
1185 * ip6_dst_gc() which will take the write_lock.
1186 */
1187 dst_hold(&rt->dst);
1188 read_unlock_bh(&table->tb6_lock);
1189 pcpu_rt = rt6_make_pcpu_route(rt);
1190 dst_release(&rt->dst);
9c7370a1 1191 }
d52d3997 1192
b811580d 1193 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1194 return pcpu_rt;
add9f2a4 1195
d52d3997 1196 }
1da177e4 1197}
9ff74384 1198EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1199
8ed67789 1200static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1201 struct flowi6 *fl6, int flags)
4acad72d 1202{
4c9483b2 1203 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1204}
1205
d409b847
MB
1206struct dst_entry *ip6_route_input_lookup(struct net *net,
1207 struct net_device *dev,
1208 struct flowi6 *fl6, int flags)
72331bc0
SL
1209{
1210 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1211 flags |= RT6_LOOKUP_F_IFACE;
1212
1213 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1214}
d409b847 1215EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1216
23aebdac
JS
1217static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1218 struct flow_keys *keys)
1219{
1220 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1221 const struct ipv6hdr *key_iph = outer_iph;
1222 const struct ipv6hdr *inner_iph;
1223 const struct icmp6hdr *icmph;
1224 struct ipv6hdr _inner_iph;
cb9e5a08 1225 struct icmp6hdr _icmph;
23aebdac
JS
1226
1227 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1228 goto out;
1229
cb9e5a08
ED
1230 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1231 sizeof(_icmph), &_icmph);
1232 if (!icmph)
1233 goto out;
1234
23aebdac
JS
1235 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1236 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1237 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1238 icmph->icmp6_type != ICMPV6_PARAMPROB)
1239 goto out;
1240
1241 inner_iph = skb_header_pointer(skb,
1242 skb_transport_offset(skb) + sizeof(*icmph),
1243 sizeof(_inner_iph), &_inner_iph);
1244 if (!inner_iph)
1245 goto out;
1246
1247 key_iph = inner_iph;
1248out:
1249 memset(keys, 0, sizeof(*keys));
1250 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1251 keys->addrs.v6addrs.src = key_iph->saddr;
1252 keys->addrs.v6addrs.dst = key_iph->daddr;
3f8f52c5 1253 keys->tags.flow_label = ip6_flowlabel(key_iph);
23aebdac
JS
1254 keys->basic.ip_proto = key_iph->nexthdr;
1255}
1256
1257/* if skb is set it will be used and fl6 can be NULL */
1258u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1259{
1260 struct flow_keys hash_keys;
1261
1262 if (skb) {
1263 ip6_multipath_l3_keys(skb, &hash_keys);
1264 return flow_hash_from_keys(&hash_keys);
1265 }
1266
1267 return get_hash_from_flowi6(fl6);
1268}
1269
c71099ac
TG
1270void ip6_route_input(struct sk_buff *skb)
1271{
b71d1d42 1272 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1273 struct net *net = dev_net(skb->dev);
adaa70bb 1274 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1275 struct ip_tunnel_info *tun_info;
4c9483b2 1276 struct flowi6 fl6 = {
e0d56fdd 1277 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1278 .daddr = iph->daddr,
1279 .saddr = iph->saddr,
6502ca52 1280 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1281 .flowi6_mark = skb->mark,
1282 .flowi6_proto = iph->nexthdr,
c71099ac 1283 };
adaa70bb 1284
904af04d 1285 tun_info = skb_tunnel_info(skb);
46fa062a 1286 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1287 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1288 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1289 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1290 skb_dst_drop(skb);
72331bc0 1291 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1292}
1293
8ed67789 1294static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1295 struct flowi6 *fl6, int flags)
1da177e4 1296{
4c9483b2 1297 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1298}
1299
6f21c96a
PA
1300struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1301 struct flowi6 *fl6, int flags)
c71099ac 1302{
d46a9d67 1303 bool any_src;
c71099ac 1304
4c1feac5
DA
1305 if (rt6_need_strict(&fl6->daddr)) {
1306 struct dst_entry *dst;
1307
1308 dst = l3mdev_link_scope_lookup(net, fl6);
1309 if (dst)
1310 return dst;
1311 }
ca254490 1312
1fb9489b 1313 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1314
d46a9d67 1315 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1316 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1317 (fl6->flowi6_oif && any_src))
77d16f45 1318 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1319
d46a9d67 1320 if (!any_src)
adaa70bb 1321 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1322 else if (sk)
1323 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1324
4c9483b2 1325 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1326}
6f21c96a 1327EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1328
2774c131 1329struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1330{
5c1e6aa3 1331 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1332 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1333 struct dst_entry *new = NULL;
1334
1dbe3252 1335 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 1336 DST_OBSOLETE_DEAD, 0);
14e50e57 1337 if (rt) {
0a1f5962 1338 rt6_info_init(rt);
8104891b 1339
0a1f5962 1340 new = &rt->dst;
14e50e57 1341 new->__use = 1;
352e512c 1342 new->input = dst_discard;
ede2059d 1343 new->output = dst_discard_out;
14e50e57 1344
0a1f5962 1345 dst_copy_metrics(new, &ort->dst);
14e50e57 1346
1dbe3252 1347 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1348 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1349 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1350 rt->rt6i_metric = 0;
1351
1352 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1353#ifdef CONFIG_IPV6_SUBTREES
1354 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1355#endif
14e50e57
DM
1356 }
1357
69ead7af
DM
1358 dst_release(dst_orig);
1359 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1360}
14e50e57 1361
1da177e4
LT
1362/*
1363 * Destination cache support functions
1364 */
1365
4b32b5ad
MKL
1366static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1367{
1368 if (rt->dst.from &&
1369 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1370 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1371}
1372
3da59bd9
MKL
1373static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1374{
36143645 1375 u32 rt_cookie = 0;
c5cff856
WW
1376
1377 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1378 return NULL;
1379
1380 if (rt6_check_expired(rt))
1381 return NULL;
1382
1383 return &rt->dst;
1384}
1385
1386static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1387{
5973fb1e
MKL
1388 if (!__rt6_check_expired(rt) &&
1389 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1390 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1391 return &rt->dst;
1392 else
1393 return NULL;
1394}
1395
1da177e4
LT
1396static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1397{
1398 struct rt6_info *rt;
1399
1400 rt = (struct rt6_info *) dst;
1401
6f3118b5
ND
1402 /* All IPV6 dsts are created with ->obsolete set to the value
1403 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1404 * into this function always.
1405 */
e3bc10bd 1406
4b32b5ad
MKL
1407 rt6_dst_from_metrics_check(rt);
1408
02bcf4e0 1409 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1410 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1411 return rt6_dst_from_check(rt, cookie);
1412 else
1413 return rt6_check(rt, cookie);
1da177e4
LT
1414}
1415
1416static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1417{
1418 struct rt6_info *rt = (struct rt6_info *) dst;
1419
1420 if (rt) {
54c1a859
YH
1421 if (rt->rt6i_flags & RTF_CACHE) {
1422 if (rt6_check_expired(rt)) {
1423 ip6_del_rt(rt);
1424 dst = NULL;
1425 }
1426 } else {
1da177e4 1427 dst_release(dst);
54c1a859
YH
1428 dst = NULL;
1429 }
1da177e4 1430 }
54c1a859 1431 return dst;
1da177e4
LT
1432}
1433
1434static void ip6_link_failure(struct sk_buff *skb)
1435{
1436 struct rt6_info *rt;
1437
3ffe533c 1438 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1439
adf30907 1440 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1441 if (rt) {
1eb4f758 1442 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1443 if (dst_hold_safe(&rt->dst))
1444 ip6_del_rt(rt);
c5cff856
WW
1445 } else {
1446 struct fib6_node *fn;
1447
1448 rcu_read_lock();
1449 fn = rcu_dereference(rt->rt6i_node);
1450 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1451 fn->fn_sernum = -1;
1452 rcu_read_unlock();
1eb4f758 1453 }
1da177e4
LT
1454 }
1455}
1456
45e4fd26
MKL
1457static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1458{
1459 struct net *net = dev_net(rt->dst.dev);
1460
1461 rt->rt6i_flags |= RTF_MODIFIED;
1462 rt->rt6i_pmtu = mtu;
1463 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1464}
1465
0d3f6d29
MKL
1466static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1467{
1468 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
1469 (rt->rt6i_flags & RTF_PCPU ||
1470 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
1471}
1472
45e4fd26
MKL
1473static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1474 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1475{
0dec879f 1476 const struct in6_addr *daddr, *saddr;
67ba4152 1477 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1478
19bda36c
XL
1479 if (dst_metric_locked(dst, RTAX_MTU))
1480 return;
1481
0dec879f
JA
1482 if (iph) {
1483 daddr = &iph->daddr;
1484 saddr = &iph->saddr;
1485 } else if (sk) {
1486 daddr = &sk->sk_v6_daddr;
1487 saddr = &inet6_sk(sk)->saddr;
1488 } else {
1489 daddr = NULL;
1490 saddr = NULL;
1491 }
1492 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1493 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1494 if (mtu >= dst_mtu(dst))
1495 return;
9d289715 1496
0d3f6d29 1497 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1498 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1499 } else if (daddr) {
45e4fd26
MKL
1500 struct rt6_info *nrt6;
1501
45e4fd26
MKL
1502 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1503 if (nrt6) {
1504 rt6_do_update_pmtu(nrt6, mtu);
1505
1506 /* ip6_ins_rt(nrt6) will bump the
1507 * rt6->rt6i_node->fn_sernum
1508 * which will fail the next rt6_check() and
1509 * invalidate the sk->sk_dst_cache.
1510 */
1511 ip6_ins_rt(nrt6);
1cfb71ee
WW
1512 /* Release the reference taken in
1513 * ip6_rt_cache_alloc()
1514 */
1515 dst_release(&nrt6->dst);
45e4fd26 1516 }
1da177e4
LT
1517 }
1518}
1519
45e4fd26
MKL
1520static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1521 struct sk_buff *skb, u32 mtu)
1522{
1523 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1524}
1525
42ae66c8 1526void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1527 int oif, u32 mark, kuid_t uid)
81aded24
DM
1528{
1529 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1530 struct dst_entry *dst;
1531 struct flowi6 fl6;
1532
1533 memset(&fl6, 0, sizeof(fl6));
1534 fl6.flowi6_oif = oif;
1b3c61dc 1535 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1536 fl6.daddr = iph->daddr;
1537 fl6.saddr = iph->saddr;
6502ca52 1538 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1539 fl6.flowi6_uid = uid;
81aded24
DM
1540
1541 dst = ip6_route_output(net, NULL, &fl6);
1542 if (!dst->error)
45e4fd26 1543 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1544 dst_release(dst);
1545}
1546EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1547
1548void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1549{
590f89a7 1550 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
1551 struct dst_entry *dst;
1552
590f89a7
DA
1553 if (!oif && skb->dev)
1554 oif = l3mdev_master_ifindex(skb->dev);
1555
1556 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1557
1558 dst = __sk_dst_get(sk);
1559 if (!dst || !dst->obsolete ||
1560 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1561 return;
1562
1563 bh_lock_sock(sk);
1564 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1565 ip6_datagram_dst_update(sk, false);
1566 bh_unlock_sock(sk);
81aded24
DM
1567}
1568EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1569
b55b76b2
DJ
1570/* Handle redirects */
1571struct ip6rd_flowi {
1572 struct flowi6 fl6;
1573 struct in6_addr gateway;
1574};
1575
1576static struct rt6_info *__ip6_route_redirect(struct net *net,
1577 struct fib6_table *table,
1578 struct flowi6 *fl6,
1579 int flags)
1580{
1581 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1582 struct rt6_info *rt;
1583 struct fib6_node *fn;
1584
1585 /* Get the "current" route for this destination and
67c408cf 1586 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1587 *
1588 * RFC 4861 specifies that redirects should only be
1589 * accepted if they come from the nexthop to the target.
1590 * Due to the way the routes are chosen, this notion
1591 * is a bit fuzzy and one might need to check all possible
1592 * routes.
1593 */
1594
1595 read_lock_bh(&table->tb6_lock);
1596 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1597restart:
1598 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1599 if (rt6_check_expired(rt))
1600 continue;
1601 if (rt->dst.error)
1602 break;
1603 if (!(rt->rt6i_flags & RTF_GATEWAY))
1604 continue;
1605 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1606 continue;
1607 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1608 continue;
1609 break;
1610 }
1611
1612 if (!rt)
1613 rt = net->ipv6.ip6_null_entry;
1614 else if (rt->dst.error) {
1615 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1616 goto out;
1617 }
1618
1619 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1620 fn = fib6_backtrack(fn, &fl6->saddr);
1621 if (fn)
1622 goto restart;
b55b76b2 1623 }
a3c00e46 1624
b0a1ba59 1625out:
b55b76b2
DJ
1626 dst_hold(&rt->dst);
1627
1628 read_unlock_bh(&table->tb6_lock);
1629
b811580d 1630 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1631 return rt;
1632};
1633
1634static struct dst_entry *ip6_route_redirect(struct net *net,
1635 const struct flowi6 *fl6,
1636 const struct in6_addr *gateway)
1637{
1638 int flags = RT6_LOOKUP_F_HAS_SADDR;
1639 struct ip6rd_flowi rdfl;
1640
1641 rdfl.fl6 = *fl6;
1642 rdfl.gateway = *gateway;
1643
1644 return fib6_rule_lookup(net, &rdfl.fl6,
1645 flags, __ip6_route_redirect);
1646}
1647
e2d118a1
LC
1648void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1649 kuid_t uid)
3a5ad2ee
DM
1650{
1651 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1652 struct dst_entry *dst;
1653 struct flowi6 fl6;
1654
1655 memset(&fl6, 0, sizeof(fl6));
e374c618 1656 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1657 fl6.flowi6_oif = oif;
1658 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1659 fl6.daddr = iph->daddr;
1660 fl6.saddr = iph->saddr;
6502ca52 1661 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1662 fl6.flowi6_uid = uid;
3a5ad2ee 1663
b55b76b2
DJ
1664 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1665 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1666 dst_release(dst);
1667}
1668EXPORT_SYMBOL_GPL(ip6_redirect);
1669
c92a59ec
DJ
1670void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1671 u32 mark)
1672{
1673 const struct ipv6hdr *iph = ipv6_hdr(skb);
1674 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1675 struct dst_entry *dst;
1676 struct flowi6 fl6;
1677
1678 memset(&fl6, 0, sizeof(fl6));
e374c618 1679 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1680 fl6.flowi6_oif = oif;
1681 fl6.flowi6_mark = mark;
c92a59ec
DJ
1682 fl6.daddr = msg->dest;
1683 fl6.saddr = iph->daddr;
e2d118a1 1684 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1685
b55b76b2
DJ
1686 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1687 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1688 dst_release(dst);
1689}
1690
3a5ad2ee
DM
1691void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1692{
e2d118a1
LC
1693 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1694 sk->sk_uid);
3a5ad2ee
DM
1695}
1696EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1697
0dbaee3b 1698static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1699{
0dbaee3b
DM
1700 struct net_device *dev = dst->dev;
1701 unsigned int mtu = dst_mtu(dst);
1702 struct net *net = dev_net(dev);
1703
1da177e4
LT
1704 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1705
5578689a
DL
1706 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1707 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1708
1709 /*
1ab1457c
YH
1710 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1711 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1712 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1713 * rely only on pmtu discovery"
1714 */
1715 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1716 mtu = IPV6_MAXPLEN;
1717 return mtu;
1718}
1719
ebb762f2 1720static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1721{
4b32b5ad
MKL
1722 const struct rt6_info *rt = (const struct rt6_info *)dst;
1723 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1724 struct inet6_dev *idev;
618f9bc7 1725
4b32b5ad
MKL
1726 if (mtu)
1727 goto out;
1728
1729 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1730 if (mtu)
30f78d8e 1731 goto out;
618f9bc7
SK
1732
1733 mtu = IPV6_MIN_MTU;
d33e4553
DM
1734
1735 rcu_read_lock();
1736 idev = __in6_dev_get(dst->dev);
1737 if (idev)
1738 mtu = idev->cnf.mtu6;
1739 rcu_read_unlock();
1740
30f78d8e 1741out:
14972cbd
RP
1742 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1743
1744 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1745}
1746
3b00944c 1747struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1748 struct flowi6 *fl6)
1da177e4 1749{
87a11578 1750 struct dst_entry *dst;
1da177e4
LT
1751 struct rt6_info *rt;
1752 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1753 struct net *net = dev_net(dev);
1da177e4 1754
38308473 1755 if (unlikely(!idev))
122bdf67 1756 return ERR_PTR(-ENODEV);
1da177e4 1757
ad706862 1758 rt = ip6_dst_alloc(net, dev, 0);
38308473 1759 if (unlikely(!rt)) {
1da177e4 1760 in6_dev_put(idev);
87a11578 1761 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1762 goto out;
1763 }
1764
8e2ec639 1765 rt->dst.flags |= DST_HOST;
4b2e0f09 1766 rt->dst.input = ip6_input;
8e2ec639 1767 rt->dst.output = ip6_output;
550bab42 1768 rt->rt6i_gateway = fl6->daddr;
87a11578 1769 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1770 rt->rt6i_dst.plen = 128;
1771 rt->rt6i_idev = idev;
14edd87d 1772 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1773
587fea74
WW
1774 /* Add this dst into uncached_list so that rt6_ifdown() can
1775 * do proper release of the net_device
1776 */
1777 rt6_uncached_list_add(rt);
1da177e4 1778
87a11578
DM
1779 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1780
1da177e4 1781out:
87a11578 1782 return dst;
1da177e4
LT
1783}
1784
569d3645 1785static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1786{
86393e52 1787 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1788 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1789 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1790 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1791 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1792 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1793 int entries;
7019b78e 1794
fc66f95c 1795 entries = dst_entries_get_fast(ops);
49a18d86 1796 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1797 entries <= rt_max_size)
1da177e4
LT
1798 goto out;
1799
6891a346 1800 net->ipv6.ip6_rt_gc_expire++;
14956643 1801 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1802 entries = dst_entries_get_slow(ops);
1803 if (entries < ops->gc_thresh)
7019b78e 1804 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1805out:
7019b78e 1806 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1807 return entries > rt_max_size;
1da177e4
LT
1808}
1809
e715b6d3
FW
1810static int ip6_convert_metrics(struct mx6_config *mxc,
1811 const struct fib6_config *cfg)
1812{
c3a8d947 1813 bool ecn_ca = false;
e715b6d3
FW
1814 struct nlattr *nla;
1815 int remaining;
1816 u32 *mp;
1817
63159f29 1818 if (!cfg->fc_mx)
e715b6d3
FW
1819 return 0;
1820
1821 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1822 if (unlikely(!mp))
1823 return -ENOMEM;
1824
1825 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1826 int type = nla_type(nla);
1bb14807 1827 u32 val;
e715b6d3 1828
1bb14807
DB
1829 if (!type)
1830 continue;
1831 if (unlikely(type > RTAX_MAX))
1832 goto err;
ea697639 1833
1bb14807
DB
1834 if (type == RTAX_CC_ALGO) {
1835 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1836
1bb14807 1837 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1838 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1839 if (val == TCP_CA_UNSPEC)
1840 goto err;
1841 } else {
1842 val = nla_get_u32(nla);
e715b6d3 1843 }
626abd59
PA
1844 if (type == RTAX_HOPLIMIT && val > 255)
1845 val = 255;
b8d3e416
DB
1846 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1847 goto err;
1bb14807
DB
1848
1849 mp[type - 1] = val;
1850 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1851 }
1852
c3a8d947
DB
1853 if (ecn_ca) {
1854 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1855 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1856 }
e715b6d3 1857
c3a8d947 1858 mxc->mx = mp;
e715b6d3
FW
1859 return 0;
1860 err:
1861 kfree(mp);
1862 return -EINVAL;
1863}
1da177e4 1864
8c14586f
DA
1865static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1866 struct fib6_config *cfg,
1867 const struct in6_addr *gw_addr)
1868{
1869 struct flowi6 fl6 = {
1870 .flowi6_oif = cfg->fc_ifindex,
1871 .daddr = *gw_addr,
1872 .saddr = cfg->fc_prefsrc,
1873 };
1874 struct fib6_table *table;
1875 struct rt6_info *rt;
d5d32e4b 1876 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1877
1878 table = fib6_get_table(net, cfg->fc_table);
1879 if (!table)
1880 return NULL;
1881
1882 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1883 flags |= RT6_LOOKUP_F_HAS_SADDR;
1884
1885 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1886
1887 /* if table lookup failed, fall back to full lookup */
1888 if (rt == net->ipv6.ip6_null_entry) {
1889 ip6_rt_put(rt);
1890 rt = NULL;
1891 }
1892
1893 return rt;
1894}
1895
333c4301
DA
1896static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1897 struct netlink_ext_ack *extack)
1da177e4 1898{
5578689a 1899 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1900 struct rt6_info *rt = NULL;
1901 struct net_device *dev = NULL;
1902 struct inet6_dev *idev = NULL;
c71099ac 1903 struct fib6_table *table;
1da177e4 1904 int addr_type;
8c5b83f0 1905 int err = -EINVAL;
1da177e4 1906
557c44be 1907 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
1908 if (cfg->fc_flags & RTF_PCPU) {
1909 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 1910 goto out;
d5d531cb 1911 }
557c44be 1912
d5d531cb
DA
1913 if (cfg->fc_dst_len > 128) {
1914 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1915 goto out;
1916 }
1917 if (cfg->fc_src_len > 128) {
1918 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 1919 goto out;
d5d531cb 1920 }
1da177e4 1921#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
1922 if (cfg->fc_src_len) {
1923 NL_SET_ERR_MSG(extack,
1924 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 1925 goto out;
d5d531cb 1926 }
1da177e4 1927#endif
86872cb5 1928 if (cfg->fc_ifindex) {
1da177e4 1929 err = -ENODEV;
5578689a 1930 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1931 if (!dev)
1932 goto out;
1933 idev = in6_dev_get(dev);
1934 if (!idev)
1935 goto out;
1936 }
1937
86872cb5
TG
1938 if (cfg->fc_metric == 0)
1939 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1940
d71314b4 1941 err = -ENOBUFS;
38308473
DM
1942 if (cfg->fc_nlinfo.nlh &&
1943 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1944 table = fib6_get_table(net, cfg->fc_table);
38308473 1945 if (!table) {
f3213831 1946 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1947 table = fib6_new_table(net, cfg->fc_table);
1948 }
1949 } else {
1950 table = fib6_new_table(net, cfg->fc_table);
1951 }
38308473
DM
1952
1953 if (!table)
c71099ac 1954 goto out;
c71099ac 1955
ad706862
MKL
1956 rt = ip6_dst_alloc(net, NULL,
1957 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1958
38308473 1959 if (!rt) {
1da177e4
LT
1960 err = -ENOMEM;
1961 goto out;
1962 }
1963
1716a961
G
1964 if (cfg->fc_flags & RTF_EXPIRES)
1965 rt6_set_expires(rt, jiffies +
1966 clock_t_to_jiffies(cfg->fc_expires));
1967 else
1968 rt6_clean_expires(rt);
1da177e4 1969
86872cb5
TG
1970 if (cfg->fc_protocol == RTPROT_UNSPEC)
1971 cfg->fc_protocol = RTPROT_BOOT;
1972 rt->rt6i_protocol = cfg->fc_protocol;
1973
1974 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1975
1976 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1977 rt->dst.input = ip6_mc_input;
ab79ad14
1978 else if (cfg->fc_flags & RTF_LOCAL)
1979 rt->dst.input = ip6_input;
1da177e4 1980 else
d8d1f30b 1981 rt->dst.input = ip6_forward;
1da177e4 1982
d8d1f30b 1983 rt->dst.output = ip6_output;
1da177e4 1984
19e42e45
RP
1985 if (cfg->fc_encap) {
1986 struct lwtunnel_state *lwtstate;
1987
30357d7d 1988 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 1989 cfg->fc_encap, AF_INET6, cfg,
9ae28727 1990 &lwtstate, extack);
19e42e45
RP
1991 if (err)
1992 goto out;
61adedf3
JB
1993 rt->dst.lwtstate = lwtstate_get(lwtstate);
1994 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1995 rt->dst.lwtstate->orig_output = rt->dst.output;
1996 rt->dst.output = lwtunnel_output;
25368623 1997 }
61adedf3
JB
1998 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1999 rt->dst.lwtstate->orig_input = rt->dst.input;
2000 rt->dst.input = lwtunnel_input;
25368623 2001 }
19e42e45
RP
2002 }
2003
86872cb5
TG
2004 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2005 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2006 if (rt->rt6i_dst.plen == 128)
e5fd387a 2007 rt->dst.flags |= DST_HOST;
e5fd387a 2008
1da177e4 2009#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2010 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2011 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2012#endif
2013
86872cb5 2014 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2015
2016 /* We cannot add true routes via loopback here,
2017 they would result in kernel looping; promote them to reject routes
2018 */
86872cb5 2019 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2020 (dev && (dev->flags & IFF_LOOPBACK) &&
2021 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2022 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2023 /* hold loopback dev/idev if we haven't done so. */
5578689a 2024 if (dev != net->loopback_dev) {
1da177e4
LT
2025 if (dev) {
2026 dev_put(dev);
2027 in6_dev_put(idev);
2028 }
5578689a 2029 dev = net->loopback_dev;
1da177e4
LT
2030 dev_hold(dev);
2031 idev = in6_dev_get(dev);
2032 if (!idev) {
2033 err = -ENODEV;
2034 goto out;
2035 }
2036 }
1da177e4 2037 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2038 switch (cfg->fc_type) {
2039 case RTN_BLACKHOLE:
2040 rt->dst.error = -EINVAL;
ede2059d 2041 rt->dst.output = dst_discard_out;
7150aede 2042 rt->dst.input = dst_discard;
ef2c7d7b
ND
2043 break;
2044 case RTN_PROHIBIT:
2045 rt->dst.error = -EACCES;
7150aede
K
2046 rt->dst.output = ip6_pkt_prohibit_out;
2047 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2048 break;
b4949ab2 2049 case RTN_THROW:
0315e382 2050 case RTN_UNREACHABLE:
ef2c7d7b 2051 default:
7150aede 2052 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2053 : (cfg->fc_type == RTN_UNREACHABLE)
2054 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2055 rt->dst.output = ip6_pkt_discard_out;
2056 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2057 break;
2058 }
1da177e4
LT
2059 goto install_route;
2060 }
2061
86872cb5 2062 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2063 const struct in6_addr *gw_addr;
1da177e4
LT
2064 int gwa_type;
2065
86872cb5 2066 gw_addr = &cfg->fc_gateway;
330567b7 2067 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2068
2069 /* if gw_addr is local we will fail to detect this in case
2070 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2071 * will return already-added prefix route via interface that
2072 * prefix route was assigned to, which might be non-loopback.
2073 */
2074 err = -EINVAL;
330567b7
FW
2075 if (ipv6_chk_addr_and_flags(net, gw_addr,
2076 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2077 dev : NULL, 0, 0)) {
2078 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2079 goto out;
d5d531cb 2080 }
4e3fd7a0 2081 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2082
2083 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2084 struct rt6_info *grt = NULL;
1da177e4
LT
2085
2086 /* IPv6 strictly inhibits using not link-local
2087 addresses as nexthop address.
2088 Otherwise, router will not able to send redirects.
2089 It is very good, but in some (rare!) circumstances
2090 (SIT, PtP, NBMA NOARP links) it is handy to allow
2091 some exceptions. --ANK
96d5822c
EN
2092 We allow IPv4-mapped nexthops to support RFC4798-type
2093 addressing
1da177e4 2094 */
96d5822c 2095 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2096 IPV6_ADDR_MAPPED))) {
2097 NL_SET_ERR_MSG(extack,
2098 "Invalid gateway address");
1da177e4 2099 goto out;
d5d531cb 2100 }
1da177e4 2101
a435a07f 2102 if (cfg->fc_table) {
8c14586f
DA
2103 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2104
a435a07f
VB
2105 if (grt) {
2106 if (grt->rt6i_flags & RTF_GATEWAY ||
2107 (dev && dev != grt->dst.dev)) {
2108 ip6_rt_put(grt);
2109 grt = NULL;
2110 }
2111 }
2112 }
2113
8c14586f
DA
2114 if (!grt)
2115 grt = rt6_lookup(net, gw_addr, NULL,
2116 cfg->fc_ifindex, 1);
1da177e4
LT
2117
2118 err = -EHOSTUNREACH;
38308473 2119 if (!grt)
1da177e4
LT
2120 goto out;
2121 if (dev) {
d1918542 2122 if (dev != grt->dst.dev) {
94e187c0 2123 ip6_rt_put(grt);
1da177e4
LT
2124 goto out;
2125 }
2126 } else {
d1918542 2127 dev = grt->dst.dev;
1da177e4
LT
2128 idev = grt->rt6i_idev;
2129 dev_hold(dev);
2130 in6_dev_hold(grt->rt6i_idev);
2131 }
38308473 2132 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2133 err = 0;
94e187c0 2134 ip6_rt_put(grt);
1da177e4
LT
2135
2136 if (err)
2137 goto out;
2138 }
2139 err = -EINVAL;
d5d531cb
DA
2140 if (!dev) {
2141 NL_SET_ERR_MSG(extack, "Egress device not specified");
2142 goto out;
2143 } else if (dev->flags & IFF_LOOPBACK) {
2144 NL_SET_ERR_MSG(extack,
2145 "Egress device can not be loopback device for this route");
1da177e4 2146 goto out;
d5d531cb 2147 }
1da177e4
LT
2148 }
2149
2150 err = -ENODEV;
38308473 2151 if (!dev)
1da177e4
LT
2152 goto out;
2153
c3968a85
DW
2154 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2155 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2156 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2157 err = -EINVAL;
2158 goto out;
2159 }
4e3fd7a0 2160 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2161 rt->rt6i_prefsrc.plen = 128;
2162 } else
2163 rt->rt6i_prefsrc.plen = 0;
2164
86872cb5 2165 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2166
2167install_route:
d8d1f30b 2168 rt->dst.dev = dev;
1da177e4 2169 rt->rt6i_idev = idev;
c71099ac 2170 rt->rt6i_table = table;
63152fc0 2171
c346dca1 2172 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2173
8c5b83f0 2174 return rt;
6b9ea5a6
RP
2175out:
2176 if (dev)
2177 dev_put(dev);
2178 if (idev)
2179 in6_dev_put(idev);
587fea74
WW
2180 if (rt)
2181 dst_release_immediate(&rt->dst);
6b9ea5a6 2182
8c5b83f0 2183 return ERR_PTR(err);
6b9ea5a6
RP
2184}
2185
333c4301
DA
2186int ip6_route_add(struct fib6_config *cfg,
2187 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2188{
2189 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2190 struct rt6_info *rt;
6b9ea5a6
RP
2191 int err;
2192
333c4301 2193 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2194 if (IS_ERR(rt)) {
2195 err = PTR_ERR(rt);
2196 rt = NULL;
6b9ea5a6 2197 goto out;
8c5b83f0 2198 }
6b9ea5a6 2199
e715b6d3
FW
2200 err = ip6_convert_metrics(&mxc, cfg);
2201 if (err)
2202 goto out;
1da177e4 2203
333c4301 2204 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2205
2206 kfree(mxc.mx);
6b9ea5a6 2207
e715b6d3 2208 return err;
1da177e4 2209out:
587fea74
WW
2210 if (rt)
2211 dst_release_immediate(&rt->dst);
6b9ea5a6 2212
1da177e4
LT
2213 return err;
2214}
2215
86872cb5 2216static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2217{
2218 int err;
c71099ac 2219 struct fib6_table *table;
d1918542 2220 struct net *net = dev_net(rt->dst.dev);
1da177e4 2221
a4c2fd7f 2222 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2223 err = -ENOENT;
2224 goto out;
2225 }
6c813a72 2226
c71099ac
TG
2227 table = rt->rt6i_table;
2228 write_lock_bh(&table->tb6_lock);
86872cb5 2229 err = fib6_del(rt, info);
c71099ac 2230 write_unlock_bh(&table->tb6_lock);
1da177e4 2231
6825a26c 2232out:
94e187c0 2233 ip6_rt_put(rt);
1da177e4
LT
2234 return err;
2235}
2236
e0a1ad73
TG
2237int ip6_del_rt(struct rt6_info *rt)
2238{
4d1169c1 2239 struct nl_info info = {
d1918542 2240 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2241 };
528c4ceb 2242 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2243}
2244
0ae81335
DA
2245static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2246{
2247 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2248 struct net *net = info->nl_net;
16a16cd3 2249 struct sk_buff *skb = NULL;
0ae81335 2250 struct fib6_table *table;
e3330039 2251 int err = -ENOENT;
0ae81335 2252
e3330039
WC
2253 if (rt == net->ipv6.ip6_null_entry)
2254 goto out_put;
0ae81335
DA
2255 table = rt->rt6i_table;
2256 write_lock_bh(&table->tb6_lock);
2257
2258 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2259 struct rt6_info *sibling, *next_sibling;
2260
16a16cd3
DA
2261 /* prefer to send a single notification with all hops */
2262 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2263 if (skb) {
2264 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2265
e3330039 2266 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2267 NULL, NULL, 0, RTM_DELROUTE,
2268 info->portid, seq, 0) < 0) {
2269 kfree_skb(skb);
2270 skb = NULL;
2271 } else
2272 info->skip_notify = 1;
2273 }
2274
0ae81335
DA
2275 list_for_each_entry_safe(sibling, next_sibling,
2276 &rt->rt6i_siblings,
2277 rt6i_siblings) {
2278 err = fib6_del(sibling, info);
2279 if (err)
e3330039 2280 goto out_unlock;
0ae81335
DA
2281 }
2282 }
2283
2284 err = fib6_del(rt, info);
e3330039 2285out_unlock:
0ae81335 2286 write_unlock_bh(&table->tb6_lock);
e3330039 2287out_put:
0ae81335 2288 ip6_rt_put(rt);
16a16cd3
DA
2289
2290 if (skb) {
e3330039 2291 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2292 info->nlh, gfp_any());
2293 }
0ae81335
DA
2294 return err;
2295}
2296
333c4301
DA
2297static int ip6_route_del(struct fib6_config *cfg,
2298 struct netlink_ext_ack *extack)
1da177e4 2299{
c71099ac 2300 struct fib6_table *table;
1da177e4
LT
2301 struct fib6_node *fn;
2302 struct rt6_info *rt;
2303 int err = -ESRCH;
2304
5578689a 2305 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2306 if (!table) {
2307 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2308 return err;
d5d531cb 2309 }
c71099ac
TG
2310
2311 read_lock_bh(&table->tb6_lock);
1da177e4 2312
c71099ac 2313 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2314 &cfg->fc_dst, cfg->fc_dst_len,
2315 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2316
1da177e4 2317 if (fn) {
d8d1f30b 2318 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2319 if ((rt->rt6i_flags & RTF_CACHE) &&
2320 !(cfg->fc_flags & RTF_CACHE))
2321 continue;
86872cb5 2322 if (cfg->fc_ifindex &&
d1918542
DM
2323 (!rt->dst.dev ||
2324 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2325 continue;
86872cb5
TG
2326 if (cfg->fc_flags & RTF_GATEWAY &&
2327 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2328 continue;
86872cb5 2329 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2330 continue;
c2ed1880
M
2331 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2332 continue;
d8d1f30b 2333 dst_hold(&rt->dst);
c71099ac 2334 read_unlock_bh(&table->tb6_lock);
1da177e4 2335
0ae81335
DA
2336 /* if gateway was specified only delete the one hop */
2337 if (cfg->fc_flags & RTF_GATEWAY)
2338 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2339
2340 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2341 }
2342 }
c71099ac 2343 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2344
2345 return err;
2346}
2347
6700c270 2348static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2349{
a6279458 2350 struct netevent_redirect netevent;
e8599ff4 2351 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2352 struct ndisc_options ndopts;
2353 struct inet6_dev *in6_dev;
2354 struct neighbour *neigh;
71bcdba0 2355 struct rd_msg *msg;
6e157b6a
DM
2356 int optlen, on_link;
2357 u8 *lladdr;
e8599ff4 2358
29a3cad5 2359 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2360 optlen -= sizeof(*msg);
e8599ff4
DM
2361
2362 if (optlen < 0) {
6e157b6a 2363 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2364 return;
2365 }
2366
71bcdba0 2367 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2368
71bcdba0 2369 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2370 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2371 return;
2372 }
2373
6e157b6a 2374 on_link = 0;
71bcdba0 2375 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2376 on_link = 1;
71bcdba0 2377 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2378 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2379 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2380 return;
2381 }
2382
2383 in6_dev = __in6_dev_get(skb->dev);
2384 if (!in6_dev)
2385 return;
2386 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2387 return;
2388
2389 /* RFC2461 8.1:
2390 * The IP source address of the Redirect MUST be the same as the current
2391 * first-hop router for the specified ICMP Destination Address.
2392 */
2393
f997c55c 2394 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2395 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2396 return;
2397 }
6e157b6a
DM
2398
2399 lladdr = NULL;
e8599ff4
DM
2400 if (ndopts.nd_opts_tgt_lladdr) {
2401 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2402 skb->dev);
2403 if (!lladdr) {
2404 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2405 return;
2406 }
2407 }
2408
6e157b6a 2409 rt = (struct rt6_info *) dst;
ec13ad1d 2410 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2411 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2412 return;
6e157b6a 2413 }
e8599ff4 2414
6e157b6a
DM
2415 /* Redirect received -> path was valid.
2416 * Look, redirects are sent only in response to data packets,
2417 * so that this nexthop apparently is reachable. --ANK
2418 */
0dec879f 2419 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2420
71bcdba0 2421 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2422 if (!neigh)
2423 return;
a6279458 2424
1da177e4
LT
2425 /*
2426 * We have finally decided to accept it.
2427 */
2428
f997c55c 2429 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2430 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2431 NEIGH_UPDATE_F_OVERRIDE|
2432 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2433 NEIGH_UPDATE_F_ISROUTER)),
2434 NDISC_REDIRECT, &ndopts);
1da177e4 2435
83a09abd 2436 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2437 if (!nrt)
1da177e4
LT
2438 goto out;
2439
2440 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2441 if (on_link)
2442 nrt->rt6i_flags &= ~RTF_GATEWAY;
2443
b91d5329 2444 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 2445 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2446
40e22e8f 2447 if (ip6_ins_rt(nrt))
1cfb71ee 2448 goto out_release;
1da177e4 2449
d8d1f30b
CG
2450 netevent.old = &rt->dst;
2451 netevent.new = &nrt->dst;
71bcdba0 2452 netevent.daddr = &msg->dest;
60592833 2453 netevent.neigh = neigh;
8d71740c
TT
2454 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2455
38308473 2456 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2457 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2458 ip6_del_rt(rt);
1da177e4
LT
2459 }
2460
1cfb71ee
WW
2461out_release:
2462 /* Release the reference taken in
2463 * ip6_rt_cache_alloc()
2464 */
2465 dst_release(&nrt->dst);
2466
1da177e4 2467out:
e8599ff4 2468 neigh_release(neigh);
6e157b6a
DM
2469}
2470
1da177e4
LT
2471/*
2472 * Misc support functions
2473 */
2474
4b32b5ad
MKL
2475static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2476{
2477 BUG_ON(from->dst.from);
2478
2479 rt->rt6i_flags &= ~RTF_EXPIRES;
2480 dst_hold(&from->dst);
2481 rt->dst.from = &from->dst;
2482 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2483}
2484
83a09abd
MKL
2485static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2486{
2487 rt->dst.input = ort->dst.input;
2488 rt->dst.output = ort->dst.output;
2489 rt->rt6i_dst = ort->rt6i_dst;
2490 rt->dst.error = ort->dst.error;
2491 rt->rt6i_idev = ort->rt6i_idev;
2492 if (rt->rt6i_idev)
2493 in6_dev_hold(rt->rt6i_idev);
2494 rt->dst.lastuse = jiffies;
2495 rt->rt6i_gateway = ort->rt6i_gateway;
2496 rt->rt6i_flags = ort->rt6i_flags;
2497 rt6_set_from(rt, ort);
2498 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2499#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2500 rt->rt6i_src = ort->rt6i_src;
1da177e4 2501#endif
83a09abd
MKL
2502 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2503 rt->rt6i_table = ort->rt6i_table;
61adedf3 2504 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2505}
2506
70ceb4f5 2507#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2508static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2509 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2510 const struct in6_addr *gwaddr,
2511 struct net_device *dev)
70ceb4f5 2512{
deb4d858 2513 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
70ceb4f5
YH
2514 struct fib6_node *fn;
2515 struct rt6_info *rt = NULL;
c71099ac
TG
2516 struct fib6_table *table;
2517
830218c1 2518 table = fib6_get_table(net, tb_id);
38308473 2519 if (!table)
c71099ac 2520 return NULL;
70ceb4f5 2521
5744dd9b 2522 read_lock_bh(&table->tb6_lock);
67ba4152 2523 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2524 if (!fn)
2525 goto out;
2526
d8d1f30b 2527 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
deb4d858 2528 if (rt->dst.dev->ifindex != dev->ifindex)
70ceb4f5
YH
2529 continue;
2530 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2531 continue;
2532 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2533 continue;
d8d1f30b 2534 dst_hold(&rt->dst);
70ceb4f5
YH
2535 break;
2536 }
2537out:
5744dd9b 2538 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2539 return rt;
2540}
2541
efa2cea0 2542static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2543 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2544 const struct in6_addr *gwaddr,
2545 struct net_device *dev,
95c96174 2546 unsigned int pref)
70ceb4f5 2547{
86872cb5 2548 struct fib6_config cfg = {
238fc7ea 2549 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2550 .fc_ifindex = dev->ifindex,
86872cb5
TG
2551 .fc_dst_len = prefixlen,
2552 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2553 RTF_UP | RTF_PREF(pref),
b91d5329 2554 .fc_protocol = RTPROT_RA,
15e47304 2555 .fc_nlinfo.portid = 0,
efa2cea0
DL
2556 .fc_nlinfo.nlh = NULL,
2557 .fc_nlinfo.nl_net = net,
86872cb5
TG
2558 };
2559
deb4d858 2560 cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO),
4e3fd7a0
AD
2561 cfg.fc_dst = *prefix;
2562 cfg.fc_gateway = *gwaddr;
70ceb4f5 2563
e317da96
YH
2564 /* We should treat it as a default route if prefix length is 0. */
2565 if (!prefixlen)
86872cb5 2566 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2567
333c4301 2568 ip6_route_add(&cfg, NULL);
70ceb4f5 2569
830218c1 2570 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2571}
2572#endif
2573
b71d1d42 2574struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2575{
deb4d858 2576 u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_MAIN);
1da177e4 2577 struct rt6_info *rt;
c71099ac 2578 struct fib6_table *table;
1da177e4 2579
830218c1 2580 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2581 if (!table)
c71099ac 2582 return NULL;
1da177e4 2583
5744dd9b 2584 read_lock_bh(&table->tb6_lock);
67ba4152 2585 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2586 if (dev == rt->dst.dev &&
045927ff 2587 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2588 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2589 break;
2590 }
2591 if (rt)
d8d1f30b 2592 dst_hold(&rt->dst);
5744dd9b 2593 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2594 return rt;
2595}
2596
b71d1d42 2597struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2598 struct net_device *dev,
2599 unsigned int pref)
1da177e4 2600{
86872cb5 2601 struct fib6_config cfg = {
deb4d858 2602 .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
238fc7ea 2603 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2604 .fc_ifindex = dev->ifindex,
2605 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2606 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 2607 .fc_protocol = RTPROT_RA,
15e47304 2608 .fc_nlinfo.portid = 0,
5578689a 2609 .fc_nlinfo.nlh = NULL,
c346dca1 2610 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2611 };
1da177e4 2612
4e3fd7a0 2613 cfg.fc_gateway = *gwaddr;
1da177e4 2614
333c4301 2615 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
2616 struct fib6_table *table;
2617
2618 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2619 if (table)
2620 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2621 }
1da177e4 2622
1da177e4
LT
2623 return rt6_get_dflt_router(gwaddr, dev);
2624}
2625
deb4d858
LC
2626int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
2627 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2628 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
2629 return -1;
2630 return 0;
830218c1
DA
2631}
2632
2633void rt6_purge_dflt_routers(struct net *net)
2634{
deb4d858 2635 fib6_clean_all(net, rt6_addrconf_purge, NULL);
1da177e4
LT
2636}
2637
5578689a
DL
2638static void rtmsg_to_fib6_config(struct net *net,
2639 struct in6_rtmsg *rtmsg,
86872cb5
TG
2640 struct fib6_config *cfg)
2641{
2642 memset(cfg, 0, sizeof(*cfg));
2643
ca254490
DA
2644 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2645 : RT6_TABLE_MAIN;
86872cb5
TG
2646 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2647 cfg->fc_metric = rtmsg->rtmsg_metric;
2648 cfg->fc_expires = rtmsg->rtmsg_info;
2649 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2650 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2651 cfg->fc_flags = rtmsg->rtmsg_flags;
2652
5578689a 2653 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2654
4e3fd7a0
AD
2655 cfg->fc_dst = rtmsg->rtmsg_dst;
2656 cfg->fc_src = rtmsg->rtmsg_src;
2657 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2658}
2659
5578689a 2660int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2661{
86872cb5 2662 struct fib6_config cfg;
1da177e4
LT
2663 struct in6_rtmsg rtmsg;
2664 int err;
2665
67ba4152 2666 switch (cmd) {
1da177e4
LT
2667 case SIOCADDRT: /* Add a route */
2668 case SIOCDELRT: /* Delete a route */
af31f412 2669 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2670 return -EPERM;
2671 err = copy_from_user(&rtmsg, arg,
2672 sizeof(struct in6_rtmsg));
2673 if (err)
2674 return -EFAULT;
86872cb5 2675
5578689a 2676 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2677
1da177e4
LT
2678 rtnl_lock();
2679 switch (cmd) {
2680 case SIOCADDRT:
333c4301 2681 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
2682 break;
2683 case SIOCDELRT:
333c4301 2684 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
2685 break;
2686 default:
2687 err = -EINVAL;
2688 }
2689 rtnl_unlock();
2690
2691 return err;
3ff50b79 2692 }
1da177e4
LT
2693
2694 return -EINVAL;
2695}
2696
2697/*
2698 * Drop the packet on the floor
2699 */
2700
d5fdd6ba 2701static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2702{
612f09e8 2703 int type;
adf30907 2704 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2705 switch (ipstats_mib_noroutes) {
2706 case IPSTATS_MIB_INNOROUTES:
0660e03f 2707 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2708 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2709 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2710 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2711 break;
2712 }
2713 /* FALLTHROUGH */
2714 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2715 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2716 ipstats_mib_noroutes);
612f09e8
YH
2717 break;
2718 }
3ffe533c 2719 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2720 kfree_skb(skb);
2721 return 0;
2722}
2723
9ce8ade0
TG
2724static int ip6_pkt_discard(struct sk_buff *skb)
2725{
612f09e8 2726 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2727}
2728
ede2059d 2729static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2730{
adf30907 2731 skb->dev = skb_dst(skb)->dev;
612f09e8 2732 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2733}
2734
9ce8ade0
TG
2735static int ip6_pkt_prohibit(struct sk_buff *skb)
2736{
612f09e8 2737 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2738}
2739
ede2059d 2740static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2741{
adf30907 2742 skb->dev = skb_dst(skb)->dev;
612f09e8 2743 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2744}
2745
1da177e4
LT
2746/*
2747 * Allocate a dst for local (unicast / anycast) address.
2748 */
2749
2750struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2751 const struct in6_addr *addr,
8f031519 2752 bool anycast)
1da177e4 2753{
ca254490 2754 u32 tb_id;
c346dca1 2755 struct net *net = dev_net(idev->dev);
4832c30d 2756 struct net_device *dev = idev->dev;
5f02ce24
DA
2757 struct rt6_info *rt;
2758
5f02ce24 2759 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2760 if (!rt)
1da177e4
LT
2761 return ERR_PTR(-ENOMEM);
2762
1da177e4
LT
2763 in6_dev_hold(idev);
2764
11d53b49 2765 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2766 rt->dst.input = ip6_input;
2767 rt->dst.output = ip6_output;
1da177e4 2768 rt->rt6i_idev = idev;
1da177e4 2769
94b5e0f9 2770 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 2771 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2772 if (anycast)
2773 rt->rt6i_flags |= RTF_ANYCAST;
2774 else
1da177e4 2775 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2776
550bab42 2777 rt->rt6i_gateway = *addr;
4e3fd7a0 2778 rt->rt6i_dst.addr = *addr;
1da177e4 2779 rt->rt6i_dst.plen = 128;
ca254490
DA
2780 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2781 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 2782
1da177e4
LT
2783 return rt;
2784}
2785
c3968a85
DW
2786/* remove deleted ip from prefsrc entries */
2787struct arg_dev_net_ip {
2788 struct net_device *dev;
2789 struct net *net;
2790 struct in6_addr *addr;
2791};
2792
2793static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2794{
2795 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2796 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2797 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2798
d1918542 2799 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2800 rt != net->ipv6.ip6_null_entry &&
2801 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2802 /* remove prefsrc entry */
2803 rt->rt6i_prefsrc.plen = 0;
2804 }
2805 return 0;
2806}
2807
2808void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2809{
2810 struct net *net = dev_net(ifp->idev->dev);
2811 struct arg_dev_net_ip adni = {
2812 .dev = ifp->idev->dev,
2813 .net = net,
2814 .addr = &ifp->addr,
2815 };
0c3584d5 2816 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2817}
2818
be7a010d
DJ
2819#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2820#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2821
2822/* Remove routers and update dst entries when gateway turn into host. */
2823static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2824{
2825 struct in6_addr *gateway = (struct in6_addr *)arg;
2826
2827 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2828 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2829 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2830 return -1;
2831 }
2832 return 0;
2833}
2834
2835void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2836{
2837 fib6_clean_all(net, fib6_clean_tohost, gateway);
2838}
2839
8ed67789
DL
2840struct arg_dev_net {
2841 struct net_device *dev;
2842 struct net *net;
2843};
2844
a1a22c12 2845/* called with write lock held for table with rt */
1da177e4
LT
2846static int fib6_ifdown(struct rt6_info *rt, void *arg)
2847{
bc3ef660 2848 const struct arg_dev_net *adn = arg;
2849 const struct net_device *dev = adn->dev;
8ed67789 2850
d1918542 2851 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
2852 rt != adn->net->ipv6.ip6_null_entry &&
2853 (rt->rt6i_nsiblings == 0 ||
8397ed36 2854 (dev && netdev_unregistering(dev)) ||
a1a22c12 2855 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 2856 return -1;
c159d30c 2857
1da177e4
LT
2858 return 0;
2859}
2860
f3db4851 2861void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2862{
8ed67789
DL
2863 struct arg_dev_net adn = {
2864 .dev = dev,
2865 .net = net,
2866 };
2867
0c3584d5 2868 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
2869 if (dev)
2870 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2871}
2872
95c96174 2873struct rt6_mtu_change_arg {
1da177e4 2874 struct net_device *dev;
95c96174 2875 unsigned int mtu;
1da177e4
LT
2876};
2877
2878static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2879{
2880 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2881 struct inet6_dev *idev;
2882
2883 /* In IPv6 pmtu discovery is not optional,
2884 so that RTAX_MTU lock cannot disable it.
2885 We still use this lock to block changes
2886 caused by addrconf/ndisc.
2887 */
2888
2889 idev = __in6_dev_get(arg->dev);
38308473 2890 if (!idev)
1da177e4
LT
2891 return 0;
2892
2893 /* For administrative MTU increase, there is no way to discover
2894 IPv6 PMTU increase, so PMTU increase should be updated here.
2895 Since RFC 1981 doesn't include administrative MTU increase
2896 update PMTU increase is a MUST. (i.e. jumbo frame)
2897 */
2898 /*
2899 If new MTU is less than route PMTU, this new MTU will be the
2900 lowest MTU in the path, update the route PMTU to reflect PMTU
2901 decreases; if new MTU is greater than route PMTU, and the
2902 old MTU is the lowest MTU in the path, update the route PMTU
2903 to reflect the increase. In this case if the other nodes' MTU
2904 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2905 PMTU discovery.
1da177e4 2906 */
d1918542 2907 if (rt->dst.dev == arg->dev &&
fb56be83 2908 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2909 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2910 if (rt->rt6i_flags & RTF_CACHE) {
2911 /* For RTF_CACHE with rt6i_pmtu == 0
2912 * (i.e. a redirected route),
2913 * the metrics of its rt->dst.from has already
2914 * been updated.
2915 */
2916 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2917 rt->rt6i_pmtu = arg->mtu;
2918 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2919 (dst_mtu(&rt->dst) < arg->mtu &&
2920 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2921 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2922 }
566cfd8f 2923 }
1da177e4
LT
2924 return 0;
2925}
2926
95c96174 2927void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2928{
c71099ac
TG
2929 struct rt6_mtu_change_arg arg = {
2930 .dev = dev,
2931 .mtu = mtu,
2932 };
1da177e4 2933
0c3584d5 2934 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2935}
2936
ef7c79ed 2937static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2938 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
8d34c677 2939 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 2940 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2941 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2942 [RTA_PRIORITY] = { .type = NLA_U32 },
2943 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2944 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2945 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2946 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2947 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2948 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2949 [RTA_UID] = { .type = NLA_U32 },
3b45a410 2950 [RTA_MARK] = { .type = NLA_U32 },
8d34c677 2951 [RTA_TABLE] = { .type = NLA_U32 },
86872cb5
TG
2952};
2953
2954static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
2955 struct fib6_config *cfg,
2956 struct netlink_ext_ack *extack)
1da177e4 2957{
86872cb5
TG
2958 struct rtmsg *rtm;
2959 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2960 unsigned int pref;
86872cb5 2961 int err;
1da177e4 2962
fceb6435
JB
2963 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2964 NULL);
86872cb5
TG
2965 if (err < 0)
2966 goto errout;
1da177e4 2967
86872cb5
TG
2968 err = -EINVAL;
2969 rtm = nlmsg_data(nlh);
2970 memset(cfg, 0, sizeof(*cfg));
2971
2972 cfg->fc_table = rtm->rtm_table;
2973 cfg->fc_dst_len = rtm->rtm_dst_len;
2974 cfg->fc_src_len = rtm->rtm_src_len;
2975 cfg->fc_flags = RTF_UP;
2976 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2977 cfg->fc_type = rtm->rtm_type;
86872cb5 2978
ef2c7d7b
ND
2979 if (rtm->rtm_type == RTN_UNREACHABLE ||
2980 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2981 rtm->rtm_type == RTN_PROHIBIT ||
2982 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2983 cfg->fc_flags |= RTF_REJECT;
2984
ab79ad14
2985 if (rtm->rtm_type == RTN_LOCAL)
2986 cfg->fc_flags |= RTF_LOCAL;
2987
1f56a01f
MKL
2988 if (rtm->rtm_flags & RTM_F_CLONED)
2989 cfg->fc_flags |= RTF_CACHE;
2990
15e47304 2991 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2992 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2993 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2994
2995 if (tb[RTA_GATEWAY]) {
67b61f6c 2996 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2997 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2998 }
87964ee1
DA
2999 if (tb[RTA_VIA]) {
3000 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
3001 goto errout;
3002 }
86872cb5
TG
3003
3004 if (tb[RTA_DST]) {
3005 int plen = (rtm->rtm_dst_len + 7) >> 3;
3006
3007 if (nla_len(tb[RTA_DST]) < plen)
3008 goto errout;
3009
3010 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3011 }
86872cb5
TG
3012
3013 if (tb[RTA_SRC]) {
3014 int plen = (rtm->rtm_src_len + 7) >> 3;
3015
3016 if (nla_len(tb[RTA_SRC]) < plen)
3017 goto errout;
3018
3019 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3020 }
86872cb5 3021
c3968a85 3022 if (tb[RTA_PREFSRC])
67b61f6c 3023 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3024
86872cb5
TG
3025 if (tb[RTA_OIF])
3026 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3027
3028 if (tb[RTA_PRIORITY])
3029 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3030
3031 if (tb[RTA_METRICS]) {
3032 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3033 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3034 }
86872cb5
TG
3035
3036 if (tb[RTA_TABLE])
3037 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3038
51ebd318
ND
3039 if (tb[RTA_MULTIPATH]) {
3040 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3041 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3042
3043 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3044 cfg->fc_mp_len, extack);
9ed59592
DA
3045 if (err < 0)
3046 goto errout;
51ebd318
ND
3047 }
3048
c78ba6d6
LR
3049 if (tb[RTA_PREF]) {
3050 pref = nla_get_u8(tb[RTA_PREF]);
3051 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3052 pref != ICMPV6_ROUTER_PREF_HIGH)
3053 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3054 cfg->fc_flags |= RTF_PREF(pref);
3055 }
3056
19e42e45
RP
3057 if (tb[RTA_ENCAP])
3058 cfg->fc_encap = tb[RTA_ENCAP];
3059
9ed59592 3060 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3061 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3062
c255bd68 3063 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3064 if (err < 0)
3065 goto errout;
3066 }
3067
32bc201e
XL
3068 if (tb[RTA_EXPIRES]) {
3069 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3070
3071 if (addrconf_finite_timeout(timeout)) {
3072 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3073 cfg->fc_flags |= RTF_EXPIRES;
3074 }
3075 }
3076
86872cb5
TG
3077 err = 0;
3078errout:
3079 return err;
1da177e4
LT
3080}
3081
6b9ea5a6
RP
3082struct rt6_nh {
3083 struct rt6_info *rt6_info;
3084 struct fib6_config r_cfg;
3085 struct mx6_config mxc;
3086 struct list_head next;
3087};
3088
3089static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3090{
3091 struct rt6_nh *nh;
3092
3093 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3094 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3095 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3096 nh->r_cfg.fc_ifindex);
3097 }
3098}
3099
3100static int ip6_route_info_append(struct list_head *rt6_nh_list,
3101 struct rt6_info *rt, struct fib6_config *r_cfg)
3102{
3103 struct rt6_nh *nh;
6b9ea5a6
RP
3104 int err = -EEXIST;
3105
3106 list_for_each_entry(nh, rt6_nh_list, next) {
3107 /* check if rt6_info already exists */
f06b7549 3108 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3109 return err;
3110 }
3111
3112 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3113 if (!nh)
3114 return -ENOMEM;
3115 nh->rt6_info = rt;
3116 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3117 if (err) {
3118 kfree(nh);
3119 return err;
3120 }
3121 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3122 list_add_tail(&nh->next, rt6_nh_list);
3123
3124 return 0;
3125}
3126
3b1137fe
DA
3127static void ip6_route_mpath_notify(struct rt6_info *rt,
3128 struct rt6_info *rt_last,
3129 struct nl_info *info,
3130 __u16 nlflags)
3131{
3132 /* if this is an APPEND route, then rt points to the first route
3133 * inserted and rt_last points to last route inserted. Userspace
3134 * wants a consistent dump of the route which starts at the first
3135 * nexthop. Since sibling routes are always added at the end of
3136 * the list, find the first sibling of the last route appended
3137 */
3138 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3139 rt = list_first_entry(&rt_last->rt6i_siblings,
3140 struct rt6_info,
3141 rt6i_siblings);
3142 }
3143
3144 if (rt)
3145 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3146}
3147
333c4301
DA
3148static int ip6_route_multipath_add(struct fib6_config *cfg,
3149 struct netlink_ext_ack *extack)
51ebd318 3150{
3b1137fe
DA
3151 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3152 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3153 struct fib6_config r_cfg;
3154 struct rtnexthop *rtnh;
6b9ea5a6
RP
3155 struct rt6_info *rt;
3156 struct rt6_nh *err_nh;
3157 struct rt6_nh *nh, *nh_safe;
3b1137fe 3158 __u16 nlflags;
51ebd318
ND
3159 int remaining;
3160 int attrlen;
6b9ea5a6
RP
3161 int err = 1;
3162 int nhn = 0;
3163 int replace = (cfg->fc_nlinfo.nlh &&
3164 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3165 LIST_HEAD(rt6_nh_list);
51ebd318 3166
3b1137fe
DA
3167 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3168 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3169 nlflags |= NLM_F_APPEND;
3170
35f1b4e9 3171 remaining = cfg->fc_mp_len;
51ebd318 3172 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3173
6b9ea5a6
RP
3174 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3175 * rt6_info structs per nexthop
3176 */
51ebd318
ND
3177 while (rtnh_ok(rtnh, remaining)) {
3178 memcpy(&r_cfg, cfg, sizeof(*cfg));
3179 if (rtnh->rtnh_ifindex)
3180 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3181
3182 attrlen = rtnh_attrlen(rtnh);
3183 if (attrlen > 0) {
3184 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3185
3186 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3187 if (nla) {
67b61f6c 3188 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3189 r_cfg.fc_flags |= RTF_GATEWAY;
3190 }
19e42e45
RP
3191 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3192 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3193 if (nla)
3194 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3195 }
6b9ea5a6 3196
333c4301 3197 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3198 if (IS_ERR(rt)) {
3199 err = PTR_ERR(rt);
3200 rt = NULL;
6b9ea5a6 3201 goto cleanup;
8c5b83f0 3202 }
6b9ea5a6
RP
3203
3204 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3205 if (err) {
587fea74 3206 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3207 goto cleanup;
3208 }
3209
3210 rtnh = rtnh_next(rtnh, &remaining);
3211 }
3212
3b1137fe
DA
3213 /* for add and replace send one notification with all nexthops.
3214 * Skip the notification in fib6_add_rt2node and send one with
3215 * the full route when done
3216 */
3217 info->skip_notify = 1;
3218
6b9ea5a6
RP
3219 err_nh = NULL;
3220 list_for_each_entry(nh, &rt6_nh_list, next) {
333c4301 3221 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
57d6f87a
DA
3222
3223 if (!err) {
3224 /* save reference to last route successfully inserted */
3225 rt_last = nh->rt6_info;
3226
3227 /* save reference to first route for notification */
3228 if (!rt_notif)
3229 rt_notif = nh->rt6_info;
3230 }
3b1137fe 3231
6b9ea5a6
RP
3232 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3233 nh->rt6_info = NULL;
3234 if (err) {
3235 if (replace && nhn)
3236 ip6_print_replace_route_err(&rt6_nh_list);
3237 err_nh = nh;
3238 goto add_errout;
51ebd318 3239 }
6b9ea5a6 3240
1a72418b 3241 /* Because each route is added like a single route we remove
27596472
MK
3242 * these flags after the first nexthop: if there is a collision,
3243 * we have already failed to add the first nexthop:
3244 * fib6_add_rt2node() has rejected it; when replacing, old
3245 * nexthops have been replaced by first new, the rest should
3246 * be added to it.
1a72418b 3247 */
27596472
MK
3248 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3249 NLM_F_REPLACE);
6b9ea5a6
RP
3250 nhn++;
3251 }
3252
3b1137fe
DA
3253 /* success ... tell user about new route */
3254 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3255 goto cleanup;
3256
3257add_errout:
3b1137fe
DA
3258 /* send notification for routes that were added so that
3259 * the delete notifications sent by ip6_route_del are
3260 * coherent
3261 */
3262 if (rt_notif)
3263 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3264
6b9ea5a6
RP
3265 /* Delete routes that were already added */
3266 list_for_each_entry(nh, &rt6_nh_list, next) {
3267 if (err_nh == nh)
3268 break;
333c4301 3269 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3270 }
3271
3272cleanup:
3273 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3274 if (nh->rt6_info)
3275 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3276 kfree(nh->mxc.mx);
6b9ea5a6
RP
3277 list_del(&nh->next);
3278 kfree(nh);
3279 }
3280
3281 return err;
3282}
3283
333c4301
DA
3284static int ip6_route_multipath_del(struct fib6_config *cfg,
3285 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3286{
3287 struct fib6_config r_cfg;
3288 struct rtnexthop *rtnh;
3289 int remaining;
3290 int attrlen;
3291 int err = 1, last_err = 0;
3292
3293 remaining = cfg->fc_mp_len;
3294 rtnh = (struct rtnexthop *)cfg->fc_mp;
3295
3296 /* Parse a Multipath Entry */
3297 while (rtnh_ok(rtnh, remaining)) {
3298 memcpy(&r_cfg, cfg, sizeof(*cfg));
3299 if (rtnh->rtnh_ifindex)
3300 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3301
3302 attrlen = rtnh_attrlen(rtnh);
3303 if (attrlen > 0) {
3304 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3305
3306 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3307 if (nla) {
3308 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3309 r_cfg.fc_flags |= RTF_GATEWAY;
3310 }
3311 }
333c4301 3312 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3313 if (err)
3314 last_err = err;
3315
51ebd318
ND
3316 rtnh = rtnh_next(rtnh, &remaining);
3317 }
3318
3319 return last_err;
3320}
3321
c21ef3e3
DA
3322static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3323 struct netlink_ext_ack *extack)
1da177e4 3324{
86872cb5
TG
3325 struct fib6_config cfg;
3326 int err;
1da177e4 3327
333c4301 3328 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3329 if (err < 0)
3330 return err;
3331
51ebd318 3332 if (cfg.fc_mp)
333c4301 3333 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3334 else {
3335 cfg.fc_delete_all_nh = 1;
333c4301 3336 return ip6_route_del(&cfg, extack);
0ae81335 3337 }
1da177e4
LT
3338}
3339
c21ef3e3
DA
3340static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3341 struct netlink_ext_ack *extack)
1da177e4 3342{
86872cb5
TG
3343 struct fib6_config cfg;
3344 int err;
1da177e4 3345
333c4301 3346 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3347 if (err < 0)
3348 return err;
3349
51ebd318 3350 if (cfg.fc_mp)
333c4301 3351 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3352 else
333c4301 3353 return ip6_route_add(&cfg, extack);
1da177e4
LT
3354}
3355
beb1afac 3356static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3357{
beb1afac
DA
3358 int nexthop_len = 0;
3359
3360 if (rt->rt6i_nsiblings) {
3361 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3362 + NLA_ALIGN(sizeof(struct rtnexthop))
3363 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3364 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3365
3366 nexthop_len *= rt->rt6i_nsiblings;
3367 }
3368
339bf98f
TG
3369 return NLMSG_ALIGN(sizeof(struct rtmsg))
3370 + nla_total_size(16) /* RTA_SRC */
3371 + nla_total_size(16) /* RTA_DST */
3372 + nla_total_size(16) /* RTA_GATEWAY */
3373 + nla_total_size(16) /* RTA_PREFSRC */
3374 + nla_total_size(4) /* RTA_TABLE */
3375 + nla_total_size(4) /* RTA_IIF */
3376 + nla_total_size(4) /* RTA_OIF */
3377 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3378 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3379 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3380 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3381 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3382 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3383 + nexthop_len;
3384}
3385
3386static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3387 unsigned int *flags, bool skip_oif)
beb1afac
DA
3388{
3389 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3390 *flags |= RTNH_F_LINKDOWN;
3391 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3392 *flags |= RTNH_F_DEAD;
3393 }
3394
3395 if (rt->rt6i_flags & RTF_GATEWAY) {
3396 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3397 goto nla_put_failure;
3398 }
3399
fe400799 3400 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3401 *flags |= RTNH_F_OFFLOAD;
3402
5be083ce
DA
3403 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3404 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3405 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3406 goto nla_put_failure;
3407
3408 if (rt->dst.lwtstate &&
3409 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3410 goto nla_put_failure;
3411
3412 return 0;
3413
3414nla_put_failure:
3415 return -EMSGSIZE;
3416}
3417
5be083ce 3418/* add multipath next hop */
beb1afac
DA
3419static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3420{
3421 struct rtnexthop *rtnh;
3422 unsigned int flags = 0;
3423
3424 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3425 if (!rtnh)
3426 goto nla_put_failure;
3427
3428 rtnh->rtnh_hops = 0;
3429 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3430
5be083ce 3431 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3432 goto nla_put_failure;
3433
3434 rtnh->rtnh_flags = flags;
3435
3436 /* length of rtnetlink header + attributes */
3437 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3438
3439 return 0;
3440
3441nla_put_failure:
3442 return -EMSGSIZE;
339bf98f
TG
3443}
3444
191cd582
BH
3445static int rt6_fill_node(struct net *net,
3446 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3447 struct in6_addr *dst, struct in6_addr *src,
15e47304 3448 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3449 unsigned int flags)
1da177e4 3450{
4b32b5ad 3451 u32 metrics[RTAX_MAX];
1da177e4 3452 struct rtmsg *rtm;
2d7202bf 3453 struct nlmsghdr *nlh;
e3703b3d 3454 long expires;
9e762a4a 3455 u32 table;
1da177e4 3456
15e47304 3457 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3458 if (!nlh)
26932566 3459 return -EMSGSIZE;
2d7202bf
TG
3460
3461 rtm = nlmsg_data(nlh);
1da177e4
LT
3462 rtm->rtm_family = AF_INET6;
3463 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3464 rtm->rtm_src_len = rt->rt6i_src.plen;
3465 rtm->rtm_tos = 0;
c71099ac 3466 if (rt->rt6i_table)
9e762a4a 3467 table = rt->rt6i_table->tb6_id;
c71099ac 3468 else
9e762a4a 3469 table = RT6_TABLE_UNSPEC;
b3dc5b18 3470 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
3471 if (nla_put_u32(skb, RTA_TABLE, table))
3472 goto nla_put_failure;
ef2c7d7b
ND
3473 if (rt->rt6i_flags & RTF_REJECT) {
3474 switch (rt->dst.error) {
3475 case -EINVAL:
3476 rtm->rtm_type = RTN_BLACKHOLE;
3477 break;
3478 case -EACCES:
3479 rtm->rtm_type = RTN_PROHIBIT;
3480 break;
b4949ab2
ND
3481 case -EAGAIN:
3482 rtm->rtm_type = RTN_THROW;
3483 break;
ef2c7d7b
ND
3484 default:
3485 rtm->rtm_type = RTN_UNREACHABLE;
3486 break;
3487 }
3488 }
38308473 3489 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3490 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
3491 else if (rt->rt6i_flags & RTF_ANYCAST)
3492 rtm->rtm_type = RTN_ANYCAST;
d1918542 3493 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3494 rtm->rtm_type = RTN_LOCAL;
3495 else
3496 rtm->rtm_type = RTN_UNICAST;
3497 rtm->rtm_flags = 0;
3498 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3499 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 3500
38308473 3501 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3502 rtm->rtm_flags |= RTM_F_CLONED;
3503
3504 if (dst) {
930345ea 3505 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3506 goto nla_put_failure;
1ab1457c 3507 rtm->rtm_dst_len = 128;
1da177e4 3508 } else if (rtm->rtm_dst_len)
930345ea 3509 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3510 goto nla_put_failure;
1da177e4
LT
3511#ifdef CONFIG_IPV6_SUBTREES
3512 if (src) {
930345ea 3513 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3514 goto nla_put_failure;
1ab1457c 3515 rtm->rtm_src_len = 128;
c78679e8 3516 } else if (rtm->rtm_src_len &&
930345ea 3517 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3518 goto nla_put_failure;
1da177e4 3519#endif
7bc570c8
YH
3520 if (iif) {
3521#ifdef CONFIG_IPV6_MROUTE
3522 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3523 int err = ip6mr_get_route(net, skb, rtm, portid);
3524
3525 if (err == 0)
3526 return 0;
3527 if (err < 0)
3528 goto nla_put_failure;
7bc570c8
YH
3529 } else
3530#endif
c78679e8
DM
3531 if (nla_put_u32(skb, RTA_IIF, iif))
3532 goto nla_put_failure;
7bc570c8 3533 } else if (dst) {
1da177e4 3534 struct in6_addr saddr_buf;
c78679e8 3535 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3536 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3537 goto nla_put_failure;
1da177e4 3538 }
2d7202bf 3539
c3968a85
DW
3540 if (rt->rt6i_prefsrc.plen) {
3541 struct in6_addr saddr_buf;
4e3fd7a0 3542 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3543 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3544 goto nla_put_failure;
c3968a85
DW
3545 }
3546
4b32b5ad
MKL
3547 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3548 if (rt->rt6i_pmtu)
3549 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3550 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3551 goto nla_put_failure;
3552
c78679e8
DM
3553 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3554 goto nla_put_failure;
8253947e 3555
beb1afac
DA
3556 /* For multipath routes, walk the siblings list and add
3557 * each as a nexthop within RTA_MULTIPATH.
3558 */
3559 if (rt->rt6i_nsiblings) {
3560 struct rt6_info *sibling, *next_sibling;
3561 struct nlattr *mp;
3562
3563 mp = nla_nest_start(skb, RTA_MULTIPATH);
3564 if (!mp)
3565 goto nla_put_failure;
3566
3567 if (rt6_add_nexthop(skb, rt) < 0)
3568 goto nla_put_failure;
3569
3570 list_for_each_entry_safe(sibling, next_sibling,
3571 &rt->rt6i_siblings, rt6i_siblings) {
3572 if (rt6_add_nexthop(skb, sibling) < 0)
3573 goto nla_put_failure;
3574 }
3575
3576 nla_nest_end(skb, mp);
3577 } else {
5be083ce 3578 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
3579 goto nla_put_failure;
3580 }
3581
8253947e 3582 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3583
87a50699 3584 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3585 goto nla_put_failure;
2d7202bf 3586
c78ba6d6
LR
3587 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3588 goto nla_put_failure;
3589
19e42e45 3590
053c095a
JB
3591 nlmsg_end(skb, nlh);
3592 return 0;
2d7202bf
TG
3593
3594nla_put_failure:
26932566
PM
3595 nlmsg_cancel(skb, nlh);
3596 return -EMSGSIZE;
1da177e4
LT
3597}
3598
1b43af54 3599int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3600{
3601 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
3602 struct net *net = arg->net;
3603
3604 if (rt == net->ipv6.ip6_null_entry)
3605 return 0;
1da177e4 3606
2d7202bf
TG
3607 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3608 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
3609
3610 /* user wants prefix routes only */
3611 if (rtm->rtm_flags & RTM_F_PREFIX &&
3612 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3613 /* success since this is not a prefix route */
3614 return 1;
3615 }
3616 }
1da177e4 3617
1f17e2f2 3618 return rt6_fill_node(net,
191cd582 3619 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3620 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 3621 NLM_F_MULTI);
1da177e4
LT
3622}
3623
c21ef3e3
DA
3624static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3625 struct netlink_ext_ack *extack)
1da177e4 3626{
3b1e0a65 3627 struct net *net = sock_net(in_skb->sk);
ab364a6f 3628 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
3629 int err, iif = 0, oif = 0;
3630 struct dst_entry *dst;
ab364a6f 3631 struct rt6_info *rt;
1da177e4 3632 struct sk_buff *skb;
ab364a6f 3633 struct rtmsg *rtm;
4c9483b2 3634 struct flowi6 fl6;
18c3a61c 3635 bool fibmatch;
1da177e4 3636
fceb6435 3637 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 3638 extack);
ab364a6f
TG
3639 if (err < 0)
3640 goto errout;
1da177e4 3641
ab364a6f 3642 err = -EINVAL;
4c9483b2 3643 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3644 rtm = nlmsg_data(nlh);
3645 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 3646 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 3647
ab364a6f
TG
3648 if (tb[RTA_SRC]) {
3649 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3650 goto errout;
3651
4e3fd7a0 3652 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3653 }
3654
3655 if (tb[RTA_DST]) {
3656 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3657 goto errout;
3658
4e3fd7a0 3659 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3660 }
3661
3662 if (tb[RTA_IIF])
3663 iif = nla_get_u32(tb[RTA_IIF]);
3664
3665 if (tb[RTA_OIF])
72331bc0 3666 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3667
2e47b291
LC
3668 if (tb[RTA_MARK])
3669 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3670
622ec2c9
LC
3671 if (tb[RTA_UID])
3672 fl6.flowi6_uid = make_kuid(current_user_ns(),
3673 nla_get_u32(tb[RTA_UID]));
3674 else
3675 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3676
1da177e4
LT
3677 if (iif) {
3678 struct net_device *dev;
72331bc0
SL
3679 int flags = 0;
3680
121622db
FW
3681 rcu_read_lock();
3682
3683 dev = dev_get_by_index_rcu(net, iif);
1da177e4 3684 if (!dev) {
121622db 3685 rcu_read_unlock();
1da177e4 3686 err = -ENODEV;
ab364a6f 3687 goto errout;
1da177e4 3688 }
72331bc0
SL
3689
3690 fl6.flowi6_iif = iif;
3691
3692 if (!ipv6_addr_any(&fl6.saddr))
3693 flags |= RT6_LOOKUP_F_HAS_SADDR;
3694
33392196 3695 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
121622db
FW
3696
3697 rcu_read_unlock();
72331bc0
SL
3698 } else {
3699 fl6.flowi6_oif = oif;
3700
33392196 3701 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
3702 }
3703
18c3a61c
RP
3704
3705 rt = container_of(dst, struct rt6_info, dst);
3706 if (rt->dst.error) {
3707 err = rt->dst.error;
3708 ip6_rt_put(rt);
3709 goto errout;
1da177e4
LT
3710 }
3711
9d6acb3b
WC
3712 if (rt == net->ipv6.ip6_null_entry) {
3713 err = rt->dst.error;
3714 ip6_rt_put(rt);
3715 goto errout;
3716 }
3717
33392196
IS
3718 if (fibmatch && rt->dst.from) {
3719 struct rt6_info *ort = container_of(rt->dst.from,
3720 struct rt6_info, dst);
3721
3722 dst_hold(&ort->dst);
3723 ip6_rt_put(rt);
3724 rt = ort;
3725 }
3726
ab364a6f 3727 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3728 if (!skb) {
94e187c0 3729 ip6_rt_put(rt);
ab364a6f
TG
3730 err = -ENOBUFS;
3731 goto errout;
3732 }
1da177e4 3733
d8d1f30b 3734 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
3735 if (fibmatch)
3736 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3737 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3738 nlh->nlmsg_seq, 0);
3739 else
3740 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3741 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3742 nlh->nlmsg_seq, 0);
1da177e4 3743 if (err < 0) {
ab364a6f
TG
3744 kfree_skb(skb);
3745 goto errout;
1da177e4
LT
3746 }
3747
15e47304 3748 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3749errout:
1da177e4 3750 return err;
1da177e4
LT
3751}
3752
37a1d361
RP
3753void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3754 unsigned int nlm_flags)
1da177e4
LT
3755{
3756 struct sk_buff *skb;
5578689a 3757 struct net *net = info->nl_net;
528c4ceb
DL
3758 u32 seq;
3759 int err;
3760
3761 err = -ENOBUFS;
38308473 3762 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3763
19e42e45 3764 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3765 if (!skb)
21713ebc
TG
3766 goto errout;
3767
191cd582 3768 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 3769 event, info->portid, seq, nlm_flags);
26932566
PM
3770 if (err < 0) {
3771 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3772 WARN_ON(err == -EMSGSIZE);
3773 kfree_skb(skb);
3774 goto errout;
3775 }
15e47304 3776 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3777 info->nlh, gfp_any());
3778 return;
21713ebc
TG
3779errout:
3780 if (err < 0)
5578689a 3781 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3782}
3783
8ed67789 3784static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3785 unsigned long event, void *ptr)
8ed67789 3786{
351638e7 3787 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3788 struct net *net = dev_net(dev);
8ed67789 3789
242d3a49
WC
3790 if (!(dev->flags & IFF_LOOPBACK))
3791 return NOTIFY_OK;
3792
3793 if (event == NETDEV_REGISTER) {
d8d1f30b 3794 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3795 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3796#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3797 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3798 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3799 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 3800 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 3801#endif
76da0704
WC
3802 } else if (event == NETDEV_UNREGISTER &&
3803 dev->reg_state != NETREG_UNREGISTERED) {
3804 /* NETDEV_UNREGISTER could be fired for multiple times by
3805 * netdev_wait_allrefs(). Make sure we only call this once.
3806 */
12d94a80 3807 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 3808#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
3809 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
3810 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
3811#endif
3812 }
3813
3814 return NOTIFY_OK;
3815}
3816
1da177e4
LT
3817/*
3818 * /proc
3819 */
3820
3821#ifdef CONFIG_PROC_FS
3822
33120b30
AD
3823static const struct file_operations ipv6_route_proc_fops = {
3824 .owner = THIS_MODULE,
3825 .open = ipv6_route_open,
3826 .read = seq_read,
3827 .llseek = seq_lseek,
8d2ca1d7 3828 .release = seq_release_net,
33120b30
AD
3829};
3830
1da177e4
LT
3831static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3832{
69ddb805 3833 struct net *net = (struct net *)seq->private;
1da177e4 3834 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3835 net->ipv6.rt6_stats->fib_nodes,
3836 net->ipv6.rt6_stats->fib_route_nodes,
3837 net->ipv6.rt6_stats->fib_rt_alloc,
3838 net->ipv6.rt6_stats->fib_rt_entries,
3839 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3840 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3841 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3842
3843 return 0;
3844}
3845
3846static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3847{
de05c557 3848 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3849}
3850
9a32144e 3851static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3852 .owner = THIS_MODULE,
3853 .open = rt6_stats_seq_open,
3854 .read = seq_read,
3855 .llseek = seq_lseek,
b6fcbdb4 3856 .release = single_release_net,
1da177e4
LT
3857};
3858#endif /* CONFIG_PROC_FS */
3859
3860#ifdef CONFIG_SYSCTL
3861
1da177e4 3862static
fe2c6338 3863int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3864 void __user *buffer, size_t *lenp, loff_t *ppos)
3865{
c486da34
LAG
3866 struct net *net;
3867 int delay;
3868 if (!write)
1da177e4 3869 return -EINVAL;
c486da34
LAG
3870
3871 net = (struct net *)ctl->extra1;
3872 delay = net->ipv6.sysctl.flush_delay;
3873 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3874 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3875 return 0;
1da177e4
LT
3876}
3877
fe2c6338 3878struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3879 {
1da177e4 3880 .procname = "flush",
4990509f 3881 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3882 .maxlen = sizeof(int),
89c8b3a1 3883 .mode = 0200,
6d9f239a 3884 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3885 },
3886 {
1da177e4 3887 .procname = "gc_thresh",
9a7ec3a9 3888 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3889 .maxlen = sizeof(int),
3890 .mode = 0644,
6d9f239a 3891 .proc_handler = proc_dointvec,
1da177e4
LT
3892 },
3893 {
1da177e4 3894 .procname = "max_size",
4990509f 3895 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3896 .maxlen = sizeof(int),
3897 .mode = 0644,
6d9f239a 3898 .proc_handler = proc_dointvec,
1da177e4
LT
3899 },
3900 {
1da177e4 3901 .procname = "gc_min_interval",
4990509f 3902 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3903 .maxlen = sizeof(int),
3904 .mode = 0644,
6d9f239a 3905 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3906 },
3907 {
1da177e4 3908 .procname = "gc_timeout",
4990509f 3909 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3910 .maxlen = sizeof(int),
3911 .mode = 0644,
6d9f239a 3912 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3913 },
3914 {
1da177e4 3915 .procname = "gc_interval",
4990509f 3916 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3917 .maxlen = sizeof(int),
3918 .mode = 0644,
6d9f239a 3919 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3920 },
3921 {
1da177e4 3922 .procname = "gc_elasticity",
4990509f 3923 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3924 .maxlen = sizeof(int),
3925 .mode = 0644,
f3d3f616 3926 .proc_handler = proc_dointvec,
1da177e4
LT
3927 },
3928 {
1da177e4 3929 .procname = "mtu_expires",
4990509f 3930 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3931 .maxlen = sizeof(int),
3932 .mode = 0644,
6d9f239a 3933 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3934 },
3935 {
1da177e4 3936 .procname = "min_adv_mss",
4990509f 3937 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3938 .maxlen = sizeof(int),
3939 .mode = 0644,
f3d3f616 3940 .proc_handler = proc_dointvec,
1da177e4
LT
3941 },
3942 {
1da177e4 3943 .procname = "gc_min_interval_ms",
4990509f 3944 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3945 .maxlen = sizeof(int),
3946 .mode = 0644,
6d9f239a 3947 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3948 },
f8572d8f 3949 { }
1da177e4
LT
3950};
3951
2c8c1e72 3952struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3953{
3954 struct ctl_table *table;
3955
3956 table = kmemdup(ipv6_route_table_template,
3957 sizeof(ipv6_route_table_template),
3958 GFP_KERNEL);
5ee09105
YH
3959
3960 if (table) {
3961 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3962 table[0].extra1 = net;
86393e52 3963 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3964 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3965 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3966 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3967 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3968 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3969 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3970 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3971 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3972
3973 /* Don't export sysctls to unprivileged users */
3974 if (net->user_ns != &init_user_ns)
3975 table[0].procname = NULL;
5ee09105
YH
3976 }
3977
760f2d01
DL
3978 return table;
3979}
1da177e4
LT
3980#endif
3981
2c8c1e72 3982static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3983{
633d424b 3984 int ret = -ENOMEM;
8ed67789 3985
86393e52
AD
3986 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3987 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3988
fc66f95c
ED
3989 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3990 goto out_ip6_dst_ops;
3991
8ed67789
DL
3992 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3993 sizeof(*net->ipv6.ip6_null_entry),
3994 GFP_KERNEL);
3995 if (!net->ipv6.ip6_null_entry)
fc66f95c 3996 goto out_ip6_dst_entries;
d8d1f30b 3997 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3998 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3999 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4000 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4001 ip6_template_metrics, true);
8ed67789
DL
4002
4003#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4004 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4005 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4006 sizeof(*net->ipv6.ip6_prohibit_entry),
4007 GFP_KERNEL);
68fffc67
PZ
4008 if (!net->ipv6.ip6_prohibit_entry)
4009 goto out_ip6_null_entry;
d8d1f30b 4010 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4011 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4012 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4013 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4014 ip6_template_metrics, true);
8ed67789
DL
4015
4016 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4017 sizeof(*net->ipv6.ip6_blk_hole_entry),
4018 GFP_KERNEL);
68fffc67
PZ
4019 if (!net->ipv6.ip6_blk_hole_entry)
4020 goto out_ip6_prohibit_entry;
d8d1f30b 4021 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4022 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4023 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4024 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4025 ip6_template_metrics, true);
8ed67789
DL
4026#endif
4027
b339a47c
PZ
4028 net->ipv6.sysctl.flush_delay = 0;
4029 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4030 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4031 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4032 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4033 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4034 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4035 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4036
6891a346
BT
4037 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4038
8ed67789
DL
4039 ret = 0;
4040out:
4041 return ret;
f2fc6a54 4042
68fffc67
PZ
4043#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4044out_ip6_prohibit_entry:
4045 kfree(net->ipv6.ip6_prohibit_entry);
4046out_ip6_null_entry:
4047 kfree(net->ipv6.ip6_null_entry);
4048#endif
fc66f95c
ED
4049out_ip6_dst_entries:
4050 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4051out_ip6_dst_ops:
f2fc6a54 4052 goto out;
cdb18761
DL
4053}
4054
2c8c1e72 4055static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4056{
8ed67789
DL
4057 kfree(net->ipv6.ip6_null_entry);
4058#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4059 kfree(net->ipv6.ip6_prohibit_entry);
4060 kfree(net->ipv6.ip6_blk_hole_entry);
4061#endif
41bb78b4 4062 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4063}
4064
d189634e
TG
4065static int __net_init ip6_route_net_init_late(struct net *net)
4066{
4067#ifdef CONFIG_PROC_FS
d4beaa66
G
4068 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4069 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4070#endif
4071 return 0;
4072}
4073
4074static void __net_exit ip6_route_net_exit_late(struct net *net)
4075{
4076#ifdef CONFIG_PROC_FS
ece31ffd
G
4077 remove_proc_entry("ipv6_route", net->proc_net);
4078 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4079#endif
4080}
4081
cdb18761
DL
4082static struct pernet_operations ip6_route_net_ops = {
4083 .init = ip6_route_net_init,
4084 .exit = ip6_route_net_exit,
4085};
4086
c3426b47
DM
4087static int __net_init ipv6_inetpeer_init(struct net *net)
4088{
4089 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4090
4091 if (!bp)
4092 return -ENOMEM;
4093 inet_peer_base_init(bp);
4094 net->ipv6.peers = bp;
4095 return 0;
4096}
4097
4098static void __net_exit ipv6_inetpeer_exit(struct net *net)
4099{
4100 struct inet_peer_base *bp = net->ipv6.peers;
4101
4102 net->ipv6.peers = NULL;
56a6b248 4103 inetpeer_invalidate_tree(bp);
c3426b47
DM
4104 kfree(bp);
4105}
4106
2b823f72 4107static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4108 .init = ipv6_inetpeer_init,
4109 .exit = ipv6_inetpeer_exit,
4110};
4111
d189634e
TG
4112static struct pernet_operations ip6_route_net_late_ops = {
4113 .init = ip6_route_net_init_late,
4114 .exit = ip6_route_net_exit_late,
4115};
4116
8ed67789
DL
4117static struct notifier_block ip6_route_dev_notifier = {
4118 .notifier_call = ip6_route_dev_notify,
242d3a49 4119 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4120};
4121
2f460933
WC
4122void __init ip6_route_init_special_entries(void)
4123{
4124 /* Registering of the loopback is done before this portion of code,
4125 * the loopback reference in rt6_info will not be taken, do it
4126 * manually for init_net */
4127 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4128 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4129 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4130 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4131 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4132 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4133 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4134 #endif
4135}
4136
433d49c3 4137int __init ip6_route_init(void)
1da177e4 4138{
433d49c3 4139 int ret;
8d0b94af 4140 int cpu;
433d49c3 4141
9a7ec3a9
DL
4142 ret = -ENOMEM;
4143 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4144 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4145 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4146 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4147 goto out;
14e50e57 4148
fc66f95c 4149 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4150 if (ret)
bdb3289f 4151 goto out_kmem_cache;
bdb3289f 4152
c3426b47
DM
4153 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4154 if (ret)
e8803b6c 4155 goto out_dst_entries;
2a0c451a 4156
7e52b33b
DM
4157 ret = register_pernet_subsys(&ip6_route_net_ops);
4158 if (ret)
4159 goto out_register_inetpeer;
c3426b47 4160
5dc121e9
AE
4161 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4162
e8803b6c 4163 ret = fib6_init();
433d49c3 4164 if (ret)
8ed67789 4165 goto out_register_subsys;
433d49c3 4166
433d49c3
DL
4167 ret = xfrm6_init();
4168 if (ret)
e8803b6c 4169 goto out_fib6_init;
c35b7e72 4170
433d49c3
DL
4171 ret = fib6_rules_init();
4172 if (ret)
4173 goto xfrm6_init;
7e5449c2 4174
d189634e
TG
4175 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4176 if (ret)
4177 goto fib6_rules_init;
4178
433d49c3 4179 ret = -ENOBUFS;
b97bac64
FW
4180 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4181 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4182 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4183 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4184 goto out_register_late_subsys;
c127ea2c 4185
8ed67789 4186 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4187 if (ret)
d189634e 4188 goto out_register_late_subsys;
8ed67789 4189
8d0b94af
MKL
4190 for_each_possible_cpu(cpu) {
4191 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4192
4193 INIT_LIST_HEAD(&ul->head);
4194 spin_lock_init(&ul->lock);
4195 }
4196
433d49c3
DL
4197out:
4198 return ret;
4199
d189634e
TG
4200out_register_late_subsys:
4201 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4202fib6_rules_init:
433d49c3
DL
4203 fib6_rules_cleanup();
4204xfrm6_init:
433d49c3 4205 xfrm6_fini();
2a0c451a
TG
4206out_fib6_init:
4207 fib6_gc_cleanup();
8ed67789
DL
4208out_register_subsys:
4209 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4210out_register_inetpeer:
4211 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4212out_dst_entries:
4213 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4214out_kmem_cache:
f2fc6a54 4215 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4216 goto out;
1da177e4
LT
4217}
4218
4219void ip6_route_cleanup(void)
4220{
8ed67789 4221 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4222 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4223 fib6_rules_cleanup();
1da177e4 4224 xfrm6_fini();
1da177e4 4225 fib6_gc_cleanup();
c3426b47 4226 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4227 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4228 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4229 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4230}