inetpeer: add parameter net for inet_getpeer_v4,v6
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 if (!rt->rt6i_peer)
103 rt6_bind_peer(rt, 1);
104
105 peer = rt->rt6i_peer;
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
127 {
128 struct in6_addr *p = &rt->rt6i_gateway;
129
130 if (!ipv6_addr_any(p))
131 return (const void *) p;
132 return daddr;
133 }
134
135 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136 {
137 struct rt6_info *rt = (struct rt6_info *) dst;
138 struct neighbour *n;
139
140 daddr = choose_neigh_daddr(rt, daddr);
141 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
142 if (n)
143 return n;
144 return neigh_create(&nd_tbl, daddr, dst->dev);
145 }
146
147 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
148 {
149 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
150 if (!n) {
151 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
152 if (IS_ERR(n))
153 return PTR_ERR(n);
154 }
155 dst_set_neighbour(&rt->dst, n);
156
157 return 0;
158 }
159
160 static struct dst_ops ip6_dst_ops_template = {
161 .family = AF_INET6,
162 .protocol = cpu_to_be16(ETH_P_IPV6),
163 .gc = ip6_dst_gc,
164 .gc_thresh = 1024,
165 .check = ip6_dst_check,
166 .default_advmss = ip6_default_advmss,
167 .mtu = ip6_mtu,
168 .cow_metrics = ipv6_cow_metrics,
169 .destroy = ip6_dst_destroy,
170 .ifdown = ip6_dst_ifdown,
171 .negative_advice = ip6_negative_advice,
172 .link_failure = ip6_link_failure,
173 .update_pmtu = ip6_rt_update_pmtu,
174 .local_out = __ip6_local_out,
175 .neigh_lookup = ip6_neigh_lookup,
176 };
177
178 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
179 {
180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
181
182 return mtu ? : dst->dev->mtu;
183 }
184
185 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
186 {
187 }
188
189 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 unsigned long old)
191 {
192 return NULL;
193 }
194
195 static struct dst_ops ip6_dst_blackhole_ops = {
196 .family = AF_INET6,
197 .protocol = cpu_to_be16(ETH_P_IPV6),
198 .destroy = ip6_dst_destroy,
199 .check = ip6_dst_check,
200 .mtu = ip6_blackhole_mtu,
201 .default_advmss = ip6_default_advmss,
202 .update_pmtu = ip6_rt_blackhole_update_pmtu,
203 .cow_metrics = ip6_rt_blackhole_cow_metrics,
204 .neigh_lookup = ip6_neigh_lookup,
205 };
206
207 static const u32 ip6_template_metrics[RTAX_MAX] = {
208 [RTAX_HOPLIMIT - 1] = 255,
209 };
210
211 static struct rt6_info ip6_null_entry_template = {
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -ENETUNREACH,
217 .input = ip6_pkt_discard,
218 .output = ip6_pkt_discard_out,
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
221 .rt6i_protocol = RTPROT_KERNEL,
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224 };
225
226 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
227
228 static int ip6_pkt_prohibit(struct sk_buff *skb);
229 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
230
231 static struct rt6_info ip6_prohibit_entry_template = {
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EACCES,
237 .input = ip6_pkt_prohibit,
238 .output = ip6_pkt_prohibit_out,
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
241 .rt6i_protocol = RTPROT_KERNEL,
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244 };
245
246 static struct rt6_info ip6_blk_hole_entry_template = {
247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
250 .obsolete = -1,
251 .error = -EINVAL,
252 .input = dst_discard,
253 .output = dst_discard,
254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256 .rt6i_protocol = RTPROT_KERNEL,
257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259 };
260
261 #endif
262
263 /* allocate dst with ip6_dst_ops */
264 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
265 struct net_device *dev,
266 int flags)
267 {
268 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
269
270 if (rt)
271 memset(&rt->rt6i_table, 0,
272 sizeof(*rt) - sizeof(struct dst_entry));
273
274 return rt;
275 }
276
277 static void ip6_dst_destroy(struct dst_entry *dst)
278 {
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281 struct inet_peer *peer = rt->rt6i_peer;
282
283 if (!(rt->dst.flags & DST_HOST))
284 dst_destroy_metrics_generic(dst);
285
286 if (idev) {
287 rt->rt6i_idev = NULL;
288 in6_dev_put(idev);
289 }
290
291 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
292 dst_release(dst->from);
293
294 if (peer) {
295 rt->rt6i_peer = NULL;
296 inet_putpeer(peer);
297 }
298 }
299
300 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
301
302 static u32 rt6_peer_genid(void)
303 {
304 return atomic_read(&__rt6_peer_genid);
305 }
306
307 void rt6_bind_peer(struct rt6_info *rt, int create)
308 {
309 struct net *net = dev_net(rt->dst.dev);
310 struct inet_peer *peer;
311
312 peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
313 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
314 inet_putpeer(peer);
315 else
316 rt->rt6i_peer_genid = rt6_peer_genid();
317 }
318
319 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
320 int how)
321 {
322 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
324 struct net_device *loopback_dev =
325 dev_net(dev)->loopback_dev;
326
327 if (dev != loopback_dev && idev && idev->dev == dev) {
328 struct inet6_dev *loopback_idev =
329 in6_dev_get(loopback_dev);
330 if (loopback_idev) {
331 rt->rt6i_idev = loopback_idev;
332 in6_dev_put(idev);
333 }
334 }
335 }
336
337 static bool rt6_check_expired(const struct rt6_info *rt)
338 {
339 struct rt6_info *ort = NULL;
340
341 if (rt->rt6i_flags & RTF_EXPIRES) {
342 if (time_after(jiffies, rt->dst.expires))
343 return true;
344 } else if (rt->dst.from) {
345 ort = (struct rt6_info *) rt->dst.from;
346 return (ort->rt6i_flags & RTF_EXPIRES) &&
347 time_after(jiffies, ort->dst.expires);
348 }
349 return false;
350 }
351
352 static bool rt6_need_strict(const struct in6_addr *daddr)
353 {
354 return ipv6_addr_type(daddr) &
355 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
356 }
357
358 /*
359 * Route lookup. Any table->tb6_lock is implied.
360 */
361
362 static inline struct rt6_info *rt6_device_match(struct net *net,
363 struct rt6_info *rt,
364 const struct in6_addr *saddr,
365 int oif,
366 int flags)
367 {
368 struct rt6_info *local = NULL;
369 struct rt6_info *sprt;
370
371 if (!oif && ipv6_addr_any(saddr))
372 goto out;
373
374 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
375 struct net_device *dev = sprt->dst.dev;
376
377 if (oif) {
378 if (dev->ifindex == oif)
379 return sprt;
380 if (dev->flags & IFF_LOOPBACK) {
381 if (!sprt->rt6i_idev ||
382 sprt->rt6i_idev->dev->ifindex != oif) {
383 if (flags & RT6_LOOKUP_F_IFACE && oif)
384 continue;
385 if (local && (!oif ||
386 local->rt6i_idev->dev->ifindex == oif))
387 continue;
388 }
389 local = sprt;
390 }
391 } else {
392 if (ipv6_chk_addr(net, saddr, dev,
393 flags & RT6_LOOKUP_F_IFACE))
394 return sprt;
395 }
396 }
397
398 if (oif) {
399 if (local)
400 return local;
401
402 if (flags & RT6_LOOKUP_F_IFACE)
403 return net->ipv6.ip6_null_entry;
404 }
405 out:
406 return rt;
407 }
408
409 #ifdef CONFIG_IPV6_ROUTER_PREF
410 static void rt6_probe(struct rt6_info *rt)
411 {
412 struct neighbour *neigh;
413 /*
414 * Okay, this does not seem to be appropriate
415 * for now, however, we need to check if it
416 * is really so; aka Router Reachability Probing.
417 *
418 * Router Reachability Probe MUST be rate-limited
419 * to no more than one per minute.
420 */
421 rcu_read_lock();
422 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
423 if (!neigh || (neigh->nud_state & NUD_VALID))
424 goto out;
425 read_lock_bh(&neigh->lock);
426 if (!(neigh->nud_state & NUD_VALID) &&
427 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
428 struct in6_addr mcaddr;
429 struct in6_addr *target;
430
431 neigh->updated = jiffies;
432 read_unlock_bh(&neigh->lock);
433
434 target = (struct in6_addr *)&neigh->primary_key;
435 addrconf_addr_solict_mult(target, &mcaddr);
436 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
437 } else {
438 read_unlock_bh(&neigh->lock);
439 }
440 out:
441 rcu_read_unlock();
442 }
443 #else
444 static inline void rt6_probe(struct rt6_info *rt)
445 {
446 }
447 #endif
448
449 /*
450 * Default Router Selection (RFC 2461 6.3.6)
451 */
452 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
453 {
454 struct net_device *dev = rt->dst.dev;
455 if (!oif || dev->ifindex == oif)
456 return 2;
457 if ((dev->flags & IFF_LOOPBACK) &&
458 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
459 return 1;
460 return 0;
461 }
462
463 static inline int rt6_check_neigh(struct rt6_info *rt)
464 {
465 struct neighbour *neigh;
466 int m;
467
468 rcu_read_lock();
469 neigh = dst_get_neighbour_noref(&rt->dst);
470 if (rt->rt6i_flags & RTF_NONEXTHOP ||
471 !(rt->rt6i_flags & RTF_GATEWAY))
472 m = 1;
473 else if (neigh) {
474 read_lock_bh(&neigh->lock);
475 if (neigh->nud_state & NUD_VALID)
476 m = 2;
477 #ifdef CONFIG_IPV6_ROUTER_PREF
478 else if (neigh->nud_state & NUD_FAILED)
479 m = 0;
480 #endif
481 else
482 m = 1;
483 read_unlock_bh(&neigh->lock);
484 } else
485 m = 0;
486 rcu_read_unlock();
487 return m;
488 }
489
490 static int rt6_score_route(struct rt6_info *rt, int oif,
491 int strict)
492 {
493 int m, n;
494
495 m = rt6_check_dev(rt, oif);
496 if (!m && (strict & RT6_LOOKUP_F_IFACE))
497 return -1;
498 #ifdef CONFIG_IPV6_ROUTER_PREF
499 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
500 #endif
501 n = rt6_check_neigh(rt);
502 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
503 return -1;
504 return m;
505 }
506
507 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
508 int *mpri, struct rt6_info *match)
509 {
510 int m;
511
512 if (rt6_check_expired(rt))
513 goto out;
514
515 m = rt6_score_route(rt, oif, strict);
516 if (m < 0)
517 goto out;
518
519 if (m > *mpri) {
520 if (strict & RT6_LOOKUP_F_REACHABLE)
521 rt6_probe(match);
522 *mpri = m;
523 match = rt;
524 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
525 rt6_probe(rt);
526 }
527
528 out:
529 return match;
530 }
531
532 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
533 struct rt6_info *rr_head,
534 u32 metric, int oif, int strict)
535 {
536 struct rt6_info *rt, *match;
537 int mpri = -1;
538
539 match = NULL;
540 for (rt = rr_head; rt && rt->rt6i_metric == metric;
541 rt = rt->dst.rt6_next)
542 match = find_match(rt, oif, strict, &mpri, match);
543 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
544 rt = rt->dst.rt6_next)
545 match = find_match(rt, oif, strict, &mpri, match);
546
547 return match;
548 }
549
550 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
551 {
552 struct rt6_info *match, *rt0;
553 struct net *net;
554
555 rt0 = fn->rr_ptr;
556 if (!rt0)
557 fn->rr_ptr = rt0 = fn->leaf;
558
559 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
560
561 if (!match &&
562 (strict & RT6_LOOKUP_F_REACHABLE)) {
563 struct rt6_info *next = rt0->dst.rt6_next;
564
565 /* no entries matched; do round-robin */
566 if (!next || next->rt6i_metric != rt0->rt6i_metric)
567 next = fn->leaf;
568
569 if (next != rt0)
570 fn->rr_ptr = next;
571 }
572
573 net = dev_net(rt0->dst.dev);
574 return match ? match : net->ipv6.ip6_null_entry;
575 }
576
577 #ifdef CONFIG_IPV6_ROUTE_INFO
578 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
579 const struct in6_addr *gwaddr)
580 {
581 struct net *net = dev_net(dev);
582 struct route_info *rinfo = (struct route_info *) opt;
583 struct in6_addr prefix_buf, *prefix;
584 unsigned int pref;
585 unsigned long lifetime;
586 struct rt6_info *rt;
587
588 if (len < sizeof(struct route_info)) {
589 return -EINVAL;
590 }
591
592 /* Sanity check for prefix_len and length */
593 if (rinfo->length > 3) {
594 return -EINVAL;
595 } else if (rinfo->prefix_len > 128) {
596 return -EINVAL;
597 } else if (rinfo->prefix_len > 64) {
598 if (rinfo->length < 2) {
599 return -EINVAL;
600 }
601 } else if (rinfo->prefix_len > 0) {
602 if (rinfo->length < 1) {
603 return -EINVAL;
604 }
605 }
606
607 pref = rinfo->route_pref;
608 if (pref == ICMPV6_ROUTER_PREF_INVALID)
609 return -EINVAL;
610
611 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
612
613 if (rinfo->length == 3)
614 prefix = (struct in6_addr *)rinfo->prefix;
615 else {
616 /* this function is safe */
617 ipv6_addr_prefix(&prefix_buf,
618 (struct in6_addr *)rinfo->prefix,
619 rinfo->prefix_len);
620 prefix = &prefix_buf;
621 }
622
623 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
624 dev->ifindex);
625
626 if (rt && !lifetime) {
627 ip6_del_rt(rt);
628 rt = NULL;
629 }
630
631 if (!rt && lifetime)
632 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
633 pref);
634 else if (rt)
635 rt->rt6i_flags = RTF_ROUTEINFO |
636 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
637
638 if (rt) {
639 if (!addrconf_finite_timeout(lifetime))
640 rt6_clean_expires(rt);
641 else
642 rt6_set_expires(rt, jiffies + HZ * lifetime);
643
644 dst_release(&rt->dst);
645 }
646 return 0;
647 }
648 #endif
649
650 #define BACKTRACK(__net, saddr) \
651 do { \
652 if (rt == __net->ipv6.ip6_null_entry) { \
653 struct fib6_node *pn; \
654 while (1) { \
655 if (fn->fn_flags & RTN_TL_ROOT) \
656 goto out; \
657 pn = fn->parent; \
658 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
659 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
660 else \
661 fn = pn; \
662 if (fn->fn_flags & RTN_RTINFO) \
663 goto restart; \
664 } \
665 } \
666 } while (0)
667
668 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
669 struct fib6_table *table,
670 struct flowi6 *fl6, int flags)
671 {
672 struct fib6_node *fn;
673 struct rt6_info *rt;
674
675 read_lock_bh(&table->tb6_lock);
676 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
677 restart:
678 rt = fn->leaf;
679 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
680 BACKTRACK(net, &fl6->saddr);
681 out:
682 dst_use(&rt->dst, jiffies);
683 read_unlock_bh(&table->tb6_lock);
684 return rt;
685
686 }
687
688 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
689 int flags)
690 {
691 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
692 }
693 EXPORT_SYMBOL_GPL(ip6_route_lookup);
694
695 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
696 const struct in6_addr *saddr, int oif, int strict)
697 {
698 struct flowi6 fl6 = {
699 .flowi6_oif = oif,
700 .daddr = *daddr,
701 };
702 struct dst_entry *dst;
703 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
704
705 if (saddr) {
706 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
707 flags |= RT6_LOOKUP_F_HAS_SADDR;
708 }
709
710 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
711 if (dst->error == 0)
712 return (struct rt6_info *) dst;
713
714 dst_release(dst);
715
716 return NULL;
717 }
718
719 EXPORT_SYMBOL(rt6_lookup);
720
721 /* ip6_ins_rt is called with FREE table->tb6_lock.
722 It takes new route entry, the addition fails by any reason the
723 route is freed. In any case, if caller does not hold it, it may
724 be destroyed.
725 */
726
727 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
728 {
729 int err;
730 struct fib6_table *table;
731
732 table = rt->rt6i_table;
733 write_lock_bh(&table->tb6_lock);
734 err = fib6_add(&table->tb6_root, rt, info);
735 write_unlock_bh(&table->tb6_lock);
736
737 return err;
738 }
739
740 int ip6_ins_rt(struct rt6_info *rt)
741 {
742 struct nl_info info = {
743 .nl_net = dev_net(rt->dst.dev),
744 };
745 return __ip6_ins_rt(rt, &info);
746 }
747
748 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
749 const struct in6_addr *daddr,
750 const struct in6_addr *saddr)
751 {
752 struct rt6_info *rt;
753
754 /*
755 * Clone the route.
756 */
757
758 rt = ip6_rt_copy(ort, daddr);
759
760 if (rt) {
761 int attempts = !in_softirq();
762
763 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
764 if (ort->rt6i_dst.plen != 128 &&
765 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
766 rt->rt6i_flags |= RTF_ANYCAST;
767 rt->rt6i_gateway = *daddr;
768 }
769
770 rt->rt6i_flags |= RTF_CACHE;
771
772 #ifdef CONFIG_IPV6_SUBTREES
773 if (rt->rt6i_src.plen && saddr) {
774 rt->rt6i_src.addr = *saddr;
775 rt->rt6i_src.plen = 128;
776 }
777 #endif
778
779 retry:
780 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
781 struct net *net = dev_net(rt->dst.dev);
782 int saved_rt_min_interval =
783 net->ipv6.sysctl.ip6_rt_gc_min_interval;
784 int saved_rt_elasticity =
785 net->ipv6.sysctl.ip6_rt_gc_elasticity;
786
787 if (attempts-- > 0) {
788 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
789 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
790
791 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
792
793 net->ipv6.sysctl.ip6_rt_gc_elasticity =
794 saved_rt_elasticity;
795 net->ipv6.sysctl.ip6_rt_gc_min_interval =
796 saved_rt_min_interval;
797 goto retry;
798 }
799
800 net_warn_ratelimited("Neighbour table overflow\n");
801 dst_free(&rt->dst);
802 return NULL;
803 }
804 }
805
806 return rt;
807 }
808
809 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
810 const struct in6_addr *daddr)
811 {
812 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
813
814 if (rt) {
815 rt->rt6i_flags |= RTF_CACHE;
816 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
817 }
818 return rt;
819 }
820
821 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
822 struct flowi6 *fl6, int flags)
823 {
824 struct fib6_node *fn;
825 struct rt6_info *rt, *nrt;
826 int strict = 0;
827 int attempts = 3;
828 int err;
829 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
830
831 strict |= flags & RT6_LOOKUP_F_IFACE;
832
833 relookup:
834 read_lock_bh(&table->tb6_lock);
835
836 restart_2:
837 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
838
839 restart:
840 rt = rt6_select(fn, oif, strict | reachable);
841
842 BACKTRACK(net, &fl6->saddr);
843 if (rt == net->ipv6.ip6_null_entry ||
844 rt->rt6i_flags & RTF_CACHE)
845 goto out;
846
847 dst_hold(&rt->dst);
848 read_unlock_bh(&table->tb6_lock);
849
850 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
851 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
852 else if (!(rt->dst.flags & DST_HOST))
853 nrt = rt6_alloc_clone(rt, &fl6->daddr);
854 else
855 goto out2;
856
857 dst_release(&rt->dst);
858 rt = nrt ? : net->ipv6.ip6_null_entry;
859
860 dst_hold(&rt->dst);
861 if (nrt) {
862 err = ip6_ins_rt(nrt);
863 if (!err)
864 goto out2;
865 }
866
867 if (--attempts <= 0)
868 goto out2;
869
870 /*
871 * Race condition! In the gap, when table->tb6_lock was
872 * released someone could insert this route. Relookup.
873 */
874 dst_release(&rt->dst);
875 goto relookup;
876
877 out:
878 if (reachable) {
879 reachable = 0;
880 goto restart_2;
881 }
882 dst_hold(&rt->dst);
883 read_unlock_bh(&table->tb6_lock);
884 out2:
885 rt->dst.lastuse = jiffies;
886 rt->dst.__use++;
887
888 return rt;
889 }
890
891 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
892 struct flowi6 *fl6, int flags)
893 {
894 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
895 }
896
897 static struct dst_entry *ip6_route_input_lookup(struct net *net,
898 struct net_device *dev,
899 struct flowi6 *fl6, int flags)
900 {
901 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
902 flags |= RT6_LOOKUP_F_IFACE;
903
904 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
905 }
906
907 void ip6_route_input(struct sk_buff *skb)
908 {
909 const struct ipv6hdr *iph = ipv6_hdr(skb);
910 struct net *net = dev_net(skb->dev);
911 int flags = RT6_LOOKUP_F_HAS_SADDR;
912 struct flowi6 fl6 = {
913 .flowi6_iif = skb->dev->ifindex,
914 .daddr = iph->daddr,
915 .saddr = iph->saddr,
916 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
917 .flowi6_mark = skb->mark,
918 .flowi6_proto = iph->nexthdr,
919 };
920
921 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
922 }
923
924 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
925 struct flowi6 *fl6, int flags)
926 {
927 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
928 }
929
930 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
931 struct flowi6 *fl6)
932 {
933 int flags = 0;
934
935 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
936 flags |= RT6_LOOKUP_F_IFACE;
937
938 if (!ipv6_addr_any(&fl6->saddr))
939 flags |= RT6_LOOKUP_F_HAS_SADDR;
940 else if (sk)
941 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
942
943 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
944 }
945
946 EXPORT_SYMBOL(ip6_route_output);
947
948 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
949 {
950 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
951 struct dst_entry *new = NULL;
952
953 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
954 if (rt) {
955 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
956
957 new = &rt->dst;
958
959 new->__use = 1;
960 new->input = dst_discard;
961 new->output = dst_discard;
962
963 if (dst_metrics_read_only(&ort->dst))
964 new->_metrics = ort->dst._metrics;
965 else
966 dst_copy_metrics(new, &ort->dst);
967 rt->rt6i_idev = ort->rt6i_idev;
968 if (rt->rt6i_idev)
969 in6_dev_hold(rt->rt6i_idev);
970
971 rt->rt6i_gateway = ort->rt6i_gateway;
972 rt->rt6i_flags = ort->rt6i_flags;
973 rt6_clean_expires(rt);
974 rt->rt6i_metric = 0;
975
976 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
977 #ifdef CONFIG_IPV6_SUBTREES
978 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
979 #endif
980
981 dst_free(new);
982 }
983
984 dst_release(dst_orig);
985 return new ? new : ERR_PTR(-ENOMEM);
986 }
987
988 /*
989 * Destination cache support functions
990 */
991
992 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
993 {
994 struct rt6_info *rt;
995
996 rt = (struct rt6_info *) dst;
997
998 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
999 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1000 if (!rt->rt6i_peer)
1001 rt6_bind_peer(rt, 0);
1002 rt->rt6i_peer_genid = rt6_peer_genid();
1003 }
1004 return dst;
1005 }
1006 return NULL;
1007 }
1008
1009 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1010 {
1011 struct rt6_info *rt = (struct rt6_info *) dst;
1012
1013 if (rt) {
1014 if (rt->rt6i_flags & RTF_CACHE) {
1015 if (rt6_check_expired(rt)) {
1016 ip6_del_rt(rt);
1017 dst = NULL;
1018 }
1019 } else {
1020 dst_release(dst);
1021 dst = NULL;
1022 }
1023 }
1024 return dst;
1025 }
1026
1027 static void ip6_link_failure(struct sk_buff *skb)
1028 {
1029 struct rt6_info *rt;
1030
1031 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1032
1033 rt = (struct rt6_info *) skb_dst(skb);
1034 if (rt) {
1035 if (rt->rt6i_flags & RTF_CACHE)
1036 rt6_update_expires(rt, 0);
1037 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1038 rt->rt6i_node->fn_sernum = -1;
1039 }
1040 }
1041
1042 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1043 {
1044 struct rt6_info *rt6 = (struct rt6_info*)dst;
1045
1046 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1047 rt6->rt6i_flags |= RTF_MODIFIED;
1048 if (mtu < IPV6_MIN_MTU) {
1049 u32 features = dst_metric(dst, RTAX_FEATURES);
1050 mtu = IPV6_MIN_MTU;
1051 features |= RTAX_FEATURE_ALLFRAG;
1052 dst_metric_set(dst, RTAX_FEATURES, features);
1053 }
1054 dst_metric_set(dst, RTAX_MTU, mtu);
1055 }
1056 }
1057
1058 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1059 {
1060 struct net_device *dev = dst->dev;
1061 unsigned int mtu = dst_mtu(dst);
1062 struct net *net = dev_net(dev);
1063
1064 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1065
1066 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1067 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1068
1069 /*
1070 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1071 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1072 * IPV6_MAXPLEN is also valid and means: "any MSS,
1073 * rely only on pmtu discovery"
1074 */
1075 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1076 mtu = IPV6_MAXPLEN;
1077 return mtu;
1078 }
1079
1080 static unsigned int ip6_mtu(const struct dst_entry *dst)
1081 {
1082 struct inet6_dev *idev;
1083 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1084
1085 if (mtu)
1086 return mtu;
1087
1088 mtu = IPV6_MIN_MTU;
1089
1090 rcu_read_lock();
1091 idev = __in6_dev_get(dst->dev);
1092 if (idev)
1093 mtu = idev->cnf.mtu6;
1094 rcu_read_unlock();
1095
1096 return mtu;
1097 }
1098
1099 static struct dst_entry *icmp6_dst_gc_list;
1100 static DEFINE_SPINLOCK(icmp6_dst_lock);
1101
1102 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1103 struct neighbour *neigh,
1104 struct flowi6 *fl6)
1105 {
1106 struct dst_entry *dst;
1107 struct rt6_info *rt;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 struct net *net = dev_net(dev);
1110
1111 if (unlikely(!idev))
1112 return ERR_PTR(-ENODEV);
1113
1114 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1115 if (unlikely(!rt)) {
1116 in6_dev_put(idev);
1117 dst = ERR_PTR(-ENOMEM);
1118 goto out;
1119 }
1120
1121 if (neigh)
1122 neigh_hold(neigh);
1123 else {
1124 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1125 if (IS_ERR(neigh)) {
1126 in6_dev_put(idev);
1127 dst_free(&rt->dst);
1128 return ERR_CAST(neigh);
1129 }
1130 }
1131
1132 rt->dst.flags |= DST_HOST;
1133 rt->dst.output = ip6_output;
1134 dst_set_neighbour(&rt->dst, neigh);
1135 atomic_set(&rt->dst.__refcnt, 1);
1136 rt->rt6i_dst.addr = fl6->daddr;
1137 rt->rt6i_dst.plen = 128;
1138 rt->rt6i_idev = idev;
1139 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1140
1141 spin_lock_bh(&icmp6_dst_lock);
1142 rt->dst.next = icmp6_dst_gc_list;
1143 icmp6_dst_gc_list = &rt->dst;
1144 spin_unlock_bh(&icmp6_dst_lock);
1145
1146 fib6_force_start_gc(net);
1147
1148 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1149
1150 out:
1151 return dst;
1152 }
1153
1154 int icmp6_dst_gc(void)
1155 {
1156 struct dst_entry *dst, **pprev;
1157 int more = 0;
1158
1159 spin_lock_bh(&icmp6_dst_lock);
1160 pprev = &icmp6_dst_gc_list;
1161
1162 while ((dst = *pprev) != NULL) {
1163 if (!atomic_read(&dst->__refcnt)) {
1164 *pprev = dst->next;
1165 dst_free(dst);
1166 } else {
1167 pprev = &dst->next;
1168 ++more;
1169 }
1170 }
1171
1172 spin_unlock_bh(&icmp6_dst_lock);
1173
1174 return more;
1175 }
1176
1177 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1178 void *arg)
1179 {
1180 struct dst_entry *dst, **pprev;
1181
1182 spin_lock_bh(&icmp6_dst_lock);
1183 pprev = &icmp6_dst_gc_list;
1184 while ((dst = *pprev) != NULL) {
1185 struct rt6_info *rt = (struct rt6_info *) dst;
1186 if (func(rt, arg)) {
1187 *pprev = dst->next;
1188 dst_free(dst);
1189 } else {
1190 pprev = &dst->next;
1191 }
1192 }
1193 spin_unlock_bh(&icmp6_dst_lock);
1194 }
1195
1196 static int ip6_dst_gc(struct dst_ops *ops)
1197 {
1198 unsigned long now = jiffies;
1199 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1200 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1201 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1202 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1203 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1204 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1205 int entries;
1206
1207 entries = dst_entries_get_fast(ops);
1208 if (time_after(rt_last_gc + rt_min_interval, now) &&
1209 entries <= rt_max_size)
1210 goto out;
1211
1212 net->ipv6.ip6_rt_gc_expire++;
1213 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1214 net->ipv6.ip6_rt_last_gc = now;
1215 entries = dst_entries_get_slow(ops);
1216 if (entries < ops->gc_thresh)
1217 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1218 out:
1219 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1220 return entries > rt_max_size;
1221 }
1222
1223 /* Clean host part of a prefix. Not necessary in radix tree,
1224 but results in cleaner routing tables.
1225
1226 Remove it only when all the things will work!
1227 */
1228
1229 int ip6_dst_hoplimit(struct dst_entry *dst)
1230 {
1231 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1232 if (hoplimit == 0) {
1233 struct net_device *dev = dst->dev;
1234 struct inet6_dev *idev;
1235
1236 rcu_read_lock();
1237 idev = __in6_dev_get(dev);
1238 if (idev)
1239 hoplimit = idev->cnf.hop_limit;
1240 else
1241 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1242 rcu_read_unlock();
1243 }
1244 return hoplimit;
1245 }
1246 EXPORT_SYMBOL(ip6_dst_hoplimit);
1247
1248 /*
1249 *
1250 */
1251
1252 int ip6_route_add(struct fib6_config *cfg)
1253 {
1254 int err;
1255 struct net *net = cfg->fc_nlinfo.nl_net;
1256 struct rt6_info *rt = NULL;
1257 struct net_device *dev = NULL;
1258 struct inet6_dev *idev = NULL;
1259 struct fib6_table *table;
1260 int addr_type;
1261
1262 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1263 return -EINVAL;
1264 #ifndef CONFIG_IPV6_SUBTREES
1265 if (cfg->fc_src_len)
1266 return -EINVAL;
1267 #endif
1268 if (cfg->fc_ifindex) {
1269 err = -ENODEV;
1270 dev = dev_get_by_index(net, cfg->fc_ifindex);
1271 if (!dev)
1272 goto out;
1273 idev = in6_dev_get(dev);
1274 if (!idev)
1275 goto out;
1276 }
1277
1278 if (cfg->fc_metric == 0)
1279 cfg->fc_metric = IP6_RT_PRIO_USER;
1280
1281 err = -ENOBUFS;
1282 if (cfg->fc_nlinfo.nlh &&
1283 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1284 table = fib6_get_table(net, cfg->fc_table);
1285 if (!table) {
1286 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1287 table = fib6_new_table(net, cfg->fc_table);
1288 }
1289 } else {
1290 table = fib6_new_table(net, cfg->fc_table);
1291 }
1292
1293 if (!table)
1294 goto out;
1295
1296 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1297
1298 if (!rt) {
1299 err = -ENOMEM;
1300 goto out;
1301 }
1302
1303 rt->dst.obsolete = -1;
1304
1305 if (cfg->fc_flags & RTF_EXPIRES)
1306 rt6_set_expires(rt, jiffies +
1307 clock_t_to_jiffies(cfg->fc_expires));
1308 else
1309 rt6_clean_expires(rt);
1310
1311 if (cfg->fc_protocol == RTPROT_UNSPEC)
1312 cfg->fc_protocol = RTPROT_BOOT;
1313 rt->rt6i_protocol = cfg->fc_protocol;
1314
1315 addr_type = ipv6_addr_type(&cfg->fc_dst);
1316
1317 if (addr_type & IPV6_ADDR_MULTICAST)
1318 rt->dst.input = ip6_mc_input;
1319 else if (cfg->fc_flags & RTF_LOCAL)
1320 rt->dst.input = ip6_input;
1321 else
1322 rt->dst.input = ip6_forward;
1323
1324 rt->dst.output = ip6_output;
1325
1326 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1327 rt->rt6i_dst.plen = cfg->fc_dst_len;
1328 if (rt->rt6i_dst.plen == 128)
1329 rt->dst.flags |= DST_HOST;
1330
1331 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1332 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1333 if (!metrics) {
1334 err = -ENOMEM;
1335 goto out;
1336 }
1337 dst_init_metrics(&rt->dst, metrics, 0);
1338 }
1339 #ifdef CONFIG_IPV6_SUBTREES
1340 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1341 rt->rt6i_src.plen = cfg->fc_src_len;
1342 #endif
1343
1344 rt->rt6i_metric = cfg->fc_metric;
1345
1346 /* We cannot add true routes via loopback here,
1347 they would result in kernel looping; promote them to reject routes
1348 */
1349 if ((cfg->fc_flags & RTF_REJECT) ||
1350 (dev && (dev->flags & IFF_LOOPBACK) &&
1351 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1352 !(cfg->fc_flags & RTF_LOCAL))) {
1353 /* hold loopback dev/idev if we haven't done so. */
1354 if (dev != net->loopback_dev) {
1355 if (dev) {
1356 dev_put(dev);
1357 in6_dev_put(idev);
1358 }
1359 dev = net->loopback_dev;
1360 dev_hold(dev);
1361 idev = in6_dev_get(dev);
1362 if (!idev) {
1363 err = -ENODEV;
1364 goto out;
1365 }
1366 }
1367 rt->dst.output = ip6_pkt_discard_out;
1368 rt->dst.input = ip6_pkt_discard;
1369 rt->dst.error = -ENETUNREACH;
1370 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1371 goto install_route;
1372 }
1373
1374 if (cfg->fc_flags & RTF_GATEWAY) {
1375 const struct in6_addr *gw_addr;
1376 int gwa_type;
1377
1378 gw_addr = &cfg->fc_gateway;
1379 rt->rt6i_gateway = *gw_addr;
1380 gwa_type = ipv6_addr_type(gw_addr);
1381
1382 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1383 struct rt6_info *grt;
1384
1385 /* IPv6 strictly inhibits using not link-local
1386 addresses as nexthop address.
1387 Otherwise, router will not able to send redirects.
1388 It is very good, but in some (rare!) circumstances
1389 (SIT, PtP, NBMA NOARP links) it is handy to allow
1390 some exceptions. --ANK
1391 */
1392 err = -EINVAL;
1393 if (!(gwa_type & IPV6_ADDR_UNICAST))
1394 goto out;
1395
1396 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1397
1398 err = -EHOSTUNREACH;
1399 if (!grt)
1400 goto out;
1401 if (dev) {
1402 if (dev != grt->dst.dev) {
1403 dst_release(&grt->dst);
1404 goto out;
1405 }
1406 } else {
1407 dev = grt->dst.dev;
1408 idev = grt->rt6i_idev;
1409 dev_hold(dev);
1410 in6_dev_hold(grt->rt6i_idev);
1411 }
1412 if (!(grt->rt6i_flags & RTF_GATEWAY))
1413 err = 0;
1414 dst_release(&grt->dst);
1415
1416 if (err)
1417 goto out;
1418 }
1419 err = -EINVAL;
1420 if (!dev || (dev->flags & IFF_LOOPBACK))
1421 goto out;
1422 }
1423
1424 err = -ENODEV;
1425 if (!dev)
1426 goto out;
1427
1428 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1429 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1430 err = -EINVAL;
1431 goto out;
1432 }
1433 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1434 rt->rt6i_prefsrc.plen = 128;
1435 } else
1436 rt->rt6i_prefsrc.plen = 0;
1437
1438 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1439 err = rt6_bind_neighbour(rt, dev);
1440 if (err)
1441 goto out;
1442 }
1443
1444 rt->rt6i_flags = cfg->fc_flags;
1445
1446 install_route:
1447 if (cfg->fc_mx) {
1448 struct nlattr *nla;
1449 int remaining;
1450
1451 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1452 int type = nla_type(nla);
1453
1454 if (type) {
1455 if (type > RTAX_MAX) {
1456 err = -EINVAL;
1457 goto out;
1458 }
1459
1460 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1461 }
1462 }
1463 }
1464
1465 rt->dst.dev = dev;
1466 rt->rt6i_idev = idev;
1467 rt->rt6i_table = table;
1468
1469 cfg->fc_nlinfo.nl_net = dev_net(dev);
1470
1471 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1472
1473 out:
1474 if (dev)
1475 dev_put(dev);
1476 if (idev)
1477 in6_dev_put(idev);
1478 if (rt)
1479 dst_free(&rt->dst);
1480 return err;
1481 }
1482
1483 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1484 {
1485 int err;
1486 struct fib6_table *table;
1487 struct net *net = dev_net(rt->dst.dev);
1488
1489 if (rt == net->ipv6.ip6_null_entry)
1490 return -ENOENT;
1491
1492 table = rt->rt6i_table;
1493 write_lock_bh(&table->tb6_lock);
1494
1495 err = fib6_del(rt, info);
1496 dst_release(&rt->dst);
1497
1498 write_unlock_bh(&table->tb6_lock);
1499
1500 return err;
1501 }
1502
1503 int ip6_del_rt(struct rt6_info *rt)
1504 {
1505 struct nl_info info = {
1506 .nl_net = dev_net(rt->dst.dev),
1507 };
1508 return __ip6_del_rt(rt, &info);
1509 }
1510
1511 static int ip6_route_del(struct fib6_config *cfg)
1512 {
1513 struct fib6_table *table;
1514 struct fib6_node *fn;
1515 struct rt6_info *rt;
1516 int err = -ESRCH;
1517
1518 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1519 if (!table)
1520 return err;
1521
1522 read_lock_bh(&table->tb6_lock);
1523
1524 fn = fib6_locate(&table->tb6_root,
1525 &cfg->fc_dst, cfg->fc_dst_len,
1526 &cfg->fc_src, cfg->fc_src_len);
1527
1528 if (fn) {
1529 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1530 if (cfg->fc_ifindex &&
1531 (!rt->dst.dev ||
1532 rt->dst.dev->ifindex != cfg->fc_ifindex))
1533 continue;
1534 if (cfg->fc_flags & RTF_GATEWAY &&
1535 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1536 continue;
1537 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1538 continue;
1539 dst_hold(&rt->dst);
1540 read_unlock_bh(&table->tb6_lock);
1541
1542 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1543 }
1544 }
1545 read_unlock_bh(&table->tb6_lock);
1546
1547 return err;
1548 }
1549
1550 /*
1551 * Handle redirects
1552 */
1553 struct ip6rd_flowi {
1554 struct flowi6 fl6;
1555 struct in6_addr gateway;
1556 };
1557
1558 static struct rt6_info *__ip6_route_redirect(struct net *net,
1559 struct fib6_table *table,
1560 struct flowi6 *fl6,
1561 int flags)
1562 {
1563 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1564 struct rt6_info *rt;
1565 struct fib6_node *fn;
1566
1567 /*
1568 * Get the "current" route for this destination and
1569 * check if the redirect has come from approriate router.
1570 *
1571 * RFC 2461 specifies that redirects should only be
1572 * accepted if they come from the nexthop to the target.
1573 * Due to the way the routes are chosen, this notion
1574 * is a bit fuzzy and one might need to check all possible
1575 * routes.
1576 */
1577
1578 read_lock_bh(&table->tb6_lock);
1579 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1580 restart:
1581 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1582 /*
1583 * Current route is on-link; redirect is always invalid.
1584 *
1585 * Seems, previous statement is not true. It could
1586 * be node, which looks for us as on-link (f.e. proxy ndisc)
1587 * But then router serving it might decide, that we should
1588 * know truth 8)8) --ANK (980726).
1589 */
1590 if (rt6_check_expired(rt))
1591 continue;
1592 if (!(rt->rt6i_flags & RTF_GATEWAY))
1593 continue;
1594 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1595 continue;
1596 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1597 continue;
1598 break;
1599 }
1600
1601 if (!rt)
1602 rt = net->ipv6.ip6_null_entry;
1603 BACKTRACK(net, &fl6->saddr);
1604 out:
1605 dst_hold(&rt->dst);
1606
1607 read_unlock_bh(&table->tb6_lock);
1608
1609 return rt;
1610 };
1611
1612 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1613 const struct in6_addr *src,
1614 const struct in6_addr *gateway,
1615 struct net_device *dev)
1616 {
1617 int flags = RT6_LOOKUP_F_HAS_SADDR;
1618 struct net *net = dev_net(dev);
1619 struct ip6rd_flowi rdfl = {
1620 .fl6 = {
1621 .flowi6_oif = dev->ifindex,
1622 .daddr = *dest,
1623 .saddr = *src,
1624 },
1625 };
1626
1627 rdfl.gateway = *gateway;
1628
1629 if (rt6_need_strict(dest))
1630 flags |= RT6_LOOKUP_F_IFACE;
1631
1632 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1633 flags, __ip6_route_redirect);
1634 }
1635
1636 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1637 const struct in6_addr *saddr,
1638 struct neighbour *neigh, u8 *lladdr, int on_link)
1639 {
1640 struct rt6_info *rt, *nrt = NULL;
1641 struct netevent_redirect netevent;
1642 struct net *net = dev_net(neigh->dev);
1643
1644 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1645
1646 if (rt == net->ipv6.ip6_null_entry) {
1647 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1648 goto out;
1649 }
1650
1651 /*
1652 * We have finally decided to accept it.
1653 */
1654
1655 neigh_update(neigh, lladdr, NUD_STALE,
1656 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1657 NEIGH_UPDATE_F_OVERRIDE|
1658 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1659 NEIGH_UPDATE_F_ISROUTER))
1660 );
1661
1662 /*
1663 * Redirect received -> path was valid.
1664 * Look, redirects are sent only in response to data packets,
1665 * so that this nexthop apparently is reachable. --ANK
1666 */
1667 dst_confirm(&rt->dst);
1668
1669 /* Duplicate redirect: silently ignore. */
1670 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1671 goto out;
1672
1673 nrt = ip6_rt_copy(rt, dest);
1674 if (!nrt)
1675 goto out;
1676
1677 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1678 if (on_link)
1679 nrt->rt6i_flags &= ~RTF_GATEWAY;
1680
1681 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1682 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1683
1684 if (ip6_ins_rt(nrt))
1685 goto out;
1686
1687 netevent.old = &rt->dst;
1688 netevent.new = &nrt->dst;
1689 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1690
1691 if (rt->rt6i_flags & RTF_CACHE) {
1692 ip6_del_rt(rt);
1693 return;
1694 }
1695
1696 out:
1697 dst_release(&rt->dst);
1698 }
1699
1700 /*
1701 * Handle ICMP "packet too big" messages
1702 * i.e. Path MTU discovery
1703 */
1704
1705 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1706 struct net *net, u32 pmtu, int ifindex)
1707 {
1708 struct rt6_info *rt, *nrt;
1709 int allfrag = 0;
1710 again:
1711 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1712 if (!rt)
1713 return;
1714
1715 if (rt6_check_expired(rt)) {
1716 ip6_del_rt(rt);
1717 goto again;
1718 }
1719
1720 if (pmtu >= dst_mtu(&rt->dst))
1721 goto out;
1722
1723 if (pmtu < IPV6_MIN_MTU) {
1724 /*
1725 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1726 * MTU (1280) and a fragment header should always be included
1727 * after a node receiving Too Big message reporting PMTU is
1728 * less than the IPv6 Minimum Link MTU.
1729 */
1730 pmtu = IPV6_MIN_MTU;
1731 allfrag = 1;
1732 }
1733
1734 /* New mtu received -> path was valid.
1735 They are sent only in response to data packets,
1736 so that this nexthop apparently is reachable. --ANK
1737 */
1738 dst_confirm(&rt->dst);
1739
1740 /* Host route. If it is static, it would be better
1741 not to override it, but add new one, so that
1742 when cache entry will expire old pmtu
1743 would return automatically.
1744 */
1745 if (rt->rt6i_flags & RTF_CACHE) {
1746 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1747 if (allfrag) {
1748 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1749 features |= RTAX_FEATURE_ALLFRAG;
1750 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1751 }
1752 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1753 rt->rt6i_flags |= RTF_MODIFIED;
1754 goto out;
1755 }
1756
1757 /* Network route.
1758 Two cases are possible:
1759 1. It is connected route. Action: COW
1760 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1761 */
1762 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1763 nrt = rt6_alloc_cow(rt, daddr, saddr);
1764 else
1765 nrt = rt6_alloc_clone(rt, daddr);
1766
1767 if (nrt) {
1768 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1769 if (allfrag) {
1770 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1771 features |= RTAX_FEATURE_ALLFRAG;
1772 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1773 }
1774
1775 /* According to RFC 1981, detecting PMTU increase shouldn't be
1776 * happened within 5 mins, the recommended timer is 10 mins.
1777 * Here this route expiration time is set to ip6_rt_mtu_expires
1778 * which is 10 mins. After 10 mins the decreased pmtu is expired
1779 * and detecting PMTU increase will be automatically happened.
1780 */
1781 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1782 nrt->rt6i_flags |= RTF_DYNAMIC;
1783 ip6_ins_rt(nrt);
1784 }
1785 out:
1786 dst_release(&rt->dst);
1787 }
1788
1789 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1790 struct net_device *dev, u32 pmtu)
1791 {
1792 struct net *net = dev_net(dev);
1793
1794 /*
1795 * RFC 1981 states that a node "MUST reduce the size of the packets it
1796 * is sending along the path" that caused the Packet Too Big message.
1797 * Since it's not possible in the general case to determine which
1798 * interface was used to send the original packet, we update the MTU
1799 * on the interface that will be used to send future packets. We also
1800 * update the MTU on the interface that received the Packet Too Big in
1801 * case the original packet was forced out that interface with
1802 * SO_BINDTODEVICE or similar. This is the next best thing to the
1803 * correct behaviour, which would be to update the MTU on all
1804 * interfaces.
1805 */
1806 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1807 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1808 }
1809
1810 /*
1811 * Misc support functions
1812 */
1813
1814 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1815 const struct in6_addr *dest)
1816 {
1817 struct net *net = dev_net(ort->dst.dev);
1818 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1819 ort->dst.dev, 0);
1820
1821 if (rt) {
1822 rt->dst.input = ort->dst.input;
1823 rt->dst.output = ort->dst.output;
1824 rt->dst.flags |= DST_HOST;
1825
1826 rt->rt6i_dst.addr = *dest;
1827 rt->rt6i_dst.plen = 128;
1828 dst_copy_metrics(&rt->dst, &ort->dst);
1829 rt->dst.error = ort->dst.error;
1830 rt->rt6i_idev = ort->rt6i_idev;
1831 if (rt->rt6i_idev)
1832 in6_dev_hold(rt->rt6i_idev);
1833 rt->dst.lastuse = jiffies;
1834
1835 rt->rt6i_gateway = ort->rt6i_gateway;
1836 rt->rt6i_flags = ort->rt6i_flags;
1837 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1838 (RTF_DEFAULT | RTF_ADDRCONF))
1839 rt6_set_from(rt, ort);
1840 else
1841 rt6_clean_expires(rt);
1842 rt->rt6i_metric = 0;
1843
1844 #ifdef CONFIG_IPV6_SUBTREES
1845 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1846 #endif
1847 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1848 rt->rt6i_table = ort->rt6i_table;
1849 }
1850 return rt;
1851 }
1852
1853 #ifdef CONFIG_IPV6_ROUTE_INFO
1854 static struct rt6_info *rt6_get_route_info(struct net *net,
1855 const struct in6_addr *prefix, int prefixlen,
1856 const struct in6_addr *gwaddr, int ifindex)
1857 {
1858 struct fib6_node *fn;
1859 struct rt6_info *rt = NULL;
1860 struct fib6_table *table;
1861
1862 table = fib6_get_table(net, RT6_TABLE_INFO);
1863 if (!table)
1864 return NULL;
1865
1866 write_lock_bh(&table->tb6_lock);
1867 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1868 if (!fn)
1869 goto out;
1870
1871 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1872 if (rt->dst.dev->ifindex != ifindex)
1873 continue;
1874 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1875 continue;
1876 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1877 continue;
1878 dst_hold(&rt->dst);
1879 break;
1880 }
1881 out:
1882 write_unlock_bh(&table->tb6_lock);
1883 return rt;
1884 }
1885
1886 static struct rt6_info *rt6_add_route_info(struct net *net,
1887 const struct in6_addr *prefix, int prefixlen,
1888 const struct in6_addr *gwaddr, int ifindex,
1889 unsigned int pref)
1890 {
1891 struct fib6_config cfg = {
1892 .fc_table = RT6_TABLE_INFO,
1893 .fc_metric = IP6_RT_PRIO_USER,
1894 .fc_ifindex = ifindex,
1895 .fc_dst_len = prefixlen,
1896 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1897 RTF_UP | RTF_PREF(pref),
1898 .fc_nlinfo.pid = 0,
1899 .fc_nlinfo.nlh = NULL,
1900 .fc_nlinfo.nl_net = net,
1901 };
1902
1903 cfg.fc_dst = *prefix;
1904 cfg.fc_gateway = *gwaddr;
1905
1906 /* We should treat it as a default route if prefix length is 0. */
1907 if (!prefixlen)
1908 cfg.fc_flags |= RTF_DEFAULT;
1909
1910 ip6_route_add(&cfg);
1911
1912 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1913 }
1914 #endif
1915
1916 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1917 {
1918 struct rt6_info *rt;
1919 struct fib6_table *table;
1920
1921 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1922 if (!table)
1923 return NULL;
1924
1925 write_lock_bh(&table->tb6_lock);
1926 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1927 if (dev == rt->dst.dev &&
1928 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1929 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1930 break;
1931 }
1932 if (rt)
1933 dst_hold(&rt->dst);
1934 write_unlock_bh(&table->tb6_lock);
1935 return rt;
1936 }
1937
1938 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1939 struct net_device *dev,
1940 unsigned int pref)
1941 {
1942 struct fib6_config cfg = {
1943 .fc_table = RT6_TABLE_DFLT,
1944 .fc_metric = IP6_RT_PRIO_USER,
1945 .fc_ifindex = dev->ifindex,
1946 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1947 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1948 .fc_nlinfo.pid = 0,
1949 .fc_nlinfo.nlh = NULL,
1950 .fc_nlinfo.nl_net = dev_net(dev),
1951 };
1952
1953 cfg.fc_gateway = *gwaddr;
1954
1955 ip6_route_add(&cfg);
1956
1957 return rt6_get_dflt_router(gwaddr, dev);
1958 }
1959
1960 void rt6_purge_dflt_routers(struct net *net)
1961 {
1962 struct rt6_info *rt;
1963 struct fib6_table *table;
1964
1965 /* NOTE: Keep consistent with rt6_get_dflt_router */
1966 table = fib6_get_table(net, RT6_TABLE_DFLT);
1967 if (!table)
1968 return;
1969
1970 restart:
1971 read_lock_bh(&table->tb6_lock);
1972 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1973 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1974 dst_hold(&rt->dst);
1975 read_unlock_bh(&table->tb6_lock);
1976 ip6_del_rt(rt);
1977 goto restart;
1978 }
1979 }
1980 read_unlock_bh(&table->tb6_lock);
1981 }
1982
1983 static void rtmsg_to_fib6_config(struct net *net,
1984 struct in6_rtmsg *rtmsg,
1985 struct fib6_config *cfg)
1986 {
1987 memset(cfg, 0, sizeof(*cfg));
1988
1989 cfg->fc_table = RT6_TABLE_MAIN;
1990 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1991 cfg->fc_metric = rtmsg->rtmsg_metric;
1992 cfg->fc_expires = rtmsg->rtmsg_info;
1993 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1994 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1995 cfg->fc_flags = rtmsg->rtmsg_flags;
1996
1997 cfg->fc_nlinfo.nl_net = net;
1998
1999 cfg->fc_dst = rtmsg->rtmsg_dst;
2000 cfg->fc_src = rtmsg->rtmsg_src;
2001 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2002 }
2003
2004 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2005 {
2006 struct fib6_config cfg;
2007 struct in6_rtmsg rtmsg;
2008 int err;
2009
2010 switch(cmd) {
2011 case SIOCADDRT: /* Add a route */
2012 case SIOCDELRT: /* Delete a route */
2013 if (!capable(CAP_NET_ADMIN))
2014 return -EPERM;
2015 err = copy_from_user(&rtmsg, arg,
2016 sizeof(struct in6_rtmsg));
2017 if (err)
2018 return -EFAULT;
2019
2020 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2021
2022 rtnl_lock();
2023 switch (cmd) {
2024 case SIOCADDRT:
2025 err = ip6_route_add(&cfg);
2026 break;
2027 case SIOCDELRT:
2028 err = ip6_route_del(&cfg);
2029 break;
2030 default:
2031 err = -EINVAL;
2032 }
2033 rtnl_unlock();
2034
2035 return err;
2036 }
2037
2038 return -EINVAL;
2039 }
2040
2041 /*
2042 * Drop the packet on the floor
2043 */
2044
2045 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2046 {
2047 int type;
2048 struct dst_entry *dst = skb_dst(skb);
2049 switch (ipstats_mib_noroutes) {
2050 case IPSTATS_MIB_INNOROUTES:
2051 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2052 if (type == IPV6_ADDR_ANY) {
2053 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2054 IPSTATS_MIB_INADDRERRORS);
2055 break;
2056 }
2057 /* FALLTHROUGH */
2058 case IPSTATS_MIB_OUTNOROUTES:
2059 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2060 ipstats_mib_noroutes);
2061 break;
2062 }
2063 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2064 kfree_skb(skb);
2065 return 0;
2066 }
2067
2068 static int ip6_pkt_discard(struct sk_buff *skb)
2069 {
2070 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2071 }
2072
2073 static int ip6_pkt_discard_out(struct sk_buff *skb)
2074 {
2075 skb->dev = skb_dst(skb)->dev;
2076 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2077 }
2078
2079 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2080
2081 static int ip6_pkt_prohibit(struct sk_buff *skb)
2082 {
2083 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2084 }
2085
2086 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2087 {
2088 skb->dev = skb_dst(skb)->dev;
2089 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2090 }
2091
2092 #endif
2093
2094 /*
2095 * Allocate a dst for local (unicast / anycast) address.
2096 */
2097
2098 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2099 const struct in6_addr *addr,
2100 bool anycast)
2101 {
2102 struct net *net = dev_net(idev->dev);
2103 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2104 net->loopback_dev, 0);
2105 int err;
2106
2107 if (!rt) {
2108 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2109 return ERR_PTR(-ENOMEM);
2110 }
2111
2112 in6_dev_hold(idev);
2113
2114 rt->dst.flags |= DST_HOST;
2115 rt->dst.input = ip6_input;
2116 rt->dst.output = ip6_output;
2117 rt->rt6i_idev = idev;
2118 rt->dst.obsolete = -1;
2119
2120 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2121 if (anycast)
2122 rt->rt6i_flags |= RTF_ANYCAST;
2123 else
2124 rt->rt6i_flags |= RTF_LOCAL;
2125 err = rt6_bind_neighbour(rt, rt->dst.dev);
2126 if (err) {
2127 dst_free(&rt->dst);
2128 return ERR_PTR(err);
2129 }
2130
2131 rt->rt6i_dst.addr = *addr;
2132 rt->rt6i_dst.plen = 128;
2133 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2134
2135 atomic_set(&rt->dst.__refcnt, 1);
2136
2137 return rt;
2138 }
2139
2140 int ip6_route_get_saddr(struct net *net,
2141 struct rt6_info *rt,
2142 const struct in6_addr *daddr,
2143 unsigned int prefs,
2144 struct in6_addr *saddr)
2145 {
2146 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2147 int err = 0;
2148 if (rt->rt6i_prefsrc.plen)
2149 *saddr = rt->rt6i_prefsrc.addr;
2150 else
2151 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2152 daddr, prefs, saddr);
2153 return err;
2154 }
2155
2156 /* remove deleted ip from prefsrc entries */
2157 struct arg_dev_net_ip {
2158 struct net_device *dev;
2159 struct net *net;
2160 struct in6_addr *addr;
2161 };
2162
2163 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2164 {
2165 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2166 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2167 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2168
2169 if (((void *)rt->dst.dev == dev || !dev) &&
2170 rt != net->ipv6.ip6_null_entry &&
2171 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2172 /* remove prefsrc entry */
2173 rt->rt6i_prefsrc.plen = 0;
2174 }
2175 return 0;
2176 }
2177
2178 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2179 {
2180 struct net *net = dev_net(ifp->idev->dev);
2181 struct arg_dev_net_ip adni = {
2182 .dev = ifp->idev->dev,
2183 .net = net,
2184 .addr = &ifp->addr,
2185 };
2186 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2187 }
2188
2189 struct arg_dev_net {
2190 struct net_device *dev;
2191 struct net *net;
2192 };
2193
2194 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2195 {
2196 const struct arg_dev_net *adn = arg;
2197 const struct net_device *dev = adn->dev;
2198
2199 if ((rt->dst.dev == dev || !dev) &&
2200 rt != adn->net->ipv6.ip6_null_entry)
2201 return -1;
2202
2203 return 0;
2204 }
2205
2206 void rt6_ifdown(struct net *net, struct net_device *dev)
2207 {
2208 struct arg_dev_net adn = {
2209 .dev = dev,
2210 .net = net,
2211 };
2212
2213 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2214 icmp6_clean_all(fib6_ifdown, &adn);
2215 }
2216
2217 struct rt6_mtu_change_arg {
2218 struct net_device *dev;
2219 unsigned int mtu;
2220 };
2221
2222 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2223 {
2224 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2225 struct inet6_dev *idev;
2226
2227 /* In IPv6 pmtu discovery is not optional,
2228 so that RTAX_MTU lock cannot disable it.
2229 We still use this lock to block changes
2230 caused by addrconf/ndisc.
2231 */
2232
2233 idev = __in6_dev_get(arg->dev);
2234 if (!idev)
2235 return 0;
2236
2237 /* For administrative MTU increase, there is no way to discover
2238 IPv6 PMTU increase, so PMTU increase should be updated here.
2239 Since RFC 1981 doesn't include administrative MTU increase
2240 update PMTU increase is a MUST. (i.e. jumbo frame)
2241 */
2242 /*
2243 If new MTU is less than route PMTU, this new MTU will be the
2244 lowest MTU in the path, update the route PMTU to reflect PMTU
2245 decreases; if new MTU is greater than route PMTU, and the
2246 old MTU is the lowest MTU in the path, update the route PMTU
2247 to reflect the increase. In this case if the other nodes' MTU
2248 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2249 PMTU discouvery.
2250 */
2251 if (rt->dst.dev == arg->dev &&
2252 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2253 (dst_mtu(&rt->dst) >= arg->mtu ||
2254 (dst_mtu(&rt->dst) < arg->mtu &&
2255 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2256 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2257 }
2258 return 0;
2259 }
2260
2261 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2262 {
2263 struct rt6_mtu_change_arg arg = {
2264 .dev = dev,
2265 .mtu = mtu,
2266 };
2267
2268 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2269 }
2270
2271 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2272 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2273 [RTA_OIF] = { .type = NLA_U32 },
2274 [RTA_IIF] = { .type = NLA_U32 },
2275 [RTA_PRIORITY] = { .type = NLA_U32 },
2276 [RTA_METRICS] = { .type = NLA_NESTED },
2277 };
2278
2279 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2280 struct fib6_config *cfg)
2281 {
2282 struct rtmsg *rtm;
2283 struct nlattr *tb[RTA_MAX+1];
2284 int err;
2285
2286 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2287 if (err < 0)
2288 goto errout;
2289
2290 err = -EINVAL;
2291 rtm = nlmsg_data(nlh);
2292 memset(cfg, 0, sizeof(*cfg));
2293
2294 cfg->fc_table = rtm->rtm_table;
2295 cfg->fc_dst_len = rtm->rtm_dst_len;
2296 cfg->fc_src_len = rtm->rtm_src_len;
2297 cfg->fc_flags = RTF_UP;
2298 cfg->fc_protocol = rtm->rtm_protocol;
2299
2300 if (rtm->rtm_type == RTN_UNREACHABLE)
2301 cfg->fc_flags |= RTF_REJECT;
2302
2303 if (rtm->rtm_type == RTN_LOCAL)
2304 cfg->fc_flags |= RTF_LOCAL;
2305
2306 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2307 cfg->fc_nlinfo.nlh = nlh;
2308 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2309
2310 if (tb[RTA_GATEWAY]) {
2311 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2312 cfg->fc_flags |= RTF_GATEWAY;
2313 }
2314
2315 if (tb[RTA_DST]) {
2316 int plen = (rtm->rtm_dst_len + 7) >> 3;
2317
2318 if (nla_len(tb[RTA_DST]) < plen)
2319 goto errout;
2320
2321 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2322 }
2323
2324 if (tb[RTA_SRC]) {
2325 int plen = (rtm->rtm_src_len + 7) >> 3;
2326
2327 if (nla_len(tb[RTA_SRC]) < plen)
2328 goto errout;
2329
2330 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2331 }
2332
2333 if (tb[RTA_PREFSRC])
2334 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2335
2336 if (tb[RTA_OIF])
2337 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2338
2339 if (tb[RTA_PRIORITY])
2340 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2341
2342 if (tb[RTA_METRICS]) {
2343 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2344 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2345 }
2346
2347 if (tb[RTA_TABLE])
2348 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2349
2350 err = 0;
2351 errout:
2352 return err;
2353 }
2354
2355 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2356 {
2357 struct fib6_config cfg;
2358 int err;
2359
2360 err = rtm_to_fib6_config(skb, nlh, &cfg);
2361 if (err < 0)
2362 return err;
2363
2364 return ip6_route_del(&cfg);
2365 }
2366
2367 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2368 {
2369 struct fib6_config cfg;
2370 int err;
2371
2372 err = rtm_to_fib6_config(skb, nlh, &cfg);
2373 if (err < 0)
2374 return err;
2375
2376 return ip6_route_add(&cfg);
2377 }
2378
2379 static inline size_t rt6_nlmsg_size(void)
2380 {
2381 return NLMSG_ALIGN(sizeof(struct rtmsg))
2382 + nla_total_size(16) /* RTA_SRC */
2383 + nla_total_size(16) /* RTA_DST */
2384 + nla_total_size(16) /* RTA_GATEWAY */
2385 + nla_total_size(16) /* RTA_PREFSRC */
2386 + nla_total_size(4) /* RTA_TABLE */
2387 + nla_total_size(4) /* RTA_IIF */
2388 + nla_total_size(4) /* RTA_OIF */
2389 + nla_total_size(4) /* RTA_PRIORITY */
2390 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2391 + nla_total_size(sizeof(struct rta_cacheinfo));
2392 }
2393
2394 static int rt6_fill_node(struct net *net,
2395 struct sk_buff *skb, struct rt6_info *rt,
2396 struct in6_addr *dst, struct in6_addr *src,
2397 int iif, int type, u32 pid, u32 seq,
2398 int prefix, int nowait, unsigned int flags)
2399 {
2400 const struct inet_peer *peer;
2401 struct rtmsg *rtm;
2402 struct nlmsghdr *nlh;
2403 long expires;
2404 u32 table;
2405 struct neighbour *n;
2406 u32 ts, tsage;
2407
2408 if (prefix) { /* user wants prefix routes only */
2409 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2410 /* success since this is not a prefix route */
2411 return 1;
2412 }
2413 }
2414
2415 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2416 if (!nlh)
2417 return -EMSGSIZE;
2418
2419 rtm = nlmsg_data(nlh);
2420 rtm->rtm_family = AF_INET6;
2421 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2422 rtm->rtm_src_len = rt->rt6i_src.plen;
2423 rtm->rtm_tos = 0;
2424 if (rt->rt6i_table)
2425 table = rt->rt6i_table->tb6_id;
2426 else
2427 table = RT6_TABLE_UNSPEC;
2428 rtm->rtm_table = table;
2429 if (nla_put_u32(skb, RTA_TABLE, table))
2430 goto nla_put_failure;
2431 if (rt->rt6i_flags & RTF_REJECT)
2432 rtm->rtm_type = RTN_UNREACHABLE;
2433 else if (rt->rt6i_flags & RTF_LOCAL)
2434 rtm->rtm_type = RTN_LOCAL;
2435 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2436 rtm->rtm_type = RTN_LOCAL;
2437 else
2438 rtm->rtm_type = RTN_UNICAST;
2439 rtm->rtm_flags = 0;
2440 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2441 rtm->rtm_protocol = rt->rt6i_protocol;
2442 if (rt->rt6i_flags & RTF_DYNAMIC)
2443 rtm->rtm_protocol = RTPROT_REDIRECT;
2444 else if (rt->rt6i_flags & RTF_ADDRCONF)
2445 rtm->rtm_protocol = RTPROT_KERNEL;
2446 else if (rt->rt6i_flags & RTF_DEFAULT)
2447 rtm->rtm_protocol = RTPROT_RA;
2448
2449 if (rt->rt6i_flags & RTF_CACHE)
2450 rtm->rtm_flags |= RTM_F_CLONED;
2451
2452 if (dst) {
2453 if (nla_put(skb, RTA_DST, 16, dst))
2454 goto nla_put_failure;
2455 rtm->rtm_dst_len = 128;
2456 } else if (rtm->rtm_dst_len)
2457 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2458 goto nla_put_failure;
2459 #ifdef CONFIG_IPV6_SUBTREES
2460 if (src) {
2461 if (nla_put(skb, RTA_SRC, 16, src))
2462 goto nla_put_failure;
2463 rtm->rtm_src_len = 128;
2464 } else if (rtm->rtm_src_len &&
2465 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2466 goto nla_put_failure;
2467 #endif
2468 if (iif) {
2469 #ifdef CONFIG_IPV6_MROUTE
2470 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2471 int err = ip6mr_get_route(net, skb, rtm, nowait);
2472 if (err <= 0) {
2473 if (!nowait) {
2474 if (err == 0)
2475 return 0;
2476 goto nla_put_failure;
2477 } else {
2478 if (err == -EMSGSIZE)
2479 goto nla_put_failure;
2480 }
2481 }
2482 } else
2483 #endif
2484 if (nla_put_u32(skb, RTA_IIF, iif))
2485 goto nla_put_failure;
2486 } else if (dst) {
2487 struct in6_addr saddr_buf;
2488 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2489 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490 goto nla_put_failure;
2491 }
2492
2493 if (rt->rt6i_prefsrc.plen) {
2494 struct in6_addr saddr_buf;
2495 saddr_buf = rt->rt6i_prefsrc.addr;
2496 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2497 goto nla_put_failure;
2498 }
2499
2500 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2501 goto nla_put_failure;
2502
2503 rcu_read_lock();
2504 n = dst_get_neighbour_noref(&rt->dst);
2505 if (n) {
2506 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2507 rcu_read_unlock();
2508 goto nla_put_failure;
2509 }
2510 }
2511 rcu_read_unlock();
2512
2513 if (rt->dst.dev &&
2514 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2515 goto nla_put_failure;
2516 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2517 goto nla_put_failure;
2518 if (!(rt->rt6i_flags & RTF_EXPIRES))
2519 expires = 0;
2520 else if (rt->dst.expires - jiffies < INT_MAX)
2521 expires = rt->dst.expires - jiffies;
2522 else
2523 expires = INT_MAX;
2524
2525 peer = rt->rt6i_peer;
2526 ts = tsage = 0;
2527 if (peer && peer->tcp_ts_stamp) {
2528 ts = peer->tcp_ts;
2529 tsage = get_seconds() - peer->tcp_ts_stamp;
2530 }
2531
2532 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2533 expires, rt->dst.error) < 0)
2534 goto nla_put_failure;
2535
2536 return nlmsg_end(skb, nlh);
2537
2538 nla_put_failure:
2539 nlmsg_cancel(skb, nlh);
2540 return -EMSGSIZE;
2541 }
2542
2543 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2544 {
2545 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2546 int prefix;
2547
2548 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2549 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2550 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2551 } else
2552 prefix = 0;
2553
2554 return rt6_fill_node(arg->net,
2555 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2556 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2557 prefix, 0, NLM_F_MULTI);
2558 }
2559
2560 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2561 {
2562 struct net *net = sock_net(in_skb->sk);
2563 struct nlattr *tb[RTA_MAX+1];
2564 struct rt6_info *rt;
2565 struct sk_buff *skb;
2566 struct rtmsg *rtm;
2567 struct flowi6 fl6;
2568 int err, iif = 0, oif = 0;
2569
2570 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2571 if (err < 0)
2572 goto errout;
2573
2574 err = -EINVAL;
2575 memset(&fl6, 0, sizeof(fl6));
2576
2577 if (tb[RTA_SRC]) {
2578 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2579 goto errout;
2580
2581 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2582 }
2583
2584 if (tb[RTA_DST]) {
2585 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2586 goto errout;
2587
2588 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2589 }
2590
2591 if (tb[RTA_IIF])
2592 iif = nla_get_u32(tb[RTA_IIF]);
2593
2594 if (tb[RTA_OIF])
2595 oif = nla_get_u32(tb[RTA_OIF]);
2596
2597 if (iif) {
2598 struct net_device *dev;
2599 int flags = 0;
2600
2601 dev = __dev_get_by_index(net, iif);
2602 if (!dev) {
2603 err = -ENODEV;
2604 goto errout;
2605 }
2606
2607 fl6.flowi6_iif = iif;
2608
2609 if (!ipv6_addr_any(&fl6.saddr))
2610 flags |= RT6_LOOKUP_F_HAS_SADDR;
2611
2612 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2613 flags);
2614 } else {
2615 fl6.flowi6_oif = oif;
2616
2617 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2618 }
2619
2620 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2621 if (!skb) {
2622 dst_release(&rt->dst);
2623 err = -ENOBUFS;
2624 goto errout;
2625 }
2626
2627 /* Reserve room for dummy headers, this skb can pass
2628 through good chunk of routing engine.
2629 */
2630 skb_reset_mac_header(skb);
2631 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2632
2633 skb_dst_set(skb, &rt->dst);
2634
2635 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2636 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2637 nlh->nlmsg_seq, 0, 0, 0);
2638 if (err < 0) {
2639 kfree_skb(skb);
2640 goto errout;
2641 }
2642
2643 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2644 errout:
2645 return err;
2646 }
2647
2648 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2649 {
2650 struct sk_buff *skb;
2651 struct net *net = info->nl_net;
2652 u32 seq;
2653 int err;
2654
2655 err = -ENOBUFS;
2656 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2657
2658 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2659 if (!skb)
2660 goto errout;
2661
2662 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2663 event, info->pid, seq, 0, 0, 0);
2664 if (err < 0) {
2665 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2666 WARN_ON(err == -EMSGSIZE);
2667 kfree_skb(skb);
2668 goto errout;
2669 }
2670 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2671 info->nlh, gfp_any());
2672 return;
2673 errout:
2674 if (err < 0)
2675 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2676 }
2677
2678 static int ip6_route_dev_notify(struct notifier_block *this,
2679 unsigned long event, void *data)
2680 {
2681 struct net_device *dev = (struct net_device *)data;
2682 struct net *net = dev_net(dev);
2683
2684 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2685 net->ipv6.ip6_null_entry->dst.dev = dev;
2686 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2687 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2688 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2689 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2690 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2691 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2692 #endif
2693 }
2694
2695 return NOTIFY_OK;
2696 }
2697
2698 /*
2699 * /proc
2700 */
2701
2702 #ifdef CONFIG_PROC_FS
2703
2704 struct rt6_proc_arg
2705 {
2706 char *buffer;
2707 int offset;
2708 int length;
2709 int skip;
2710 int len;
2711 };
2712
2713 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2714 {
2715 struct seq_file *m = p_arg;
2716 struct neighbour *n;
2717
2718 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2719
2720 #ifdef CONFIG_IPV6_SUBTREES
2721 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2722 #else
2723 seq_puts(m, "00000000000000000000000000000000 00 ");
2724 #endif
2725 rcu_read_lock();
2726 n = dst_get_neighbour_noref(&rt->dst);
2727 if (n) {
2728 seq_printf(m, "%pi6", n->primary_key);
2729 } else {
2730 seq_puts(m, "00000000000000000000000000000000");
2731 }
2732 rcu_read_unlock();
2733 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2734 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2735 rt->dst.__use, rt->rt6i_flags,
2736 rt->dst.dev ? rt->dst.dev->name : "");
2737 return 0;
2738 }
2739
2740 static int ipv6_route_show(struct seq_file *m, void *v)
2741 {
2742 struct net *net = (struct net *)m->private;
2743 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2744 return 0;
2745 }
2746
2747 static int ipv6_route_open(struct inode *inode, struct file *file)
2748 {
2749 return single_open_net(inode, file, ipv6_route_show);
2750 }
2751
2752 static const struct file_operations ipv6_route_proc_fops = {
2753 .owner = THIS_MODULE,
2754 .open = ipv6_route_open,
2755 .read = seq_read,
2756 .llseek = seq_lseek,
2757 .release = single_release_net,
2758 };
2759
2760 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2761 {
2762 struct net *net = (struct net *)seq->private;
2763 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2764 net->ipv6.rt6_stats->fib_nodes,
2765 net->ipv6.rt6_stats->fib_route_nodes,
2766 net->ipv6.rt6_stats->fib_rt_alloc,
2767 net->ipv6.rt6_stats->fib_rt_entries,
2768 net->ipv6.rt6_stats->fib_rt_cache,
2769 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2770 net->ipv6.rt6_stats->fib_discarded_routes);
2771
2772 return 0;
2773 }
2774
2775 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2776 {
2777 return single_open_net(inode, file, rt6_stats_seq_show);
2778 }
2779
2780 static const struct file_operations rt6_stats_seq_fops = {
2781 .owner = THIS_MODULE,
2782 .open = rt6_stats_seq_open,
2783 .read = seq_read,
2784 .llseek = seq_lseek,
2785 .release = single_release_net,
2786 };
2787 #endif /* CONFIG_PROC_FS */
2788
2789 #ifdef CONFIG_SYSCTL
2790
2791 static
2792 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2793 void __user *buffer, size_t *lenp, loff_t *ppos)
2794 {
2795 struct net *net;
2796 int delay;
2797 if (!write)
2798 return -EINVAL;
2799
2800 net = (struct net *)ctl->extra1;
2801 delay = net->ipv6.sysctl.flush_delay;
2802 proc_dointvec(ctl, write, buffer, lenp, ppos);
2803 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2804 return 0;
2805 }
2806
2807 ctl_table ipv6_route_table_template[] = {
2808 {
2809 .procname = "flush",
2810 .data = &init_net.ipv6.sysctl.flush_delay,
2811 .maxlen = sizeof(int),
2812 .mode = 0200,
2813 .proc_handler = ipv6_sysctl_rtcache_flush
2814 },
2815 {
2816 .procname = "gc_thresh",
2817 .data = &ip6_dst_ops_template.gc_thresh,
2818 .maxlen = sizeof(int),
2819 .mode = 0644,
2820 .proc_handler = proc_dointvec,
2821 },
2822 {
2823 .procname = "max_size",
2824 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2825 .maxlen = sizeof(int),
2826 .mode = 0644,
2827 .proc_handler = proc_dointvec,
2828 },
2829 {
2830 .procname = "gc_min_interval",
2831 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832 .maxlen = sizeof(int),
2833 .mode = 0644,
2834 .proc_handler = proc_dointvec_jiffies,
2835 },
2836 {
2837 .procname = "gc_timeout",
2838 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2839 .maxlen = sizeof(int),
2840 .mode = 0644,
2841 .proc_handler = proc_dointvec_jiffies,
2842 },
2843 {
2844 .procname = "gc_interval",
2845 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
2848 .proc_handler = proc_dointvec_jiffies,
2849 },
2850 {
2851 .procname = "gc_elasticity",
2852 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2853 .maxlen = sizeof(int),
2854 .mode = 0644,
2855 .proc_handler = proc_dointvec,
2856 },
2857 {
2858 .procname = "mtu_expires",
2859 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2860 .maxlen = sizeof(int),
2861 .mode = 0644,
2862 .proc_handler = proc_dointvec_jiffies,
2863 },
2864 {
2865 .procname = "min_adv_mss",
2866 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2867 .maxlen = sizeof(int),
2868 .mode = 0644,
2869 .proc_handler = proc_dointvec,
2870 },
2871 {
2872 .procname = "gc_min_interval_ms",
2873 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2874 .maxlen = sizeof(int),
2875 .mode = 0644,
2876 .proc_handler = proc_dointvec_ms_jiffies,
2877 },
2878 { }
2879 };
2880
2881 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2882 {
2883 struct ctl_table *table;
2884
2885 table = kmemdup(ipv6_route_table_template,
2886 sizeof(ipv6_route_table_template),
2887 GFP_KERNEL);
2888
2889 if (table) {
2890 table[0].data = &net->ipv6.sysctl.flush_delay;
2891 table[0].extra1 = net;
2892 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2893 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2894 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2895 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2896 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2897 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2898 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2899 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2900 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2901 }
2902
2903 return table;
2904 }
2905 #endif
2906
2907 static int __net_init ip6_route_net_init(struct net *net)
2908 {
2909 int ret = -ENOMEM;
2910
2911 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2912 sizeof(net->ipv6.ip6_dst_ops));
2913
2914 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2915 goto out_ip6_dst_ops;
2916
2917 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2918 sizeof(*net->ipv6.ip6_null_entry),
2919 GFP_KERNEL);
2920 if (!net->ipv6.ip6_null_entry)
2921 goto out_ip6_dst_entries;
2922 net->ipv6.ip6_null_entry->dst.path =
2923 (struct dst_entry *)net->ipv6.ip6_null_entry;
2924 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2925 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2926 ip6_template_metrics, true);
2927
2928 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2929 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2930 sizeof(*net->ipv6.ip6_prohibit_entry),
2931 GFP_KERNEL);
2932 if (!net->ipv6.ip6_prohibit_entry)
2933 goto out_ip6_null_entry;
2934 net->ipv6.ip6_prohibit_entry->dst.path =
2935 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2936 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2937 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2938 ip6_template_metrics, true);
2939
2940 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2941 sizeof(*net->ipv6.ip6_blk_hole_entry),
2942 GFP_KERNEL);
2943 if (!net->ipv6.ip6_blk_hole_entry)
2944 goto out_ip6_prohibit_entry;
2945 net->ipv6.ip6_blk_hole_entry->dst.path =
2946 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2947 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2948 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2949 ip6_template_metrics, true);
2950 #endif
2951
2952 net->ipv6.sysctl.flush_delay = 0;
2953 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2954 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2955 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2956 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2957 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2958 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2959 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2960
2961 #ifdef CONFIG_PROC_FS
2962 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2963 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2964 #endif
2965 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2966
2967 ret = 0;
2968 out:
2969 return ret;
2970
2971 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2972 out_ip6_prohibit_entry:
2973 kfree(net->ipv6.ip6_prohibit_entry);
2974 out_ip6_null_entry:
2975 kfree(net->ipv6.ip6_null_entry);
2976 #endif
2977 out_ip6_dst_entries:
2978 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2979 out_ip6_dst_ops:
2980 goto out;
2981 }
2982
2983 static void __net_exit ip6_route_net_exit(struct net *net)
2984 {
2985 #ifdef CONFIG_PROC_FS
2986 proc_net_remove(net, "ipv6_route");
2987 proc_net_remove(net, "rt6_stats");
2988 #endif
2989 kfree(net->ipv6.ip6_null_entry);
2990 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2991 kfree(net->ipv6.ip6_prohibit_entry);
2992 kfree(net->ipv6.ip6_blk_hole_entry);
2993 #endif
2994 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2995 }
2996
2997 static struct pernet_operations ip6_route_net_ops = {
2998 .init = ip6_route_net_init,
2999 .exit = ip6_route_net_exit,
3000 };
3001
3002 static struct notifier_block ip6_route_dev_notifier = {
3003 .notifier_call = ip6_route_dev_notify,
3004 .priority = 0,
3005 };
3006
3007 int __init ip6_route_init(void)
3008 {
3009 int ret;
3010
3011 ret = -ENOMEM;
3012 ip6_dst_ops_template.kmem_cachep =
3013 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3014 SLAB_HWCACHE_ALIGN, NULL);
3015 if (!ip6_dst_ops_template.kmem_cachep)
3016 goto out;
3017
3018 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3019 if (ret)
3020 goto out_kmem_cache;
3021
3022 ret = register_pernet_subsys(&ip6_route_net_ops);
3023 if (ret)
3024 goto out_dst_entries;
3025
3026 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3027
3028 /* Registering of the loopback is done before this portion of code,
3029 * the loopback reference in rt6_info will not be taken, do it
3030 * manually for init_net */
3031 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3032 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3033 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3034 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3035 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3036 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3037 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3038 #endif
3039 ret = fib6_init();
3040 if (ret)
3041 goto out_register_subsys;
3042
3043 ret = xfrm6_init();
3044 if (ret)
3045 goto out_fib6_init;
3046
3047 ret = fib6_rules_init();
3048 if (ret)
3049 goto xfrm6_init;
3050
3051 ret = -ENOBUFS;
3052 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3053 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3054 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3055 goto fib6_rules_init;
3056
3057 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3058 if (ret)
3059 goto fib6_rules_init;
3060
3061 out:
3062 return ret;
3063
3064 fib6_rules_init:
3065 fib6_rules_cleanup();
3066 xfrm6_init:
3067 xfrm6_fini();
3068 out_fib6_init:
3069 fib6_gc_cleanup();
3070 out_register_subsys:
3071 unregister_pernet_subsys(&ip6_route_net_ops);
3072 out_dst_entries:
3073 dst_entries_destroy(&ip6_dst_blackhole_ops);
3074 out_kmem_cache:
3075 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3076 goto out;
3077 }
3078
3079 void ip6_route_cleanup(void)
3080 {
3081 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3082 fib6_rules_cleanup();
3083 xfrm6_fini();
3084 fib6_gc_cleanup();
3085 unregister_pernet_subsys(&ip6_route_net_ops);
3086 dst_entries_destroy(&ip6_dst_blackhole_ops);
3087 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3088 }