net: ipv4: Don't crash if passing a null sk to ip_rt_update_pmtu.
[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / net / ipv4 / route.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
e905a9ed 21 * Alan Cox : Super /proc >4K
1da177e4
LT
22 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
e905a9ed 39 *
1da177e4
LT
40 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
bb1d23b0 55 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
cef2685e
IS
56 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
1da177e4
LT
58 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
afd46503
JP
65#define pr_fmt(fmt) "IPv4: " fmt
66
1da177e4
LT
67#include <linux/module.h>
68#include <asm/uaccess.h>
1da177e4
LT
69#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
1da177e4
LT
72#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
1da177e4
LT
83#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
1da177e4
LT
89#include <linux/rcupdate.h>
90#include <linux/times.h>
5a0e3ad6 91#include <linux/slab.h>
ff1f69a8 92#include <linux/jhash.h>
352e512c 93#include <net/dst.h>
457c4cbc 94#include <net/net_namespace.h>
1da177e4
LT
95#include <net/protocol.h>
96#include <net/ip.h>
97#include <net/route.h>
98#include <net/inetpeer.h>
99#include <net/sock.h>
100#include <net/ip_fib.h>
101#include <net/arp.h>
102#include <net/tcp.h>
103#include <net/icmp.h>
104#include <net/xfrm.h>
8d71740c 105#include <net/netevent.h>
63f3444f 106#include <net/rtnetlink.h>
1da177e4
LT
107#ifdef CONFIG_SYSCTL
108#include <linux/sysctl.h>
7426a564 109#include <linux/kmemleak.h>
1da177e4 110#endif
6e5714ea 111#include <net/secure_seq.h>
1da177e4 112
68a5e3dd 113#define RT_FL_TOS(oldflp4) \
f61759e6 114 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
1da177e4
LT
115
116#define IP_MAX_MTU 0xFFF0
117
118#define RT_GC_TIMEOUT (300*HZ)
119
1da177e4 120static int ip_rt_max_size;
817bc4db
SH
121static int ip_rt_redirect_number __read_mostly = 9;
122static int ip_rt_redirect_load __read_mostly = HZ / 50;
123static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
124static int ip_rt_error_cost __read_mostly = HZ;
125static int ip_rt_error_burst __read_mostly = 5 * HZ;
817bc4db
SH
126static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
127static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
128static int ip_rt_min_advmss __read_mostly = 256;
9f28a2fc 129
1da177e4
LT
130/*
131 * Interface to generic destination cache.
132 */
133
134static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 135static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
ebb762f2 136static unsigned int ipv4_mtu(const struct dst_entry *dst);
1da177e4
LT
137static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
138static void ipv4_link_failure(struct sk_buff *skb);
6700c270
DM
139static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
140 struct sk_buff *skb, u32 mtu);
141static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
142 struct sk_buff *skb);
caacf05e 143static void ipv4_dst_destroy(struct dst_entry *dst);
1da177e4 144
72cdd1d9
ED
145static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
146 int how)
147{
148}
1da177e4 149
62fa8a84
DM
150static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
151{
31248731
DM
152 WARN_ON(1);
153 return NULL;
62fa8a84
DM
154}
155
f894cbf8
DM
156static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
157 struct sk_buff *skb,
158 const void *daddr);
d3aaeb38 159
1da177e4
LT
160static struct dst_ops ipv4_dst_ops = {
161 .family = AF_INET,
09640e63 162 .protocol = cpu_to_be16(ETH_P_IP),
1da177e4 163 .check = ipv4_dst_check,
0dbaee3b 164 .default_advmss = ipv4_default_advmss,
ebb762f2 165 .mtu = ipv4_mtu,
62fa8a84 166 .cow_metrics = ipv4_cow_metrics,
caacf05e 167 .destroy = ipv4_dst_destroy,
1da177e4
LT
168 .ifdown = ipv4_dst_ifdown,
169 .negative_advice = ipv4_negative_advice,
170 .link_failure = ipv4_link_failure,
171 .update_pmtu = ip_rt_update_pmtu,
e47a185b 172 .redirect = ip_do_redirect,
1ac06e03 173 .local_out = __ip_local_out,
d3aaeb38 174 .neigh_lookup = ipv4_neigh_lookup,
1da177e4
LT
175};
176
177#define ECN_OR_COST(class) TC_PRIO_##class
178
4839c52b 179const __u8 ip_tos2prio[16] = {
1da177e4 180 TC_PRIO_BESTEFFORT,
4a2b9c37 181 ECN_OR_COST(BESTEFFORT),
1da177e4
LT
182 TC_PRIO_BESTEFFORT,
183 ECN_OR_COST(BESTEFFORT),
184 TC_PRIO_BULK,
185 ECN_OR_COST(BULK),
186 TC_PRIO_BULK,
187 ECN_OR_COST(BULK),
188 TC_PRIO_INTERACTIVE,
189 ECN_OR_COST(INTERACTIVE),
190 TC_PRIO_INTERACTIVE,
191 ECN_OR_COST(INTERACTIVE),
192 TC_PRIO_INTERACTIVE_BULK,
193 ECN_OR_COST(INTERACTIVE_BULK),
194 TC_PRIO_INTERACTIVE_BULK,
195 ECN_OR_COST(INTERACTIVE_BULK)
196};
d4a96865 197EXPORT_SYMBOL(ip_tos2prio);
1da177e4 198
2f970d83 199static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
27f39c73 200#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
1da177e4 201
1da177e4 202#ifdef CONFIG_PROC_FS
1da177e4
LT
203static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
204{
29e75252 205 if (*pos)
89aef892 206 return NULL;
29e75252 207 return SEQ_START_TOKEN;
1da177e4
LT
208}
209
210static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
211{
1da177e4 212 ++*pos;
89aef892 213 return NULL;
1da177e4
LT
214}
215
216static void rt_cache_seq_stop(struct seq_file *seq, void *v)
217{
1da177e4
LT
218}
219
220static int rt_cache_seq_show(struct seq_file *seq, void *v)
221{
222 if (v == SEQ_START_TOKEN)
223 seq_printf(seq, "%-127s\n",
224 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
225 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
226 "HHUptod\tSpecDst");
e905a9ed 227 return 0;
1da177e4
LT
228}
229
f690808e 230static const struct seq_operations rt_cache_seq_ops = {
1da177e4
LT
231 .start = rt_cache_seq_start,
232 .next = rt_cache_seq_next,
233 .stop = rt_cache_seq_stop,
234 .show = rt_cache_seq_show,
235};
236
237static int rt_cache_seq_open(struct inode *inode, struct file *file)
238{
89aef892 239 return seq_open(file, &rt_cache_seq_ops);
1da177e4
LT
240}
241
9a32144e 242static const struct file_operations rt_cache_seq_fops = {
1da177e4
LT
243 .owner = THIS_MODULE,
244 .open = rt_cache_seq_open,
245 .read = seq_read,
246 .llseek = seq_lseek,
89aef892 247 .release = seq_release,
1da177e4
LT
248};
249
250
251static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
252{
253 int cpu;
254
255 if (*pos == 0)
256 return SEQ_START_TOKEN;
257
0f23174a 258 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
259 if (!cpu_possible(cpu))
260 continue;
261 *pos = cpu+1;
2f970d83 262 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
263 }
264 return NULL;
265}
266
267static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
268{
269 int cpu;
270
0f23174a 271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
1da177e4
LT
272 if (!cpu_possible(cpu))
273 continue;
274 *pos = cpu+1;
2f970d83 275 return &per_cpu(rt_cache_stat, cpu);
1da177e4
LT
276 }
277 return NULL;
e905a9ed 278
1da177e4
LT
279}
280
281static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
282{
283
284}
285
286static int rt_cpu_seq_show(struct seq_file *seq, void *v)
287{
288 struct rt_cache_stat *st = v;
289
290 if (v == SEQ_START_TOKEN) {
5bec0039 291 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
1da177e4
LT
292 return 0;
293 }
e905a9ed 294
1da177e4
LT
295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
fc66f95c 297 dst_entries_get_slow(&ipv4_dst_ops),
1da177e4
LT
298 st->in_hit,
299 st->in_slow_tot,
300 st->in_slow_mc,
301 st->in_no_route,
302 st->in_brd,
303 st->in_martian_dst,
304 st->in_martian_src,
305
306 st->out_hit,
307 st->out_slow_tot,
e905a9ed 308 st->out_slow_mc,
1da177e4
LT
309
310 st->gc_total,
311 st->gc_ignored,
312 st->gc_goal_miss,
313 st->gc_dst_overflow,
314 st->in_hlist_search,
315 st->out_hlist_search
316 );
317 return 0;
318}
319
f690808e 320static const struct seq_operations rt_cpu_seq_ops = {
1da177e4
LT
321 .start = rt_cpu_seq_start,
322 .next = rt_cpu_seq_next,
323 .stop = rt_cpu_seq_stop,
324 .show = rt_cpu_seq_show,
325};
326
327
328static int rt_cpu_seq_open(struct inode *inode, struct file *file)
329{
330 return seq_open(file, &rt_cpu_seq_ops);
331}
332
9a32144e 333static const struct file_operations rt_cpu_seq_fops = {
1da177e4
LT
334 .owner = THIS_MODULE,
335 .open = rt_cpu_seq_open,
336 .read = seq_read,
337 .llseek = seq_lseek,
338 .release = seq_release,
339};
340
c7066f70 341#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 342static int rt_acct_proc_show(struct seq_file *m, void *v)
78c686e9 343{
a661c419
AD
344 struct ip_rt_acct *dst, *src;
345 unsigned int i, j;
346
347 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
348 if (!dst)
349 return -ENOMEM;
350
351 for_each_possible_cpu(i) {
352 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
353 for (j = 0; j < 256; j++) {
354 dst[j].o_bytes += src[j].o_bytes;
355 dst[j].o_packets += src[j].o_packets;
356 dst[j].i_bytes += src[j].i_bytes;
357 dst[j].i_packets += src[j].i_packets;
358 }
78c686e9
PE
359 }
360
a661c419
AD
361 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
362 kfree(dst);
363 return 0;
364}
78c686e9 365
a661c419
AD
366static int rt_acct_proc_open(struct inode *inode, struct file *file)
367{
368 return single_open(file, rt_acct_proc_show, NULL);
78c686e9 369}
a661c419
AD
370
371static const struct file_operations rt_acct_proc_fops = {
372 .owner = THIS_MODULE,
373 .open = rt_acct_proc_open,
374 .read = seq_read,
375 .llseek = seq_lseek,
376 .release = single_release,
377};
78c686e9 378#endif
107f1634 379
73b38711 380static int __net_init ip_rt_do_proc_init(struct net *net)
107f1634
PE
381{
382 struct proc_dir_entry *pde;
383
d4beaa66
G
384 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
385 &rt_cache_seq_fops);
107f1634
PE
386 if (!pde)
387 goto err1;
388
77020720
WC
389 pde = proc_create("rt_cache", S_IRUGO,
390 net->proc_net_stat, &rt_cpu_seq_fops);
107f1634
PE
391 if (!pde)
392 goto err2;
393
c7066f70 394#ifdef CONFIG_IP_ROUTE_CLASSID
a661c419 395 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
107f1634
PE
396 if (!pde)
397 goto err3;
398#endif
399 return 0;
400
c7066f70 401#ifdef CONFIG_IP_ROUTE_CLASSID
107f1634
PE
402err3:
403 remove_proc_entry("rt_cache", net->proc_net_stat);
404#endif
405err2:
406 remove_proc_entry("rt_cache", net->proc_net);
407err1:
408 return -ENOMEM;
409}
73b38711
DL
410
411static void __net_exit ip_rt_do_proc_exit(struct net *net)
412{
413 remove_proc_entry("rt_cache", net->proc_net_stat);
414 remove_proc_entry("rt_cache", net->proc_net);
c7066f70 415#ifdef CONFIG_IP_ROUTE_CLASSID
73b38711 416 remove_proc_entry("rt_acct", net->proc_net);
0a931acf 417#endif
73b38711
DL
418}
419
420static struct pernet_operations ip_rt_proc_ops __net_initdata = {
421 .init = ip_rt_do_proc_init,
422 .exit = ip_rt_do_proc_exit,
423};
424
425static int __init ip_rt_proc_init(void)
426{
427 return register_pernet_subsys(&ip_rt_proc_ops);
428}
429
107f1634 430#else
73b38711 431static inline int ip_rt_proc_init(void)
107f1634
PE
432{
433 return 0;
434}
1da177e4 435#endif /* CONFIG_PROC_FS */
e905a9ed 436
4331debc 437static inline bool rt_is_expired(const struct rtable *rth)
e84f84f2 438{
d8d1f30b 439 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
e84f84f2
DL
440}
441
4ccfe6d4 442void rt_cache_flush(struct net *net)
1da177e4 443{
b42664f8 444 rt_genid_bump(net);
98376387
ED
445}
446
f894cbf8
DM
447static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
448 struct sk_buff *skb,
449 const void *daddr)
3769cffb 450{
d3aaeb38
DM
451 struct net_device *dev = dst->dev;
452 const __be32 *pkey = daddr;
39232973 453 const struct rtable *rt;
3769cffb
DM
454 struct neighbour *n;
455
39232973 456 rt = (const struct rtable *) dst;
a263b309 457 if (rt->rt_gateway)
39232973 458 pkey = (const __be32 *) &rt->rt_gateway;
f894cbf8
DM
459 else if (skb)
460 pkey = &ip_hdr(skb)->daddr;
d3aaeb38 461
80703d26 462 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
d3aaeb38
DM
463 if (n)
464 return n;
32092ecf 465 return neigh_create(&arp_tbl, pkey, dev);
d3aaeb38
DM
466}
467
4176df01
ED
468#define IP_IDENTS_SZ 2048u
469struct ip_ident_bucket {
470 atomic_t id;
471 u32 stamp32;
472};
473
474static struct ip_ident_bucket *ip_idents __read_mostly;
475
476/* In order to protect privacy, we add a perturbation to identifiers
477 * if one generator is seldom used. This makes hard for an attacker
478 * to infer how many packets were sent between two points in time.
479 */
480u32 ip_idents_reserve(u32 hash, int segs)
481{
482 struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
483 u32 old = ACCESS_ONCE(bucket->stamp32);
484 u32 now = (u32)jiffies;
485 u32 delta = 0;
486
487 if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) {
488 u64 x = prandom_u32();
489
490 x *= (now - old);
491 delta = (u32)(x >> 32);
492 }
493
494 return atomic_add_return(segs + delta, &bucket->id) - segs;
495}
496EXPORT_SYMBOL(ip_idents_reserve);
1da177e4 497
ff1f69a8 498void __ip_select_ident(struct iphdr *iph, int segs)
1da177e4 499{
ff1f69a8
ED
500 static u32 ip_idents_hashrnd __read_mostly;
501 static bool hashrnd_initialized = false;
502 u32 hash, id;
1da177e4 503
ff1f69a8
ED
504 if (unlikely(!hashrnd_initialized)) {
505 hashrnd_initialized = true;
506 get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
1d861aa4 507 }
1da177e4 508
4176df01
ED
509 hash = jhash_3words((__force u32)iph->daddr,
510 (__force u32)iph->saddr,
511 iph->protocol,
512 ip_idents_hashrnd);
ff1f69a8
ED
513 id = ip_idents_reserve(hash, segs);
514 iph->id = htons(id);
1da177e4 515}
4bc2f18b 516EXPORT_SYMBOL(__ip_select_ident);
1da177e4 517
83a2dfd5
LC
518static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
519 const struct sock *sk,
4895c771
DM
520 const struct iphdr *iph,
521 int oif, u8 tos,
522 u8 prot, u32 mark, int flow_flags)
523{
524 if (sk) {
525 const struct inet_sock *inet = inet_sk(sk);
526
527 oif = sk->sk_bound_dev_if;
528 mark = sk->sk_mark;
529 tos = RT_CONN_FLAGS(sk);
530 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
531 }
532 flowi4_init_output(fl4, oif, mark, tos,
533 RT_SCOPE_UNIVERSE, prot,
534 flow_flags,
83a2dfd5
LC
535 iph->daddr, iph->saddr, 0, 0,
536 sock_net_uid(net, sk));
4895c771
DM
537}
538
5abf7f7e 539static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
673476a9 540 const struct sock *sk)
4895c771 541{
65d11e9a 542 const struct net *net = dev_net(skb->dev);
4895c771
DM
543 const struct iphdr *iph = ip_hdr(skb);
544 int oif = skb->dev->ifindex;
545 u8 tos = RT_TOS(iph->tos);
546 u8 prot = iph->protocol;
547 u32 mark = skb->mark;
548
65d11e9a 549 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
4895c771
DM
550}
551
673476a9 552static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
4895c771
DM
553{
554 const struct inet_sock *inet = inet_sk(sk);
5abf7f7e 555 const struct ip_options_rcu *inet_opt;
4895c771
DM
556 __be32 daddr = inet->inet_daddr;
557
558 rcu_read_lock();
559 inet_opt = rcu_dereference(inet->inet_opt);
560 if (inet_opt && inet_opt->opt.srr)
561 daddr = inet_opt->opt.faddr;
562 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
563 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
564 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
565 inet_sk_flowi_flags(sk),
83a2dfd5 566 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
4895c771
DM
567 rcu_read_unlock();
568}
569
673476a9 570static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
5abf7f7e 571 const struct sk_buff *skb)
4895c771
DM
572{
573 if (skb)
574 build_skb_flow_key(fl4, skb, sk);
575 else
576 build_sk_flow_key(fl4, sk);
577}
578
c5038a83
DM
579static inline void rt_free(struct rtable *rt)
580{
581 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
582}
583
584static DEFINE_SPINLOCK(fnhe_lock);
4895c771 585
aee06da6 586static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
4895c771
DM
587{
588 struct fib_nh_exception *fnhe, *oldest;
c5038a83 589 struct rtable *orig;
4895c771
DM
590
591 oldest = rcu_dereference(hash->chain);
592 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
593 fnhe = rcu_dereference(fnhe->fnhe_next)) {
594 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
595 oldest = fnhe;
596 }
c5038a83
DM
597 orig = rcu_dereference(oldest->fnhe_rth);
598 if (orig) {
599 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
600 rt_free(orig);
601 }
4895c771
DM
602 return oldest;
603}
604
d3a25c98
DM
605static inline u32 fnhe_hashfun(__be32 daddr)
606{
607 u32 hval;
608
609 hval = (__force u32) daddr;
610 hval ^= (hval >> 11) ^ (hval >> 22);
611
612 return hval & (FNHE_HASH_SIZE - 1);
613}
614
aee06da6
JA
615static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
616 u32 pmtu, unsigned long expires)
4895c771 617{
aee06da6 618 struct fnhe_hash_bucket *hash;
4895c771
DM
619 struct fib_nh_exception *fnhe;
620 int depth;
aee06da6
JA
621 u32 hval = fnhe_hashfun(daddr);
622
c5038a83 623 spin_lock_bh(&fnhe_lock);
4895c771 624
aee06da6 625 hash = nh->nh_exceptions;
4895c771 626 if (!hash) {
aee06da6 627 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
4895c771 628 if (!hash)
aee06da6
JA
629 goto out_unlock;
630 nh->nh_exceptions = hash;
4895c771
DM
631 }
632
4895c771
DM
633 hash += hval;
634
635 depth = 0;
636 for (fnhe = rcu_dereference(hash->chain); fnhe;
637 fnhe = rcu_dereference(fnhe->fnhe_next)) {
638 if (fnhe->fnhe_daddr == daddr)
aee06da6 639 break;
4895c771
DM
640 depth++;
641 }
642
aee06da6
JA
643 if (fnhe) {
644 if (gw)
645 fnhe->fnhe_gw = gw;
646 if (pmtu) {
647 fnhe->fnhe_pmtu = pmtu;
648 fnhe->fnhe_expires = expires;
649 }
650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
661 fnhe->fnhe_daddr = daddr;
662 fnhe->fnhe_gw = gw;
663 fnhe->fnhe_pmtu = pmtu;
664 fnhe->fnhe_expires = expires;
4895c771 665 }
4895c771 666
4895c771 667 fnhe->fnhe_stamp = jiffies;
aee06da6
JA
668
669out_unlock:
c5038a83 670 spin_unlock_bh(&fnhe_lock);
aee06da6 671 return;
4895c771
DM
672}
673
ceb33206
DM
674static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
675 bool kill_route)
1da177e4 676{
e47a185b 677 __be32 new_gw = icmp_hdr(skb)->un.gateway;
94206125 678 __be32 old_gw = ip_hdr(skb)->saddr;
e47a185b 679 struct net_device *dev = skb->dev;
e47a185b 680 struct in_device *in_dev;
4895c771 681 struct fib_result res;
e47a185b 682 struct neighbour *n;
317805b8 683 struct net *net;
1da177e4 684
94206125
DM
685 switch (icmp_hdr(skb)->code & 7) {
686 case ICMP_REDIR_NET:
687 case ICMP_REDIR_NETTOS:
688 case ICMP_REDIR_HOST:
689 case ICMP_REDIR_HOSTTOS:
690 break;
691
692 default:
693 return;
694 }
695
e47a185b
DM
696 if (rt->rt_gateway != old_gw)
697 return;
698
699 in_dev = __in_dev_get_rcu(dev);
700 if (!in_dev)
701 return;
702
c346dca1 703 net = dev_net(dev);
9d4fb27d
JP
704 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
705 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
706 ipv4_is_zeronet(new_gw))
1da177e4
LT
707 goto reject_redirect;
708
709 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
710 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
711 goto reject_redirect;
712 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
713 goto reject_redirect;
714 } else {
317805b8 715 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1da177e4
LT
716 goto reject_redirect;
717 }
718
b05e6fdc
SSL
719 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
720 if (!n)
721 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
8f22b60e 722 if (!IS_ERR(n)) {
e47a185b
DM
723 if (!(n->nud_state & NUD_VALID)) {
724 neigh_event_send(n, NULL);
725 } else {
4895c771
DM
726 if (fib_lookup(net, fl4, &res) == 0) {
727 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 728
aee06da6
JA
729 update_or_create_fnhe(nh, fl4->daddr, new_gw,
730 0, 0);
4895c771 731 }
ceb33206
DM
732 if (kill_route)
733 rt->dst.obsolete = DST_OBSOLETE_KILL;
e47a185b
DM
734 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
735 }
736 neigh_release(n);
737 }
738 return;
739
740reject_redirect:
741#ifdef CONFIG_IP_ROUTE_VERBOSE
99ee038d
DM
742 if (IN_DEV_LOG_MARTIANS(in_dev)) {
743 const struct iphdr *iph = (const struct iphdr *) skb->data;
744 __be32 daddr = iph->daddr;
745 __be32 saddr = iph->saddr;
746
e47a185b
DM
747 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
748 " Advised path = %pI4 -> %pI4\n",
749 &old_gw, dev->name, &new_gw,
750 &saddr, &daddr);
99ee038d 751 }
e47a185b
DM
752#endif
753 ;
754}
755
4895c771
DM
756static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
757{
758 struct rtable *rt;
759 struct flowi4 fl4;
f96ef988
MK
760 const struct iphdr *iph = (const struct iphdr *) skb->data;
761 int oif = skb->dev->ifindex;
762 u8 tos = RT_TOS(iph->tos);
763 u8 prot = iph->protocol;
764 u32 mark = skb->mark;
4895c771
DM
765
766 rt = (struct rtable *) dst;
767
83a2dfd5 768 __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0);
ceb33206 769 __ip_do_redirect(rt, skb, &fl4, true);
4895c771
DM
770}
771
1da177e4
LT
772static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
773{
ee6b9673 774 struct rtable *rt = (struct rtable *)dst;
1da177e4
LT
775 struct dst_entry *ret = dst;
776
777 if (rt) {
d11a4dc1 778 if (dst->obsolete > 0) {
1da177e4
LT
779 ip_rt_put(rt);
780 ret = NULL;
5943634f
DM
781 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
782 rt->dst.expires) {
89aef892 783 ip_rt_put(rt);
1da177e4
LT
784 ret = NULL;
785 }
786 }
787 return ret;
788}
789
790/*
791 * Algorithm:
792 * 1. The first ip_rt_redirect_number redirects are sent
793 * with exponential backoff, then we stop sending them at all,
794 * assuming that the host ignores our redirects.
795 * 2. If we did not see packets requiring redirects
796 * during ip_rt_redirect_silence, we assume that the host
797 * forgot redirected route and start to send redirects again.
798 *
799 * This algorithm is much cheaper and more intelligent than dumb load limiting
800 * in icmp.c.
801 *
802 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
803 * and "frag. need" (breaks PMTU discovery) in icmp.c.
804 */
805
806void ip_rt_send_redirect(struct sk_buff *skb)
807{
511c3f92 808 struct rtable *rt = skb_rtable(skb);
30038fc6 809 struct in_device *in_dev;
92d86829 810 struct inet_peer *peer;
1d861aa4 811 struct net *net;
30038fc6 812 int log_martians;
1da177e4 813
30038fc6 814 rcu_read_lock();
d8d1f30b 815 in_dev = __in_dev_get_rcu(rt->dst.dev);
30038fc6
ED
816 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
817 rcu_read_unlock();
1da177e4 818 return;
30038fc6
ED
819 }
820 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
821 rcu_read_unlock();
1da177e4 822
1d861aa4
DM
823 net = dev_net(rt->dst.dev);
824 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829 825 if (!peer) {
e81da0e1
JA
826 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
827 rt_nexthop(rt, ip_hdr(skb)->daddr));
92d86829
DM
828 return;
829 }
830
1da177e4
LT
831 /* No redirected packets during ip_rt_redirect_silence;
832 * reset the algorithm.
833 */
92d86829
DM
834 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
835 peer->rate_tokens = 0;
1da177e4
LT
836
837 /* Too many ignored redirects; do not send anything
d8d1f30b 838 * set dst.rate_last to the last seen redirected packet.
1da177e4 839 */
92d86829
DM
840 if (peer->rate_tokens >= ip_rt_redirect_number) {
841 peer->rate_last = jiffies;
1d861aa4 842 goto out_put_peer;
1da177e4
LT
843 }
844
845 /* Check for load limit; set rate_last to the latest sent
846 * redirect.
847 */
92d86829 848 if (peer->rate_tokens == 0 ||
14fb8a76 849 time_after(jiffies,
92d86829
DM
850 (peer->rate_last +
851 (ip_rt_redirect_load << peer->rate_tokens)))) {
e81da0e1
JA
852 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
853
854 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
92d86829
DM
855 peer->rate_last = jiffies;
856 ++peer->rate_tokens;
1da177e4 857#ifdef CONFIG_IP_ROUTE_VERBOSE
30038fc6 858 if (log_martians &&
e87cc472
JP
859 peer->rate_tokens == ip_rt_redirect_number)
860 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
92101b3b 861 &ip_hdr(skb)->saddr, inet_iif(skb),
e81da0e1 862 &ip_hdr(skb)->daddr, &gw);
1da177e4
LT
863#endif
864 }
1d861aa4
DM
865out_put_peer:
866 inet_putpeer(peer);
1da177e4
LT
867}
868
869static int ip_error(struct sk_buff *skb)
870{
251da413 871 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
511c3f92 872 struct rtable *rt = skb_rtable(skb);
92d86829 873 struct inet_peer *peer;
1da177e4 874 unsigned long now;
251da413 875 struct net *net;
92d86829 876 bool send;
1da177e4
LT
877 int code;
878
ebe0fda9
EB
879 /* IP on this device is disabled. */
880 if (!in_dev)
881 goto out;
882
251da413
DM
883 net = dev_net(rt->dst.dev);
884 if (!IN_DEV_FORWARD(in_dev)) {
885 switch (rt->dst.error) {
886 case EHOSTUNREACH:
887 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
888 break;
889
890 case ENETUNREACH:
891 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
892 break;
893 }
894 goto out;
895 }
896
d8d1f30b 897 switch (rt->dst.error) {
4500ebf8
JP
898 case EINVAL:
899 default:
900 goto out;
901 case EHOSTUNREACH:
902 code = ICMP_HOST_UNREACH;
903 break;
904 case ENETUNREACH:
905 code = ICMP_NET_UNREACH;
251da413 906 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
4500ebf8
JP
907 break;
908 case EACCES:
909 code = ICMP_PKT_FILTERED;
910 break;
1da177e4
LT
911 }
912
1d861aa4 913 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
92d86829
DM
914
915 send = true;
916 if (peer) {
917 now = jiffies;
918 peer->rate_tokens += now - peer->rate_last;
919 if (peer->rate_tokens > ip_rt_error_burst)
920 peer->rate_tokens = ip_rt_error_burst;
921 peer->rate_last = now;
922 if (peer->rate_tokens >= ip_rt_error_cost)
923 peer->rate_tokens -= ip_rt_error_cost;
924 else
925 send = false;
1d861aa4 926 inet_putpeer(peer);
1da177e4 927 }
92d86829
DM
928 if (send)
929 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1da177e4
LT
930
931out: kfree_skb(skb);
932 return 0;
e905a9ed 933}
1da177e4 934
d851c12b 935static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
1da177e4 936{
d851c12b 937 struct dst_entry *dst = &rt->dst;
4895c771 938 struct fib_result res;
2c8cec5c 939
fa1e492a
SK
940 if (dst_metric_locked(dst, RTAX_MTU))
941 return;
942
7f92d334
SK
943 if (dst->dev->mtu < mtu)
944 return;
945
5943634f
DM
946 if (mtu < ip_rt_min_pmtu)
947 mtu = ip_rt_min_pmtu;
2c8cec5c 948
d851c12b
SK
949 if (!rt->rt_pmtu) {
950 dst->obsolete = DST_OBSOLETE_KILL;
951 } else {
952 rt->rt_pmtu = mtu;
953 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
954 }
955
c5ae7d41 956 rcu_read_lock();
d851c12b 957 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
4895c771 958 struct fib_nh *nh = &FIB_RES_NH(res);
4895c771 959
aee06da6
JA
960 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
961 jiffies + ip_rt_mtu_expires);
4895c771 962 }
c5ae7d41 963 rcu_read_unlock();
1da177e4
LT
964}
965
4895c771
DM
966static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
967 struct sk_buff *skb, u32 mtu)
968{
969 struct rtable *rt = (struct rtable *) dst;
970 struct flowi4 fl4;
971
972 ip_rt_build_flow_key(&fl4, sk, skb);
d851c12b 973 __ip_rt_update_pmtu(rt, &fl4, mtu);
4895c771
DM
974}
975
36393395
DM
976void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
977 int oif, u32 mark, u8 protocol, int flow_flags)
978{
4895c771 979 const struct iphdr *iph = (const struct iphdr *) skb->data;
36393395
DM
980 struct flowi4 fl4;
981 struct rtable *rt;
982
3c2a0909
S
983 if (!mark)
984 mark = IP4_REPLY_MARK(net, skb->mark);
985
83a2dfd5 986 __build_flow_key(net, &fl4, NULL, iph, oif,
4895c771 987 RT_TOS(iph->tos), protocol, mark, flow_flags);
36393395
DM
988 rt = __ip_route_output_key(net, &fl4);
989 if (!IS_ERR(rt)) {
4895c771 990 __ip_rt_update_pmtu(rt, &fl4, mtu);
36393395
DM
991 ip_rt_put(rt);
992 }
993}
994EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
995
9cb3a50c 996static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
36393395 997{
4895c771
DM
998 const struct iphdr *iph = (const struct iphdr *) skb->data;
999 struct flowi4 fl4;
1000 struct rtable *rt;
36393395 1001
83a2dfd5 1002 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
3c2a0909
S
1003
1004 if (!fl4.flowi4_mark)
1005 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1006
4895c771
DM
1007 rt = __ip_route_output_key(sock_net(sk), &fl4);
1008 if (!IS_ERR(rt)) {
1009 __ip_rt_update_pmtu(rt, &fl4, mtu);
1010 ip_rt_put(rt);
1011 }
36393395 1012}
9cb3a50c
SK
1013
1014void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1015{
1016 const struct iphdr *iph = (const struct iphdr *) skb->data;
1017 struct flowi4 fl4;
1018 struct rtable *rt;
f1e1b06f 1019 struct dst_entry *odst = NULL;
b44108db 1020 bool new = false;
83a2dfd5 1021 struct net *net = sock_net(sk);
9cb3a50c
SK
1022
1023 bh_lock_sock(sk);
f1e1b06f 1024 odst = sk_dst_get(sk);
9cb3a50c 1025
f1e1b06f 1026 if (sock_owned_by_user(sk) || !odst) {
9cb3a50c
SK
1027 __ipv4_sk_update_pmtu(skb, sk, mtu);
1028 goto out;
1029 }
1030
83a2dfd5 1031 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
9cb3a50c 1032
f1e1b06f
ED
1033 rt = (struct rtable *)odst;
1034 if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
9cb3a50c
SK
1035 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1036 if (IS_ERR(rt))
1037 goto out;
b44108db
SK
1038
1039 new = true;
9cb3a50c
SK
1040 }
1041
1042 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1043
f1e1b06f 1044 if (!dst_check(&rt->dst, 0)) {
b44108db
SK
1045 if (new)
1046 dst_release(&rt->dst);
1047
9cb3a50c
SK
1048 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1049 if (IS_ERR(rt))
1050 goto out;
1051
b44108db 1052 new = true;
9cb3a50c
SK
1053 }
1054
b44108db 1055 if (new)
f1e1b06f 1056 sk_dst_set(sk, &rt->dst);
9cb3a50c
SK
1057
1058out:
1059 bh_unlock_sock(sk);
f1e1b06f 1060 dst_release(odst);
9cb3a50c 1061}
36393395 1062EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
f39925db 1063
b42597e2
DM
1064void ipv4_redirect(struct sk_buff *skb, struct net *net,
1065 int oif, u32 mark, u8 protocol, int flow_flags)
1066{
4895c771 1067 const struct iphdr *iph = (const struct iphdr *) skb->data;
b42597e2
DM
1068 struct flowi4 fl4;
1069 struct rtable *rt;
1070
83a2dfd5 1071 __build_flow_key(net, &fl4, NULL, iph, oif,
4895c771 1072 RT_TOS(iph->tos), protocol, mark, flow_flags);
b42597e2
DM
1073 rt = __ip_route_output_key(net, &fl4);
1074 if (!IS_ERR(rt)) {
ceb33206 1075 __ip_do_redirect(rt, skb, &fl4, false);
b42597e2
DM
1076 ip_rt_put(rt);
1077 }
1078}
1079EXPORT_SYMBOL_GPL(ipv4_redirect);
1080
1081void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1082{
4895c771
DM
1083 const struct iphdr *iph = (const struct iphdr *) skb->data;
1084 struct flowi4 fl4;
1085 struct rtable *rt;
83a2dfd5 1086 struct net *net = sock_net(sk);
b42597e2 1087
83a2dfd5
LC
1088 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1089 rt = __ip_route_output_key(net, &fl4);
4895c771 1090 if (!IS_ERR(rt)) {
ceb33206 1091 __ip_do_redirect(rt, skb, &fl4, false);
4895c771
DM
1092 ip_rt_put(rt);
1093 }
b42597e2
DM
1094}
1095EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1096
efbc368d
DM
1097static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1098{
1099 struct rtable *rt = (struct rtable *) dst;
1100
ceb33206
DM
1101 /* All IPV4 dsts are created with ->obsolete set to the value
1102 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1103 * into this function always.
1104 *
1105 * When a PMTU/redirect information update invalidates a
1106 * route, this is indicated by setting obsolete to
1107 * DST_OBSOLETE_KILL.
1108 */
1109 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
efbc368d 1110 return NULL;
d11a4dc1 1111 return dst;
1da177e4
LT
1112}
1113
1da177e4
LT
1114static void ipv4_link_failure(struct sk_buff *skb)
1115{
1116 struct rtable *rt;
1117
1118 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1119
511c3f92 1120 rt = skb_rtable(skb);
5943634f
DM
1121 if (rt)
1122 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1123}
1124
1125static int ip_rt_bug(struct sk_buff *skb)
1126{
91df42be
JP
1127 pr_debug("%s: %pI4 -> %pI4, %s\n",
1128 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1129 skb->dev ? skb->dev->name : "?");
1da177e4 1130 kfree_skb(skb);
c378a9c0 1131 WARN_ON(1);
1da177e4
LT
1132 return 0;
1133}
1134
1135/*
1136 We do not cache source address of outgoing interface,
1137 because it is used only by IP RR, TS and SRR options,
1138 so that it out of fast path.
1139
1140 BTW remember: "addr" is allowed to be not aligned
1141 in IP options!
1142 */
1143
8e36360a 1144void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1da177e4 1145{
a61ced5d 1146 __be32 src;
1da177e4 1147
c7537967 1148 if (rt_is_output_route(rt))
c5be24ff 1149 src = ip_hdr(skb)->saddr;
ebc0ffae 1150 else {
8e36360a
DM
1151 struct fib_result res;
1152 struct flowi4 fl4;
1153 struct iphdr *iph;
1154
1155 iph = ip_hdr(skb);
1156
1157 memset(&fl4, 0, sizeof(fl4));
1158 fl4.daddr = iph->daddr;
1159 fl4.saddr = iph->saddr;
b0fe4a31 1160 fl4.flowi4_tos = RT_TOS(iph->tos);
8e36360a
DM
1161 fl4.flowi4_oif = rt->dst.dev->ifindex;
1162 fl4.flowi4_iif = skb->dev->ifindex;
1163 fl4.flowi4_mark = skb->mark;
5e2b61f7 1164
ebc0ffae 1165 rcu_read_lock();
68a5e3dd 1166 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
436c3b66 1167 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
ebc0ffae 1168 else
f8126f1d
DM
1169 src = inet_select_addr(rt->dst.dev,
1170 rt_nexthop(rt, iph->daddr),
1171 RT_SCOPE_UNIVERSE);
ebc0ffae
ED
1172 rcu_read_unlock();
1173 }
1da177e4
LT
1174 memcpy(addr, &src, 4);
1175}
1176
c7066f70 1177#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4
LT
1178static void set_class_tag(struct rtable *rt, u32 tag)
1179{
d8d1f30b
CG
1180 if (!(rt->dst.tclassid & 0xFFFF))
1181 rt->dst.tclassid |= tag & 0xFFFF;
1182 if (!(rt->dst.tclassid & 0xFFFF0000))
1183 rt->dst.tclassid |= tag & 0xFFFF0000;
1da177e4
LT
1184}
1185#endif
1186
0dbaee3b
DM
1187static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1188{
1189 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1190
1191 if (advmss == 0) {
1192 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1193 ip_rt_min_advmss);
1194 if (advmss > 65535 - 40)
1195 advmss = 65535 - 40;
1196 }
1197 return advmss;
1198}
1199
ebb762f2 1200static unsigned int ipv4_mtu(const struct dst_entry *dst)
d33e4553 1201{
261663b0 1202 const struct rtable *rt = (const struct rtable *) dst;
5943634f
DM
1203 unsigned int mtu = rt->rt_pmtu;
1204
98d75c37 1205 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
5943634f 1206 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1207
38d523e2 1208 if (mtu)
618f9bc7
SK
1209 return mtu;
1210
1211 mtu = dst->dev->mtu;
d33e4553
DM
1212
1213 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
155e8336 1214 if (rt->rt_uses_gateway && mtu > 576)
d33e4553
DM
1215 mtu = 576;
1216 }
1217
1218 if (mtu > IP_MAX_MTU)
1219 mtu = IP_MAX_MTU;
1220
1221 return mtu;
1222}
1223
f2bb4bed 1224static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
4895c771
DM
1225{
1226 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1227 struct fib_nh_exception *fnhe;
1228 u32 hval;
1229
f2bb4bed
DM
1230 if (!hash)
1231 return NULL;
1232
d3a25c98 1233 hval = fnhe_hashfun(daddr);
4895c771
DM
1234
1235 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1236 fnhe = rcu_dereference(fnhe->fnhe_next)) {
f2bb4bed
DM
1237 if (fnhe->fnhe_daddr == daddr)
1238 return fnhe;
1239 }
1240 return NULL;
1241}
aee06da6 1242
caacf05e 1243static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
f2bb4bed
DM
1244 __be32 daddr)
1245{
caacf05e
DM
1246 bool ret = false;
1247
c5038a83 1248 spin_lock_bh(&fnhe_lock);
f2bb4bed 1249
c5038a83 1250 if (daddr == fnhe->fnhe_daddr) {
13d82bf5
SK
1251 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1252 if (orig && rt_is_expired(orig)) {
1253 fnhe->fnhe_gw = 0;
1254 fnhe->fnhe_pmtu = 0;
1255 fnhe->fnhe_expires = 0;
1256 }
c5038a83
DM
1257 if (fnhe->fnhe_pmtu) {
1258 unsigned long expires = fnhe->fnhe_expires;
1259 unsigned long diff = expires - jiffies;
1260
1261 if (time_before(jiffies, expires)) {
1262 rt->rt_pmtu = fnhe->fnhe_pmtu;
1263 dst_set_expires(&rt->dst, diff);
1264 }
1265 }
1266 if (fnhe->fnhe_gw) {
1267 rt->rt_flags |= RTCF_REDIRECTED;
1268 rt->rt_gateway = fnhe->fnhe_gw;
155e8336
JA
1269 rt->rt_uses_gateway = 1;
1270 } else if (!rt->rt_gateway)
1271 rt->rt_gateway = daddr;
f2bb4bed 1272
c5038a83
DM
1273 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1274 if (orig)
1275 rt_free(orig);
1276
1277 fnhe->fnhe_stamp = jiffies;
caacf05e 1278 ret = true;
c5038a83
DM
1279 }
1280 spin_unlock_bh(&fnhe_lock);
caacf05e
DM
1281
1282 return ret;
54764bb6
ED
1283}
1284
caacf05e 1285static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
f2bb4bed 1286{
d26b3a7c 1287 struct rtable *orig, *prev, **p;
caacf05e 1288 bool ret = true;
f2bb4bed 1289
d26b3a7c 1290 if (rt_is_input_route(rt)) {
54764bb6 1291 p = (struct rtable **)&nh->nh_rth_input;
d26b3a7c 1292 } else {
d26b3a7c
ED
1293 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1294 }
f2bb4bed
DM
1295 orig = *p;
1296
1297 prev = cmpxchg(p, orig, rt);
1298 if (prev == orig) {
f2bb4bed 1299 if (orig)
54764bb6 1300 rt_free(orig);
155e8336 1301 } else
caacf05e 1302 ret = false;
caacf05e
DM
1303
1304 return ret;
1305}
1306
1307static DEFINE_SPINLOCK(rt_uncached_lock);
1308static LIST_HEAD(rt_uncached_list);
1309
1310static void rt_add_uncached_list(struct rtable *rt)
1311{
1312 spin_lock_bh(&rt_uncached_lock);
1313 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1314 spin_unlock_bh(&rt_uncached_lock);
1315}
1316
1317static void ipv4_dst_destroy(struct dst_entry *dst)
1318{
1319 struct rtable *rt = (struct rtable *) dst;
1320
78df76a0 1321 if (!list_empty(&rt->rt_uncached)) {
caacf05e
DM
1322 spin_lock_bh(&rt_uncached_lock);
1323 list_del(&rt->rt_uncached);
1324 spin_unlock_bh(&rt_uncached_lock);
1325 }
1326}
1327
1328void rt_flush_dev(struct net_device *dev)
1329{
1330 if (!list_empty(&rt_uncached_list)) {
1331 struct net *net = dev_net(dev);
1332 struct rtable *rt;
1333
1334 spin_lock_bh(&rt_uncached_lock);
1335 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1336 if (rt->dst.dev != dev)
1337 continue;
1338 rt->dst.dev = net->loopback_dev;
1339 dev_hold(rt->dst.dev);
1340 dev_put(dev);
1341 }
1342 spin_unlock_bh(&rt_uncached_lock);
4895c771
DM
1343 }
1344}
1345
4331debc 1346static bool rt_cache_valid(const struct rtable *rt)
d2d68ba9 1347{
4331debc
ED
1348 return rt &&
1349 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1350 !rt_is_expired(rt);
d2d68ba9
DM
1351}
1352
f2bb4bed 1353static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
5e2b61f7 1354 const struct fib_result *res,
f2bb4bed 1355 struct fib_nh_exception *fnhe,
982721f3 1356 struct fib_info *fi, u16 type, u32 itag)
1da177e4 1357{
caacf05e
DM
1358 bool cached = false;
1359
1da177e4 1360 if (fi) {
4895c771
DM
1361 struct fib_nh *nh = &FIB_RES_NH(*res);
1362
155e8336 1363 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
4895c771 1364 rt->rt_gateway = nh->nh_gw;
155e8336
JA
1365 rt->rt_uses_gateway = 1;
1366 }
2860583f 1367 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
c7066f70 1368#ifdef CONFIG_IP_ROUTE_CLASSID
f2bb4bed 1369 rt->dst.tclassid = nh->nh_tclassid;
1da177e4 1370#endif
c5038a83 1371 if (unlikely(fnhe))
caacf05e 1372 cached = rt_bind_exception(rt, fnhe, daddr);
c5038a83 1373 else if (!(rt->dst.flags & DST_NOCACHE))
caacf05e 1374 cached = rt_cache_route(nh, rt);
155e8336
JA
1375 if (unlikely(!cached)) {
1376 /* Routes we intend to cache in nexthop exception or
1377 * FIB nexthop have the DST_NOCACHE bit clear.
1378 * However, if we are unsuccessful at storing this
1379 * route into the cache we really need to set it.
1380 */
1381 rt->dst.flags |= DST_NOCACHE;
1382 if (!rt->rt_gateway)
1383 rt->rt_gateway = daddr;
1384 rt_add_uncached_list(rt);
1385 }
1386 } else
caacf05e 1387 rt_add_uncached_list(rt);
defb3519 1388
c7066f70 1389#ifdef CONFIG_IP_ROUTE_CLASSID
1da177e4 1390#ifdef CONFIG_IP_MULTIPLE_TABLES
85b91b03 1391 set_class_tag(rt, res->tclassid);
1da177e4
LT
1392#endif
1393 set_class_tag(rt, itag);
1394#endif
1da177e4
LT
1395}
1396
5c1e6aa3 1397static struct rtable *rt_dst_alloc(struct net_device *dev,
f2bb4bed 1398 bool nopolicy, bool noxfrm, bool will_cache)
0c4dcd58 1399{
f5b0a874 1400 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
c6cffba4 1401 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
5c1e6aa3
DM
1402 (nopolicy ? DST_NOPOLICY : 0) |
1403 (noxfrm ? DST_NOXFRM : 0));
0c4dcd58
DM
1404}
1405
96d36220 1406/* called in rcu_read_lock() section */
9e12bb22 1407static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1da177e4
LT
1408 u8 tos, struct net_device *dev, int our)
1409{
1da177e4 1410 struct rtable *rth;
96d36220 1411 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1412 u32 itag = 0;
b5f7e755 1413 int err;
1da177e4
LT
1414
1415 /* Primary sanity checks. */
1416
1417 if (in_dev == NULL)
1418 return -EINVAL;
1419
1e637c74 1420 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
d0daebc3 1421 skb->protocol != htons(ETH_P_IP))
1da177e4
LT
1422 goto e_inval;
1423
d0daebc3
TG
1424 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1425 if (ipv4_is_loopback(saddr))
1426 goto e_inval;
1427
f97c1e0c
JP
1428 if (ipv4_is_zeronet(saddr)) {
1429 if (!ipv4_is_local_multicast(daddr))
1da177e4 1430 goto e_inval;
b5f7e755 1431 } else {
9e56e380
DM
1432 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1433 in_dev, &itag);
b5f7e755
ED
1434 if (err < 0)
1435 goto e_err;
1436 }
4e7b2f14 1437 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
f2bb4bed 1438 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
1da177e4
LT
1439 if (!rth)
1440 goto e_nobufs;
1441
cf911662
DM
1442#ifdef CONFIG_IP_ROUTE_CLASSID
1443 rth->dst.tclassid = itag;
1444#endif
d8d1f30b 1445 rth->dst.output = ip_rt_bug;
1da177e4 1446
cf911662
DM
1447 rth->rt_genid = rt_genid(dev_net(dev));
1448 rth->rt_flags = RTCF_MULTICAST;
1449 rth->rt_type = RTN_MULTICAST;
9917e1e8 1450 rth->rt_is_input= 1;
13378cad 1451 rth->rt_iif = 0;
5943634f 1452 rth->rt_pmtu = 0;
f8126f1d 1453 rth->rt_gateway = 0;
155e8336 1454 rth->rt_uses_gateway = 0;
caacf05e 1455 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4 1456 if (our) {
d8d1f30b 1457 rth->dst.input= ip_local_deliver;
1da177e4
LT
1458 rth->rt_flags |= RTCF_LOCAL;
1459 }
1460
1461#ifdef CONFIG_IP_MROUTE
f97c1e0c 1462 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
d8d1f30b 1463 rth->dst.input = ip_mr_input;
1da177e4
LT
1464#endif
1465 RT_CACHE_STAT_INC(in_slow_mc);
1466
89aef892
DM
1467 skb_dst_set(skb, &rth->dst);
1468 return 0;
1da177e4
LT
1469
1470e_nobufs:
1da177e4 1471 return -ENOBUFS;
1da177e4 1472e_inval:
96d36220 1473 return -EINVAL;
b5f7e755 1474e_err:
b5f7e755 1475 return err;
1da177e4
LT
1476}
1477
1478
1479static void ip_handle_martian_source(struct net_device *dev,
1480 struct in_device *in_dev,
1481 struct sk_buff *skb,
9e12bb22
AV
1482 __be32 daddr,
1483 __be32 saddr)
1da177e4
LT
1484{
1485 RT_CACHE_STAT_INC(in_martian_src);
1486#ifdef CONFIG_IP_ROUTE_VERBOSE
1487 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1488 /*
1489 * RFC1812 recommendation, if source is martian,
1490 * the only hint is MAC header.
1491 */
058bd4d2 1492 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
673d57e7 1493 &daddr, &saddr, dev->name);
98e399f8 1494 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
058bd4d2
JP
1495 print_hex_dump(KERN_WARNING, "ll header: ",
1496 DUMP_PREFIX_OFFSET, 16, 1,
1497 skb_mac_header(skb),
1498 dev->hard_header_len, true);
1da177e4
LT
1499 }
1500 }
1501#endif
1502}
1503
47360228 1504/* called in rcu_read_lock() section */
5969f71d 1505static int __mkroute_input(struct sk_buff *skb,
982721f3 1506 const struct fib_result *res,
5969f71d 1507 struct in_device *in_dev,
c6cffba4 1508 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1509{
1da177e4
LT
1510 struct rtable *rth;
1511 int err;
1512 struct in_device *out_dev;
47360228 1513 unsigned int flags = 0;
d2d68ba9 1514 bool do_cache;
809d0bf7 1515 u32 itag = 0;
1da177e4
LT
1516
1517 /* get a working reference to the output device */
47360228 1518 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1da177e4 1519 if (out_dev == NULL) {
e87cc472 1520 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
1da177e4
LT
1521 return -EINVAL;
1522 }
1523
5c04c819 1524 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
9e56e380 1525 in_dev->dev, in_dev, &itag);
1da177e4 1526 if (err < 0) {
e905a9ed 1527 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1da177e4 1528 saddr);
e905a9ed 1529
1da177e4
LT
1530 goto cleanup;
1531 }
1532
e81da0e1
JA
1533 do_cache = res->fi && !itag;
1534 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
41f55a51 1535 skb->protocol == htons(ETH_P_IP) &&
1da177e4 1536 (IN_DEV_SHARED_MEDIA(out_dev) ||
3c2a0909
S
1537 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1538 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1da177e4
LT
1539
1540 if (skb->protocol != htons(ETH_P_IP)) {
1541 /* Not IP (i.e. ARP). Do not create route, if it is
1542 * invalid for proxy arp. DNAT routes are always valid.
65324144
JDB
1543 *
1544 * Proxy arp feature have been extended to allow, ARP
1545 * replies back to the same interface, to support
1546 * Private VLAN switch technologies. See arp.c.
1da177e4 1547 */
65324144
JDB
1548 if (out_dev == in_dev &&
1549 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
1da177e4
LT
1550 err = -EINVAL;
1551 goto cleanup;
1552 }
1553 }
1554
e81da0e1
JA
1555 if (do_cache) {
1556 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1557 if (rt_cache_valid(rth)) {
1558 skb_dst_set_noref(skb, &rth->dst);
1559 goto out;
d2d68ba9
DM
1560 }
1561 }
f2bb4bed 1562
5c1e6aa3
DM
1563 rth = rt_dst_alloc(out_dev->dev,
1564 IN_DEV_CONF_GET(in_dev, NOPOLICY),
d2d68ba9 1565 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
1da177e4
LT
1566 if (!rth) {
1567 err = -ENOBUFS;
1568 goto cleanup;
1569 }
1570
cf911662
DM
1571 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1572 rth->rt_flags = flags;
1573 rth->rt_type = res->type;
9917e1e8 1574 rth->rt_is_input = 1;
13378cad 1575 rth->rt_iif = 0;
5943634f 1576 rth->rt_pmtu = 0;
f8126f1d 1577 rth->rt_gateway = 0;
155e8336 1578 rth->rt_uses_gateway = 0;
caacf05e 1579 INIT_LIST_HEAD(&rth->rt_uncached);
e82a32af 1580 RT_CACHE_STAT_INC(in_slow_tot);
1da177e4 1581
d8d1f30b
CG
1582 rth->dst.input = ip_forward;
1583 rth->dst.output = ip_output;
1da177e4 1584
d2d68ba9 1585 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
c6cffba4 1586 skb_dst_set(skb, &rth->dst);
d2d68ba9 1587out:
1da177e4
LT
1588 err = 0;
1589 cleanup:
1da177e4 1590 return err;
e905a9ed 1591}
1da177e4 1592
5969f71d
SH
1593static int ip_mkroute_input(struct sk_buff *skb,
1594 struct fib_result *res,
68a5e3dd 1595 const struct flowi4 *fl4,
5969f71d
SH
1596 struct in_device *in_dev,
1597 __be32 daddr, __be32 saddr, u32 tos)
1da177e4 1598{
1da177e4 1599#ifdef CONFIG_IP_ROUTE_MULTIPATH
ff3fccb3 1600 if (res->fi && res->fi->fib_nhs > 1)
1b7fe593 1601 fib_select_multipath(res);
1da177e4
LT
1602#endif
1603
1604 /* create a routing cache entry */
c6cffba4 1605 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
1da177e4
LT
1606}
1607
1da177e4
LT
1608/*
1609 * NOTE. We drop all the packets that has local source
1610 * addresses, because every properly looped back packet
1611 * must have correct destination already attached by output routine.
1612 *
1613 * Such approach solves two big problems:
1614 * 1. Not simplex devices are handled properly.
1615 * 2. IP spoofing attempts are filtered with 100% of guarantee.
ebc0ffae 1616 * called with rcu_read_lock()
1da177e4
LT
1617 */
1618
9e12bb22 1619static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
c10237e0 1620 u8 tos, struct net_device *dev)
1da177e4
LT
1621{
1622 struct fib_result res;
96d36220 1623 struct in_device *in_dev = __in_dev_get_rcu(dev);
68a5e3dd 1624 struct flowi4 fl4;
95c96174 1625 unsigned int flags = 0;
1da177e4 1626 u32 itag = 0;
95c96174 1627 struct rtable *rth;
1da177e4 1628 int err = -EINVAL;
5e73ea1a 1629 struct net *net = dev_net(dev);
d2d68ba9 1630 bool do_cache;
1da177e4
LT
1631
1632 /* IP on this device is disabled. */
1633
1634 if (!in_dev)
1635 goto out;
1636
1637 /* Check for the most weird martians, which can be not detected
1638 by fib_lookup.
1639 */
1640
d0daebc3 1641 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
1da177e4
LT
1642 goto martian_source;
1643
d2d68ba9 1644 res.fi = NULL;
27a954bd 1645 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
1da177e4
LT
1646 goto brd_input;
1647
1648 /* Accept zero addresses only to limited broadcast;
1649 * I even do not know to fix it or not. Waiting for complains :-)
1650 */
f97c1e0c 1651 if (ipv4_is_zeronet(saddr))
1da177e4
LT
1652 goto martian_source;
1653
d0daebc3 1654 if (ipv4_is_zeronet(daddr))
1da177e4
LT
1655 goto martian_destination;
1656
9eb43e76
ED
1657 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1658 * and call it once if daddr or/and saddr are loopback addresses
1659 */
1660 if (ipv4_is_loopback(daddr)) {
1661 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3 1662 goto martian_destination;
9eb43e76
ED
1663 } else if (ipv4_is_loopback(saddr)) {
1664 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
d0daebc3
TG
1665 goto martian_source;
1666 }
1667
1da177e4
LT
1668 /*
1669 * Now we are ready to route packet.
1670 */
68a5e3dd
DM
1671 fl4.flowi4_oif = 0;
1672 fl4.flowi4_iif = dev->ifindex;
1673 fl4.flowi4_mark = skb->mark;
1674 fl4.flowi4_tos = tos;
1675 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1676 fl4.daddr = daddr;
1677 fl4.saddr = saddr;
1678 err = fib_lookup(net, &fl4, &res);
251da413 1679 if (err != 0)
1da177e4 1680 goto no_route;
1da177e4 1681
1da177e4
LT
1682 if (res.type == RTN_BROADCAST)
1683 goto brd_input;
1684
1685 if (res.type == RTN_LOCAL) {
5c04c819 1686 err = fib_validate_source(skb, saddr, daddr, tos,
1fb9489b 1687 LOOPBACK_IFINDEX,
9e56e380 1688 dev, in_dev, &itag);
b5f7e755
ED
1689 if (err < 0)
1690 goto martian_source_keep_err;
1da177e4
LT
1691 goto local_input;
1692 }
1693
1694 if (!IN_DEV_FORWARD(in_dev))
251da413 1695 goto no_route;
1da177e4
LT
1696 if (res.type != RTN_UNICAST)
1697 goto martian_destination;
1698
68a5e3dd 1699 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
1da177e4
LT
1700out: return err;
1701
1702brd_input:
1703 if (skb->protocol != htons(ETH_P_IP))
1704 goto e_inval;
1705
41347dcd 1706 if (!ipv4_is_zeronet(saddr)) {
9e56e380
DM
1707 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1708 in_dev, &itag);
1da177e4 1709 if (err < 0)
b5f7e755 1710 goto martian_source_keep_err;
1da177e4
LT
1711 }
1712 flags |= RTCF_BROADCAST;
1713 res.type = RTN_BROADCAST;
1714 RT_CACHE_STAT_INC(in_brd);
1715
1716local_input:
d2d68ba9
DM
1717 do_cache = false;
1718 if (res.fi) {
fe3edf45 1719 if (!itag) {
54764bb6 1720 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
d2d68ba9 1721 if (rt_cache_valid(rth)) {
c6cffba4
DM
1722 skb_dst_set_noref(skb, &rth->dst);
1723 err = 0;
1724 goto out;
d2d68ba9
DM
1725 }
1726 do_cache = true;
1727 }
1728 }
1729
5c1e6aa3 1730 rth = rt_dst_alloc(net->loopback_dev,
d2d68ba9 1731 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
1da177e4
LT
1732 if (!rth)
1733 goto e_nobufs;
1734
cf911662 1735 rth->dst.input= ip_local_deliver;
d8d1f30b 1736 rth->dst.output= ip_rt_bug;
cf911662
DM
1737#ifdef CONFIG_IP_ROUTE_CLASSID
1738 rth->dst.tclassid = itag;
1739#endif
1da177e4 1740
cf911662
DM
1741 rth->rt_genid = rt_genid(net);
1742 rth->rt_flags = flags|RTCF_LOCAL;
1743 rth->rt_type = res.type;
9917e1e8 1744 rth->rt_is_input = 1;
13378cad 1745 rth->rt_iif = 0;
5943634f 1746 rth->rt_pmtu = 0;
f8126f1d 1747 rth->rt_gateway = 0;
155e8336 1748 rth->rt_uses_gateway = 0;
caacf05e 1749 INIT_LIST_HEAD(&rth->rt_uncached);
e82a32af 1750 RT_CACHE_STAT_INC(in_slow_tot);
1da177e4 1751 if (res.type == RTN_UNREACHABLE) {
d8d1f30b
CG
1752 rth->dst.input= ip_error;
1753 rth->dst.error= -err;
1da177e4
LT
1754 rth->rt_flags &= ~RTCF_LOCAL;
1755 }
62713c4b
AS
1756 if (do_cache) {
1757 if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
1758 rth->dst.flags |= DST_NOCACHE;
1759 rt_add_uncached_list(rth);
1760 }
1761 }
89aef892 1762 skb_dst_set(skb, &rth->dst);
b23dd4fe 1763 err = 0;
ebc0ffae 1764 goto out;
1da177e4
LT
1765
1766no_route:
1767 RT_CACHE_STAT_INC(in_no_route);
1da177e4 1768 res.type = RTN_UNREACHABLE;
7f53878d
MC
1769 if (err == -ESRCH)
1770 err = -ENETUNREACH;
1da177e4
LT
1771 goto local_input;
1772
1773 /*
1774 * Do not cache martian addresses: they should be logged (RFC1812)
1775 */
1776martian_destination:
1777 RT_CACHE_STAT_INC(in_martian_dst);
1778#ifdef CONFIG_IP_ROUTE_VERBOSE
e87cc472
JP
1779 if (IN_DEV_LOG_MARTIANS(in_dev))
1780 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1781 &daddr, &saddr, dev->name);
1da177e4 1782#endif
2c2910a4 1783
1da177e4
LT
1784e_inval:
1785 err = -EINVAL;
ebc0ffae 1786 goto out;
1da177e4
LT
1787
1788e_nobufs:
1789 err = -ENOBUFS;
ebc0ffae 1790 goto out;
1da177e4
LT
1791
1792martian_source:
b5f7e755
ED
1793 err = -EINVAL;
1794martian_source_keep_err:
1da177e4 1795 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
ebc0ffae 1796 goto out;
1da177e4
LT
1797}
1798
c6cffba4
DM
1799int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1800 u8 tos, struct net_device *dev)
1da177e4 1801{
96d36220 1802 int res;
1da177e4 1803
f77b0d11 1804 tos &= IPTOS_RT_MASK;
96d36220
ED
1805 rcu_read_lock();
1806
1da177e4
LT
1807 /* Multicast recognition logic is moved from route cache to here.
1808 The problem was that too many Ethernet cards have broken/missing
1809 hardware multicast filters :-( As result the host on multicasting
1810 network acquires a lot of useless route cache entries, sort of
1811 SDR messages from all the world. Now we try to get rid of them.
1812 Really, provided software IP multicast filter is organized
1813 reasonably (at least, hashed), it does not result in a slowdown
1814 comparing with route cache reject entries.
1815 Note, that multicast routers are not affected, because
1816 route cache entry is created eventually.
1817 */
f97c1e0c 1818 if (ipv4_is_multicast(daddr)) {
96d36220 1819 struct in_device *in_dev = __in_dev_get_rcu(dev);
1da177e4 1820
96d36220 1821 if (in_dev) {
dbdd9a52
DM
1822 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1823 ip_hdr(skb)->protocol);
1da177e4
LT
1824 if (our
1825#ifdef CONFIG_IP_MROUTE
9d4fb27d
JP
1826 ||
1827 (!ipv4_is_local_multicast(daddr) &&
1828 IN_DEV_MFORWARD(in_dev))
1da177e4 1829#endif
9d4fb27d 1830 ) {
96d36220
ED
1831 int res = ip_route_input_mc(skb, daddr, saddr,
1832 tos, dev, our);
1da177e4 1833 rcu_read_unlock();
96d36220 1834 return res;
1da177e4
LT
1835 }
1836 }
1837 rcu_read_unlock();
1838 return -EINVAL;
1839 }
c10237e0 1840 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
96d36220
ED
1841 rcu_read_unlock();
1842 return res;
1da177e4 1843}
c6cffba4 1844EXPORT_SYMBOL(ip_route_input_noref);
1da177e4 1845
ebc0ffae 1846/* called with rcu_read_lock() */
982721f3 1847static struct rtable *__mkroute_output(const struct fib_result *res,
1a00fee4 1848 const struct flowi4 *fl4, int orig_oif,
f61759e6 1849 struct net_device *dev_out,
5ada5527 1850 unsigned int flags)
1da177e4 1851{
982721f3 1852 struct fib_info *fi = res->fi;
f2bb4bed 1853 struct fib_nh_exception *fnhe;
5ada5527 1854 struct in_device *in_dev;
982721f3 1855 u16 type = res->type;
5ada5527 1856 struct rtable *rth;
c92b9655 1857 bool do_cache;
1da177e4 1858
d0daebc3
TG
1859 in_dev = __in_dev_get_rcu(dev_out);
1860 if (!in_dev)
5ada5527 1861 return ERR_PTR(-EINVAL);
1da177e4 1862
d0daebc3
TG
1863 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1864 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1865 return ERR_PTR(-EINVAL);
1866
68a5e3dd 1867 if (ipv4_is_lbcast(fl4->daddr))
982721f3 1868 type = RTN_BROADCAST;
68a5e3dd 1869 else if (ipv4_is_multicast(fl4->daddr))
982721f3 1870 type = RTN_MULTICAST;
68a5e3dd 1871 else if (ipv4_is_zeronet(fl4->daddr))
5ada5527 1872 return ERR_PTR(-EINVAL);
1da177e4
LT
1873
1874 if (dev_out->flags & IFF_LOOPBACK)
1875 flags |= RTCF_LOCAL;
1876
63617421 1877 do_cache = true;
982721f3 1878 if (type == RTN_BROADCAST) {
1da177e4 1879 flags |= RTCF_BROADCAST | RTCF_LOCAL;
982721f3
DM
1880 fi = NULL;
1881 } else if (type == RTN_MULTICAST) {
dd28d1a0 1882 flags |= RTCF_MULTICAST | RTCF_LOCAL;
813b3b5d
DM
1883 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1884 fl4->flowi4_proto))
1da177e4 1885 flags &= ~RTCF_LOCAL;
63617421
JA
1886 else
1887 do_cache = false;
1da177e4 1888 /* If multicast route do not exist use
dd28d1a0
ED
1889 * default one, but do not gateway in this case.
1890 * Yes, it is hack.
1da177e4 1891 */
982721f3
DM
1892 if (fi && res->prefixlen < 4)
1893 fi = NULL;
6bb1c458
CF
1894 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
1895 (orig_oif != dev_out->ifindex)) {
1896 /* For local routes that require a particular output interface
1897 * we do not want to cache the result. Caching the result
1898 * causes incorrect behaviour when there are multiple source
1899 * addresses on the interface, the end result being that if the
1900 * intended recipient is waiting on that interface for the
1901 * packet he won't receive it because it will be delivered on
1902 * the loopback interface and the IP_PKTINFO ipi_ifindex will
1903 * be set to the loopback interface as well.
1904 */
1905 fi = NULL;
1da177e4
LT
1906 }
1907
f2bb4bed 1908 fnhe = NULL;
63617421
JA
1909 do_cache &= fi != NULL;
1910 if (do_cache) {
c5038a83 1911 struct rtable __rcu **prth;
c92b9655 1912 struct fib_nh *nh = &FIB_RES_NH(*res);
d26b3a7c 1913
c92b9655 1914 fnhe = find_exception(nh, fl4->daddr);
c5038a83
DM
1915 if (fnhe)
1916 prth = &fnhe->fnhe_rth;
c92b9655
JA
1917 else {
1918 if (unlikely(fl4->flowi4_flags &
1919 FLOWI_FLAG_KNOWN_NH &&
1920 !(nh->nh_gw &&
1921 nh->nh_scope == RT_SCOPE_LINK))) {
1922 do_cache = false;
1923 goto add;
1924 }
1925 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1926 }
c5038a83
DM
1927 rth = rcu_dereference(*prth);
1928 if (rt_cache_valid(rth)) {
1929 dst_hold(&rth->dst);
1930 return rth;
f2bb4bed
DM
1931 }
1932 }
c92b9655
JA
1933
1934add:
5c1e6aa3
DM
1935 rth = rt_dst_alloc(dev_out,
1936 IN_DEV_CONF_GET(in_dev, NOPOLICY),
f2bb4bed 1937 IN_DEV_CONF_GET(in_dev, NOXFRM),
c92b9655 1938 do_cache);
8391d07b 1939 if (!rth)
5ada5527 1940 return ERR_PTR(-ENOBUFS);
8391d07b 1941
cf911662
DM
1942 rth->dst.output = ip_output;
1943
cf911662
DM
1944 rth->rt_genid = rt_genid(dev_net(dev_out));
1945 rth->rt_flags = flags;
1946 rth->rt_type = type;
9917e1e8 1947 rth->rt_is_input = 0;
13378cad 1948 rth->rt_iif = orig_oif ? : 0;
5943634f 1949 rth->rt_pmtu = 0;
f8126f1d 1950 rth->rt_gateway = 0;
155e8336 1951 rth->rt_uses_gateway = 0;
caacf05e 1952 INIT_LIST_HEAD(&rth->rt_uncached);
1da177e4
LT
1953
1954 RT_CACHE_STAT_INC(out_slow_tot);
1955
41347dcd 1956 if (flags & RTCF_LOCAL)
d8d1f30b 1957 rth->dst.input = ip_local_deliver;
1da177e4 1958 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
e905a9ed 1959 if (flags & RTCF_LOCAL &&
1da177e4 1960 !(dev_out->flags & IFF_LOOPBACK)) {
d8d1f30b 1961 rth->dst.output = ip_mc_output;
1da177e4
LT
1962 RT_CACHE_STAT_INC(out_slow_mc);
1963 }
1964#ifdef CONFIG_IP_MROUTE
982721f3 1965 if (type == RTN_MULTICAST) {
1da177e4 1966 if (IN_DEV_MFORWARD(in_dev) &&
813b3b5d 1967 !ipv4_is_local_multicast(fl4->daddr)) {
d8d1f30b
CG
1968 rth->dst.input = ip_mr_input;
1969 rth->dst.output = ip_mc_output;
1da177e4
LT
1970 }
1971 }
1972#endif
1973 }
1974
f2bb4bed 1975 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
1da177e4 1976
5ada5527 1977 return rth;
1da177e4
LT
1978}
1979
1da177e4
LT
1980/*
1981 * Major route resolver routine.
1982 */
1983
89aef892 1984struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
1da177e4 1985{
1da177e4 1986 struct net_device *dev_out = NULL;
f61759e6 1987 __u8 tos = RT_FL_TOS(fl4);
813b3b5d
DM
1988 unsigned int flags = 0;
1989 struct fib_result res;
5ada5527 1990 struct rtable *rth;
813b3b5d 1991 int orig_oif;
1da177e4 1992
85b91b03 1993 res.tclassid = 0;
1da177e4 1994 res.fi = NULL;
8b96d22d 1995 res.table = NULL;
1da177e4 1996
813b3b5d
DM
1997 orig_oif = fl4->flowi4_oif;
1998
1fb9489b 1999 fl4->flowi4_iif = LOOPBACK_IFINDEX;
813b3b5d
DM
2000 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2001 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2002 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
44713b67 2003
010c2708 2004 rcu_read_lock();
813b3b5d 2005 if (fl4->saddr) {
b23dd4fe 2006 rth = ERR_PTR(-EINVAL);
813b3b5d
DM
2007 if (ipv4_is_multicast(fl4->saddr) ||
2008 ipv4_is_lbcast(fl4->saddr) ||
2009 ipv4_is_zeronet(fl4->saddr))
1da177e4
LT
2010 goto out;
2011
1da177e4
LT
2012 /* I removed check for oif == dev_out->oif here.
2013 It was wrong for two reasons:
1ab35276
DL
2014 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2015 is assigned to multiple interfaces.
1da177e4
LT
2016 2. Moreover, we are allowed to send packets with saddr
2017 of another iface. --ANK
2018 */
2019
813b3b5d
DM
2020 if (fl4->flowi4_oif == 0 &&
2021 (ipv4_is_multicast(fl4->daddr) ||
2022 ipv4_is_lbcast(fl4->daddr))) {
a210d01a 2023 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 2024 dev_out = __ip_dev_find(net, fl4->saddr, false);
a210d01a
JA
2025 if (dev_out == NULL)
2026 goto out;
2027
1da177e4
LT
2028 /* Special hack: user can direct multicasts
2029 and limited broadcast via necessary interface
2030 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2031 This hack is not just for fun, it allows
2032 vic,vat and friends to work.
2033 They bind socket to loopback, set ttl to zero
2034 and expect that it will work.
2035 From the viewpoint of routing cache they are broken,
2036 because we are not allowed to build multicast path
2037 with loopback source addr (look, routing cache
2038 cannot know, that ttl is zero, so that packet
2039 will not leave this host and route is valid).
2040 Luckily, this hack is good workaround.
2041 */
2042
813b3b5d 2043 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2044 goto make_route;
2045 }
a210d01a 2046
813b3b5d 2047 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
a210d01a 2048 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
813b3b5d 2049 if (!__ip_dev_find(net, fl4->saddr, false))
a210d01a 2050 goto out;
a210d01a 2051 }
1da177e4
LT
2052 }
2053
2054
813b3b5d
DM
2055 if (fl4->flowi4_oif) {
2056 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
b23dd4fe 2057 rth = ERR_PTR(-ENODEV);
1da177e4
LT
2058 if (dev_out == NULL)
2059 goto out;
e5ed6399
HX
2060
2061 /* RACE: Check return value of inet_select_addr instead. */
fc75fc83 2062 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
b23dd4fe 2063 rth = ERR_PTR(-ENETUNREACH);
fc75fc83
ED
2064 goto out;
2065 }
813b3b5d
DM
2066 if (ipv4_is_local_multicast(fl4->daddr) ||
2067 ipv4_is_lbcast(fl4->daddr)) {
2068 if (!fl4->saddr)
2069 fl4->saddr = inet_select_addr(dev_out, 0,
2070 RT_SCOPE_LINK);
1da177e4
LT
2071 goto make_route;
2072 }
b15e22da 2073 if (!fl4->saddr) {
813b3b5d
DM
2074 if (ipv4_is_multicast(fl4->daddr))
2075 fl4->saddr = inet_select_addr(dev_out, 0,
2076 fl4->flowi4_scope);
2077 else if (!fl4->daddr)
2078 fl4->saddr = inet_select_addr(dev_out, 0,
2079 RT_SCOPE_HOST);
1da177e4
LT
2080 }
2081 }
2082
813b3b5d
DM
2083 if (!fl4->daddr) {
2084 fl4->daddr = fl4->saddr;
2085 if (!fl4->daddr)
2086 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
b40afd0e 2087 dev_out = net->loopback_dev;
1fb9489b 2088 fl4->flowi4_oif = LOOPBACK_IFINDEX;
1da177e4
LT
2089 res.type = RTN_LOCAL;
2090 flags |= RTCF_LOCAL;
2091 goto make_route;
2092 }
2093
813b3b5d 2094 if (fib_lookup(net, fl4, &res)) {
1da177e4 2095 res.fi = NULL;
8b96d22d 2096 res.table = NULL;
813b3b5d 2097 if (fl4->flowi4_oif) {
1da177e4
LT
2098 /* Apparently, routing tables are wrong. Assume,
2099 that the destination is on link.
2100
2101 WHY? DW.
2102 Because we are allowed to send to iface
2103 even if it has NO routes and NO assigned
2104 addresses. When oif is specified, routing
2105 tables are looked up with only one purpose:
2106 to catch if destination is gatewayed, rather than
2107 direct. Moreover, if MSG_DONTROUTE is set,
2108 we send packet, ignoring both routing tables
2109 and ifaddr state. --ANK
2110
2111
2112 We could make it even if oif is unknown,
2113 likely IPv6, but we do not.
2114 */
2115
813b3b5d
DM
2116 if (fl4->saddr == 0)
2117 fl4->saddr = inet_select_addr(dev_out, 0,
2118 RT_SCOPE_LINK);
1da177e4
LT
2119 res.type = RTN_UNICAST;
2120 goto make_route;
2121 }
b23dd4fe 2122 rth = ERR_PTR(-ENETUNREACH);
1da177e4
LT
2123 goto out;
2124 }
1da177e4
LT
2125
2126 if (res.type == RTN_LOCAL) {
813b3b5d 2127 if (!fl4->saddr) {
9fc3bbb4 2128 if (res.fi->fib_prefsrc)
813b3b5d 2129 fl4->saddr = res.fi->fib_prefsrc;
9fc3bbb4 2130 else
813b3b5d 2131 fl4->saddr = fl4->daddr;
9fc3bbb4 2132 }
b40afd0e 2133 dev_out = net->loopback_dev;
813b3b5d 2134 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2135 flags |= RTCF_LOCAL;
2136 goto make_route;
2137 }
2138
2139#ifdef CONFIG_IP_ROUTE_MULTIPATH
813b3b5d 2140 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
1b7fe593 2141 fib_select_multipath(&res);
1da177e4
LT
2142 else
2143#endif
21d8c49e
DM
2144 if (!res.prefixlen &&
2145 res.table->tb_num_default > 1 &&
813b3b5d 2146 res.type == RTN_UNICAST && !fl4->flowi4_oif)
0c838ff1 2147 fib_select_default(&res);
1da177e4 2148
813b3b5d
DM
2149 if (!fl4->saddr)
2150 fl4->saddr = FIB_RES_PREFSRC(net, res);
1da177e4 2151
1da177e4 2152 dev_out = FIB_RES_DEV(res);
813b3b5d 2153 fl4->flowi4_oif = dev_out->ifindex;
1da177e4
LT
2154
2155
2156make_route:
1a00fee4 2157 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
1da177e4 2158
010c2708
DM
2159out:
2160 rcu_read_unlock();
b23dd4fe 2161 return rth;
1da177e4 2162}
d8c97a94
ACM
2163EXPORT_SYMBOL_GPL(__ip_route_output_key);
2164
ae2688d5
JW
2165static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2166{
2167 return NULL;
2168}
2169
ebb762f2 2170static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 2171{
618f9bc7
SK
2172 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2173
2174 return mtu ? : dst->dev->mtu;
ec831ea7
RD
2175}
2176
6700c270
DM
2177static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2178 struct sk_buff *skb, u32 mtu)
14e50e57
DM
2179{
2180}
2181
6700c270
DM
2182static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2183 struct sk_buff *skb)
b587ee3b
DM
2184{
2185}
2186
0972ddb2
HB
2187static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2188 unsigned long old)
2189{
2190 return NULL;
2191}
2192
14e50e57
DM
2193static struct dst_ops ipv4_dst_blackhole_ops = {
2194 .family = AF_INET,
09640e63 2195 .protocol = cpu_to_be16(ETH_P_IP),
ae2688d5 2196 .check = ipv4_blackhole_dst_check,
ebb762f2 2197 .mtu = ipv4_blackhole_mtu,
214f45c9 2198 .default_advmss = ipv4_default_advmss,
14e50e57 2199 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
b587ee3b 2200 .redirect = ipv4_rt_blackhole_redirect,
0972ddb2 2201 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
d3aaeb38 2202 .neigh_lookup = ipv4_neigh_lookup,
14e50e57
DM
2203};
2204
2774c131 2205struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2206{
2774c131 2207 struct rtable *ort = (struct rtable *) dst_orig;
f5b0a874 2208 struct rtable *rt;
14e50e57 2209
f5b0a874 2210 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
14e50e57 2211 if (rt) {
d8d1f30b 2212 struct dst_entry *new = &rt->dst;
14e50e57 2213
14e50e57 2214 new->__use = 1;
352e512c
HX
2215 new->input = dst_discard;
2216 new->output = dst_discard;
14e50e57 2217
d8d1f30b 2218 new->dev = ort->dst.dev;
14e50e57
DM
2219 if (new->dev)
2220 dev_hold(new->dev);
2221
9917e1e8 2222 rt->rt_is_input = ort->rt_is_input;
5e2b61f7 2223 rt->rt_iif = ort->rt_iif;
5943634f 2224 rt->rt_pmtu = ort->rt_pmtu;
14e50e57 2225
e84f84f2 2226 rt->rt_genid = rt_genid(net);
14e50e57
DM
2227 rt->rt_flags = ort->rt_flags;
2228 rt->rt_type = ort->rt_type;
14e50e57 2229 rt->rt_gateway = ort->rt_gateway;
155e8336 2230 rt->rt_uses_gateway = ort->rt_uses_gateway;
14e50e57 2231
caacf05e
DM
2232 INIT_LIST_HEAD(&rt->rt_uncached);
2233
14e50e57
DM
2234 dst_free(new);
2235 }
2236
2774c131
DM
2237 dst_release(dst_orig);
2238
2239 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
14e50e57
DM
2240}
2241
9d6ec938 2242struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
b23dd4fe 2243 struct sock *sk)
1da177e4 2244{
9d6ec938 2245 struct rtable *rt = __ip_route_output_key(net, flp4);
1da177e4 2246
b23dd4fe
DM
2247 if (IS_ERR(rt))
2248 return rt;
1da177e4 2249
56157872 2250 if (flp4->flowi4_proto)
9d6ec938
DM
2251 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2252 flowi4_to_flowi(flp4),
2253 sk, 0);
1da177e4 2254
b23dd4fe 2255 return rt;
1da177e4 2256}
d8c97a94
ACM
2257EXPORT_SYMBOL_GPL(ip_route_output_flow);
2258
f1ce3062 2259static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
15e47304 2260 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
f1ce3062 2261 u32 seq, int event, int nowait, unsigned int flags)
1da177e4 2262{
511c3f92 2263 struct rtable *rt = skb_rtable(skb);
1da177e4 2264 struct rtmsg *r;
be403ea1 2265 struct nlmsghdr *nlh;
2bc8ca40 2266 unsigned long expires = 0;
f185071d 2267 u32 error;
521f5490 2268 u32 metrics[RTAX_MAX];
be403ea1 2269
15e47304 2270 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
be403ea1 2271 if (nlh == NULL)
26932566 2272 return -EMSGSIZE;
be403ea1
TG
2273
2274 r = nlmsg_data(nlh);
1da177e4
LT
2275 r->rtm_family = AF_INET;
2276 r->rtm_dst_len = 32;
2277 r->rtm_src_len = 0;
d6c0a4f6 2278 r->rtm_tos = fl4->flowi4_tos;
1da177e4 2279 r->rtm_table = RT_TABLE_MAIN;
f3756b79
DM
2280 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2281 goto nla_put_failure;
1da177e4
LT
2282 r->rtm_type = rt->rt_type;
2283 r->rtm_scope = RT_SCOPE_UNIVERSE;
2284 r->rtm_protocol = RTPROT_UNSPEC;
2285 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2286 if (rt->rt_flags & RTCF_NOTIFY)
2287 r->rtm_flags |= RTM_F_NOTIFY;
3c2a0909
S
2288 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2289 r->rtm_flags |= RTCF_DOREDIRECT;
be403ea1 2290
f1ce3062 2291 if (nla_put_be32(skb, RTA_DST, dst))
f3756b79 2292 goto nla_put_failure;
1a00fee4 2293 if (src) {
1da177e4 2294 r->rtm_src_len = 32;
1a00fee4 2295 if (nla_put_be32(skb, RTA_SRC, src))
f3756b79 2296 goto nla_put_failure;
1da177e4 2297 }
f3756b79
DM
2298 if (rt->dst.dev &&
2299 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2300 goto nla_put_failure;
c7066f70 2301#ifdef CONFIG_IP_ROUTE_CLASSID
f3756b79
DM
2302 if (rt->dst.tclassid &&
2303 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2304 goto nla_put_failure;
1da177e4 2305#endif
41347dcd 2306 if (!rt_is_input_route(rt) &&
d6c0a4f6
DM
2307 fl4->saddr != src) {
2308 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
f3756b79
DM
2309 goto nla_put_failure;
2310 }
155e8336 2311 if (rt->rt_uses_gateway &&
f3756b79
DM
2312 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2313 goto nla_put_failure;
be403ea1 2314
ee9a8f7a
SK
2315 expires = rt->dst.expires;
2316 if (expires) {
2317 unsigned long now = jiffies;
2318
2319 if (time_before(now, expires))
2320 expires -= now;
2321 else
2322 expires = 0;
2323 }
2324
521f5490 2325 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
ee9a8f7a 2326 if (rt->rt_pmtu && expires)
521f5490
JA
2327 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2328 if (rtnetlink_put_metrics(skb, metrics) < 0)
be403ea1
TG
2329 goto nla_put_failure;
2330
b4869889 2331 if (fl4->flowi4_mark &&
68aaed54 2332 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
f3756b79 2333 goto nla_put_failure;
963bfeee 2334
092f27a6
LC
2335 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2336 nla_put_u32(skb, RTA_UID,
2337 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2338 goto nla_put_failure;
2339
d8d1f30b 2340 error = rt->dst.error;
be403ea1 2341
c7537967 2342 if (rt_is_input_route(rt)) {
8caaf7b6
ND
2343#ifdef CONFIG_IP_MROUTE
2344 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2345 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2346 int err = ipmr_get_route(net, skb,
2347 fl4->saddr, fl4->daddr,
c980ece0
NA
2348 r, nowait, portid);
2349
8caaf7b6
ND
2350 if (err <= 0) {
2351 if (!nowait) {
2352 if (err == 0)
2353 return 0;
2354 goto nla_put_failure;
2355 } else {
2356 if (err == -EMSGSIZE)
2357 goto nla_put_failure;
2358 error = err;
2359 }
2360 }
2361 } else
2362#endif
da1bba1f 2363 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
8caaf7b6 2364 goto nla_put_failure;
1da177e4
LT
2365 }
2366
f185071d 2367 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
e3703b3d 2368 goto nla_put_failure;
be403ea1
TG
2369
2370 return nlmsg_end(skb, nlh);
1da177e4 2371
be403ea1 2372nla_put_failure:
26932566
PM
2373 nlmsg_cancel(skb, nlh);
2374 return -EMSGSIZE;
1da177e4
LT
2375}
2376
661d2967 2377static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2378{
3b1e0a65 2379 struct net *net = sock_net(in_skb->sk);
d889ce3b
TG
2380 struct rtmsg *rtm;
2381 struct nlattr *tb[RTA_MAX+1];
1da177e4 2382 struct rtable *rt = NULL;
d6c0a4f6 2383 struct flowi4 fl4;
9e12bb22
AV
2384 __be32 dst = 0;
2385 __be32 src = 0;
2386 u32 iif;
d889ce3b 2387 int err;
963bfeee 2388 int mark;
1da177e4 2389 struct sk_buff *skb;
092f27a6 2390 kuid_t uid;
1da177e4 2391
d889ce3b
TG
2392 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2393 if (err < 0)
2394 goto errout;
2395
2396 rtm = nlmsg_data(nlh);
2397
1da177e4 2398 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
d889ce3b
TG
2399 if (skb == NULL) {
2400 err = -ENOBUFS;
2401 goto errout;
2402 }
1da177e4
LT
2403
2404 /* Reserve room for dummy headers, this skb can pass
2405 through good chunk of routing engine.
2406 */
459a98ed 2407 skb_reset_mac_header(skb);
c1d2bbe1 2408 skb_reset_network_header(skb);
d2c962b8
SH
2409
2410 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
eddc9ec5 2411 ip_hdr(skb)->protocol = IPPROTO_ICMP;
1da177e4
LT
2412 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2413
17fb2c64
AV
2414 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2415 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
d889ce3b 2416 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
963bfeee 2417 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
092f27a6
LC
2418 if (tb[RTA_UID])
2419 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2420 else
2421 uid = (iif ? INVALID_UID : current_uid());
1da177e4 2422
d6c0a4f6
DM
2423 memset(&fl4, 0, sizeof(fl4));
2424 fl4.daddr = dst;
2425 fl4.saddr = src;
2426 fl4.flowi4_tos = rtm->rtm_tos;
2427 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2428 fl4.flowi4_mark = mark;
092f27a6 2429 fl4.flowi4_uid = uid;
d6c0a4f6 2430
1da177e4 2431 if (iif) {
d889ce3b
TG
2432 struct net_device *dev;
2433
1937504d 2434 dev = __dev_get_by_index(net, iif);
d889ce3b
TG
2435 if (dev == NULL) {
2436 err = -ENODEV;
2437 goto errout_free;
2438 }
2439
1da177e4
LT
2440 skb->protocol = htons(ETH_P_IP);
2441 skb->dev = dev;
963bfeee 2442 skb->mark = mark;
1da177e4
LT
2443 local_bh_disable();
2444 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2445 local_bh_enable();
d889ce3b 2446
511c3f92 2447 rt = skb_rtable(skb);
d8d1f30b
CG
2448 if (err == 0 && rt->dst.error)
2449 err = -rt->dst.error;
1da177e4 2450 } else {
9d6ec938 2451 rt = ip_route_output_key(net, &fl4);
b23dd4fe
DM
2452
2453 err = 0;
2454 if (IS_ERR(rt))
2455 err = PTR_ERR(rt);
1da177e4 2456 }
d889ce3b 2457
1da177e4 2458 if (err)
d889ce3b 2459 goto errout_free;
1da177e4 2460
d8d1f30b 2461 skb_dst_set(skb, &rt->dst);
1da177e4
LT
2462 if (rtm->rtm_flags & RTM_F_NOTIFY)
2463 rt->rt_flags |= RTCF_NOTIFY;
2464
f1ce3062 2465 err = rt_fill_info(net, dst, src, &fl4, skb,
15e47304 2466 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1937504d 2467 RTM_NEWROUTE, 0, 0);
d889ce3b
TG
2468 if (err <= 0)
2469 goto errout_free;
1da177e4 2470
15e47304 2471 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
d889ce3b 2472errout:
2942e900 2473 return err;
1da177e4 2474
d889ce3b 2475errout_free:
1da177e4 2476 kfree_skb(skb);
d889ce3b 2477 goto errout;
1da177e4
LT
2478}
2479
2480int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2481{
1da177e4
LT
2482 return skb->len;
2483}
2484
2485void ip_rt_multicast_event(struct in_device *in_dev)
2486{
4ccfe6d4 2487 rt_cache_flush(dev_net(in_dev->dev));
1da177e4
LT
2488}
2489
2490#ifdef CONFIG_SYSCTL
082c7ca4
G
2491static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2492static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2493static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2494static int ip_rt_gc_elasticity __read_mostly = 8;
2495
81c684d1 2496static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
8d65af78 2497 void __user *buffer,
1da177e4
LT
2498 size_t *lenp, loff_t *ppos)
2499{
2500 if (write) {
4ccfe6d4 2501 rt_cache_flush((struct net *)__ctl->extra1);
1da177e4 2502 return 0;
e905a9ed 2503 }
1da177e4
LT
2504
2505 return -EINVAL;
2506}
2507
eeb61f71 2508static ctl_table ipv4_route_table[] = {
1da177e4 2509 {
1da177e4
LT
2510 .procname = "gc_thresh",
2511 .data = &ipv4_dst_ops.gc_thresh,
2512 .maxlen = sizeof(int),
2513 .mode = 0644,
6d9f239a 2514 .proc_handler = proc_dointvec,
1da177e4
LT
2515 },
2516 {
1da177e4
LT
2517 .procname = "max_size",
2518 .data = &ip_rt_max_size,
2519 .maxlen = sizeof(int),
2520 .mode = 0644,
6d9f239a 2521 .proc_handler = proc_dointvec,
1da177e4
LT
2522 },
2523 {
2524 /* Deprecated. Use gc_min_interval_ms */
e905a9ed 2525
1da177e4
LT
2526 .procname = "gc_min_interval",
2527 .data = &ip_rt_gc_min_interval,
2528 .maxlen = sizeof(int),
2529 .mode = 0644,
6d9f239a 2530 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2531 },
2532 {
1da177e4
LT
2533 .procname = "gc_min_interval_ms",
2534 .data = &ip_rt_gc_min_interval,
2535 .maxlen = sizeof(int),
2536 .mode = 0644,
6d9f239a 2537 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4
LT
2538 },
2539 {
1da177e4
LT
2540 .procname = "gc_timeout",
2541 .data = &ip_rt_gc_timeout,
2542 .maxlen = sizeof(int),
2543 .mode = 0644,
6d9f239a 2544 .proc_handler = proc_dointvec_jiffies,
1da177e4 2545 },
9f28a2fc
ED
2546 {
2547 .procname = "gc_interval",
2548 .data = &ip_rt_gc_interval,
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
2551 .proc_handler = proc_dointvec_jiffies,
2552 },
1da177e4 2553 {
1da177e4
LT
2554 .procname = "redirect_load",
2555 .data = &ip_rt_redirect_load,
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
6d9f239a 2558 .proc_handler = proc_dointvec,
1da177e4
LT
2559 },
2560 {
1da177e4
LT
2561 .procname = "redirect_number",
2562 .data = &ip_rt_redirect_number,
2563 .maxlen = sizeof(int),
2564 .mode = 0644,
6d9f239a 2565 .proc_handler = proc_dointvec,
1da177e4
LT
2566 },
2567 {
1da177e4
LT
2568 .procname = "redirect_silence",
2569 .data = &ip_rt_redirect_silence,
2570 .maxlen = sizeof(int),
2571 .mode = 0644,
6d9f239a 2572 .proc_handler = proc_dointvec,
1da177e4
LT
2573 },
2574 {
1da177e4
LT
2575 .procname = "error_cost",
2576 .data = &ip_rt_error_cost,
2577 .maxlen = sizeof(int),
2578 .mode = 0644,
6d9f239a 2579 .proc_handler = proc_dointvec,
1da177e4
LT
2580 },
2581 {
1da177e4
LT
2582 .procname = "error_burst",
2583 .data = &ip_rt_error_burst,
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
6d9f239a 2586 .proc_handler = proc_dointvec,
1da177e4
LT
2587 },
2588 {
1da177e4
LT
2589 .procname = "gc_elasticity",
2590 .data = &ip_rt_gc_elasticity,
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
6d9f239a 2593 .proc_handler = proc_dointvec,
1da177e4
LT
2594 },
2595 {
1da177e4
LT
2596 .procname = "mtu_expires",
2597 .data = &ip_rt_mtu_expires,
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
6d9f239a 2600 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2601 },
2602 {
1da177e4
LT
2603 .procname = "min_pmtu",
2604 .data = &ip_rt_min_pmtu,
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
6d9f239a 2607 .proc_handler = proc_dointvec,
1da177e4
LT
2608 },
2609 {
1da177e4
LT
2610 .procname = "min_adv_mss",
2611 .data = &ip_rt_min_advmss,
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
6d9f239a 2614 .proc_handler = proc_dointvec,
1da177e4 2615 },
f8572d8f 2616 { }
1da177e4 2617};
39a23e75 2618
39a23e75
DL
2619static struct ctl_table ipv4_route_flush_table[] = {
2620 {
39a23e75
DL
2621 .procname = "flush",
2622 .maxlen = sizeof(int),
2623 .mode = 0200,
6d9f239a 2624 .proc_handler = ipv4_sysctl_rtcache_flush,
39a23e75 2625 },
f8572d8f 2626 { },
39a23e75
DL
2627};
2628
2629static __net_init int sysctl_route_net_init(struct net *net)
2630{
2631 struct ctl_table *tbl;
2632
2633 tbl = ipv4_route_flush_table;
09ad9bc7 2634 if (!net_eq(net, &init_net)) {
39a23e75
DL
2635 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2636 if (tbl == NULL)
2637 goto err_dup;
464dc801
EB
2638
2639 /* Don't export sysctls to unprivileged users */
2640 if (net->user_ns != &init_user_ns)
2641 tbl[0].procname = NULL;
39a23e75
DL
2642 }
2643 tbl[0].extra1 = net;
2644
ec8f23ce 2645 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
39a23e75
DL
2646 if (net->ipv4.route_hdr == NULL)
2647 goto err_reg;
2648 return 0;
2649
2650err_reg:
2651 if (tbl != ipv4_route_flush_table)
2652 kfree(tbl);
2653err_dup:
2654 return -ENOMEM;
2655}
2656
2657static __net_exit void sysctl_route_net_exit(struct net *net)
2658{
2659 struct ctl_table *tbl;
2660
2661 tbl = net->ipv4.route_hdr->ctl_table_arg;
2662 unregister_net_sysctl_table(net->ipv4.route_hdr);
2663 BUG_ON(tbl == ipv4_route_flush_table);
2664 kfree(tbl);
2665}
2666
2667static __net_initdata struct pernet_operations sysctl_route_ops = {
2668 .init = sysctl_route_net_init,
2669 .exit = sysctl_route_net_exit,
2670};
1da177e4
LT
2671#endif
2672
3ee94372 2673static __net_init int rt_genid_init(struct net *net)
9f5e97e5 2674{
b42664f8 2675 atomic_set(&net->rt_genid, 0);
436c3b66
DM
2676 get_random_bytes(&net->ipv4.dev_addr_genid,
2677 sizeof(net->ipv4.dev_addr_genid));
9f5e97e5
DL
2678 return 0;
2679}
2680
3ee94372
NH
2681static __net_initdata struct pernet_operations rt_genid_ops = {
2682 .init = rt_genid_init,
9f5e97e5
DL
2683};
2684
c3426b47
DM
2685static int __net_init ipv4_inetpeer_init(struct net *net)
2686{
2687 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2688
2689 if (!bp)
2690 return -ENOMEM;
2691 inet_peer_base_init(bp);
2692 net->ipv4.peers = bp;
2693 return 0;
2694}
2695
2696static void __net_exit ipv4_inetpeer_exit(struct net *net)
2697{
2698 struct inet_peer_base *bp = net->ipv4.peers;
2699
2700 net->ipv4.peers = NULL;
56a6b248 2701 inetpeer_invalidate_tree(bp);
c3426b47
DM
2702 kfree(bp);
2703}
2704
2705static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2706 .init = ipv4_inetpeer_init,
2707 .exit = ipv4_inetpeer_exit,
2708};
9f5e97e5 2709
c7066f70 2710#ifdef CONFIG_IP_ROUTE_CLASSID
7d720c3e 2711struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
c7066f70 2712#endif /* CONFIG_IP_ROUTE_CLASSID */
1da177e4 2713
1da177e4
LT
2714int __init ip_rt_init(void)
2715{
424c4b70 2716 int rc = 0;
1da177e4 2717
ff1f69a8
ED
2718 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2719 if (!ip_idents)
2720 panic("IP: failed to allocate ip_idents\n");
2721
2722 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2723
c7066f70 2724#ifdef CONFIG_IP_ROUTE_CLASSID
0dcec8c2 2725 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
1da177e4
LT
2726 if (!ip_rt_acct)
2727 panic("IP: failed to allocate ip_rt_acct\n");
1da177e4
LT
2728#endif
2729
e5d679f3
AD
2730 ipv4_dst_ops.kmem_cachep =
2731 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
20c2df83 2732 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1da177e4 2733
14e50e57
DM
2734 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2735
fc66f95c
ED
2736 if (dst_entries_init(&ipv4_dst_ops) < 0)
2737 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2738
2739 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2740 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2741
89aef892
DM
2742 ipv4_dst_ops.gc_thresh = ~0;
2743 ip_rt_max_size = INT_MAX;
1da177e4 2744
1da177e4
LT
2745 devinet_init();
2746 ip_fib_init();
2747
73b38711 2748 if (ip_rt_proc_init())
058bd4d2 2749 pr_err("Unable to create route proc files\n");
1da177e4
LT
2750#ifdef CONFIG_XFRM
2751 xfrm_init();
703fb94e 2752 xfrm4_init();
1da177e4 2753#endif
c7ac8679 2754 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
63f3444f 2755
39a23e75
DL
2756#ifdef CONFIG_SYSCTL
2757 register_pernet_subsys(&sysctl_route_ops);
2758#endif
3ee94372 2759 register_pernet_subsys(&rt_genid_ops);
c3426b47 2760 register_pernet_subsys(&ipv4_inetpeer_ops);
1da177e4
LT
2761 return rc;
2762}
2763
a1bc6eb4 2764#ifdef CONFIG_SYSCTL
eeb61f71
AV
2765/*
2766 * We really need to sanitize the damn ipv4 init order, then all
2767 * this nonsense will go away.
2768 */
2769void __init ip_static_sysctl_init(void)
2770{
4e5ca785 2771 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
eeb61f71 2772}
a1bc6eb4 2773#endif