Merge tag 'v3.10.58' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7be560d6 179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
199 if (likely(!tp->repair))
200 tp->write_seq = 0;
1da177e4
LT
201 }
202
295ff7ed 203 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 206
c720c7e8
ED
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
1da177e4 209
d83d8461 210 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 213
bee7ca9e 214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket
218 * lock select source port, enter ourselves into the hash tables and
219 * complete initialization after this.
220 */
221 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 222 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
223 if (err)
224 goto failure;
225
da905bd1 226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
1da177e4 231 goto failure;
b23dd4fe 232 }
1da177e4 233 /* OK, now commit destination to socket. */
bcd76111 234 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 235 sk_setup_caps(sk, &rt->dst);
6fa3eb70 236 printk(KERN_INFO "[socket_conn]IPV4 socket[%lu] sport:%u \n", SOCK_INODE(sk->sk_socket)->i_ino, ntohs(inet->inet_sport));
ee995283 237 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
1da177e4
LT
241 usin->sin_port);
242
c720c7e8 243 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 244
2b916477 245 err = tcp_connect(sk);
ee995283 246
1da177e4
LT
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253failure:
7174259e
ACM
254 /*
255 * This unhashes the socket and releases the local port,
256 * if necessary.
257 */
1da177e4
LT
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
c720c7e8 261 inet->inet_dport = 0;
1da177e4
LT
262 return err;
263}
4bc2f18b 264EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 265
1da177e4 266/*
563d34d0
ED
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 270 */
5f80f4d8 271void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
563d34d0 275 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 276
80d0a69f
DM
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
1da177e4
LT
279 return;
280
1da177e4
LT
281 /* Something is about to be wrong... Remember soft error
282 * for the case, if this connection will not able to recover.
283 */
284 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285 sk->sk_err_soft = EMSGSIZE;
286
287 mtu = dst_mtu(dst);
288
289 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 290 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
291 tcp_sync_mss(sk, mtu);
292
293 /* Resend the TCP packet because it's
294 * clear that the old packet has been
295 * dropped. This is the new "fast" path mtu
296 * discovery.
297 */
298 tcp_simple_retransmit(sk);
299 } /* else let the usual retransmit timer handle it */
300}
5f80f4d8 301EXPORT_SYMBOL(tcp_v4_mtu_reduced);
1da177e4 302
55be7a9c
DM
303static void do_redirect(struct sk_buff *skb, struct sock *sk)
304{
305 struct dst_entry *dst = __sk_dst_check(sk, 0);
306
1ed5c48f 307 if (dst)
6700c270 308 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
309}
310
1da177e4
LT
311/*
312 * This routine is called by the ICMP module when it gets some
313 * sort of error condition. If err < 0 then the socket should
314 * be closed and the error returned to the user. If err > 0
315 * it's just the icmp type << 8 | icmp code. After adjustment
316 * header points to the first 8 bytes of the tcp header. We need
317 * to find the appropriate port.
318 *
319 * The locking strategy used here is very "optimistic". When
320 * someone else accesses the socket the ICMP is just dropped
321 * and for some paths there is no check at all.
322 * A more general error queue to queue errors for later handling
323 * is probably better.
324 *
325 */
326
4d1a2d9e 327void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 328{
b71d1d42 329 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 330 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 331 struct inet_connection_sock *icsk;
1da177e4
LT
332 struct tcp_sock *tp;
333 struct inet_sock *inet;
4d1a2d9e
DL
334 const int type = icmp_hdr(icmp_skb)->type;
335 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 336 struct sock *sk;
f1ecd5d9 337 struct sk_buff *skb;
168a8f58 338 struct request_sock *req;
1da177e4 339 __u32 seq;
f1ecd5d9 340 __u32 remaining;
1da177e4 341 int err;
4d1a2d9e 342 struct net *net = dev_net(icmp_skb->dev);
1da177e4 343
4d1a2d9e 344 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
346 return;
347 }
348
fd54d716 349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 350 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 351 if (!sk) {
dcfc23ca 352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
353 return;
354 }
355 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 356 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
357 return;
358 }
359
360 bh_lock_sock(sk);
361 /* If too many ICMPs get dropped on busy
362 * servers this needs to be solved differently.
563d34d0
ED
363 * We do take care of PMTU discovery (RFC1191) special case :
364 * we can receive locally generated ICMP messages while socket is held.
1da177e4 365 */
b74aa930
ED
366 if (sock_owned_by_user(sk)) {
367 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
369 }
1da177e4
LT
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
97e3ecd1 373 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 goto out;
376 }
377
f1ecd5d9 378 icsk = inet_csk(sk);
1da177e4 379 tp = tcp_sk(sk);
168a8f58 380 req = tp->fastopen_rsk;
1da177e4
LT
381 seq = ntohl(th->seq);
382 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
383 !between(seq, tp->snd_una, tp->snd_nxt) &&
384 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
385 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 386 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
387 goto out;
388 }
389
390 switch (type) {
55be7a9c
DM
391 case ICMP_REDIRECT:
392 do_redirect(icmp_skb, sk);
393 goto out;
1da177e4
LT
394 case ICMP_SOURCE_QUENCH:
395 /* Just silently ignore these. */
396 goto out;
397 case ICMP_PARAMETERPROB:
398 err = EPROTO;
399 break;
400 case ICMP_DEST_UNREACH:
401 if (code > NR_ICMP_UNREACH)
402 goto out;
403
404 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
405 /* We are not interested in TCP_LISTEN and open_requests
406 * (SYN-ACKs send out by Linux are always <576bytes so
407 * they should go through unfragmented).
408 */
409 if (sk->sk_state == TCP_LISTEN)
410 goto out;
411
563d34d0 412 tp->mtu_info = info;
144d56e9 413 if (!sock_owned_by_user(sk)) {
563d34d0 414 tcp_v4_mtu_reduced(sk);
144d56e9
ED
415 } else {
416 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
417 sock_hold(sk);
418 }
1da177e4
LT
419 goto out;
420 }
421
422 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
423 /* check if icmp_skb allows revert of backoff
424 * (see draft-zimmermann-tcp-lcd) */
425 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
426 break;
427 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
428 !icsk->icsk_backoff)
429 break;
430
168a8f58
JC
431 /* XXX (TFO) - revisit the following logic for TFO */
432
8f49c270
DM
433 if (sock_owned_by_user(sk))
434 break;
435
f1ecd5d9 436 icsk->icsk_backoff--;
9ad7c049
JC
437 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
438 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
439 tcp_bound_rto(sk);
440
441 skb = tcp_write_queue_head(sk);
442 BUG_ON(!skb);
443
444 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
445 tcp_time_stamp - TCP_SKB_CB(skb)->when);
446
447 if (remaining) {
448 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
6fa3eb70 449 remaining, sysctl_tcp_rto_max);
f1ecd5d9
DL
450 } else {
451 /* RTO revert clocked out retransmission.
452 * Will retransmit now */
453 tcp_retransmit_timer(sk);
454 }
455
1da177e4
LT
456 break;
457 case ICMP_TIME_EXCEEDED:
458 err = EHOSTUNREACH;
459 break;
460 default:
461 goto out;
462 }
463
168a8f58
JC
464 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
465 * than following the TCP_SYN_RECV case and closing the socket,
466 * we ignore the ICMP error and keep trying like a fully established
467 * socket. Is this the right thing to do?
468 */
469 if (req && req->sk == NULL)
470 goto out;
471
1da177e4 472 switch (sk->sk_state) {
60236fdd 473 struct request_sock *req, **prev;
1da177e4
LT
474 case TCP_LISTEN:
475 if (sock_owned_by_user(sk))
476 goto out;
477
463c84b9
ACM
478 req = inet_csk_search_req(sk, &prev, th->dest,
479 iph->daddr, iph->saddr);
1da177e4
LT
480 if (!req)
481 goto out;
482
483 /* ICMPs are not backlogged, hence we cannot get
484 an established socket here.
485 */
547b792c 486 WARN_ON(req->sk);
1da177e4 487
2e6599cb 488 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 489 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
490 goto out;
491 }
492
493 /*
494 * Still in SYN_RECV, just remove it silently.
495 * There is no good way to pass the error to the newly
496 * created socket, and POSIX does not want network
497 * errors returned from accept().
498 */
463c84b9 499 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 500 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
501 goto out;
502
503 case TCP_SYN_SENT:
504 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
505 It can f.e. if SYNs crossed,
506 or Fast Open.
1da177e4
LT
507 */
508 if (!sock_owned_by_user(sk)) {
1da177e4
LT
509 sk->sk_err = err;
510
511 sk->sk_error_report(sk);
512
513 tcp_done(sk);
514 } else {
515 sk->sk_err_soft = err;
516 }
517 goto out;
518 }
519
520 /* If we've already connected we will keep trying
521 * until we time out, or the user gives up.
522 *
523 * rfc1122 4.2.3.9 allows to consider as hard errors
524 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
525 * but it is obsoleted by pmtu discovery).
526 *
527 * Note, that in modern internet, where routing is unreliable
528 * and in each dark corner broken firewalls sit, sending random
529 * errors ordered by their masters even this two messages finally lose
530 * their original sense (even Linux sends invalid PORT_UNREACHs)
531 *
532 * Now we are in compliance with RFCs.
533 * --ANK (980905)
534 */
535
536 inet = inet_sk(sk);
537 if (!sock_owned_by_user(sk) && inet->recverr) {
538 sk->sk_err = err;
539 sk->sk_error_report(sk);
540 } else { /* Only an error on timeout */
541 sk->sk_err_soft = err;
542 }
543
544out:
545 bh_unlock_sock(sk);
546 sock_put(sk);
547}
548
419f9f89
HX
549static void __tcp_v4_send_check(struct sk_buff *skb,
550 __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
a430a43d
HX
575int tcp_v4_gso_send_check(struct sk_buff *skb)
576{
eddc9ec5 577 const struct iphdr *iph;
a430a43d
HX
578 struct tcphdr *th;
579
580 if (!pskb_may_pull(skb, sizeof(*th)))
581 return -EINVAL;
582
eddc9ec5 583 iph = ip_hdr(skb);
aa8223c7 584 th = tcp_hdr(skb);
a430a43d
HX
585
586 th->check = 0;
84fa7933 587 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 588 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
589 return 0;
590}
591
1da177e4
LT
592/*
593 * This routine will send an RST to the other tcp.
594 *
595 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
596 * for reset.
597 * Answer: if a packet caused RST, it is not for a socket
598 * existing in our system, if it is matched to a socket,
599 * it is just duplicate segment or bug in other side's TCP.
600 * So that we build reply only basing on parameters
601 * arrived with segment.
602 * Exception: precedence violation. We do not implement it in any case.
603 */
604
cfb6eeb4 605static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 606{
cf533ea5 607 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
608 struct {
609 struct tcphdr th;
610#ifdef CONFIG_TCP_MD5SIG
714e85be 611 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
612#endif
613 } rep;
1da177e4 614 struct ip_reply_arg arg;
cfb6eeb4
YH
615#ifdef CONFIG_TCP_MD5SIG
616 struct tcp_md5sig_key *key;
658ddaaf
SL
617 const __u8 *hash_location = NULL;
618 unsigned char newhash[16];
619 int genhash;
620 struct sock *sk1 = NULL;
cfb6eeb4 621#endif
a86b1e30 622 struct net *net;
1da177e4
LT
623
624 /* Never send a reset in response to a reset. */
625 if (th->rst)
626 return;
627
511c3f92 628 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
629 return;
630
631 /* Swap the send and the receive. */
cfb6eeb4
YH
632 memset(&rep, 0, sizeof(rep));
633 rep.th.dest = th->source;
634 rep.th.source = th->dest;
635 rep.th.doff = sizeof(struct tcphdr) / 4;
636 rep.th.rst = 1;
1da177e4
LT
637
638 if (th->ack) {
cfb6eeb4 639 rep.th.seq = th->ack_seq;
1da177e4 640 } else {
cfb6eeb4
YH
641 rep.th.ack = 1;
642 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
643 skb->len - (th->doff << 2));
1da177e4
LT
644 }
645
7174259e 646 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
647 arg.iov[0].iov_base = (unsigned char *)&rep;
648 arg.iov[0].iov_len = sizeof(rep.th);
649
650#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
651 hash_location = tcp_parse_md5sig_option(th);
652 if (!sk && hash_location) {
653 /*
654 * active side is lost. Try to find listening socket through
655 * source port, and then find md5 key through listening socket.
656 * we are not loose security here:
657 * Incoming packet is checked with md5 hash with finding key,
658 * no RST generated if md5 hash doesn't match.
659 */
660 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
661 &tcp_hashinfo, ip_hdr(skb)->saddr,
662 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
663 ntohs(th->source), inet_iif(skb));
664 /* don't send rst if it can't find key */
665 if (!sk1)
666 return;
667 rcu_read_lock();
668 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
669 &ip_hdr(skb)->saddr, AF_INET);
670 if (!key)
671 goto release_sk1;
672
673 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
674 if (genhash || memcmp(hash_location, newhash, 16) != 0)
675 goto release_sk1;
676 } else {
677 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
678 &ip_hdr(skb)->saddr,
679 AF_INET) : NULL;
680 }
681
cfb6eeb4
YH
682 if (key) {
683 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
684 (TCPOPT_NOP << 16) |
685 (TCPOPT_MD5SIG << 8) |
686 TCPOLEN_MD5SIG);
687 /* Update length and the length the header thinks exists */
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len / 4;
690
49a72dfb 691 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
692 key, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
694 }
695#endif
eddc9ec5
ACM
696 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
697 ip_hdr(skb)->saddr, /* XXX */
52cd5750 698 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 699 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 700 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 701 /* When socket is gone, all binding information is lost.
4c675258
AK
702 * routing might fail in this case. No choice here, if we choose to force
703 * input interface, we will misroute in case of asymmetric route.
e2446eaa 704 */
4c675258
AK
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 707
adf30907 708 net = dev_net(skb_dst(skb)->dev);
66b13d99 709 arg.tos = ip_hdr(skb)->tos;
be9f4a44 710 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 711 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 712
63231bdd
PE
713 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
714 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
715
716#ifdef CONFIG_TCP_MD5SIG
717release_sk1:
718 if (sk1) {
719 rcu_read_unlock();
720 sock_put(sk1);
721 }
722#endif
1da177e4
LT
723}
724
725/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
726 outside socket context is ugly, certainly. What can I do?
727 */
728
9501f972 729static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 730 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 731 struct tcp_md5sig_key *key,
66b13d99 732 int reply_flags, u8 tos)
1da177e4 733{
cf533ea5 734 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
735 struct {
736 struct tcphdr th;
714e85be 737 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 738#ifdef CONFIG_TCP_MD5SIG
714e85be 739 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
740#endif
741 ];
1da177e4
LT
742 } rep;
743 struct ip_reply_arg arg;
adf30907 744 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
745
746 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 747 memset(&arg, 0, sizeof(arg));
1da177e4
LT
748
749 arg.iov[0].iov_base = (unsigned char *)&rep;
750 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 751 if (tsecr) {
cfb6eeb4
YH
752 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
753 (TCPOPT_TIMESTAMP << 8) |
754 TCPOLEN_TIMESTAMP);
ee684b6f
AV
755 rep.opt[1] = htonl(tsval);
756 rep.opt[2] = htonl(tsecr);
cb48cfe8 757 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
758 }
759
760 /* Swap the send and the receive. */
761 rep.th.dest = th->source;
762 rep.th.source = th->dest;
763 rep.th.doff = arg.iov[0].iov_len / 4;
764 rep.th.seq = htonl(seq);
765 rep.th.ack_seq = htonl(ack);
766 rep.th.ack = 1;
767 rep.th.window = htons(win);
768
cfb6eeb4 769#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 770 if (key) {
ee684b6f 771 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
772
773 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
774 (TCPOPT_NOP << 16) |
775 (TCPOPT_MD5SIG << 8) |
776 TCPOLEN_MD5SIG);
777 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
778 rep.th.doff = arg.iov[0].iov_len/4;
779
49a72dfb 780 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
781 key, ip_hdr(skb)->saddr,
782 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
783 }
784#endif
88ef4a5a 785 arg.flags = reply_flags;
eddc9ec5
ACM
786 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
787 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
788 arg.iov[0].iov_len, IPPROTO_TCP, 0);
789 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
790 if (oif)
791 arg.bound_dev_if = oif;
66b13d99 792 arg.tos = tos;
be9f4a44 793 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 794 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 795
63231bdd 796 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
797}
798
799static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
800{
8feaf0c0 801 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 802 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 803
9501f972 804 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 805 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 806 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
807 tcptw->tw_ts_recent,
808 tw->tw_bound_dev_if,
88ef4a5a 809 tcp_twsk_md5_key(tcptw),
66b13d99
ED
810 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
811 tw->tw_tos
9501f972 812 );
1da177e4 813
8feaf0c0 814 inet_twsk_put(tw);
1da177e4
LT
815}
816
6edafaaf 817static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 818 struct request_sock *req)
1da177e4 819{
168a8f58
JC
820 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
821 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
822 */
823 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
824 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
825 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 826 tcp_time_stamp,
9501f972
YH
827 req->ts_recent,
828 0,
a915da9b
ED
829 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
830 AF_INET),
66b13d99
ED
831 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
832 ip_hdr(skb)->tos);
1da177e4
LT
833}
834
1da177e4 835/*
9bf1d83e 836 * Send a SYN-ACK after having received a SYN.
60236fdd 837 * This still operates on a request_sock only, not on a big
1da177e4
LT
838 * socket.
839 */
72659ecc
OP
840static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
841 struct request_sock *req,
7586eceb
ED
842 u16 queue_mapping,
843 bool nocache)
1da177e4 844{
2e6599cb 845 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 846 struct flowi4 fl4;
1da177e4
LT
847 int err = -1;
848 struct sk_buff * skb;
849
850 /* First, grab a route. */
ba3f7f04 851 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 852 return -1;
1da177e4 853
1a2c6181 854 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
855
856 if (skb) {
419f9f89 857 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 858
fff32699 859 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
860 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
861 ireq->rmt_addr,
862 ireq->opt);
b9df3cb8 863 err = net_xmit_eval(err);
016818d0
NC
864 if (!tcp_rsk(req)->snt_synack && !err)
865 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
866 }
867
1da177e4
LT
868 return err;
869}
870
1a2c6181 871static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 872{
1a2c6181 873 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
e6c022a4
ED
874
875 if (!res)
876 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
877 return res;
fd80eb94
DL
878}
879
1da177e4 880/*
60236fdd 881 * IPv4 request_sock destructor.
1da177e4 882 */
60236fdd 883static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 884{
a51482bd 885 kfree(inet_rsk(req)->opt);
1da177e4
LT
886}
887
946cedcc 888/*
a2a385d6 889 * Return true if a syncookie should be sent
946cedcc 890 */
a2a385d6 891bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
892 const struct sk_buff *skb,
893 const char *proto)
1da177e4 894{
946cedcc 895 const char *msg = "Dropping request";
a2a385d6 896 bool want_cookie = false;
946cedcc
ED
897 struct listen_sock *lopt;
898
899
1da177e4 900
2a1d4bd4 901#ifdef CONFIG_SYN_COOKIES
946cedcc 902 if (sysctl_tcp_syncookies) {
2a1d4bd4 903 msg = "Sending cookies";
a2a385d6 904 want_cookie = true;
946cedcc
ED
905 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
906 } else
80e40daa 907#endif
946cedcc
ED
908 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
909
910 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
911 if (!lopt->synflood_warned) {
912 lopt->synflood_warned = 1;
afd46503 913 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
914 proto, ntohs(tcp_hdr(skb)->dest), msg);
915 }
916 return want_cookie;
2a1d4bd4 917}
946cedcc 918EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
919
920/*
60236fdd 921 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 922 */
5dff747b 923static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 924{
f6d8bd05
ED
925 const struct ip_options *opt = &(IPCB(skb)->opt);
926 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
927
928 if (opt && opt->optlen) {
f6d8bd05
ED
929 int opt_size = sizeof(*dopt) + opt->optlen;
930
1da177e4
LT
931 dopt = kmalloc(opt_size, GFP_ATOMIC);
932 if (dopt) {
f6d8bd05 933 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
934 kfree(dopt);
935 dopt = NULL;
936 }
937 }
938 }
939 return dopt;
940}
941
cfb6eeb4
YH
942#ifdef CONFIG_TCP_MD5SIG
943/*
944 * RFC2385 MD5 checksumming requires a mapping of
945 * IP address->MD5 Key.
946 * We need to maintain these in the sk structure.
947 */
948
949/* Find the Key structure for an address. */
a915da9b
ED
950struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
951 const union tcp_md5_addr *addr,
952 int family)
cfb6eeb4
YH
953{
954 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 955 struct tcp_md5sig_key *key;
a915da9b 956 unsigned int size = sizeof(struct in_addr);
a8afca03 957 struct tcp_md5sig_info *md5sig;
cfb6eeb4 958
a8afca03
ED
959 /* caller either holds rcu_read_lock() or socket lock */
960 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
961 sock_owned_by_user(sk) ||
962 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 963 if (!md5sig)
cfb6eeb4 964 return NULL;
a915da9b
ED
965#if IS_ENABLED(CONFIG_IPV6)
966 if (family == AF_INET6)
967 size = sizeof(struct in6_addr);
968#endif
b67bfe0d 969 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
970 if (key->family != family)
971 continue;
972 if (!memcmp(&key->addr, addr, size))
973 return key;
cfb6eeb4
YH
974 }
975 return NULL;
976}
a915da9b 977EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
978
979struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
980 struct sock *addr_sk)
981{
a915da9b
ED
982 union tcp_md5_addr *addr;
983
984 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
985 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 986}
cfb6eeb4
YH
987EXPORT_SYMBOL(tcp_v4_md5_lookup);
988
f5b99bcd
AB
989static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
990 struct request_sock *req)
cfb6eeb4 991{
a915da9b
ED
992 union tcp_md5_addr *addr;
993
994 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
995 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
996}
997
998/* This can be called on a newly created socket, from other files */
a915da9b
ED
999int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1000 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
1001{
1002 /* Add Key to the list */
b0a713e9 1003 struct tcp_md5sig_key *key;
cfb6eeb4 1004 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1005 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1006
c0353c7b 1007 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
1008 if (key) {
1009 /* Pre-existing entry - just update that one. */
a915da9b 1010 memcpy(key->key, newkey, newkeylen);
b0a713e9 1011 key->keylen = newkeylen;
a915da9b
ED
1012 return 0;
1013 }
260fcbeb 1014
a8afca03
ED
1015 md5sig = rcu_dereference_protected(tp->md5sig_info,
1016 sock_owned_by_user(sk));
a915da9b
ED
1017 if (!md5sig) {
1018 md5sig = kmalloc(sizeof(*md5sig), gfp);
1019 if (!md5sig)
cfb6eeb4 1020 return -ENOMEM;
cfb6eeb4 1021
a915da9b
ED
1022 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1023 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1024 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1025 }
cfb6eeb4 1026
5f3d9cb2 1027 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1028 if (!key)
1029 return -ENOMEM;
1030 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1031 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1032 return -ENOMEM;
cfb6eeb4 1033 }
a915da9b
ED
1034
1035 memcpy(key->key, newkey, newkeylen);
1036 key->keylen = newkeylen;
1037 key->family = family;
1038 memcpy(&key->addr, addr,
1039 (family == AF_INET6) ? sizeof(struct in6_addr) :
1040 sizeof(struct in_addr));
1041 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1042 return 0;
1043}
a915da9b 1044EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1045
a915da9b 1046int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1047{
1048 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1049 struct tcp_md5sig_key *key;
a8afca03 1050 struct tcp_md5sig_info *md5sig;
a915da9b 1051
c0353c7b 1052 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1053 if (!key)
1054 return -ENOENT;
1055 hlist_del_rcu(&key->node);
5f3d9cb2 1056 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1057 kfree_rcu(key, rcu);
a8afca03
ED
1058 md5sig = rcu_dereference_protected(tp->md5sig_info,
1059 sock_owned_by_user(sk));
1060 if (hlist_empty(&md5sig->head))
a915da9b
ED
1061 tcp_free_md5sig_pool();
1062 return 0;
cfb6eeb4 1063}
a915da9b 1064EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1065
e0683e70 1066static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1067{
1068 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1069 struct tcp_md5sig_key *key;
b67bfe0d 1070 struct hlist_node *n;
a8afca03 1071 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1072
a8afca03
ED
1073 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1074
1075 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1076 tcp_free_md5sig_pool();
b67bfe0d 1077 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1078 hlist_del_rcu(&key->node);
5f3d9cb2 1079 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1080 kfree_rcu(key, rcu);
cfb6eeb4
YH
1081 }
1082}
1083
7174259e
ACM
1084static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1085 int optlen)
cfb6eeb4
YH
1086{
1087 struct tcp_md5sig cmd;
1088 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1089
1090 if (optlen < sizeof(cmd))
1091 return -EINVAL;
1092
7174259e 1093 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1094 return -EFAULT;
1095
1096 if (sin->sin_family != AF_INET)
1097 return -EINVAL;
1098
a8afca03 1099 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1100 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1101 AF_INET);
cfb6eeb4
YH
1102
1103 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1104 return -EINVAL;
1105
a915da9b
ED
1106 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1107 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1108 GFP_KERNEL);
cfb6eeb4
YH
1109}
1110
49a72dfb
AL
1111static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1112 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1113{
cfb6eeb4 1114 struct tcp4_pseudohdr *bp;
49a72dfb 1115 struct scatterlist sg;
cfb6eeb4
YH
1116
1117 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1118
1119 /*
49a72dfb 1120 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1121 * destination IP address, zero-padded protocol number, and
1122 * segment length)
1123 */
1124 bp->saddr = saddr;
1125 bp->daddr = daddr;
1126 bp->pad = 0;
076fb722 1127 bp->protocol = IPPROTO_TCP;
49a72dfb 1128 bp->len = cpu_to_be16(nbytes);
c7da57a1 1129
49a72dfb
AL
1130 sg_init_one(&sg, bp, sizeof(*bp));
1131 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1132}
1133
a915da9b 1134static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1135 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1136{
1137 struct tcp_md5sig_pool *hp;
1138 struct hash_desc *desc;
1139
1140 hp = tcp_get_md5sig_pool();
1141 if (!hp)
1142 goto clear_hash_noput;
1143 desc = &hp->md5_desc;
1144
1145 if (crypto_hash_init(desc))
1146 goto clear_hash;
1147 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1148 goto clear_hash;
1149 if (tcp_md5_hash_header(hp, th))
1150 goto clear_hash;
1151 if (tcp_md5_hash_key(hp, key))
1152 goto clear_hash;
1153 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1154 goto clear_hash;
1155
cfb6eeb4 1156 tcp_put_md5sig_pool();
cfb6eeb4 1157 return 0;
49a72dfb 1158
cfb6eeb4
YH
1159clear_hash:
1160 tcp_put_md5sig_pool();
1161clear_hash_noput:
1162 memset(md5_hash, 0, 16);
49a72dfb 1163 return 1;
cfb6eeb4
YH
1164}
1165
49a72dfb 1166int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1167 const struct sock *sk, const struct request_sock *req,
1168 const struct sk_buff *skb)
cfb6eeb4 1169{
49a72dfb
AL
1170 struct tcp_md5sig_pool *hp;
1171 struct hash_desc *desc;
318cf7aa 1172 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1173 __be32 saddr, daddr;
1174
1175 if (sk) {
c720c7e8
ED
1176 saddr = inet_sk(sk)->inet_saddr;
1177 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1178 } else if (req) {
1179 saddr = inet_rsk(req)->loc_addr;
1180 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1181 } else {
49a72dfb
AL
1182 const struct iphdr *iph = ip_hdr(skb);
1183 saddr = iph->saddr;
1184 daddr = iph->daddr;
cfb6eeb4 1185 }
49a72dfb
AL
1186
1187 hp = tcp_get_md5sig_pool();
1188 if (!hp)
1189 goto clear_hash_noput;
1190 desc = &hp->md5_desc;
1191
1192 if (crypto_hash_init(desc))
1193 goto clear_hash;
1194
1195 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1196 goto clear_hash;
1197 if (tcp_md5_hash_header(hp, th))
1198 goto clear_hash;
1199 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1200 goto clear_hash;
1201 if (tcp_md5_hash_key(hp, key))
1202 goto clear_hash;
1203 if (crypto_hash_final(desc, md5_hash))
1204 goto clear_hash;
1205
1206 tcp_put_md5sig_pool();
1207 return 0;
1208
1209clear_hash:
1210 tcp_put_md5sig_pool();
1211clear_hash_noput:
1212 memset(md5_hash, 0, 16);
1213 return 1;
cfb6eeb4 1214}
49a72dfb 1215EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1216
a2a385d6 1217static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1218{
1219 /*
1220 * This gets called for each TCP segment that arrives
1221 * so we want to be efficient.
1222 * We have 3 drop cases:
1223 * o No MD5 hash and one expected.
1224 * o MD5 hash and we're not expecting one.
1225 * o MD5 hash and its wrong.
1226 */
cf533ea5 1227 const __u8 *hash_location = NULL;
cfb6eeb4 1228 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1229 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1230 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1231 int genhash;
cfb6eeb4
YH
1232 unsigned char newhash[16];
1233
a915da9b
ED
1234 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1235 AF_INET);
7d5d5525 1236 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1237
cfb6eeb4
YH
1238 /* We've parsed the options - do we have a hash? */
1239 if (!hash_expected && !hash_location)
a2a385d6 1240 return false;
cfb6eeb4
YH
1241
1242 if (hash_expected && !hash_location) {
785957d3 1243 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1244 return true;
cfb6eeb4
YH
1245 }
1246
1247 if (!hash_expected && hash_location) {
785957d3 1248 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1249 return true;
cfb6eeb4
YH
1250 }
1251
1252 /* Okay, so this is hash_expected and hash_location -
1253 * so we need to calculate the checksum.
1254 */
49a72dfb
AL
1255 genhash = tcp_v4_md5_hash_skb(newhash,
1256 hash_expected,
1257 NULL, NULL, skb);
cfb6eeb4
YH
1258
1259 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1260 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1261 &iph->saddr, ntohs(th->source),
1262 &iph->daddr, ntohs(th->dest),
1263 genhash ? " tcp_v4_calc_md5_hash failed"
1264 : "");
a2a385d6 1265 return true;
cfb6eeb4 1266 }
a2a385d6 1267 return false;
cfb6eeb4
YH
1268}
1269
1270#endif
1271
72a3effa 1272struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1273 .family = PF_INET,
2e6599cb 1274 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1275 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1276 .send_ack = tcp_v4_reqsk_send_ack,
1277 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1278 .send_reset = tcp_v4_send_reset,
72659ecc 1279 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1280};
1281
cfb6eeb4 1282#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1283static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1284 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1285 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1286};
b6332e6c 1287#endif
cfb6eeb4 1288
168a8f58
JC
1289static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1290 struct request_sock *req,
1291 struct tcp_fastopen_cookie *foc,
1292 struct tcp_fastopen_cookie *valid_foc)
1293{
1294 bool skip_cookie = false;
1295 struct fastopen_queue *fastopenq;
1296
1297 if (likely(!fastopen_cookie_present(foc))) {
1298 /* See include/net/tcp.h for the meaning of these knobs */
1299 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1300 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1301 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1302 skip_cookie = true; /* no cookie to validate */
1303 else
1304 return false;
1305 }
1306 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1307 /* A FO option is present; bump the counter. */
1308 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1309
1310 /* Make sure the listener has enabled fastopen, and we don't
1311 * exceed the max # of pending TFO requests allowed before trying
1312 * to validating the cookie in order to avoid burning CPU cycles
1313 * unnecessarily.
1314 *
1315 * XXX (TFO) - The implication of checking the max_qlen before
1316 * processing a cookie request is that clients can't differentiate
1317 * between qlen overflow causing Fast Open to be disabled
1318 * temporarily vs a server not supporting Fast Open at all.
1319 */
1320 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1321 fastopenq == NULL || fastopenq->max_qlen == 0)
1322 return false;
1323
1324 if (fastopenq->qlen >= fastopenq->max_qlen) {
1325 struct request_sock *req1;
1326 spin_lock(&fastopenq->lock);
1327 req1 = fastopenq->rskq_rst_head;
1328 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1329 spin_unlock(&fastopenq->lock);
1330 NET_INC_STATS_BH(sock_net(sk),
1331 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1332 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1333 foc->len = -1;
1334 return false;
1335 }
1336 fastopenq->rskq_rst_head = req1->dl_next;
1337 fastopenq->qlen--;
1338 spin_unlock(&fastopenq->lock);
1339 reqsk_free(req1);
1340 }
1341 if (skip_cookie) {
1342 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1343 return true;
1344 }
1345 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1346 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1347 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1348 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1349 memcmp(&foc->val[0], &valid_foc->val[0],
1350 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1351 return false;
1352 valid_foc->len = -1;
1353 }
1354 /* Acknowledge the data received from the peer. */
1355 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1356 return true;
1357 } else if (foc->len == 0) { /* Client requesting a cookie */
1358 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1359 NET_INC_STATS_BH(sock_net(sk),
1360 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1361 } else {
1362 /* Client sent a cookie with wrong size. Treat it
1363 * the same as invalid and return a valid one.
1364 */
1365 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1366 }
1367 return false;
1368}
1369
1370static int tcp_v4_conn_req_fastopen(struct sock *sk,
1371 struct sk_buff *skb,
1372 struct sk_buff *skb_synack,
1a2c6181 1373 struct request_sock *req)
168a8f58
JC
1374{
1375 struct tcp_sock *tp = tcp_sk(sk);
1376 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1377 const struct inet_request_sock *ireq = inet_rsk(req);
1378 struct sock *child;
016818d0 1379 int err;
168a8f58 1380
e6c022a4
ED
1381 req->num_retrans = 0;
1382 req->num_timeout = 0;
168a8f58
JC
1383 req->sk = NULL;
1384
1385 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1386 if (child == NULL) {
1387 NET_INC_STATS_BH(sock_net(sk),
1388 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1389 kfree_skb(skb_synack);
1390 return -1;
1391 }
016818d0
NC
1392 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1393 ireq->rmt_addr, ireq->opt);
1394 err = net_xmit_eval(err);
1395 if (!err)
1396 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1397 /* XXX (TFO) - is it ok to ignore error and continue? */
1398
1399 spin_lock(&queue->fastopenq->lock);
1400 queue->fastopenq->qlen++;
1401 spin_unlock(&queue->fastopenq->lock);
1402
1403 /* Initialize the child socket. Have to fix some values to take
1404 * into account the child is a Fast Open socket and is created
1405 * only out of the bits carried in the SYN packet.
1406 */
1407 tp = tcp_sk(child);
1408
1409 tp->fastopen_rsk = req;
1410 /* Do a hold on the listner sk so that if the listener is being
1411 * closed, the child that has been accepted can live on and still
1412 * access listen_lock.
1413 */
1414 sock_hold(sk);
1415 tcp_rsk(req)->listener = sk;
1416
1417 /* RFC1323: The window in SYN & SYN/ACK segments is never
1418 * scaled. So correct it appropriately.
1419 */
1420 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1421
1422 /* Activate the retrans timer so that SYNACK can be retransmitted.
1423 * The request socket is not added to the SYN table of the parent
1424 * because it's been added to the accept queue directly.
1425 */
1426 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
6fa3eb70 1427 TCP_TIMEOUT_INIT, sysctl_tcp_rto_max);
168a8f58
JC
1428
1429 /* Add the child socket directly into the accept queue */
1430 inet_csk_reqsk_queue_add(sk, req, child);
1431
1432 /* Now finish processing the fastopen child socket. */
1433 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1434 tcp_init_congestion_control(child);
1435 tcp_mtup_init(child);
1436 tcp_init_buffer_space(child);
1437 tcp_init_metrics(child);
1438
1439 /* Queue the data carried in the SYN packet. We need to first
1440 * bump skb's refcnt because the caller will attempt to free it.
1441 *
1442 * XXX (TFO) - we honor a zero-payload TFO request for now.
1443 * (Any reason not to?)
1444 */
1445 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1446 /* Don't queue the skb if there is no payload in SYN.
1447 * XXX (TFO) - How about SYN+FIN?
1448 */
1449 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1450 } else {
1451 skb = skb_get(skb);
1452 skb_dst_drop(skb);
1453 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1454 skb_set_owner_r(skb, child);
1455 __skb_queue_tail(&child->sk_receive_queue, skb);
1456 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
6f73601e 1457 tp->syn_data_acked = 1;
168a8f58
JC
1458 }
1459 sk->sk_data_ready(sk, 0);
1460 bh_unlock_sock(child);
1461 sock_put(child);
1462 WARN_ON(req->sk == NULL);
1463 return 0;
1464}
1465
1da177e4
LT
1466int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1467{
1468 struct tcp_options_received tmp_opt;
60236fdd 1469 struct request_sock *req;
e6b4d113 1470 struct inet_request_sock *ireq;
4957faad 1471 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1472 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1473 __be32 saddr = ip_hdr(skb)->saddr;
1474 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1475 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1476 bool want_cookie = false;
168a8f58
JC
1477 struct flowi4 fl4;
1478 struct tcp_fastopen_cookie foc = { .len = -1 };
1479 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1480 struct sk_buff *skb_synack;
1481 int do_fastopen;
1da177e4
LT
1482
1483 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1484 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1485 goto drop;
1486
1487 /* TW buckets are converted to open requests without
1488 * limitations, they conserve resources and peer is
1489 * evidently real one.
1490 */
463c84b9 1491 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1492 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1493 if (!want_cookie)
1494 goto drop;
1da177e4
LT
1495 }
1496
1497 /* Accept backlog is full. If we have already queued enough
1498 * of warm entries in syn queue, drop request. It is better than
1499 * clogging syn queue with openreqs with exponentially increasing
1500 * timeout.
1501 */
2aeef18d
NS
1502 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1503 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1504 goto drop;
2aeef18d 1505 }
1da177e4 1506
ce4a7d0d 1507 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1508 if (!req)
1509 goto drop;
1510
cfb6eeb4
YH
1511#ifdef CONFIG_TCP_MD5SIG
1512 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1513#endif
1514
1da177e4 1515 tcp_clear_options(&tmp_opt);
bee7ca9e 1516 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1517 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1518 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1519
4dfc2817 1520 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1521 tcp_clear_options(&tmp_opt);
1da177e4 1522
1da177e4 1523 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1524 tcp_openreq_init(req, &tmp_opt, skb);
1525
bb5b7c11
DM
1526 ireq = inet_rsk(req);
1527 ireq->loc_addr = daddr;
1528 ireq->rmt_addr = saddr;
1529 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1530 ireq->opt = tcp_v4_save_options(skb);
6fa3eb70 1531 ireq->ir_mark = inet_request_mark(sk, skb);
bb5b7c11 1532
284904aa 1533 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1534 goto drop_and_free;
284904aa 1535
172d69e6 1536 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1537 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1538
1539 if (want_cookie) {
1da177e4 1540 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1541 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1542 } else if (!isn) {
1da177e4
LT
1543 /* VJ's idea. We save last timestamp seen
1544 * from the destination in peer table, when entering
1545 * state TIME-WAIT, and check against it before
1546 * accepting new connection request.
1547 *
1548 * If "isn" is not zero, this request hit alive
1549 * timewait bucket, so that all the necessary checks
1550 * are made in the function processing timewait state.
1551 */
1552 if (tmp_opt.saw_tstamp &&
295ff7ed 1553 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1554 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1555 fl4.daddr == saddr) {
1556 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1557 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1558 goto drop_and_release;
1da177e4
LT
1559 }
1560 }
1561 /* Kill the following clause, if you dislike this way. */
1562 else if (!sysctl_tcp_syncookies &&
463c84b9 1563 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1564 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1565 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1566 /* Without syncookies last quarter of
1567 * backlog is filled with destinations,
1568 * proven to be alive.
1569 * It means that we continue to communicate
1570 * to destinations, already remembered
1571 * to the moment of synflood.
1572 */
afd46503 1573 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1574 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1575 goto drop_and_release;
1da177e4
LT
1576 }
1577
a94f723d 1578 isn = tcp_v4_init_sequence(skb);
1da177e4 1579 }
2e6599cb 1580 tcp_rsk(req)->snt_isn = isn;
1da177e4 1581
168a8f58
JC
1582 if (dst == NULL) {
1583 dst = inet_csk_route_req(sk, &fl4, req);
1584 if (dst == NULL)
1585 goto drop_and_free;
1586 }
1587 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1588
1589 /* We don't call tcp_v4_send_synack() directly because we need
1590 * to make sure a child socket can be created successfully before
1591 * sending back synack!
1592 *
1593 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1594 * (or better yet, call tcp_send_synack() in the child context
1595 * directly, but will have to fix bunch of other code first)
1596 * after syn_recv_sock() except one will need to first fix the
1597 * latter to remove its dependency on the current implementation
1598 * of tcp_v4_send_synack()->tcp_select_initial_window().
1599 */
1600 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1601 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1602
1603 if (skb_synack) {
1604 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1605 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1606 } else
1607 goto drop_and_free;
1608
1609 if (likely(!do_fastopen)) {
1610 int err;
1611 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1612 ireq->rmt_addr, ireq->opt);
1613 err = net_xmit_eval(err);
1614 if (err || want_cookie)
1615 goto drop_and_free;
1616
016818d0 1617 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1618 tcp_rsk(req)->listener = NULL;
1619 /* Add the request_sock to the SYN table */
1620 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1621 if (fastopen_cookie_present(&foc) && foc.len != 0)
1622 NET_INC_STATS_BH(sock_net(sk),
1623 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1a2c6181 1624 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1da177e4
LT
1625 goto drop_and_free;
1626
1da177e4
LT
1627 return 0;
1628
7cd04fa7
DL
1629drop_and_release:
1630 dst_release(dst);
1da177e4 1631drop_and_free:
60236fdd 1632 reqsk_free(req);
1da177e4 1633drop:
848bf15f 1634 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1635 return 0;
1636}
4bc2f18b 1637EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1638
1639
1640/*
1641 * The three way handshake has completed - we got a valid synack -
1642 * now create the new socket.
1643 */
1644struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1645 struct request_sock *req,
1da177e4
LT
1646 struct dst_entry *dst)
1647{
2e6599cb 1648 struct inet_request_sock *ireq;
1da177e4
LT
1649 struct inet_sock *newinet;
1650 struct tcp_sock *newtp;
1651 struct sock *newsk;
cfb6eeb4
YH
1652#ifdef CONFIG_TCP_MD5SIG
1653 struct tcp_md5sig_key *key;
1654#endif
f6d8bd05 1655 struct ip_options_rcu *inet_opt;
1da177e4
LT
1656
1657 if (sk_acceptq_is_full(sk))
1658 goto exit_overflow;
1659
1da177e4
LT
1660 newsk = tcp_create_openreq_child(sk, req, skb);
1661 if (!newsk)
093d2823 1662 goto exit_nonewsk;
1da177e4 1663
bcd76111 1664 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1665 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1666
1667 newtp = tcp_sk(newsk);
1668 newinet = inet_sk(newsk);
2e6599cb 1669 ireq = inet_rsk(req);
c720c7e8
ED
1670 newinet->inet_daddr = ireq->rmt_addr;
1671 newinet->inet_rcv_saddr = ireq->loc_addr;
1672 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1673 inet_opt = ireq->opt;
1674 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1675 ireq->opt = NULL;
463c84b9 1676 newinet->mc_index = inet_iif(skb);
eddc9ec5 1677 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1678 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1679 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1680 if (inet_opt)
1681 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1682 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1683
dfd25fff
ED
1684 if (!dst) {
1685 dst = inet_csk_route_child_sock(sk, newsk, req);
1686 if (!dst)
1687 goto put_and_exit;
1688 } else {
1689 /* syncookie case : see end of cookie_v4_check() */
1690 }
0e734419
DM
1691 sk_setup_caps(newsk, dst);
1692
5d424d5a 1693 tcp_mtup_init(newsk);
1da177e4 1694 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1695 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1696 if (tcp_sk(sk)->rx_opt.user_mss &&
1697 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1698 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1699
1da177e4 1700 tcp_initialize_rcv_mss(newsk);
623df484 1701 tcp_synack_rtt_meas(newsk, req);
e6c022a4 1702 newtp->total_retrans = req->num_retrans;
1da177e4 1703
cfb6eeb4
YH
1704#ifdef CONFIG_TCP_MD5SIG
1705 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1706 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1707 AF_INET);
c720c7e8 1708 if (key != NULL) {
cfb6eeb4
YH
1709 /*
1710 * We're using one, so create a matching key
1711 * on the newsk structure. If we fail to get
1712 * memory, then we end up not copying the key
1713 * across. Shucks.
1714 */
a915da9b
ED
1715 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1716 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1717 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1718 }
1719#endif
1720
0e734419
DM
1721 if (__inet_inherit_port(sk, newsk) < 0)
1722 goto put_and_exit;
9327f705 1723 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1724
1725 return newsk;
1726
1727exit_overflow:
de0744af 1728 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1729exit_nonewsk:
1730 dst_release(dst);
1da177e4 1731exit:
de0744af 1732 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1733 return NULL;
0e734419 1734put_and_exit:
e337e24d
CP
1735 inet_csk_prepare_forced_close(newsk);
1736 tcp_done(newsk);
0e734419 1737 goto exit;
1da177e4 1738}
4bc2f18b 1739EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1740
1741static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1742{
aa8223c7 1743 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1744 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1745 struct sock *nsk;
60236fdd 1746 struct request_sock **prev;
1da177e4 1747 /* Find possible connection requests. */
463c84b9
ACM
1748 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1749 iph->saddr, iph->daddr);
1da177e4 1750 if (req)
8336886f 1751 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1752
3b1e0a65 1753 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1754 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1755
1756 if (nsk) {
1757 if (nsk->sk_state != TCP_TIME_WAIT) {
1758 bh_lock_sock(nsk);
1759 return nsk;
1760 }
9469c7b4 1761 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1762 return NULL;
1763 }
1764
1765#ifdef CONFIG_SYN_COOKIES
af9b4738 1766 if (!th->syn)
1da177e4
LT
1767 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1768#endif
1769 return sk;
1770}
1771
b51655b9 1772static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1773{
eddc9ec5
ACM
1774 const struct iphdr *iph = ip_hdr(skb);
1775
84fa7933 1776 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1777 if (!tcp_v4_check(skb->len, iph->saddr,
1778 iph->daddr, skb->csum)) {
fb286bb2 1779 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1780 return 0;
fb286bb2 1781 }
1da177e4 1782 }
fb286bb2 1783
eddc9ec5 1784 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1785 skb->len, IPPROTO_TCP, 0);
1786
1da177e4 1787 if (skb->len <= 76) {
fb286bb2 1788 return __skb_checksum_complete(skb);
1da177e4
LT
1789 }
1790 return 0;
1791}
1792
1793
1794/* The socket must have it's spinlock held when we get
1795 * here.
1796 *
1797 * We have a potential double-lock case here, so even when
1798 * doing backlog processing we use the BH locking scheme.
1799 * This is because we cannot sleep with the original spinlock
1800 * held.
1801 */
1802int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1803{
cfb6eeb4
YH
1804 struct sock *rsk;
1805#ifdef CONFIG_TCP_MD5SIG
1806 /*
1807 * We really want to reject the packet as early as possible
1808 * if:
1809 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1810 * o There is an MD5 option and we're not expecting one
1811 */
7174259e 1812 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1813 goto discard;
1814#endif
1815
1da177e4 1816 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1817 struct dst_entry *dst = sk->sk_rx_dst;
1818
bdeab991 1819 sock_rps_save_rxhash(sk, skb);
404e0a8b 1820 if (dst) {
505fbcf0
ED
1821 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1822 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1823 dst_release(dst);
1824 sk->sk_rx_dst = NULL;
1825 }
1826 }
aa8223c7 1827 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1828 rsk = sk;
1da177e4 1829 goto reset;
cfb6eeb4 1830 }
1da177e4
LT
1831 return 0;
1832 }
1833
ab6a5bb6 1834 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1835 goto csum_err;
1836
1837 if (sk->sk_state == TCP_LISTEN) {
1838 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1839 if (!nsk)
1840 goto discard;
1841
1842 if (nsk != sk) {
bdeab991 1843 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1844 if (tcp_child_process(sk, nsk, skb)) {
1845 rsk = nsk;
1da177e4 1846 goto reset;
cfb6eeb4 1847 }
1da177e4
LT
1848 return 0;
1849 }
ca55158c 1850 } else
bdeab991 1851 sock_rps_save_rxhash(sk, skb);
ca55158c 1852
aa8223c7 1853 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1854 rsk = sk;
1da177e4 1855 goto reset;
cfb6eeb4 1856 }
1da177e4
LT
1857 return 0;
1858
1859reset:
cfb6eeb4 1860 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1861discard:
1862 kfree_skb(skb);
1863 /* Be careful here. If this function gets more complicated and
1864 * gcc suffers from register pressure on the x86, sk (in %ebx)
1865 * might be destroyed here. This current version compiles correctly,
1866 * but you have been warned.
1867 */
1868 return 0;
1869
1870csum_err:
6a5dc9e5 1871 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1872 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1873 goto discard;
1874}
4bc2f18b 1875EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1876
160eb5a6 1877void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1878{
41063e9d
DM
1879 const struct iphdr *iph;
1880 const struct tcphdr *th;
1881 struct sock *sk;
41063e9d 1882
41063e9d 1883 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1884 return;
41063e9d 1885
45f00f99 1886 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1887 return;
41063e9d
DM
1888
1889 iph = ip_hdr(skb);
45f00f99 1890 th = tcp_hdr(skb);
41063e9d
DM
1891
1892 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1893 return;
41063e9d 1894
45f00f99 1895 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1896 iph->saddr, th->source,
7011d085 1897 iph->daddr, ntohs(th->dest),
9cb429d6 1898 skb->skb_iif);
41063e9d
DM
1899 if (sk) {
1900 skb->sk = sk;
1901 skb->destructor = sock_edemux;
1902 if (sk->sk_state != TCP_TIME_WAIT) {
1903 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1904
41063e9d
DM
1905 if (dst)
1906 dst = dst_check(dst, 0);
92101b3b 1907 if (dst &&
505fbcf0 1908 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1909 skb_dst_set_noref(skb, dst);
41063e9d
DM
1910 }
1911 }
41063e9d
DM
1912}
1913
b2fb4f54
ED
1914/* Packet is added to VJ-style prequeue for processing in process
1915 * context, if a reader task is waiting. Apparently, this exciting
1916 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1917 * failed somewhere. Latency? Burstiness? Well, at least now we will
1918 * see, why it failed. 8)8) --ANK
1919 *
1920 */
1921bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1922{
1923 struct tcp_sock *tp = tcp_sk(sk);
1924
1925 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1926 return false;
1927
1928 if (skb->len <= tcp_hdrlen(skb) &&
1929 skb_queue_len(&tp->ucopy.prequeue) == 0)
1930 return false;
1931
58717686 1932 skb_dst_force(skb);
b2fb4f54
ED
1933 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1934 tp->ucopy.memory += skb->truesize;
1935 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1936 struct sk_buff *skb1;
1937
1938 BUG_ON(sock_owned_by_user(sk));
1939
1940 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1941 sk_backlog_rcv(sk, skb1);
1942 NET_INC_STATS_BH(sock_net(sk),
1943 LINUX_MIB_TCPPREQUEUEDROPPED);
1944 }
1945
1946 tp->ucopy.memory = 0;
1947 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1948 wake_up_interruptible_sync_poll(sk_sleep(sk),
1949 POLLIN | POLLRDNORM | POLLRDBAND);
1950 if (!inet_csk_ack_scheduled(sk))
1951 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1952 (3 * tcp_rto_min(sk)) / 4,
6fa3eb70 1953 sysctl_tcp_rto_max);
b2fb4f54
ED
1954 }
1955 return true;
1956}
1957EXPORT_SYMBOL(tcp_prequeue);
1958
1da177e4
LT
1959/*
1960 * From tcp_input.c
1961 */
1962
1963int tcp_v4_rcv(struct sk_buff *skb)
1964{
eddc9ec5 1965 const struct iphdr *iph;
cf533ea5 1966 const struct tcphdr *th;
1da177e4
LT
1967 struct sock *sk;
1968 int ret;
a86b1e30 1969 struct net *net = dev_net(skb->dev);
1da177e4
LT
1970
1971 if (skb->pkt_type != PACKET_HOST)
1972 goto discard_it;
1973
1974 /* Count it even if it's bad */
63231bdd 1975 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1976
1977 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1978 goto discard_it;
1979
aa8223c7 1980 th = tcp_hdr(skb);
1da177e4
LT
1981
1982 if (th->doff < sizeof(struct tcphdr) / 4)
1983 goto bad_packet;
1984 if (!pskb_may_pull(skb, th->doff * 4))
1985 goto discard_it;
1986
1987 /* An explanation is required here, I think.
1988 * Packet length and doff are validated by header prediction,
caa20d9a 1989 * provided case of th->doff==0 is eliminated.
1da177e4 1990 * So, we defer the checks. */
60476372 1991 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
6a5dc9e5 1992 goto csum_error;
1da177e4 1993
aa8223c7 1994 th = tcp_hdr(skb);
eddc9ec5 1995 iph = ip_hdr(skb);
1da177e4
LT
1996 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1997 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1998 skb->len - th->doff * 4);
1999 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
2000 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 2001 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
2002 TCP_SKB_CB(skb)->sacked = 0;
2003
9a1f27c4 2004 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
2005 if (!sk)
2006 goto no_tcp_socket;
2007
bb134d5d
ED
2008process:
2009 if (sk->sk_state == TCP_TIME_WAIT)
2010 goto do_time_wait;
2011
6cce09f8
ED
2012 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
2013 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 2014 goto discard_and_relse;
6cce09f8 2015 }
d218d111 2016
1da177e4
LT
2017 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
2018 goto discard_and_relse;
b59c2701 2019 nf_reset(skb);
1da177e4 2020
fda9ef5d 2021 if (sk_filter(sk, skb))
1da177e4
LT
2022 goto discard_and_relse;
2023
2024 skb->dev = NULL;
2025
c6366184 2026 bh_lock_sock_nested(sk);
1da177e4
LT
2027 ret = 0;
2028 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
2029#ifdef CONFIG_NET_DMA
2030 struct tcp_sock *tp = tcp_sk(sk);
2031 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 2032 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 2033 if (tp->ucopy.dma_chan)
1da177e4 2034 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
2035 else
2036#endif
2037 {
2038 if (!tcp_prequeue(sk, skb))
ae8d7f88 2039 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 2040 }
da882c1f
ED
2041 } else if (unlikely(sk_add_backlog(sk, skb,
2042 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 2043 bh_unlock_sock(sk);
6cce09f8 2044 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
2045 goto discard_and_relse;
2046 }
1da177e4
LT
2047 bh_unlock_sock(sk);
2048
2049 sock_put(sk);
2050
2051 return ret;
2052
2053no_tcp_socket:
2054 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2055 goto discard_it;
2056
2057 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
2058csum_error:
2059 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 2060bad_packet:
63231bdd 2061 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 2062 } else {
cfb6eeb4 2063 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
2064 }
2065
2066discard_it:
2067 /* Discard frame. */
2068 kfree_skb(skb);
e905a9ed 2069 return 0;
1da177e4
LT
2070
2071discard_and_relse:
2072 sock_put(sk);
2073 goto discard_it;
2074
2075do_time_wait:
2076 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 2077 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2078 goto discard_it;
2079 }
2080
6a5dc9e5 2081 if (skb->len < (th->doff << 2)) {
9469c7b4 2082 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
2083 goto bad_packet;
2084 }
2085 if (tcp_checksum_complete(skb)) {
2086 inet_twsk_put(inet_twsk(sk));
2087 goto csum_error;
1da177e4 2088 }
9469c7b4 2089 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 2090 case TCP_TW_SYN: {
c346dca1 2091 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 2092 &tcp_hashinfo,
da5e3630 2093 iph->saddr, th->source,
eddc9ec5 2094 iph->daddr, th->dest,
463c84b9 2095 inet_iif(skb));
1da177e4 2096 if (sk2) {
9469c7b4
YH
2097 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2098 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2099 sk = sk2;
2100 goto process;
2101 }
2102 /* Fall through to ACK */
2103 }
2104 case TCP_TW_ACK:
2105 tcp_v4_timewait_ack(sk, skb);
2106 break;
2107 case TCP_TW_RST:
2108 goto no_tcp_socket;
2109 case TCP_TW_SUCCESS:;
2110 }
2111 goto discard_it;
2112}
2113
ccb7c410
DM
2114static struct timewait_sock_ops tcp_timewait_sock_ops = {
2115 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2116 .twsk_unique = tcp_twsk_unique,
2117 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 2118};
1da177e4 2119
63d02d15 2120void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
2121{
2122 struct dst_entry *dst = skb_dst(skb);
2123
2124 dst_hold(dst);
2125 sk->sk_rx_dst = dst;
2126 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2127}
63d02d15 2128EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 2129
3b401a81 2130const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
2131 .queue_xmit = ip_queue_xmit,
2132 .send_check = tcp_v4_send_check,
2133 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 2134 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
2135 .conn_request = tcp_v4_conn_request,
2136 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
2137 .net_header_len = sizeof(struct iphdr),
2138 .setsockopt = ip_setsockopt,
2139 .getsockopt = ip_getsockopt,
2140 .addr2sockaddr = inet_csk_addr2sockaddr,
2141 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 2142 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 2143#ifdef CONFIG_COMPAT
543d9cfe
ACM
2144 .compat_setsockopt = compat_ip_setsockopt,
2145 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 2146#endif
5f80f4d8 2147 .mtu_reduced = tcp_v4_mtu_reduced,
1da177e4 2148};
4bc2f18b 2149EXPORT_SYMBOL(ipv4_specific);
1da177e4 2150
cfb6eeb4 2151#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 2152static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 2153 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 2154 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 2155 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 2156};
b6332e6c 2157#endif
cfb6eeb4 2158
1da177e4
LT
2159/* NOTE: A lot of things set to zero explicitly by call to
2160 * sk_alloc() so need not be done here.
2161 */
2162static int tcp_v4_init_sock(struct sock *sk)
2163{
6687e988 2164 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 2165
900f65d3 2166 tcp_init_sock(sk);
6fa3eb70 2167 icsk->icsk_MMSRB = 0;
1da177e4 2168
8292a17a 2169 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 2170
cfb6eeb4 2171#ifdef CONFIG_TCP_MD5SIG
ac807fa8 2172 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 2173#endif
1da177e4 2174
1da177e4
LT
2175 return 0;
2176}
2177
7d06b2e0 2178void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
2179{
2180 struct tcp_sock *tp = tcp_sk(sk);
2181
2182 tcp_clear_xmit_timers(sk);
2183
6687e988 2184 tcp_cleanup_congestion_control(sk);
317a76f9 2185
1da177e4 2186 /* Cleanup up the write buffer. */
fe067e8a 2187 tcp_write_queue_purge(sk);
1da177e4
LT
2188
2189 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 2190 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 2191
cfb6eeb4
YH
2192#ifdef CONFIG_TCP_MD5SIG
2193 /* Clean up the MD5 key list, if any */
2194 if (tp->md5sig_info) {
a915da9b 2195 tcp_clear_md5_list(sk);
a8afca03 2196 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
2197 tp->md5sig_info = NULL;
2198 }
2199#endif
2200
1a2449a8
CL
2201#ifdef CONFIG_NET_DMA
2202 /* Cleans up our sk_async_wait_queue */
e905a9ed 2203 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
2204#endif
2205
1da177e4
LT
2206 /* Clean prequeue, it must be empty really */
2207 __skb_queue_purge(&tp->ucopy.prequeue);
2208
2209 /* Clean up a referenced TCP bind bucket. */
463c84b9 2210 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 2211 inet_put_port(sk);
1da177e4 2212
168a8f58 2213 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 2214
cf60af03
YC
2215 /* If socket is aborted during connect operation */
2216 tcp_free_fastopen_req(tp);
2217
180d8cd9 2218 sk_sockets_allocated_dec(sk);
d1a4c0b3 2219 sock_release_memcg(sk);
1da177e4 2220}
1da177e4
LT
2221EXPORT_SYMBOL(tcp_v4_destroy_sock);
2222
6fa3eb70
S
2223void tcp_v4_handle_retrans_time_by_uid(struct uid_err uid_e)
2224{
2225 unsigned int bucket;
2226 uid_t skuid = (uid_t)(uid_e.appuid);
2227 struct inet_connection_sock *icsk = NULL;//inet_csk(sk);
2228
2229
2230 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2231 struct hlist_nulls_node *node;
2232 struct sock *sk;
2233 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2234
2235 spin_lock_bh(lock);
2236 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2237
2238 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2239 continue;
2240 if (sock_flag(sk, SOCK_DEAD))
2241 continue;
2242
2243 if(sk->sk_socket){
2244 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2245 continue;
2246 else
2247 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid socket uid(%d) match!",
2248 SOCK_INODE(sk->sk_socket)->i_uid);
2249 } else{
2250 continue;
2251 }
2252
2253 sock_hold(sk);
2254 spin_unlock_bh(lock);
2255
2256 local_bh_disable();
2257 bh_lock_sock(sk);
2258
2259 // update sk time out value
2260 icsk = inet_csk(sk);
2261 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid update timer\n");
2262
2263 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + 2);
2264 icsk->icsk_rto = sysctl_tcp_rto_min * 30;
2265 icsk->icsk_MMSRB = 1;
2266
2267 bh_unlock_sock(sk);
2268 local_bh_enable();
2269 spin_lock_bh(lock);
2270 sock_put(sk);
2271
2272 }
2273 spin_unlock_bh(lock);
2274 }
2275
2276}
2277
2278
2279/*
2280 * tcp_v4_nuke_addr_by_uid - destroy all sockets of spcial uid
2281 */
2282void tcp_v4_reset_connections_by_uid(struct uid_err uid_e)
2283{
2284 unsigned int bucket;
2285 uid_t skuid = (uid_t)(uid_e.appuid);
2286
2287 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2288 struct hlist_nulls_node *node;
2289 struct sock *sk;
2290 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2291
2292restart:
2293 spin_lock_bh(lock);
2294 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2295
2296 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2297 continue;
2298 if (sock_flag(sk, SOCK_DEAD))
2299 continue;
2300
2301 if(sk->sk_socket){
2302 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2303 continue;
2304 else
2305 printk(KERN_INFO "SIOCKILLSOCK socket uid(%d) match!",
2306 SOCK_INODE(sk->sk_socket)->i_uid);
2307 } else{
2308 continue;
2309 }
2310
2311 sock_hold(sk);
2312 spin_unlock_bh(lock);
2313
2314 local_bh_disable();
2315 bh_lock_sock(sk);
2316 sk->sk_err = uid_e.errNum;
2317 printk(KERN_INFO "SIOCKILLSOCK set sk err == %d!! \n", sk->sk_err);
2318 sk->sk_error_report(sk);
2319
2320 tcp_done(sk);
2321 bh_unlock_sock(sk);
2322 local_bh_enable();
2323 sock_put(sk);
2324
2325 goto restart;
2326 }
2327 spin_unlock_bh(lock);
2328 }
2329}
2330
2331
1da177e4
LT
2332#ifdef CONFIG_PROC_FS
2333/* Proc filesystem TCP sock list dumping. */
2334
3ab5aee7 2335static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 2336{
3ab5aee7 2337 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 2338 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
2339}
2340
8feaf0c0 2341static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2342{
3ab5aee7
ED
2343 return !is_a_nulls(tw->tw_node.next) ?
2344 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2345}
2346
a8b690f9
TH
2347/*
2348 * Get next listener socket follow cur. If cur is NULL, get first socket
2349 * starting from bucket given in st->bucket; when st->bucket is zero the
2350 * very first socket in the hash table is returned.
2351 */
1da177e4
LT
2352static void *listening_get_next(struct seq_file *seq, void *cur)
2353{
463c84b9 2354 struct inet_connection_sock *icsk;
c25eb3bf 2355 struct hlist_nulls_node *node;
1da177e4 2356 struct sock *sk = cur;
5caea4ea 2357 struct inet_listen_hashbucket *ilb;
5799de0b 2358 struct tcp_iter_state *st = seq->private;
a4146b1b 2359 struct net *net = seq_file_net(seq);
1da177e4
LT
2360
2361 if (!sk) {
a8b690f9 2362 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2363 spin_lock_bh(&ilb->lock);
c25eb3bf 2364 sk = sk_nulls_head(&ilb->head);
a8b690f9 2365 st->offset = 0;
1da177e4
LT
2366 goto get_sk;
2367 }
5caea4ea 2368 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2369 ++st->num;
a8b690f9 2370 ++st->offset;
1da177e4
LT
2371
2372 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2373 struct request_sock *req = cur;
1da177e4 2374
72a3effa 2375 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2376 req = req->dl_next;
2377 while (1) {
2378 while (req) {
bdccc4ca 2379 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2380 cur = req;
2381 goto out;
2382 }
2383 req = req->dl_next;
2384 }
72a3effa 2385 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2386 break;
2387get_req:
463c84b9 2388 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2389 }
1bde5ac4 2390 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2391 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2392 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2393 } else {
e905a9ed 2394 icsk = inet_csk(sk);
463c84b9
ACM
2395 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2396 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2397 goto start_req;
463c84b9 2398 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2399 sk = sk_nulls_next(sk);
1da177e4
LT
2400 }
2401get_sk:
c25eb3bf 2402 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2403 if (!net_eq(sock_net(sk), net))
2404 continue;
2405 if (sk->sk_family == st->family) {
1da177e4
LT
2406 cur = sk;
2407 goto out;
2408 }
e905a9ed 2409 icsk = inet_csk(sk);
463c84b9
ACM
2410 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2411 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2412start_req:
2413 st->uid = sock_i_uid(sk);
2414 st->syn_wait_sk = sk;
2415 st->state = TCP_SEQ_STATE_OPENREQ;
2416 st->sbucket = 0;
2417 goto get_req;
2418 }
463c84b9 2419 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2420 }
5caea4ea 2421 spin_unlock_bh(&ilb->lock);
a8b690f9 2422 st->offset = 0;
0f7ff927 2423 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2424 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2425 spin_lock_bh(&ilb->lock);
c25eb3bf 2426 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2427 goto get_sk;
2428 }
2429 cur = NULL;
2430out:
2431 return cur;
2432}
2433
2434static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2435{
a8b690f9
TH
2436 struct tcp_iter_state *st = seq->private;
2437 void *rc;
2438
2439 st->bucket = 0;
2440 st->offset = 0;
2441 rc = listening_get_next(seq, NULL);
1da177e4
LT
2442
2443 while (rc && *pos) {
2444 rc = listening_get_next(seq, rc);
2445 --*pos;
2446 }
2447 return rc;
2448}
2449
a2a385d6 2450static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2451{
3ab5aee7
ED
2452 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2453 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2454}
2455
a8b690f9
TH
2456/*
2457 * Get first established socket starting from bucket given in st->bucket.
2458 * If st->bucket is zero, the very first socket in the hash is returned.
2459 */
1da177e4
LT
2460static void *established_get_first(struct seq_file *seq)
2461{
5799de0b 2462 struct tcp_iter_state *st = seq->private;
a4146b1b 2463 struct net *net = seq_file_net(seq);
1da177e4
LT
2464 void *rc = NULL;
2465
a8b690f9
TH
2466 st->offset = 0;
2467 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2468 struct sock *sk;
3ab5aee7 2469 struct hlist_nulls_node *node;
8feaf0c0 2470 struct inet_timewait_sock *tw;
9db66bdc 2471 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2472
6eac5604
AK
2473 /* Lockless fast path for the common case of empty buckets */
2474 if (empty_bucket(st))
2475 continue;
2476
9db66bdc 2477 spin_lock_bh(lock);
3ab5aee7 2478 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2479 if (sk->sk_family != st->family ||
878628fb 2480 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2481 continue;
2482 }
2483 rc = sk;
2484 goto out;
2485 }
2486 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2487 inet_twsk_for_each(tw, node,
dbca9b27 2488 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2489 if (tw->tw_family != st->family ||
878628fb 2490 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2491 continue;
2492 }
2493 rc = tw;
2494 goto out;
2495 }
9db66bdc 2496 spin_unlock_bh(lock);
1da177e4
LT
2497 st->state = TCP_SEQ_STATE_ESTABLISHED;
2498 }
2499out:
2500 return rc;
2501}
2502
2503static void *established_get_next(struct seq_file *seq, void *cur)
2504{
2505 struct sock *sk = cur;
8feaf0c0 2506 struct inet_timewait_sock *tw;
3ab5aee7 2507 struct hlist_nulls_node *node;
5799de0b 2508 struct tcp_iter_state *st = seq->private;
a4146b1b 2509 struct net *net = seq_file_net(seq);
1da177e4
LT
2510
2511 ++st->num;
a8b690f9 2512 ++st->offset;
1da177e4
LT
2513
2514 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2515 tw = cur;
2516 tw = tw_next(tw);
2517get_tw:
878628fb 2518 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2519 tw = tw_next(tw);
2520 }
2521 if (tw) {
2522 cur = tw;
2523 goto out;
2524 }
9db66bdc 2525 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2526 st->state = TCP_SEQ_STATE_ESTABLISHED;
2527
6eac5604 2528 /* Look for next non empty bucket */
a8b690f9 2529 st->offset = 0;
f373b53b 2530 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2531 empty_bucket(st))
2532 ;
f373b53b 2533 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2534 return NULL;
2535
9db66bdc 2536 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2537 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2538 } else
3ab5aee7 2539 sk = sk_nulls_next(sk);
1da177e4 2540
3ab5aee7 2541 sk_nulls_for_each_from(sk, node) {
878628fb 2542 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2543 goto found;
2544 }
2545
2546 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2547 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2548 goto get_tw;
2549found:
2550 cur = sk;
2551out:
2552 return cur;
2553}
2554
2555static void *established_get_idx(struct seq_file *seq, loff_t pos)
2556{
a8b690f9
TH
2557 struct tcp_iter_state *st = seq->private;
2558 void *rc;
2559
2560 st->bucket = 0;
2561 rc = established_get_first(seq);
1da177e4
LT
2562
2563 while (rc && pos) {
2564 rc = established_get_next(seq, rc);
2565 --pos;
7174259e 2566 }
1da177e4
LT
2567 return rc;
2568}
2569
2570static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2571{
2572 void *rc;
5799de0b 2573 struct tcp_iter_state *st = seq->private;
1da177e4 2574
1da177e4
LT
2575 st->state = TCP_SEQ_STATE_LISTENING;
2576 rc = listening_get_idx(seq, &pos);
2577
2578 if (!rc) {
1da177e4
LT
2579 st->state = TCP_SEQ_STATE_ESTABLISHED;
2580 rc = established_get_idx(seq, pos);
2581 }
2582
2583 return rc;
2584}
2585
a8b690f9
TH
2586static void *tcp_seek_last_pos(struct seq_file *seq)
2587{
2588 struct tcp_iter_state *st = seq->private;
2589 int offset = st->offset;
2590 int orig_num = st->num;
2591 void *rc = NULL;
2592
2593 switch (st->state) {
2594 case TCP_SEQ_STATE_OPENREQ:
2595 case TCP_SEQ_STATE_LISTENING:
2596 if (st->bucket >= INET_LHTABLE_SIZE)
2597 break;
2598 st->state = TCP_SEQ_STATE_LISTENING;
2599 rc = listening_get_next(seq, NULL);
2600 while (offset-- && rc)
2601 rc = listening_get_next(seq, rc);
2602 if (rc)
2603 break;
2604 st->bucket = 0;
2605 /* Fallthrough */
2606 case TCP_SEQ_STATE_ESTABLISHED:
2607 case TCP_SEQ_STATE_TIME_WAIT:
2608 st->state = TCP_SEQ_STATE_ESTABLISHED;
2609 if (st->bucket > tcp_hashinfo.ehash_mask)
2610 break;
2611 rc = established_get_first(seq);
2612 while (offset-- && rc)
2613 rc = established_get_next(seq, rc);
2614 }
2615
2616 st->num = orig_num;
2617
2618 return rc;
2619}
2620
1da177e4
LT
2621static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2622{
5799de0b 2623 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2624 void *rc;
2625
2626 if (*pos && *pos == st->last_pos) {
2627 rc = tcp_seek_last_pos(seq);
2628 if (rc)
2629 goto out;
2630 }
2631
1da177e4
LT
2632 st->state = TCP_SEQ_STATE_LISTENING;
2633 st->num = 0;
a8b690f9
TH
2634 st->bucket = 0;
2635 st->offset = 0;
2636 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2637
2638out:
2639 st->last_pos = *pos;
2640 return rc;
1da177e4
LT
2641}
2642
2643static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2644{
a8b690f9 2645 struct tcp_iter_state *st = seq->private;
1da177e4 2646 void *rc = NULL;
1da177e4
LT
2647
2648 if (v == SEQ_START_TOKEN) {
2649 rc = tcp_get_idx(seq, 0);
2650 goto out;
2651 }
1da177e4
LT
2652
2653 switch (st->state) {
2654 case TCP_SEQ_STATE_OPENREQ:
2655 case TCP_SEQ_STATE_LISTENING:
2656 rc = listening_get_next(seq, v);
2657 if (!rc) {
1da177e4 2658 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2659 st->bucket = 0;
2660 st->offset = 0;
1da177e4
LT
2661 rc = established_get_first(seq);
2662 }
2663 break;
2664 case TCP_SEQ_STATE_ESTABLISHED:
2665 case TCP_SEQ_STATE_TIME_WAIT:
2666 rc = established_get_next(seq, v);
2667 break;
2668 }
2669out:
2670 ++*pos;
a8b690f9 2671 st->last_pos = *pos;
1da177e4
LT
2672 return rc;
2673}
2674
2675static void tcp_seq_stop(struct seq_file *seq, void *v)
2676{
5799de0b 2677 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2678
2679 switch (st->state) {
2680 case TCP_SEQ_STATE_OPENREQ:
2681 if (v) {
463c84b9
ACM
2682 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2683 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2684 }
2685 case TCP_SEQ_STATE_LISTENING:
2686 if (v != SEQ_START_TOKEN)
5caea4ea 2687 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2688 break;
2689 case TCP_SEQ_STATE_TIME_WAIT:
2690 case TCP_SEQ_STATE_ESTABLISHED:
2691 if (v)
9db66bdc 2692 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2693 break;
2694 }
2695}
2696
73cb88ec 2697int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2698{
d9dda78b 2699 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2700 struct tcp_iter_state *s;
52d6f3f1 2701 int err;
1da177e4 2702
52d6f3f1
DL
2703 err = seq_open_net(inode, file, &afinfo->seq_ops,
2704 sizeof(struct tcp_iter_state));
2705 if (err < 0)
2706 return err;
f40c8174 2707
52d6f3f1 2708 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2709 s->family = afinfo->family;
a8b690f9 2710 s->last_pos = 0;
f40c8174
DL
2711 return 0;
2712}
73cb88ec 2713EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2714
6f8b13bc 2715int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2716{
2717 int rc = 0;
2718 struct proc_dir_entry *p;
2719
9427c4b3
DL
2720 afinfo->seq_ops.start = tcp_seq_start;
2721 afinfo->seq_ops.next = tcp_seq_next;
2722 afinfo->seq_ops.stop = tcp_seq_stop;
2723
84841c3c 2724 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2725 afinfo->seq_fops, afinfo);
84841c3c 2726 if (!p)
1da177e4
LT
2727 rc = -ENOMEM;
2728 return rc;
2729}
4bc2f18b 2730EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2731
6f8b13bc 2732void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2733{
ece31ffd 2734 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2735}
4bc2f18b 2736EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2737
cf533ea5 2738static void get_openreq4(const struct sock *sk, const struct request_sock *req,
a7cb5a49 2739 struct seq_file *f, int i, kuid_t uid, int *len)
1da177e4 2740{
2e6599cb 2741 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2742 long delta = req->expires - jiffies;
1da177e4 2743
5e659e4c 2744 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2745 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2746 i,
2e6599cb 2747 ireq->loc_addr,
c720c7e8 2748 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2749 ireq->rmt_addr,
2750 ntohs(ireq->rmt_port),
1da177e4
LT
2751 TCP_SYN_RECV,
2752 0, 0, /* could print option size, but that is af dependent. */
2753 1, /* timers active (only the expire timer) */
a399a805 2754 jiffies_delta_to_clock_t(delta),
e6c022a4 2755 req->num_timeout,
a7cb5a49 2756 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2757 0, /* non standard timer */
2758 0, /* open_requests have no inode */
2759 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2760 req,
2761 len);
1da177e4
LT
2762}
2763
5e659e4c 2764static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2765{
2766 int timer_active;
2767 unsigned long timer_expires;
cf533ea5 2768 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2769 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2770 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2771 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2772 __be32 dest = inet->inet_daddr;
2773 __be32 src = inet->inet_rcv_saddr;
2774 __u16 destp = ntohs(inet->inet_dport);
2775 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2776 int rx_queue;
1da177e4 2777
6ba8a3b1
ND
2778 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2779 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2780 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2781 timer_active = 1;
463c84b9
ACM
2782 timer_expires = icsk->icsk_timeout;
2783 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2784 timer_active = 4;
463c84b9 2785 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2786 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2787 timer_active = 2;
cf4c6bf8 2788 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2789 } else {
2790 timer_active = 0;
2791 timer_expires = jiffies;
2792 }
2793
49d09007
ED
2794 if (sk->sk_state == TCP_LISTEN)
2795 rx_queue = sk->sk_ack_backlog;
2796 else
2797 /*
2798 * because we dont lock socket, we might find a transient negative value
2799 */
2800 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2801
5e659e4c 2802 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2803 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2804 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2805 tp->write_seq - tp->snd_una,
49d09007 2806 rx_queue,
1da177e4 2807 timer_active,
a399a805 2808 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2809 icsk->icsk_retransmits,
a7cb5a49 2810 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2811 icsk->icsk_probes_out,
cf4c6bf8
IJ
2812 sock_i_ino(sk),
2813 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2814 jiffies_to_clock_t(icsk->icsk_rto),
2815 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2816 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2817 tp->snd_cwnd,
168a8f58
JC
2818 sk->sk_state == TCP_LISTEN ?
2819 (fastopenq ? fastopenq->max_qlen : 0) :
2820 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
5e659e4c 2821 len);
1da177e4
LT
2822}
2823
cf533ea5 2824static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2825 struct seq_file *f, int i, int *len)
1da177e4 2826{
23f33c2d 2827 __be32 dest, src;
1da177e4 2828 __u16 destp, srcp;
a399a805 2829 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2830
2831 dest = tw->tw_daddr;
2832 src = tw->tw_rcv_saddr;
2833 destp = ntohs(tw->tw_dport);
2834 srcp = ntohs(tw->tw_sport);
2835
5e659e4c 2836 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2837 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4 2838 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2839 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
5e659e4c 2840 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2841}
2842
2843#define TMPSZ 150
2844
2845static int tcp4_seq_show(struct seq_file *seq, void *v)
2846{
5799de0b 2847 struct tcp_iter_state *st;
5e659e4c 2848 int len;
1da177e4
LT
2849
2850 if (v == SEQ_START_TOKEN) {
2851 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2852 " sl local_address rem_address st tx_queue "
2853 "rx_queue tr tm->when retrnsmt uid timeout "
2854 "inode");
2855 goto out;
2856 }
2857 st = seq->private;
2858
2859 switch (st->state) {
2860 case TCP_SEQ_STATE_LISTENING:
2861 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2862 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2863 break;
2864 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2865 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2866 break;
2867 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2868 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2869 break;
2870 }
5e659e4c 2871 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2872out:
2873 return 0;
2874}
2875
73cb88ec
AV
2876static const struct file_operations tcp_afinfo_seq_fops = {
2877 .owner = THIS_MODULE,
2878 .open = tcp_seq_open,
2879 .read = seq_read,
2880 .llseek = seq_lseek,
2881 .release = seq_release_net
2882};
2883
1da177e4 2884static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2885 .name = "tcp",
2886 .family = AF_INET,
73cb88ec 2887 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2888 .seq_ops = {
2889 .show = tcp4_seq_show,
2890 },
1da177e4
LT
2891};
2892
2c8c1e72 2893static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2894{
2895 return tcp_proc_register(net, &tcp4_seq_afinfo);
2896}
2897
2c8c1e72 2898static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2899{
2900 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2901}
2902
2903static struct pernet_operations tcp4_net_ops = {
2904 .init = tcp4_proc_init_net,
2905 .exit = tcp4_proc_exit_net,
2906};
2907
1da177e4
LT
2908int __init tcp4_proc_init(void)
2909{
757764f6 2910 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2911}
2912
2913void tcp4_proc_exit(void)
2914{
757764f6 2915 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2916}
2917#endif /* CONFIG_PROC_FS */
2918
bf296b12
HX
2919struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2920{
b71d1d42 2921 const struct iphdr *iph = skb_gro_network_header(skb);
861b6501
ED
2922 __wsum wsum;
2923 __sum16 sum;
bf296b12
HX
2924
2925 switch (skb->ip_summed) {
2926 case CHECKSUM_COMPLETE:
86911732 2927 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2928 skb->csum)) {
2929 skb->ip_summed = CHECKSUM_UNNECESSARY;
2930 break;
2931 }
861b6501 2932flush:
bf296b12
HX
2933 NAPI_GRO_CB(skb)->flush = 1;
2934 return NULL;
861b6501
ED
2935
2936 case CHECKSUM_NONE:
2937 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2938 skb_gro_len(skb), IPPROTO_TCP, 0);
2939 sum = csum_fold(skb_checksum(skb,
2940 skb_gro_offset(skb),
2941 skb_gro_len(skb),
2942 wsum));
2943 if (sum)
2944 goto flush;
2945
2946 skb->ip_summed = CHECKSUM_UNNECESSARY;
2947 break;
bf296b12
HX
2948 }
2949
2950 return tcp_gro_receive(head, skb);
2951}
bf296b12
HX
2952
2953int tcp4_gro_complete(struct sk_buff *skb)
2954{
b71d1d42 2955 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2956 struct tcphdr *th = tcp_hdr(skb);
2957
2958 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2959 iph->saddr, iph->daddr, 0);
2960 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2961
2962 return tcp_gro_complete(skb);
2963}
bf296b12 2964
1da177e4
LT
2965struct proto tcp_prot = {
2966 .name = "TCP",
2967 .owner = THIS_MODULE,
2968 .close = tcp_close,
2969 .connect = tcp_v4_connect,
2970 .disconnect = tcp_disconnect,
463c84b9 2971 .accept = inet_csk_accept,
1da177e4
LT
2972 .ioctl = tcp_ioctl,
2973 .init = tcp_v4_init_sock,
2974 .destroy = tcp_v4_destroy_sock,
2975 .shutdown = tcp_shutdown,
2976 .setsockopt = tcp_setsockopt,
2977 .getsockopt = tcp_getsockopt,
1da177e4 2978 .recvmsg = tcp_recvmsg,
7ba42910
CG
2979 .sendmsg = tcp_sendmsg,
2980 .sendpage = tcp_sendpage,
1da177e4 2981 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2982 .release_cb = tcp_release_cb,
ab1e0a13
ACM
2983 .hash = inet_hash,
2984 .unhash = inet_unhash,
2985 .get_port = inet_csk_get_port,
1da177e4
LT
2986 .enter_memory_pressure = tcp_enter_memory_pressure,
2987 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2988 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2989 .memory_allocated = &tcp_memory_allocated,
2990 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2991 .sysctl_wmem = sysctl_tcp_wmem,
2992 .sysctl_rmem = sysctl_tcp_rmem,
2993 .max_header = MAX_TCP_HEADER,
2994 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2995 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2996 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2997 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2998 .h.hashinfo = &tcp_hashinfo,
7ba42910 2999 .no_autobind = true,
543d9cfe
ACM
3000#ifdef CONFIG_COMPAT
3001 .compat_setsockopt = compat_tcp_setsockopt,
3002 .compat_getsockopt = compat_tcp_getsockopt,
3003#endif
c255a458 3004#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
3005 .init_cgroup = tcp_init_cgroup,
3006 .destroy_cgroup = tcp_destroy_cgroup,
3007 .proto_cgroup = tcp_proto_cgroup,
3008#endif
1da177e4 3009};
4bc2f18b 3010EXPORT_SYMBOL(tcp_prot);
1da177e4 3011
046ee902
DL
3012static int __net_init tcp_sk_init(struct net *net)
3013{
5d134f1c 3014 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 3015 return 0;
046ee902
DL
3016}
3017
3018static void __net_exit tcp_sk_exit(struct net *net)
3019{
b099ce26
EB
3020}
3021
3022static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
3023{
3024 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
3025}
3026
3027static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
3028 .init = tcp_sk_init,
3029 .exit = tcp_sk_exit,
3030 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
3031};
3032
9b0f976f 3033void __init tcp_v4_init(void)
1da177e4 3034{
5caea4ea 3035 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 3036 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 3037 panic("Failed to create the TCP control socket.\n");
1da177e4 3038}