Merge tag 'v3.10.95' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7be560d6 179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
199 if (likely(!tp->repair))
200 tp->write_seq = 0;
1da177e4
LT
201 }
202
295ff7ed 203 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 206
c720c7e8
ED
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
1da177e4 209
d83d8461 210 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 213
bee7ca9e 214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket
218 * lock select source port, enter ourselves into the hash tables and
219 * complete initialization after this.
220 */
221 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 222 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
223 if (err)
224 goto failure;
225
da905bd1 226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
1da177e4 231 goto failure;
b23dd4fe 232 }
1da177e4 233 /* OK, now commit destination to socket. */
bcd76111 234 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 235 sk_setup_caps(sk, &rt->dst);
6fa3eb70 236 printk(KERN_INFO "[socket_conn]IPV4 socket[%lu] sport:%u \n", SOCK_INODE(sk->sk_socket)->i_ino, ntohs(inet->inet_sport));
ee995283 237 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
1da177e4
LT
241 usin->sin_port);
242
c720c7e8 243 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 244
2b916477 245 err = tcp_connect(sk);
ee995283 246
1da177e4
LT
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253failure:
7174259e
ACM
254 /*
255 * This unhashes the socket and releases the local port,
256 * if necessary.
257 */
1da177e4
LT
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
c720c7e8 261 inet->inet_dport = 0;
1da177e4
LT
262 return err;
263}
4bc2f18b 264EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 265
1da177e4 266/*
563d34d0
ED
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 270 */
5f80f4d8 271void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
563d34d0 275 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 276
80d0a69f
DM
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
1da177e4
LT
279 return;
280
1da177e4
LT
281 /* Something is about to be wrong... Remember soft error
282 * for the case, if this connection will not able to recover.
283 */
284 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285 sk->sk_err_soft = EMSGSIZE;
286
287 mtu = dst_mtu(dst);
288
289 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 290 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
291 tcp_sync_mss(sk, mtu);
292
293 /* Resend the TCP packet because it's
294 * clear that the old packet has been
295 * dropped. This is the new "fast" path mtu
296 * discovery.
297 */
298 tcp_simple_retransmit(sk);
299 } /* else let the usual retransmit timer handle it */
300}
5f80f4d8 301EXPORT_SYMBOL(tcp_v4_mtu_reduced);
1da177e4 302
55be7a9c
DM
303static void do_redirect(struct sk_buff *skb, struct sock *sk)
304{
305 struct dst_entry *dst = __sk_dst_check(sk, 0);
306
1ed5c48f 307 if (dst)
6700c270 308 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
309}
310
1da177e4
LT
311/*
312 * This routine is called by the ICMP module when it gets some
313 * sort of error condition. If err < 0 then the socket should
314 * be closed and the error returned to the user. If err > 0
315 * it's just the icmp type << 8 | icmp code. After adjustment
316 * header points to the first 8 bytes of the tcp header. We need
317 * to find the appropriate port.
318 *
319 * The locking strategy used here is very "optimistic". When
320 * someone else accesses the socket the ICMP is just dropped
321 * and for some paths there is no check at all.
322 * A more general error queue to queue errors for later handling
323 * is probably better.
324 *
325 */
326
4d1a2d9e 327void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 328{
b71d1d42 329 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 330 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 331 struct inet_connection_sock *icsk;
1da177e4
LT
332 struct tcp_sock *tp;
333 struct inet_sock *inet;
4d1a2d9e
DL
334 const int type = icmp_hdr(icmp_skb)->type;
335 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 336 struct sock *sk;
f1ecd5d9 337 struct sk_buff *skb;
168a8f58 338 struct request_sock *req;
1da177e4 339 __u32 seq;
f1ecd5d9 340 __u32 remaining;
1da177e4 341 int err;
4d1a2d9e 342 struct net *net = dev_net(icmp_skb->dev);
1da177e4 343
4d1a2d9e 344 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
346 return;
347 }
348
fd54d716 349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 350 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 351 if (!sk) {
dcfc23ca 352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
353 return;
354 }
355 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 356 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
357 return;
358 }
359
360 bh_lock_sock(sk);
361 /* If too many ICMPs get dropped on busy
362 * servers this needs to be solved differently.
563d34d0
ED
363 * We do take care of PMTU discovery (RFC1191) special case :
364 * we can receive locally generated ICMP messages while socket is held.
1da177e4 365 */
b74aa930
ED
366 if (sock_owned_by_user(sk)) {
367 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
369 }
1da177e4
LT
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
97e3ecd1 373 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 goto out;
376 }
377
f1ecd5d9 378 icsk = inet_csk(sk);
1da177e4 379 tp = tcp_sk(sk);
168a8f58 380 req = tp->fastopen_rsk;
1da177e4
LT
381 seq = ntohl(th->seq);
382 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
383 !between(seq, tp->snd_una, tp->snd_nxt) &&
384 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
385 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 386 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
387 goto out;
388 }
389
390 switch (type) {
55be7a9c
DM
391 case ICMP_REDIRECT:
392 do_redirect(icmp_skb, sk);
393 goto out;
1da177e4
LT
394 case ICMP_SOURCE_QUENCH:
395 /* Just silently ignore these. */
396 goto out;
397 case ICMP_PARAMETERPROB:
398 err = EPROTO;
399 break;
400 case ICMP_DEST_UNREACH:
401 if (code > NR_ICMP_UNREACH)
402 goto out;
403
404 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
405 /* We are not interested in TCP_LISTEN and open_requests
406 * (SYN-ACKs send out by Linux are always <576bytes so
407 * they should go through unfragmented).
408 */
409 if (sk->sk_state == TCP_LISTEN)
410 goto out;
411
563d34d0 412 tp->mtu_info = info;
144d56e9 413 if (!sock_owned_by_user(sk)) {
563d34d0 414 tcp_v4_mtu_reduced(sk);
144d56e9
ED
415 } else {
416 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
417 sock_hold(sk);
418 }
1da177e4
LT
419 goto out;
420 }
421
422 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
423 /* check if icmp_skb allows revert of backoff
424 * (see draft-zimmermann-tcp-lcd) */
425 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
426 break;
427 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
428 !icsk->icsk_backoff)
429 break;
430
168a8f58
JC
431 /* XXX (TFO) - revisit the following logic for TFO */
432
8f49c270
DM
433 if (sock_owned_by_user(sk))
434 break;
435
f1ecd5d9 436 icsk->icsk_backoff--;
9ad7c049
JC
437 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
438 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
439 tcp_bound_rto(sk);
440
441 skb = tcp_write_queue_head(sk);
442 BUG_ON(!skb);
443
444 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
445 tcp_time_stamp - TCP_SKB_CB(skb)->when);
446
447 if (remaining) {
448 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
6fa3eb70 449 remaining, sysctl_tcp_rto_max);
f1ecd5d9
DL
450 } else {
451 /* RTO revert clocked out retransmission.
452 * Will retransmit now */
453 tcp_retransmit_timer(sk);
454 }
455
1da177e4
LT
456 break;
457 case ICMP_TIME_EXCEEDED:
458 err = EHOSTUNREACH;
459 break;
460 default:
461 goto out;
462 }
463
168a8f58
JC
464 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
465 * than following the TCP_SYN_RECV case and closing the socket,
466 * we ignore the ICMP error and keep trying like a fully established
467 * socket. Is this the right thing to do?
468 */
469 if (req && req->sk == NULL)
470 goto out;
471
1da177e4 472 switch (sk->sk_state) {
60236fdd 473 struct request_sock *req, **prev;
1da177e4
LT
474 case TCP_LISTEN:
475 if (sock_owned_by_user(sk))
476 goto out;
477
463c84b9
ACM
478 req = inet_csk_search_req(sk, &prev, th->dest,
479 iph->daddr, iph->saddr);
1da177e4
LT
480 if (!req)
481 goto out;
482
483 /* ICMPs are not backlogged, hence we cannot get
484 an established socket here.
485 */
547b792c 486 WARN_ON(req->sk);
1da177e4 487
2e6599cb 488 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 489 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
490 goto out;
491 }
492
493 /*
494 * Still in SYN_RECV, just remove it silently.
495 * There is no good way to pass the error to the newly
496 * created socket, and POSIX does not want network
497 * errors returned from accept().
498 */
463c84b9 499 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 500 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
501 goto out;
502
503 case TCP_SYN_SENT:
504 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
505 It can f.e. if SYNs crossed,
506 or Fast Open.
1da177e4
LT
507 */
508 if (!sock_owned_by_user(sk)) {
1da177e4
LT
509 sk->sk_err = err;
510
511 sk->sk_error_report(sk);
512
513 tcp_done(sk);
514 } else {
515 sk->sk_err_soft = err;
516 }
517 goto out;
518 }
519
520 /* If we've already connected we will keep trying
521 * until we time out, or the user gives up.
522 *
523 * rfc1122 4.2.3.9 allows to consider as hard errors
524 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
525 * but it is obsoleted by pmtu discovery).
526 *
527 * Note, that in modern internet, where routing is unreliable
528 * and in each dark corner broken firewalls sit, sending random
529 * errors ordered by their masters even this two messages finally lose
530 * their original sense (even Linux sends invalid PORT_UNREACHs)
531 *
532 * Now we are in compliance with RFCs.
533 * --ANK (980905)
534 */
535
536 inet = inet_sk(sk);
537 if (!sock_owned_by_user(sk) && inet->recverr) {
538 sk->sk_err = err;
539 sk->sk_error_report(sk);
540 } else { /* Only an error on timeout */
541 sk->sk_err_soft = err;
542 }
543
544out:
545 bh_unlock_sock(sk);
546 sock_put(sk);
547}
548
419f9f89
HX
549static void __tcp_v4_send_check(struct sk_buff *skb,
550 __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
a430a43d
HX
575int tcp_v4_gso_send_check(struct sk_buff *skb)
576{
eddc9ec5 577 const struct iphdr *iph;
a430a43d
HX
578 struct tcphdr *th;
579
580 if (!pskb_may_pull(skb, sizeof(*th)))
581 return -EINVAL;
582
eddc9ec5 583 iph = ip_hdr(skb);
aa8223c7 584 th = tcp_hdr(skb);
a430a43d
HX
585
586 th->check = 0;
84fa7933 587 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 588 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
589 return 0;
590}
591
1da177e4
LT
592/*
593 * This routine will send an RST to the other tcp.
594 *
595 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
596 * for reset.
597 * Answer: if a packet caused RST, it is not for a socket
598 * existing in our system, if it is matched to a socket,
599 * it is just duplicate segment or bug in other side's TCP.
600 * So that we build reply only basing on parameters
601 * arrived with segment.
602 * Exception: precedence violation. We do not implement it in any case.
603 */
604
cfb6eeb4 605static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 606{
cf533ea5 607 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
608 struct {
609 struct tcphdr th;
610#ifdef CONFIG_TCP_MD5SIG
714e85be 611 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
612#endif
613 } rep;
1da177e4 614 struct ip_reply_arg arg;
cfb6eeb4
YH
615#ifdef CONFIG_TCP_MD5SIG
616 struct tcp_md5sig_key *key;
658ddaaf
SL
617 const __u8 *hash_location = NULL;
618 unsigned char newhash[16];
619 int genhash;
620 struct sock *sk1 = NULL;
cfb6eeb4 621#endif
a86b1e30 622 struct net *net;
1da177e4
LT
623
624 /* Never send a reset in response to a reset. */
625 if (th->rst)
626 return;
627
511c3f92 628 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
629 return;
630
631 /* Swap the send and the receive. */
cfb6eeb4
YH
632 memset(&rep, 0, sizeof(rep));
633 rep.th.dest = th->source;
634 rep.th.source = th->dest;
635 rep.th.doff = sizeof(struct tcphdr) / 4;
636 rep.th.rst = 1;
1da177e4
LT
637
638 if (th->ack) {
cfb6eeb4 639 rep.th.seq = th->ack_seq;
1da177e4 640 } else {
cfb6eeb4
YH
641 rep.th.ack = 1;
642 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
643 skb->len - (th->doff << 2));
1da177e4
LT
644 }
645
7174259e 646 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
647 arg.iov[0].iov_base = (unsigned char *)&rep;
648 arg.iov[0].iov_len = sizeof(rep.th);
649
650#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
651 hash_location = tcp_parse_md5sig_option(th);
652 if (!sk && hash_location) {
653 /*
654 * active side is lost. Try to find listening socket through
655 * source port, and then find md5 key through listening socket.
656 * we are not loose security here:
657 * Incoming packet is checked with md5 hash with finding key,
658 * no RST generated if md5 hash doesn't match.
659 */
660 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
661 &tcp_hashinfo, ip_hdr(skb)->saddr,
662 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
663 ntohs(th->source), inet_iif(skb));
664 /* don't send rst if it can't find key */
665 if (!sk1)
666 return;
667 rcu_read_lock();
668 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
669 &ip_hdr(skb)->saddr, AF_INET);
670 if (!key)
671 goto release_sk1;
672
673 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
674 if (genhash || memcmp(hash_location, newhash, 16) != 0)
675 goto release_sk1;
676 } else {
677 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
678 &ip_hdr(skb)->saddr,
679 AF_INET) : NULL;
680 }
681
cfb6eeb4
YH
682 if (key) {
683 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
684 (TCPOPT_NOP << 16) |
685 (TCPOPT_MD5SIG << 8) |
686 TCPOLEN_MD5SIG);
687 /* Update length and the length the header thinks exists */
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len / 4;
690
49a72dfb 691 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
692 key, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
694 }
695#endif
eddc9ec5
ACM
696 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
697 ip_hdr(skb)->saddr, /* XXX */
52cd5750 698 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 699 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 700 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 701 /* When socket is gone, all binding information is lost.
4c675258
AK
702 * routing might fail in this case. No choice here, if we choose to force
703 * input interface, we will misroute in case of asymmetric route.
e2446eaa 704 */
4c675258
AK
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 707
adf30907 708 net = dev_net(skb_dst(skb)->dev);
66b13d99 709 arg.tos = ip_hdr(skb)->tos;
6bed3166
ED
710 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
711 skb, ip_hdr(skb)->saddr,
70e73416 712 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 713
63231bdd
PE
714 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
715 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
716
717#ifdef CONFIG_TCP_MD5SIG
718release_sk1:
719 if (sk1) {
720 rcu_read_unlock();
721 sock_put(sk1);
722 }
723#endif
1da177e4
LT
724}
725
726/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
727 outside socket context is ugly, certainly. What can I do?
728 */
729
9501f972 730static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 731 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 732 struct tcp_md5sig_key *key,
66b13d99 733 int reply_flags, u8 tos)
1da177e4 734{
cf533ea5 735 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
736 struct {
737 struct tcphdr th;
714e85be 738 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 739#ifdef CONFIG_TCP_MD5SIG
714e85be 740 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
741#endif
742 ];
1da177e4
LT
743 } rep;
744 struct ip_reply_arg arg;
adf30907 745 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
746
747 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 748 memset(&arg, 0, sizeof(arg));
1da177e4
LT
749
750 arg.iov[0].iov_base = (unsigned char *)&rep;
751 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 752 if (tsecr) {
cfb6eeb4
YH
753 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
754 (TCPOPT_TIMESTAMP << 8) |
755 TCPOLEN_TIMESTAMP);
ee684b6f
AV
756 rep.opt[1] = htonl(tsval);
757 rep.opt[2] = htonl(tsecr);
cb48cfe8 758 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
759 }
760
761 /* Swap the send and the receive. */
762 rep.th.dest = th->source;
763 rep.th.source = th->dest;
764 rep.th.doff = arg.iov[0].iov_len / 4;
765 rep.th.seq = htonl(seq);
766 rep.th.ack_seq = htonl(ack);
767 rep.th.ack = 1;
768 rep.th.window = htons(win);
769
cfb6eeb4 770#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 771 if (key) {
ee684b6f 772 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
773
774 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
775 (TCPOPT_NOP << 16) |
776 (TCPOPT_MD5SIG << 8) |
777 TCPOLEN_MD5SIG);
778 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
779 rep.th.doff = arg.iov[0].iov_len/4;
780
49a72dfb 781 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
782 key, ip_hdr(skb)->saddr,
783 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
784 }
785#endif
88ef4a5a 786 arg.flags = reply_flags;
eddc9ec5
ACM
787 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
788 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
789 arg.iov[0].iov_len, IPPROTO_TCP, 0);
790 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
791 if (oif)
792 arg.bound_dev_if = oif;
66b13d99 793 arg.tos = tos;
6bed3166
ED
794 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
795 skb, ip_hdr(skb)->saddr,
70e73416 796 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 797
63231bdd 798 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
799}
800
801static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
802{
8feaf0c0 803 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 804 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 805
9501f972 806 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 807 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 808 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
809 tcptw->tw_ts_recent,
810 tw->tw_bound_dev_if,
88ef4a5a 811 tcp_twsk_md5_key(tcptw),
66b13d99
ED
812 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
813 tw->tw_tos
9501f972 814 );
1da177e4 815
8feaf0c0 816 inet_twsk_put(tw);
1da177e4
LT
817}
818
6edafaaf 819static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 820 struct request_sock *req)
1da177e4 821{
168a8f58
JC
822 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
823 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
824 */
825 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
826 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
827 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 828 tcp_time_stamp,
9501f972
YH
829 req->ts_recent,
830 0,
a915da9b
ED
831 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
832 AF_INET),
66b13d99
ED
833 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
834 ip_hdr(skb)->tos);
1da177e4
LT
835}
836
1da177e4 837/*
9bf1d83e 838 * Send a SYN-ACK after having received a SYN.
60236fdd 839 * This still operates on a request_sock only, not on a big
1da177e4
LT
840 * socket.
841 */
72659ecc
OP
842static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
843 struct request_sock *req,
7586eceb
ED
844 u16 queue_mapping,
845 bool nocache)
1da177e4 846{
2e6599cb 847 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 848 struct flowi4 fl4;
1da177e4
LT
849 int err = -1;
850 struct sk_buff * skb;
851
852 /* First, grab a route. */
ba3f7f04 853 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 854 return -1;
1da177e4 855
1a2c6181 856 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
857
858 if (skb) {
419f9f89 859 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 860
fff32699 861 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
862 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
863 ireq->rmt_addr,
864 ireq->opt);
b9df3cb8 865 err = net_xmit_eval(err);
016818d0
NC
866 if (!tcp_rsk(req)->snt_synack && !err)
867 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
868 }
869
1da177e4
LT
870 return err;
871}
872
1a2c6181 873static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 874{
1a2c6181 875 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
e6c022a4
ED
876
877 if (!res)
878 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
879 return res;
fd80eb94
DL
880}
881
1da177e4 882/*
60236fdd 883 * IPv4 request_sock destructor.
1da177e4 884 */
60236fdd 885static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 886{
a51482bd 887 kfree(inet_rsk(req)->opt);
1da177e4
LT
888}
889
946cedcc 890/*
a2a385d6 891 * Return true if a syncookie should be sent
946cedcc 892 */
a2a385d6 893bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
894 const struct sk_buff *skb,
895 const char *proto)
1da177e4 896{
946cedcc 897 const char *msg = "Dropping request";
a2a385d6 898 bool want_cookie = false;
946cedcc
ED
899 struct listen_sock *lopt;
900
901
1da177e4 902
2a1d4bd4 903#ifdef CONFIG_SYN_COOKIES
946cedcc 904 if (sysctl_tcp_syncookies) {
2a1d4bd4 905 msg = "Sending cookies";
a2a385d6 906 want_cookie = true;
946cedcc
ED
907 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
908 } else
80e40daa 909#endif
946cedcc
ED
910 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
911
912 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
913 if (!lopt->synflood_warned) {
914 lopt->synflood_warned = 1;
afd46503 915 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
916 proto, ntohs(tcp_hdr(skb)->dest), msg);
917 }
918 return want_cookie;
2a1d4bd4 919}
946cedcc 920EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
921
922/*
60236fdd 923 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 924 */
5dff747b 925static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 926{
f6d8bd05
ED
927 const struct ip_options *opt = &(IPCB(skb)->opt);
928 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
929
930 if (opt && opt->optlen) {
f6d8bd05
ED
931 int opt_size = sizeof(*dopt) + opt->optlen;
932
1da177e4
LT
933 dopt = kmalloc(opt_size, GFP_ATOMIC);
934 if (dopt) {
f6d8bd05 935 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
936 kfree(dopt);
937 dopt = NULL;
938 }
939 }
940 }
941 return dopt;
942}
943
cfb6eeb4
YH
944#ifdef CONFIG_TCP_MD5SIG
945/*
946 * RFC2385 MD5 checksumming requires a mapping of
947 * IP address->MD5 Key.
948 * We need to maintain these in the sk structure.
949 */
950
951/* Find the Key structure for an address. */
a915da9b
ED
952struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
953 const union tcp_md5_addr *addr,
954 int family)
cfb6eeb4
YH
955{
956 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 957 struct tcp_md5sig_key *key;
a915da9b 958 unsigned int size = sizeof(struct in_addr);
a8afca03 959 struct tcp_md5sig_info *md5sig;
cfb6eeb4 960
a8afca03
ED
961 /* caller either holds rcu_read_lock() or socket lock */
962 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
963 sock_owned_by_user(sk) ||
964 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 965 if (!md5sig)
cfb6eeb4 966 return NULL;
a915da9b
ED
967#if IS_ENABLED(CONFIG_IPV6)
968 if (family == AF_INET6)
969 size = sizeof(struct in6_addr);
970#endif
b67bfe0d 971 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
972 if (key->family != family)
973 continue;
974 if (!memcmp(&key->addr, addr, size))
975 return key;
cfb6eeb4
YH
976 }
977 return NULL;
978}
a915da9b 979EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
980
981struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
982 struct sock *addr_sk)
983{
a915da9b
ED
984 union tcp_md5_addr *addr;
985
986 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
987 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 988}
cfb6eeb4
YH
989EXPORT_SYMBOL(tcp_v4_md5_lookup);
990
f5b99bcd
AB
991static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
992 struct request_sock *req)
cfb6eeb4 993{
a915da9b
ED
994 union tcp_md5_addr *addr;
995
996 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
997 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
998}
999
1000/* This can be called on a newly created socket, from other files */
a915da9b
ED
1001int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1002 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
1003{
1004 /* Add Key to the list */
b0a713e9 1005 struct tcp_md5sig_key *key;
cfb6eeb4 1006 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1007 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1008
c0353c7b 1009 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
1010 if (key) {
1011 /* Pre-existing entry - just update that one. */
a915da9b 1012 memcpy(key->key, newkey, newkeylen);
b0a713e9 1013 key->keylen = newkeylen;
a915da9b
ED
1014 return 0;
1015 }
260fcbeb 1016
a8afca03 1017 md5sig = rcu_dereference_protected(tp->md5sig_info,
98d2ffdc
ED
1018 sock_owned_by_user(sk) ||
1019 lockdep_is_held(&sk->sk_lock.slock));
a915da9b
ED
1020 if (!md5sig) {
1021 md5sig = kmalloc(sizeof(*md5sig), gfp);
1022 if (!md5sig)
cfb6eeb4 1023 return -ENOMEM;
cfb6eeb4 1024
a915da9b
ED
1025 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1026 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1027 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1028 }
cfb6eeb4 1029
5f3d9cb2 1030 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1031 if (!key)
1032 return -ENOMEM;
1033 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1034 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1035 return -ENOMEM;
cfb6eeb4 1036 }
a915da9b
ED
1037
1038 memcpy(key->key, newkey, newkeylen);
1039 key->keylen = newkeylen;
1040 key->family = family;
1041 memcpy(&key->addr, addr,
1042 (family == AF_INET6) ? sizeof(struct in6_addr) :
1043 sizeof(struct in_addr));
1044 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1045 return 0;
1046}
a915da9b 1047EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1048
a915da9b 1049int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1050{
1051 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1052 struct tcp_md5sig_key *key;
a8afca03 1053 struct tcp_md5sig_info *md5sig;
a915da9b 1054
c0353c7b 1055 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1056 if (!key)
1057 return -ENOENT;
1058 hlist_del_rcu(&key->node);
5f3d9cb2 1059 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1060 kfree_rcu(key, rcu);
a8afca03
ED
1061 md5sig = rcu_dereference_protected(tp->md5sig_info,
1062 sock_owned_by_user(sk));
1063 if (hlist_empty(&md5sig->head))
a915da9b
ED
1064 tcp_free_md5sig_pool();
1065 return 0;
cfb6eeb4 1066}
a915da9b 1067EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1068
e0683e70 1069static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1070{
1071 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1072 struct tcp_md5sig_key *key;
b67bfe0d 1073 struct hlist_node *n;
a8afca03 1074 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1075
a8afca03
ED
1076 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1077
1078 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1079 tcp_free_md5sig_pool();
b67bfe0d 1080 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1081 hlist_del_rcu(&key->node);
5f3d9cb2 1082 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1083 kfree_rcu(key, rcu);
cfb6eeb4
YH
1084 }
1085}
1086
7174259e
ACM
1087static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1088 int optlen)
cfb6eeb4
YH
1089{
1090 struct tcp_md5sig cmd;
1091 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1092
1093 if (optlen < sizeof(cmd))
1094 return -EINVAL;
1095
7174259e 1096 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1097 return -EFAULT;
1098
1099 if (sin->sin_family != AF_INET)
1100 return -EINVAL;
1101
a8afca03 1102 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1103 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1104 AF_INET);
cfb6eeb4
YH
1105
1106 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1107 return -EINVAL;
1108
a915da9b
ED
1109 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1110 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1111 GFP_KERNEL);
cfb6eeb4
YH
1112}
1113
49a72dfb
AL
1114static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1115 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1116{
cfb6eeb4 1117 struct tcp4_pseudohdr *bp;
49a72dfb 1118 struct scatterlist sg;
cfb6eeb4
YH
1119
1120 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1121
1122 /*
49a72dfb 1123 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1124 * destination IP address, zero-padded protocol number, and
1125 * segment length)
1126 */
1127 bp->saddr = saddr;
1128 bp->daddr = daddr;
1129 bp->pad = 0;
076fb722 1130 bp->protocol = IPPROTO_TCP;
49a72dfb 1131 bp->len = cpu_to_be16(nbytes);
c7da57a1 1132
49a72dfb
AL
1133 sg_init_one(&sg, bp, sizeof(*bp));
1134 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1135}
1136
a915da9b 1137static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1138 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1139{
1140 struct tcp_md5sig_pool *hp;
1141 struct hash_desc *desc;
1142
1143 hp = tcp_get_md5sig_pool();
1144 if (!hp)
1145 goto clear_hash_noput;
1146 desc = &hp->md5_desc;
1147
1148 if (crypto_hash_init(desc))
1149 goto clear_hash;
1150 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1151 goto clear_hash;
1152 if (tcp_md5_hash_header(hp, th))
1153 goto clear_hash;
1154 if (tcp_md5_hash_key(hp, key))
1155 goto clear_hash;
1156 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1157 goto clear_hash;
1158
cfb6eeb4 1159 tcp_put_md5sig_pool();
cfb6eeb4 1160 return 0;
49a72dfb 1161
cfb6eeb4
YH
1162clear_hash:
1163 tcp_put_md5sig_pool();
1164clear_hash_noput:
1165 memset(md5_hash, 0, 16);
49a72dfb 1166 return 1;
cfb6eeb4
YH
1167}
1168
49a72dfb 1169int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1170 const struct sock *sk, const struct request_sock *req,
1171 const struct sk_buff *skb)
cfb6eeb4 1172{
49a72dfb
AL
1173 struct tcp_md5sig_pool *hp;
1174 struct hash_desc *desc;
318cf7aa 1175 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1176 __be32 saddr, daddr;
1177
1178 if (sk) {
c720c7e8
ED
1179 saddr = inet_sk(sk)->inet_saddr;
1180 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1181 } else if (req) {
1182 saddr = inet_rsk(req)->loc_addr;
1183 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1184 } else {
49a72dfb
AL
1185 const struct iphdr *iph = ip_hdr(skb);
1186 saddr = iph->saddr;
1187 daddr = iph->daddr;
cfb6eeb4 1188 }
49a72dfb
AL
1189
1190 hp = tcp_get_md5sig_pool();
1191 if (!hp)
1192 goto clear_hash_noput;
1193 desc = &hp->md5_desc;
1194
1195 if (crypto_hash_init(desc))
1196 goto clear_hash;
1197
1198 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1199 goto clear_hash;
1200 if (tcp_md5_hash_header(hp, th))
1201 goto clear_hash;
1202 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1203 goto clear_hash;
1204 if (tcp_md5_hash_key(hp, key))
1205 goto clear_hash;
1206 if (crypto_hash_final(desc, md5_hash))
1207 goto clear_hash;
1208
1209 tcp_put_md5sig_pool();
1210 return 0;
1211
1212clear_hash:
1213 tcp_put_md5sig_pool();
1214clear_hash_noput:
1215 memset(md5_hash, 0, 16);
1216 return 1;
cfb6eeb4 1217}
49a72dfb 1218EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1219
a2a385d6 1220static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1221{
1222 /*
1223 * This gets called for each TCP segment that arrives
1224 * so we want to be efficient.
1225 * We have 3 drop cases:
1226 * o No MD5 hash and one expected.
1227 * o MD5 hash and we're not expecting one.
1228 * o MD5 hash and its wrong.
1229 */
cf533ea5 1230 const __u8 *hash_location = NULL;
cfb6eeb4 1231 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1232 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1233 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1234 int genhash;
cfb6eeb4
YH
1235 unsigned char newhash[16];
1236
a915da9b
ED
1237 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1238 AF_INET);
7d5d5525 1239 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1240
cfb6eeb4
YH
1241 /* We've parsed the options - do we have a hash? */
1242 if (!hash_expected && !hash_location)
a2a385d6 1243 return false;
cfb6eeb4
YH
1244
1245 if (hash_expected && !hash_location) {
785957d3 1246 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1247 return true;
cfb6eeb4
YH
1248 }
1249
1250 if (!hash_expected && hash_location) {
785957d3 1251 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1252 return true;
cfb6eeb4
YH
1253 }
1254
1255 /* Okay, so this is hash_expected and hash_location -
1256 * so we need to calculate the checksum.
1257 */
49a72dfb
AL
1258 genhash = tcp_v4_md5_hash_skb(newhash,
1259 hash_expected,
1260 NULL, NULL, skb);
cfb6eeb4
YH
1261
1262 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1263 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1264 &iph->saddr, ntohs(th->source),
1265 &iph->daddr, ntohs(th->dest),
1266 genhash ? " tcp_v4_calc_md5_hash failed"
1267 : "");
a2a385d6 1268 return true;
cfb6eeb4 1269 }
a2a385d6 1270 return false;
cfb6eeb4
YH
1271}
1272
1273#endif
1274
72a3effa 1275struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1276 .family = PF_INET,
2e6599cb 1277 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1278 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1279 .send_ack = tcp_v4_reqsk_send_ack,
1280 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1281 .send_reset = tcp_v4_send_reset,
72659ecc 1282 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1283};
1284
cfb6eeb4 1285#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1286static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1287 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1288 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1289};
b6332e6c 1290#endif
cfb6eeb4 1291
168a8f58
JC
1292static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1293 struct request_sock *req,
1294 struct tcp_fastopen_cookie *foc,
1295 struct tcp_fastopen_cookie *valid_foc)
1296{
1297 bool skip_cookie = false;
1298 struct fastopen_queue *fastopenq;
1299
1300 if (likely(!fastopen_cookie_present(foc))) {
1301 /* See include/net/tcp.h for the meaning of these knobs */
1302 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1303 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1304 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1305 skip_cookie = true; /* no cookie to validate */
1306 else
1307 return false;
1308 }
1309 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1310 /* A FO option is present; bump the counter. */
1311 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1312
1313 /* Make sure the listener has enabled fastopen, and we don't
1314 * exceed the max # of pending TFO requests allowed before trying
1315 * to validating the cookie in order to avoid burning CPU cycles
1316 * unnecessarily.
1317 *
1318 * XXX (TFO) - The implication of checking the max_qlen before
1319 * processing a cookie request is that clients can't differentiate
1320 * between qlen overflow causing Fast Open to be disabled
1321 * temporarily vs a server not supporting Fast Open at all.
1322 */
1323 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1324 fastopenq == NULL || fastopenq->max_qlen == 0)
1325 return false;
1326
1327 if (fastopenq->qlen >= fastopenq->max_qlen) {
1328 struct request_sock *req1;
1329 spin_lock(&fastopenq->lock);
1330 req1 = fastopenq->rskq_rst_head;
1331 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1332 spin_unlock(&fastopenq->lock);
1333 NET_INC_STATS_BH(sock_net(sk),
1334 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1335 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1336 foc->len = -1;
1337 return false;
1338 }
1339 fastopenq->rskq_rst_head = req1->dl_next;
1340 fastopenq->qlen--;
1341 spin_unlock(&fastopenq->lock);
1342 reqsk_free(req1);
1343 }
1344 if (skip_cookie) {
1345 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1346 return true;
1347 }
1348 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1349 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1350 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1351 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1352 memcmp(&foc->val[0], &valid_foc->val[0],
1353 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1354 return false;
1355 valid_foc->len = -1;
1356 }
1357 /* Acknowledge the data received from the peer. */
1358 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1359 return true;
1360 } else if (foc->len == 0) { /* Client requesting a cookie */
1361 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1362 NET_INC_STATS_BH(sock_net(sk),
1363 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1364 } else {
1365 /* Client sent a cookie with wrong size. Treat it
1366 * the same as invalid and return a valid one.
1367 */
1368 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1369 }
1370 return false;
1371}
1372
1373static int tcp_v4_conn_req_fastopen(struct sock *sk,
1374 struct sk_buff *skb,
1375 struct sk_buff *skb_synack,
1a2c6181 1376 struct request_sock *req)
168a8f58
JC
1377{
1378 struct tcp_sock *tp = tcp_sk(sk);
1379 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1380 const struct inet_request_sock *ireq = inet_rsk(req);
1381 struct sock *child;
016818d0 1382 int err;
168a8f58 1383
e6c022a4
ED
1384 req->num_retrans = 0;
1385 req->num_timeout = 0;
168a8f58
JC
1386 req->sk = NULL;
1387
1388 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1389 if (child == NULL) {
1390 NET_INC_STATS_BH(sock_net(sk),
1391 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1392 kfree_skb(skb_synack);
1393 return -1;
1394 }
016818d0
NC
1395 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1396 ireq->rmt_addr, ireq->opt);
1397 err = net_xmit_eval(err);
1398 if (!err)
1399 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1400 /* XXX (TFO) - is it ok to ignore error and continue? */
1401
1402 spin_lock(&queue->fastopenq->lock);
1403 queue->fastopenq->qlen++;
1404 spin_unlock(&queue->fastopenq->lock);
1405
1406 /* Initialize the child socket. Have to fix some values to take
1407 * into account the child is a Fast Open socket and is created
1408 * only out of the bits carried in the SYN packet.
1409 */
1410 tp = tcp_sk(child);
1411
1412 tp->fastopen_rsk = req;
1413 /* Do a hold on the listner sk so that if the listener is being
1414 * closed, the child that has been accepted can live on and still
1415 * access listen_lock.
1416 */
1417 sock_hold(sk);
1418 tcp_rsk(req)->listener = sk;
1419
1420 /* RFC1323: The window in SYN & SYN/ACK segments is never
1421 * scaled. So correct it appropriately.
1422 */
1423 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1424
1425 /* Activate the retrans timer so that SYNACK can be retransmitted.
1426 * The request socket is not added to the SYN table of the parent
1427 * because it's been added to the accept queue directly.
1428 */
1429 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
6fa3eb70 1430 TCP_TIMEOUT_INIT, sysctl_tcp_rto_max);
168a8f58
JC
1431
1432 /* Add the child socket directly into the accept queue */
1433 inet_csk_reqsk_queue_add(sk, req, child);
1434
1435 /* Now finish processing the fastopen child socket. */
1436 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1437 tcp_init_congestion_control(child);
1438 tcp_mtup_init(child);
1439 tcp_init_buffer_space(child);
1440 tcp_init_metrics(child);
1441
1442 /* Queue the data carried in the SYN packet. We need to first
1443 * bump skb's refcnt because the caller will attempt to free it.
1444 *
1445 * XXX (TFO) - we honor a zero-payload TFO request for now.
1446 * (Any reason not to?)
1447 */
1448 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1449 /* Don't queue the skb if there is no payload in SYN.
1450 * XXX (TFO) - How about SYN+FIN?
1451 */
1452 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1453 } else {
1454 skb = skb_get(skb);
1455 skb_dst_drop(skb);
1456 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1457 skb_set_owner_r(skb, child);
1458 __skb_queue_tail(&child->sk_receive_queue, skb);
1459 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
6f73601e 1460 tp->syn_data_acked = 1;
168a8f58
JC
1461 }
1462 sk->sk_data_ready(sk, 0);
1463 bh_unlock_sock(child);
1464 sock_put(child);
1465 WARN_ON(req->sk == NULL);
1466 return 0;
1467}
1468
1da177e4
LT
1469int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1470{
1471 struct tcp_options_received tmp_opt;
60236fdd 1472 struct request_sock *req;
e6b4d113 1473 struct inet_request_sock *ireq;
4957faad 1474 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1475 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1476 __be32 saddr = ip_hdr(skb)->saddr;
1477 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1478 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1479 bool want_cookie = false;
168a8f58
JC
1480 struct flowi4 fl4;
1481 struct tcp_fastopen_cookie foc = { .len = -1 };
1482 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1483 struct sk_buff *skb_synack;
1484 int do_fastopen;
1da177e4
LT
1485
1486 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1487 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1488 goto drop;
1489
1490 /* TW buckets are converted to open requests without
1491 * limitations, they conserve resources and peer is
1492 * evidently real one.
1493 */
463c84b9 1494 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1495 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1496 if (!want_cookie)
1497 goto drop;
1da177e4
LT
1498 }
1499
1500 /* Accept backlog is full. If we have already queued enough
1501 * of warm entries in syn queue, drop request. It is better than
1502 * clogging syn queue with openreqs with exponentially increasing
1503 * timeout.
1504 */
2aeef18d
NS
1505 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1507 goto drop;
2aeef18d 1508 }
1da177e4 1509
ce4a7d0d 1510 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1511 if (!req)
1512 goto drop;
1513
cfb6eeb4
YH
1514#ifdef CONFIG_TCP_MD5SIG
1515 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1516#endif
1517
1da177e4 1518 tcp_clear_options(&tmp_opt);
bee7ca9e 1519 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1520 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1521 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1522
4dfc2817 1523 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1524 tcp_clear_options(&tmp_opt);
1da177e4 1525
1da177e4 1526 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1527 tcp_openreq_init(req, &tmp_opt, skb);
1528
bb5b7c11
DM
1529 ireq = inet_rsk(req);
1530 ireq->loc_addr = daddr;
1531 ireq->rmt_addr = saddr;
1532 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1533 ireq->opt = tcp_v4_save_options(skb);
6fa3eb70 1534 ireq->ir_mark = inet_request_mark(sk, skb);
bb5b7c11 1535
284904aa 1536 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1537 goto drop_and_free;
284904aa 1538
172d69e6 1539 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1540 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1541
1542 if (want_cookie) {
1da177e4 1543 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1544 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1545 } else if (!isn) {
1da177e4
LT
1546 /* VJ's idea. We save last timestamp seen
1547 * from the destination in peer table, when entering
1548 * state TIME-WAIT, and check against it before
1549 * accepting new connection request.
1550 *
1551 * If "isn" is not zero, this request hit alive
1552 * timewait bucket, so that all the necessary checks
1553 * are made in the function processing timewait state.
1554 */
1555 if (tmp_opt.saw_tstamp &&
295ff7ed 1556 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1557 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1558 fl4.daddr == saddr) {
1559 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1560 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1561 goto drop_and_release;
1da177e4
LT
1562 }
1563 }
1564 /* Kill the following clause, if you dislike this way. */
1565 else if (!sysctl_tcp_syncookies &&
463c84b9 1566 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1567 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1568 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1569 /* Without syncookies last quarter of
1570 * backlog is filled with destinations,
1571 * proven to be alive.
1572 * It means that we continue to communicate
1573 * to destinations, already remembered
1574 * to the moment of synflood.
1575 */
afd46503 1576 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1577 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1578 goto drop_and_release;
1da177e4
LT
1579 }
1580
a94f723d 1581 isn = tcp_v4_init_sequence(skb);
1da177e4 1582 }
2e6599cb 1583 tcp_rsk(req)->snt_isn = isn;
1da177e4 1584
168a8f58
JC
1585 if (dst == NULL) {
1586 dst = inet_csk_route_req(sk, &fl4, req);
1587 if (dst == NULL)
1588 goto drop_and_free;
1589 }
1590 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1591
1592 /* We don't call tcp_v4_send_synack() directly because we need
1593 * to make sure a child socket can be created successfully before
1594 * sending back synack!
1595 *
1596 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1597 * (or better yet, call tcp_send_synack() in the child context
1598 * directly, but will have to fix bunch of other code first)
1599 * after syn_recv_sock() except one will need to first fix the
1600 * latter to remove its dependency on the current implementation
1601 * of tcp_v4_send_synack()->tcp_select_initial_window().
1602 */
1603 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1604 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1605
1606 if (skb_synack) {
1607 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1608 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1609 } else
1610 goto drop_and_free;
1611
1612 if (likely(!do_fastopen)) {
1613 int err;
1614 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1615 ireq->rmt_addr, ireq->opt);
1616 err = net_xmit_eval(err);
1617 if (err || want_cookie)
1618 goto drop_and_free;
1619
016818d0 1620 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1621 tcp_rsk(req)->listener = NULL;
1622 /* Add the request_sock to the SYN table */
1623 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1624 if (fastopen_cookie_present(&foc) && foc.len != 0)
1625 NET_INC_STATS_BH(sock_net(sk),
1626 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1a2c6181 1627 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1da177e4
LT
1628 goto drop_and_free;
1629
1da177e4
LT
1630 return 0;
1631
7cd04fa7
DL
1632drop_and_release:
1633 dst_release(dst);
1da177e4 1634drop_and_free:
60236fdd 1635 reqsk_free(req);
1da177e4 1636drop:
848bf15f 1637 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1638 return 0;
1639}
4bc2f18b 1640EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1641
1642
1643/*
1644 * The three way handshake has completed - we got a valid synack -
1645 * now create the new socket.
1646 */
1647struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1648 struct request_sock *req,
1da177e4
LT
1649 struct dst_entry *dst)
1650{
2e6599cb 1651 struct inet_request_sock *ireq;
1da177e4
LT
1652 struct inet_sock *newinet;
1653 struct tcp_sock *newtp;
1654 struct sock *newsk;
cfb6eeb4
YH
1655#ifdef CONFIG_TCP_MD5SIG
1656 struct tcp_md5sig_key *key;
1657#endif
f6d8bd05 1658 struct ip_options_rcu *inet_opt;
1da177e4
LT
1659
1660 if (sk_acceptq_is_full(sk))
1661 goto exit_overflow;
1662
1da177e4
LT
1663 newsk = tcp_create_openreq_child(sk, req, skb);
1664 if (!newsk)
093d2823 1665 goto exit_nonewsk;
1da177e4 1666
bcd76111 1667 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1668 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1669
1670 newtp = tcp_sk(newsk);
1671 newinet = inet_sk(newsk);
2e6599cb 1672 ireq = inet_rsk(req);
c720c7e8
ED
1673 newinet->inet_daddr = ireq->rmt_addr;
1674 newinet->inet_rcv_saddr = ireq->loc_addr;
1675 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1676 inet_opt = ireq->opt;
1677 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1678 ireq->opt = NULL;
463c84b9 1679 newinet->mc_index = inet_iif(skb);
eddc9ec5 1680 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1681 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1682 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1683 if (inet_opt)
1684 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1685 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1686
dfd25fff
ED
1687 if (!dst) {
1688 dst = inet_csk_route_child_sock(sk, newsk, req);
1689 if (!dst)
1690 goto put_and_exit;
1691 } else {
1692 /* syncookie case : see end of cookie_v4_check() */
1693 }
0e734419
DM
1694 sk_setup_caps(newsk, dst);
1695
5d424d5a 1696 tcp_mtup_init(newsk);
1da177e4 1697 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1698 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1699 if (tcp_sk(sk)->rx_opt.user_mss &&
1700 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1701 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1702
1da177e4 1703 tcp_initialize_rcv_mss(newsk);
623df484 1704 tcp_synack_rtt_meas(newsk, req);
e6c022a4 1705 newtp->total_retrans = req->num_retrans;
1da177e4 1706
cfb6eeb4
YH
1707#ifdef CONFIG_TCP_MD5SIG
1708 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1709 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1710 AF_INET);
c720c7e8 1711 if (key != NULL) {
cfb6eeb4
YH
1712 /*
1713 * We're using one, so create a matching key
1714 * on the newsk structure. If we fail to get
1715 * memory, then we end up not copying the key
1716 * across. Shucks.
1717 */
a915da9b
ED
1718 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1719 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1720 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1721 }
1722#endif
1723
0e734419
DM
1724 if (__inet_inherit_port(sk, newsk) < 0)
1725 goto put_and_exit;
9327f705 1726 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1727
1728 return newsk;
1729
1730exit_overflow:
de0744af 1731 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1732exit_nonewsk:
1733 dst_release(dst);
1da177e4 1734exit:
de0744af 1735 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1736 return NULL;
0e734419 1737put_and_exit:
e337e24d
CP
1738 inet_csk_prepare_forced_close(newsk);
1739 tcp_done(newsk);
0e734419 1740 goto exit;
1da177e4 1741}
4bc2f18b 1742EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1743
1744static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1745{
aa8223c7 1746 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1747 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1748 struct sock *nsk;
60236fdd 1749 struct request_sock **prev;
1da177e4 1750 /* Find possible connection requests. */
463c84b9
ACM
1751 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1752 iph->saddr, iph->daddr);
1da177e4 1753 if (req)
8336886f 1754 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1755
3b1e0a65 1756 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1757 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1758
1759 if (nsk) {
1760 if (nsk->sk_state != TCP_TIME_WAIT) {
1761 bh_lock_sock(nsk);
1762 return nsk;
1763 }
9469c7b4 1764 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1765 return NULL;
1766 }
1767
1768#ifdef CONFIG_SYN_COOKIES
af9b4738 1769 if (!th->syn)
1da177e4
LT
1770 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1771#endif
1772 return sk;
1773}
1774
b51655b9 1775static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1776{
eddc9ec5
ACM
1777 const struct iphdr *iph = ip_hdr(skb);
1778
84fa7933 1779 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1780 if (!tcp_v4_check(skb->len, iph->saddr,
1781 iph->daddr, skb->csum)) {
fb286bb2 1782 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1783 return 0;
fb286bb2 1784 }
1da177e4 1785 }
fb286bb2 1786
eddc9ec5 1787 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1788 skb->len, IPPROTO_TCP, 0);
1789
1da177e4 1790 if (skb->len <= 76) {
fb286bb2 1791 return __skb_checksum_complete(skb);
1da177e4
LT
1792 }
1793 return 0;
1794}
1795
1796
1797/* The socket must have it's spinlock held when we get
1798 * here.
1799 *
1800 * We have a potential double-lock case here, so even when
1801 * doing backlog processing we use the BH locking scheme.
1802 * This is because we cannot sleep with the original spinlock
1803 * held.
1804 */
1805int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1806{
cfb6eeb4
YH
1807 struct sock *rsk;
1808#ifdef CONFIG_TCP_MD5SIG
1809 /*
1810 * We really want to reject the packet as early as possible
1811 * if:
1812 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1813 * o There is an MD5 option and we're not expecting one
1814 */
7174259e 1815 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1816 goto discard;
1817#endif
1818
1da177e4 1819 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1820 struct dst_entry *dst = sk->sk_rx_dst;
1821
bdeab991 1822 sock_rps_save_rxhash(sk, skb);
404e0a8b 1823 if (dst) {
505fbcf0
ED
1824 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1825 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1826 dst_release(dst);
1827 sk->sk_rx_dst = NULL;
1828 }
1829 }
aa8223c7 1830 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1831 rsk = sk;
1da177e4 1832 goto reset;
cfb6eeb4 1833 }
1da177e4
LT
1834 return 0;
1835 }
1836
ab6a5bb6 1837 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1838 goto csum_err;
1839
1840 if (sk->sk_state == TCP_LISTEN) {
1841 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1842 if (!nsk)
1843 goto discard;
1844
1845 if (nsk != sk) {
bdeab991 1846 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1847 if (tcp_child_process(sk, nsk, skb)) {
1848 rsk = nsk;
1da177e4 1849 goto reset;
cfb6eeb4 1850 }
1da177e4
LT
1851 return 0;
1852 }
ca55158c 1853 } else
bdeab991 1854 sock_rps_save_rxhash(sk, skb);
ca55158c 1855
aa8223c7 1856 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1857 rsk = sk;
1da177e4 1858 goto reset;
cfb6eeb4 1859 }
1da177e4
LT
1860 return 0;
1861
1862reset:
cfb6eeb4 1863 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1864discard:
1865 kfree_skb(skb);
1866 /* Be careful here. If this function gets more complicated and
1867 * gcc suffers from register pressure on the x86, sk (in %ebx)
1868 * might be destroyed here. This current version compiles correctly,
1869 * but you have been warned.
1870 */
1871 return 0;
1872
1873csum_err:
6a5dc9e5 1874 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1876 goto discard;
1877}
4bc2f18b 1878EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1879
160eb5a6 1880void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1881{
41063e9d
DM
1882 const struct iphdr *iph;
1883 const struct tcphdr *th;
1884 struct sock *sk;
41063e9d 1885
41063e9d 1886 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1887 return;
41063e9d 1888
45f00f99 1889 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1890 return;
41063e9d
DM
1891
1892 iph = ip_hdr(skb);
45f00f99 1893 th = tcp_hdr(skb);
41063e9d
DM
1894
1895 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1896 return;
41063e9d 1897
45f00f99 1898 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1899 iph->saddr, th->source,
7011d085 1900 iph->daddr, ntohs(th->dest),
9cb429d6 1901 skb->skb_iif);
41063e9d
DM
1902 if (sk) {
1903 skb->sk = sk;
1904 skb->destructor = sock_edemux;
1905 if (sk->sk_state != TCP_TIME_WAIT) {
1b946e38 1906 struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst);
505fbcf0 1907
41063e9d
DM
1908 if (dst)
1909 dst = dst_check(dst, 0);
92101b3b 1910 if (dst &&
505fbcf0 1911 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1912 skb_dst_set_noref(skb, dst);
41063e9d
DM
1913 }
1914 }
41063e9d
DM
1915}
1916
b2fb4f54
ED
1917/* Packet is added to VJ-style prequeue for processing in process
1918 * context, if a reader task is waiting. Apparently, this exciting
1919 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1920 * failed somewhere. Latency? Burstiness? Well, at least now we will
1921 * see, why it failed. 8)8) --ANK
1922 *
1923 */
1924bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1925{
1926 struct tcp_sock *tp = tcp_sk(sk);
1927
1928 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1929 return false;
1930
1931 if (skb->len <= tcp_hdrlen(skb) &&
1932 skb_queue_len(&tp->ucopy.prequeue) == 0)
1933 return false;
1934
58717686 1935 skb_dst_force(skb);
b2fb4f54
ED
1936 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1937 tp->ucopy.memory += skb->truesize;
1938 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1939 struct sk_buff *skb1;
1940
1941 BUG_ON(sock_owned_by_user(sk));
1942
1943 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1944 sk_backlog_rcv(sk, skb1);
1945 NET_INC_STATS_BH(sock_net(sk),
1946 LINUX_MIB_TCPPREQUEUEDROPPED);
1947 }
1948
1949 tp->ucopy.memory = 0;
1950 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1951 wake_up_interruptible_sync_poll(sk_sleep(sk),
1952 POLLIN | POLLRDNORM | POLLRDBAND);
1953 if (!inet_csk_ack_scheduled(sk))
1954 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1955 (3 * tcp_rto_min(sk)) / 4,
6fa3eb70 1956 sysctl_tcp_rto_max);
b2fb4f54
ED
1957 }
1958 return true;
1959}
1960EXPORT_SYMBOL(tcp_prequeue);
1961
1da177e4
LT
1962/*
1963 * From tcp_input.c
1964 */
1965
1966int tcp_v4_rcv(struct sk_buff *skb)
1967{
eddc9ec5 1968 const struct iphdr *iph;
cf533ea5 1969 const struct tcphdr *th;
1da177e4
LT
1970 struct sock *sk;
1971 int ret;
a86b1e30 1972 struct net *net = dev_net(skb->dev);
1da177e4
LT
1973
1974 if (skb->pkt_type != PACKET_HOST)
1975 goto discard_it;
1976
1977 /* Count it even if it's bad */
63231bdd 1978 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1979
1980 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1981 goto discard_it;
1982
aa8223c7 1983 th = tcp_hdr(skb);
1da177e4
LT
1984
1985 if (th->doff < sizeof(struct tcphdr) / 4)
1986 goto bad_packet;
1987 if (!pskb_may_pull(skb, th->doff * 4))
1988 goto discard_it;
1989
1990 /* An explanation is required here, I think.
1991 * Packet length and doff are validated by header prediction,
caa20d9a 1992 * provided case of th->doff==0 is eliminated.
1da177e4 1993 * So, we defer the checks. */
60476372 1994 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
6a5dc9e5 1995 goto csum_error;
1da177e4 1996
aa8223c7 1997 th = tcp_hdr(skb);
eddc9ec5 1998 iph = ip_hdr(skb);
1da177e4
LT
1999 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
2000 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
2001 skb->len - th->doff * 4);
2002 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
2003 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 2004 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
2005 TCP_SKB_CB(skb)->sacked = 0;
2006
9a1f27c4 2007 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
2008 if (!sk)
2009 goto no_tcp_socket;
2010
bb134d5d
ED
2011process:
2012 if (sk->sk_state == TCP_TIME_WAIT)
2013 goto do_time_wait;
2014
6cce09f8
ED
2015 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
2016 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 2017 goto discard_and_relse;
6cce09f8 2018 }
d218d111 2019
1da177e4
LT
2020 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
2021 goto discard_and_relse;
b59c2701 2022 nf_reset(skb);
1da177e4 2023
fda9ef5d 2024 if (sk_filter(sk, skb))
1da177e4
LT
2025 goto discard_and_relse;
2026
2027 skb->dev = NULL;
2028
c6366184 2029 bh_lock_sock_nested(sk);
1da177e4
LT
2030 ret = 0;
2031 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
2032#ifdef CONFIG_NET_DMA
2033 struct tcp_sock *tp = tcp_sk(sk);
2034 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 2035 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 2036 if (tp->ucopy.dma_chan)
1da177e4 2037 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
2038 else
2039#endif
2040 {
2041 if (!tcp_prequeue(sk, skb))
ae8d7f88 2042 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 2043 }
da882c1f
ED
2044 } else if (unlikely(sk_add_backlog(sk, skb,
2045 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 2046 bh_unlock_sock(sk);
6cce09f8 2047 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
2048 goto discard_and_relse;
2049 }
1da177e4
LT
2050 bh_unlock_sock(sk);
2051
2052 sock_put(sk);
2053
2054 return ret;
2055
2056no_tcp_socket:
2057 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2058 goto discard_it;
2059
2060 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
2061csum_error:
2062 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 2063bad_packet:
63231bdd 2064 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 2065 } else {
cfb6eeb4 2066 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
2067 }
2068
2069discard_it:
2070 /* Discard frame. */
2071 kfree_skb(skb);
e905a9ed 2072 return 0;
1da177e4
LT
2073
2074discard_and_relse:
2075 sock_put(sk);
2076 goto discard_it;
2077
2078do_time_wait:
2079 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 2080 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2081 goto discard_it;
2082 }
2083
6a5dc9e5 2084 if (skb->len < (th->doff << 2)) {
9469c7b4 2085 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
2086 goto bad_packet;
2087 }
2088 if (tcp_checksum_complete(skb)) {
2089 inet_twsk_put(inet_twsk(sk));
2090 goto csum_error;
1da177e4 2091 }
9469c7b4 2092 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 2093 case TCP_TW_SYN: {
c346dca1 2094 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 2095 &tcp_hashinfo,
da5e3630 2096 iph->saddr, th->source,
eddc9ec5 2097 iph->daddr, th->dest,
463c84b9 2098 inet_iif(skb));
1da177e4 2099 if (sk2) {
9469c7b4
YH
2100 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2101 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2102 sk = sk2;
2103 goto process;
2104 }
2105 /* Fall through to ACK */
2106 }
2107 case TCP_TW_ACK:
2108 tcp_v4_timewait_ack(sk, skb);
2109 break;
2110 case TCP_TW_RST:
2111 goto no_tcp_socket;
2112 case TCP_TW_SUCCESS:;
2113 }
2114 goto discard_it;
2115}
2116
ccb7c410
DM
2117static struct timewait_sock_ops tcp_timewait_sock_ops = {
2118 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2119 .twsk_unique = tcp_twsk_unique,
2120 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 2121};
1da177e4 2122
63d02d15 2123void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
2124{
2125 struct dst_entry *dst = skb_dst(skb);
2126
2127 dst_hold(dst);
2128 sk->sk_rx_dst = dst;
2129 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2130}
63d02d15 2131EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 2132
3b401a81 2133const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
2134 .queue_xmit = ip_queue_xmit,
2135 .send_check = tcp_v4_send_check,
2136 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 2137 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
2138 .conn_request = tcp_v4_conn_request,
2139 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
2140 .net_header_len = sizeof(struct iphdr),
2141 .setsockopt = ip_setsockopt,
2142 .getsockopt = ip_getsockopt,
2143 .addr2sockaddr = inet_csk_addr2sockaddr,
2144 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 2145 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 2146#ifdef CONFIG_COMPAT
543d9cfe
ACM
2147 .compat_setsockopt = compat_ip_setsockopt,
2148 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 2149#endif
5f80f4d8 2150 .mtu_reduced = tcp_v4_mtu_reduced,
1da177e4 2151};
4bc2f18b 2152EXPORT_SYMBOL(ipv4_specific);
1da177e4 2153
cfb6eeb4 2154#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 2155static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 2156 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 2157 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 2158 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 2159};
b6332e6c 2160#endif
cfb6eeb4 2161
1da177e4
LT
2162/* NOTE: A lot of things set to zero explicitly by call to
2163 * sk_alloc() so need not be done here.
2164 */
2165static int tcp_v4_init_sock(struct sock *sk)
2166{
6687e988 2167 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 2168
900f65d3 2169 tcp_init_sock(sk);
6fa3eb70 2170 icsk->icsk_MMSRB = 0;
1da177e4 2171
8292a17a 2172 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 2173
cfb6eeb4 2174#ifdef CONFIG_TCP_MD5SIG
ac807fa8 2175 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 2176#endif
1da177e4 2177
1da177e4
LT
2178 return 0;
2179}
2180
7d06b2e0 2181void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
2182{
2183 struct tcp_sock *tp = tcp_sk(sk);
2184
2185 tcp_clear_xmit_timers(sk);
2186
6687e988 2187 tcp_cleanup_congestion_control(sk);
317a76f9 2188
1da177e4 2189 /* Cleanup up the write buffer. */
fe067e8a 2190 tcp_write_queue_purge(sk);
1da177e4
LT
2191
2192 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 2193 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 2194
cfb6eeb4
YH
2195#ifdef CONFIG_TCP_MD5SIG
2196 /* Clean up the MD5 key list, if any */
2197 if (tp->md5sig_info) {
a915da9b 2198 tcp_clear_md5_list(sk);
a8afca03 2199 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
2200 tp->md5sig_info = NULL;
2201 }
2202#endif
2203
1a2449a8
CL
2204#ifdef CONFIG_NET_DMA
2205 /* Cleans up our sk_async_wait_queue */
e905a9ed 2206 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
2207#endif
2208
1da177e4
LT
2209 /* Clean prequeue, it must be empty really */
2210 __skb_queue_purge(&tp->ucopy.prequeue);
2211
2212 /* Clean up a referenced TCP bind bucket. */
463c84b9 2213 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 2214 inet_put_port(sk);
1da177e4 2215
168a8f58 2216 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 2217
cf60af03
YC
2218 /* If socket is aborted during connect operation */
2219 tcp_free_fastopen_req(tp);
2220
180d8cd9 2221 sk_sockets_allocated_dec(sk);
d1a4c0b3 2222 sock_release_memcg(sk);
1da177e4 2223}
1da177e4
LT
2224EXPORT_SYMBOL(tcp_v4_destroy_sock);
2225
6fa3eb70
S
2226void tcp_v4_handle_retrans_time_by_uid(struct uid_err uid_e)
2227{
2228 unsigned int bucket;
2229 uid_t skuid = (uid_t)(uid_e.appuid);
2230 struct inet_connection_sock *icsk = NULL;//inet_csk(sk);
2231
2232
2233 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2234 struct hlist_nulls_node *node;
2235 struct sock *sk;
2236 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2237
2238 spin_lock_bh(lock);
2239 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2240
2241 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2242 continue;
2243 if (sock_flag(sk, SOCK_DEAD))
2244 continue;
2245
2246 if(sk->sk_socket){
2247 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2248 continue;
2249 else
2250 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid socket uid(%d) match!",
2251 SOCK_INODE(sk->sk_socket)->i_uid);
2252 } else{
2253 continue;
2254 }
2255
2256 sock_hold(sk);
2257 spin_unlock_bh(lock);
2258
2259 local_bh_disable();
2260 bh_lock_sock(sk);
2261
2262 // update sk time out value
2263 icsk = inet_csk(sk);
2264 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid update timer\n");
2265
2266 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + 2);
2267 icsk->icsk_rto = sysctl_tcp_rto_min * 30;
2268 icsk->icsk_MMSRB = 1;
2269
2270 bh_unlock_sock(sk);
2271 local_bh_enable();
2272 spin_lock_bh(lock);
2273 sock_put(sk);
2274
2275 }
2276 spin_unlock_bh(lock);
2277 }
2278
2279}
2280
2281
2282/*
2283 * tcp_v4_nuke_addr_by_uid - destroy all sockets of spcial uid
2284 */
2285void tcp_v4_reset_connections_by_uid(struct uid_err uid_e)
2286{
2287 unsigned int bucket;
2288 uid_t skuid = (uid_t)(uid_e.appuid);
2289
2290 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2291 struct hlist_nulls_node *node;
2292 struct sock *sk;
2293 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2294
2295restart:
2296 spin_lock_bh(lock);
2297 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2298
2299 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2300 continue;
2301 if (sock_flag(sk, SOCK_DEAD))
2302 continue;
2303
2304 if(sk->sk_socket){
2305 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2306 continue;
2307 else
2308 printk(KERN_INFO "SIOCKILLSOCK socket uid(%d) match!",
2309 SOCK_INODE(sk->sk_socket)->i_uid);
2310 } else{
2311 continue;
2312 }
2313
2314 sock_hold(sk);
2315 spin_unlock_bh(lock);
2316
2317 local_bh_disable();
2318 bh_lock_sock(sk);
2319 sk->sk_err = uid_e.errNum;
2320 printk(KERN_INFO "SIOCKILLSOCK set sk err == %d!! \n", sk->sk_err);
2321 sk->sk_error_report(sk);
2322
2323 tcp_done(sk);
2324 bh_unlock_sock(sk);
2325 local_bh_enable();
2326 sock_put(sk);
2327
2328 goto restart;
2329 }
2330 spin_unlock_bh(lock);
2331 }
2332}
2333
2334
1da177e4
LT
2335#ifdef CONFIG_PROC_FS
2336/* Proc filesystem TCP sock list dumping. */
2337
3ab5aee7 2338static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 2339{
3ab5aee7 2340 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 2341 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
2342}
2343
8feaf0c0 2344static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2345{
3ab5aee7
ED
2346 return !is_a_nulls(tw->tw_node.next) ?
2347 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2348}
2349
a8b690f9
TH
2350/*
2351 * Get next listener socket follow cur. If cur is NULL, get first socket
2352 * starting from bucket given in st->bucket; when st->bucket is zero the
2353 * very first socket in the hash table is returned.
2354 */
1da177e4
LT
2355static void *listening_get_next(struct seq_file *seq, void *cur)
2356{
463c84b9 2357 struct inet_connection_sock *icsk;
c25eb3bf 2358 struct hlist_nulls_node *node;
1da177e4 2359 struct sock *sk = cur;
5caea4ea 2360 struct inet_listen_hashbucket *ilb;
5799de0b 2361 struct tcp_iter_state *st = seq->private;
a4146b1b 2362 struct net *net = seq_file_net(seq);
1da177e4
LT
2363
2364 if (!sk) {
a8b690f9 2365 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2366 spin_lock_bh(&ilb->lock);
c25eb3bf 2367 sk = sk_nulls_head(&ilb->head);
a8b690f9 2368 st->offset = 0;
1da177e4
LT
2369 goto get_sk;
2370 }
5caea4ea 2371 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2372 ++st->num;
a8b690f9 2373 ++st->offset;
1da177e4
LT
2374
2375 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2376 struct request_sock *req = cur;
1da177e4 2377
72a3effa 2378 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2379 req = req->dl_next;
2380 while (1) {
2381 while (req) {
bdccc4ca 2382 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2383 cur = req;
2384 goto out;
2385 }
2386 req = req->dl_next;
2387 }
72a3effa 2388 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2389 break;
2390get_req:
463c84b9 2391 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2392 }
1bde5ac4 2393 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2394 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2395 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2396 } else {
e905a9ed 2397 icsk = inet_csk(sk);
463c84b9
ACM
2398 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2399 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2400 goto start_req;
463c84b9 2401 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2402 sk = sk_nulls_next(sk);
1da177e4
LT
2403 }
2404get_sk:
c25eb3bf 2405 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2406 if (!net_eq(sock_net(sk), net))
2407 continue;
2408 if (sk->sk_family == st->family) {
1da177e4
LT
2409 cur = sk;
2410 goto out;
2411 }
e905a9ed 2412 icsk = inet_csk(sk);
463c84b9
ACM
2413 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2414 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2415start_req:
2416 st->uid = sock_i_uid(sk);
2417 st->syn_wait_sk = sk;
2418 st->state = TCP_SEQ_STATE_OPENREQ;
2419 st->sbucket = 0;
2420 goto get_req;
2421 }
463c84b9 2422 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2423 }
5caea4ea 2424 spin_unlock_bh(&ilb->lock);
a8b690f9 2425 st->offset = 0;
0f7ff927 2426 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2427 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2428 spin_lock_bh(&ilb->lock);
c25eb3bf 2429 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2430 goto get_sk;
2431 }
2432 cur = NULL;
2433out:
2434 return cur;
2435}
2436
2437static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2438{
a8b690f9
TH
2439 struct tcp_iter_state *st = seq->private;
2440 void *rc;
2441
2442 st->bucket = 0;
2443 st->offset = 0;
2444 rc = listening_get_next(seq, NULL);
1da177e4
LT
2445
2446 while (rc && *pos) {
2447 rc = listening_get_next(seq, rc);
2448 --*pos;
2449 }
2450 return rc;
2451}
2452
a2a385d6 2453static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2454{
3ab5aee7
ED
2455 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2456 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2457}
2458
a8b690f9
TH
2459/*
2460 * Get first established socket starting from bucket given in st->bucket.
2461 * If st->bucket is zero, the very first socket in the hash is returned.
2462 */
1da177e4
LT
2463static void *established_get_first(struct seq_file *seq)
2464{
5799de0b 2465 struct tcp_iter_state *st = seq->private;
a4146b1b 2466 struct net *net = seq_file_net(seq);
1da177e4
LT
2467 void *rc = NULL;
2468
a8b690f9
TH
2469 st->offset = 0;
2470 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2471 struct sock *sk;
3ab5aee7 2472 struct hlist_nulls_node *node;
8feaf0c0 2473 struct inet_timewait_sock *tw;
9db66bdc 2474 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2475
6eac5604
AK
2476 /* Lockless fast path for the common case of empty buckets */
2477 if (empty_bucket(st))
2478 continue;
2479
9db66bdc 2480 spin_lock_bh(lock);
3ab5aee7 2481 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2482 if (sk->sk_family != st->family ||
878628fb 2483 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2484 continue;
2485 }
2486 rc = sk;
2487 goto out;
2488 }
2489 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2490 inet_twsk_for_each(tw, node,
dbca9b27 2491 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2492 if (tw->tw_family != st->family ||
878628fb 2493 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2494 continue;
2495 }
2496 rc = tw;
2497 goto out;
2498 }
9db66bdc 2499 spin_unlock_bh(lock);
1da177e4
LT
2500 st->state = TCP_SEQ_STATE_ESTABLISHED;
2501 }
2502out:
2503 return rc;
2504}
2505
2506static void *established_get_next(struct seq_file *seq, void *cur)
2507{
2508 struct sock *sk = cur;
8feaf0c0 2509 struct inet_timewait_sock *tw;
3ab5aee7 2510 struct hlist_nulls_node *node;
5799de0b 2511 struct tcp_iter_state *st = seq->private;
a4146b1b 2512 struct net *net = seq_file_net(seq);
1da177e4
LT
2513
2514 ++st->num;
a8b690f9 2515 ++st->offset;
1da177e4
LT
2516
2517 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2518 tw = cur;
2519 tw = tw_next(tw);
2520get_tw:
878628fb 2521 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2522 tw = tw_next(tw);
2523 }
2524 if (tw) {
2525 cur = tw;
2526 goto out;
2527 }
9db66bdc 2528 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2529 st->state = TCP_SEQ_STATE_ESTABLISHED;
2530
6eac5604 2531 /* Look for next non empty bucket */
a8b690f9 2532 st->offset = 0;
f373b53b 2533 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2534 empty_bucket(st))
2535 ;
f373b53b 2536 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2537 return NULL;
2538
9db66bdc 2539 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2540 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2541 } else
3ab5aee7 2542 sk = sk_nulls_next(sk);
1da177e4 2543
3ab5aee7 2544 sk_nulls_for_each_from(sk, node) {
878628fb 2545 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2546 goto found;
2547 }
2548
2549 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2550 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2551 goto get_tw;
2552found:
2553 cur = sk;
2554out:
2555 return cur;
2556}
2557
2558static void *established_get_idx(struct seq_file *seq, loff_t pos)
2559{
a8b690f9
TH
2560 struct tcp_iter_state *st = seq->private;
2561 void *rc;
2562
2563 st->bucket = 0;
2564 rc = established_get_first(seq);
1da177e4
LT
2565
2566 while (rc && pos) {
2567 rc = established_get_next(seq, rc);
2568 --pos;
7174259e 2569 }
1da177e4
LT
2570 return rc;
2571}
2572
2573static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2574{
2575 void *rc;
5799de0b 2576 struct tcp_iter_state *st = seq->private;
1da177e4 2577
1da177e4
LT
2578 st->state = TCP_SEQ_STATE_LISTENING;
2579 rc = listening_get_idx(seq, &pos);
2580
2581 if (!rc) {
1da177e4
LT
2582 st->state = TCP_SEQ_STATE_ESTABLISHED;
2583 rc = established_get_idx(seq, pos);
2584 }
2585
2586 return rc;
2587}
2588
a8b690f9
TH
2589static void *tcp_seek_last_pos(struct seq_file *seq)
2590{
2591 struct tcp_iter_state *st = seq->private;
2592 int offset = st->offset;
2593 int orig_num = st->num;
2594 void *rc = NULL;
2595
2596 switch (st->state) {
2597 case TCP_SEQ_STATE_OPENREQ:
2598 case TCP_SEQ_STATE_LISTENING:
2599 if (st->bucket >= INET_LHTABLE_SIZE)
2600 break;
2601 st->state = TCP_SEQ_STATE_LISTENING;
2602 rc = listening_get_next(seq, NULL);
2603 while (offset-- && rc)
2604 rc = listening_get_next(seq, rc);
2605 if (rc)
2606 break;
2607 st->bucket = 0;
2608 /* Fallthrough */
2609 case TCP_SEQ_STATE_ESTABLISHED:
2610 case TCP_SEQ_STATE_TIME_WAIT:
2611 st->state = TCP_SEQ_STATE_ESTABLISHED;
2612 if (st->bucket > tcp_hashinfo.ehash_mask)
2613 break;
2614 rc = established_get_first(seq);
2615 while (offset-- && rc)
2616 rc = established_get_next(seq, rc);
2617 }
2618
2619 st->num = orig_num;
2620
2621 return rc;
2622}
2623
1da177e4
LT
2624static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2625{
5799de0b 2626 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2627 void *rc;
2628
2629 if (*pos && *pos == st->last_pos) {
2630 rc = tcp_seek_last_pos(seq);
2631 if (rc)
2632 goto out;
2633 }
2634
1da177e4
LT
2635 st->state = TCP_SEQ_STATE_LISTENING;
2636 st->num = 0;
a8b690f9
TH
2637 st->bucket = 0;
2638 st->offset = 0;
2639 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2640
2641out:
2642 st->last_pos = *pos;
2643 return rc;
1da177e4
LT
2644}
2645
2646static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2647{
a8b690f9 2648 struct tcp_iter_state *st = seq->private;
1da177e4 2649 void *rc = NULL;
1da177e4
LT
2650
2651 if (v == SEQ_START_TOKEN) {
2652 rc = tcp_get_idx(seq, 0);
2653 goto out;
2654 }
1da177e4
LT
2655
2656 switch (st->state) {
2657 case TCP_SEQ_STATE_OPENREQ:
2658 case TCP_SEQ_STATE_LISTENING:
2659 rc = listening_get_next(seq, v);
2660 if (!rc) {
1da177e4 2661 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2662 st->bucket = 0;
2663 st->offset = 0;
1da177e4
LT
2664 rc = established_get_first(seq);
2665 }
2666 break;
2667 case TCP_SEQ_STATE_ESTABLISHED:
2668 case TCP_SEQ_STATE_TIME_WAIT:
2669 rc = established_get_next(seq, v);
2670 break;
2671 }
2672out:
2673 ++*pos;
a8b690f9 2674 st->last_pos = *pos;
1da177e4
LT
2675 return rc;
2676}
2677
2678static void tcp_seq_stop(struct seq_file *seq, void *v)
2679{
5799de0b 2680 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2681
2682 switch (st->state) {
2683 case TCP_SEQ_STATE_OPENREQ:
2684 if (v) {
463c84b9
ACM
2685 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2686 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2687 }
2688 case TCP_SEQ_STATE_LISTENING:
2689 if (v != SEQ_START_TOKEN)
5caea4ea 2690 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2691 break;
2692 case TCP_SEQ_STATE_TIME_WAIT:
2693 case TCP_SEQ_STATE_ESTABLISHED:
2694 if (v)
9db66bdc 2695 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2696 break;
2697 }
2698}
2699
73cb88ec 2700int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2701{
d9dda78b 2702 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2703 struct tcp_iter_state *s;
52d6f3f1 2704 int err;
1da177e4 2705
52d6f3f1
DL
2706 err = seq_open_net(inode, file, &afinfo->seq_ops,
2707 sizeof(struct tcp_iter_state));
2708 if (err < 0)
2709 return err;
f40c8174 2710
52d6f3f1 2711 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2712 s->family = afinfo->family;
a8b690f9 2713 s->last_pos = 0;
f40c8174
DL
2714 return 0;
2715}
73cb88ec 2716EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2717
6f8b13bc 2718int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2719{
2720 int rc = 0;
2721 struct proc_dir_entry *p;
2722
9427c4b3
DL
2723 afinfo->seq_ops.start = tcp_seq_start;
2724 afinfo->seq_ops.next = tcp_seq_next;
2725 afinfo->seq_ops.stop = tcp_seq_stop;
2726
84841c3c 2727 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2728 afinfo->seq_fops, afinfo);
84841c3c 2729 if (!p)
1da177e4
LT
2730 rc = -ENOMEM;
2731 return rc;
2732}
4bc2f18b 2733EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2734
6f8b13bc 2735void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2736{
ece31ffd 2737 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2738}
4bc2f18b 2739EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2740
cf533ea5 2741static void get_openreq4(const struct sock *sk, const struct request_sock *req,
a7cb5a49 2742 struct seq_file *f, int i, kuid_t uid, int *len)
1da177e4 2743{
2e6599cb 2744 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2745 long delta = req->expires - jiffies;
1da177e4 2746
5e659e4c 2747 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2748 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2749 i,
2e6599cb 2750 ireq->loc_addr,
c720c7e8 2751 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2752 ireq->rmt_addr,
2753 ntohs(ireq->rmt_port),
1da177e4
LT
2754 TCP_SYN_RECV,
2755 0, 0, /* could print option size, but that is af dependent. */
2756 1, /* timers active (only the expire timer) */
a399a805 2757 jiffies_delta_to_clock_t(delta),
e6c022a4 2758 req->num_timeout,
a7cb5a49 2759 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2760 0, /* non standard timer */
2761 0, /* open_requests have no inode */
2762 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2763 req,
2764 len);
1da177e4
LT
2765}
2766
5e659e4c 2767static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2768{
2769 int timer_active;
2770 unsigned long timer_expires;
cf533ea5 2771 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2772 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2773 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2774 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2775 __be32 dest = inet->inet_daddr;
2776 __be32 src = inet->inet_rcv_saddr;
2777 __u16 destp = ntohs(inet->inet_dport);
2778 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2779 int rx_queue;
1da177e4 2780
6ba8a3b1
ND
2781 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2782 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2783 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2784 timer_active = 1;
463c84b9
ACM
2785 timer_expires = icsk->icsk_timeout;
2786 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2787 timer_active = 4;
463c84b9 2788 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2789 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2790 timer_active = 2;
cf4c6bf8 2791 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2792 } else {
2793 timer_active = 0;
2794 timer_expires = jiffies;
2795 }
2796
49d09007
ED
2797 if (sk->sk_state == TCP_LISTEN)
2798 rx_queue = sk->sk_ack_backlog;
2799 else
2800 /*
2801 * because we dont lock socket, we might find a transient negative value
2802 */
2803 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2804
5e659e4c 2805 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2806 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2807 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2808 tp->write_seq - tp->snd_una,
49d09007 2809 rx_queue,
1da177e4 2810 timer_active,
a399a805 2811 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2812 icsk->icsk_retransmits,
a7cb5a49 2813 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2814 icsk->icsk_probes_out,
cf4c6bf8
IJ
2815 sock_i_ino(sk),
2816 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2817 jiffies_to_clock_t(icsk->icsk_rto),
2818 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2819 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2820 tp->snd_cwnd,
168a8f58
JC
2821 sk->sk_state == TCP_LISTEN ?
2822 (fastopenq ? fastopenq->max_qlen : 0) :
2823 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
5e659e4c 2824 len);
1da177e4
LT
2825}
2826
cf533ea5 2827static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2828 struct seq_file *f, int i, int *len)
1da177e4 2829{
23f33c2d 2830 __be32 dest, src;
1da177e4 2831 __u16 destp, srcp;
a399a805 2832 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2833
2834 dest = tw->tw_daddr;
2835 src = tw->tw_rcv_saddr;
2836 destp = ntohs(tw->tw_dport);
2837 srcp = ntohs(tw->tw_sport);
2838
5e659e4c 2839 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2840 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4 2841 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2842 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
5e659e4c 2843 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2844}
2845
2846#define TMPSZ 150
2847
2848static int tcp4_seq_show(struct seq_file *seq, void *v)
2849{
5799de0b 2850 struct tcp_iter_state *st;
5e659e4c 2851 int len;
1da177e4
LT
2852
2853 if (v == SEQ_START_TOKEN) {
2854 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2855 " sl local_address rem_address st tx_queue "
2856 "rx_queue tr tm->when retrnsmt uid timeout "
2857 "inode");
2858 goto out;
2859 }
2860 st = seq->private;
2861
2862 switch (st->state) {
2863 case TCP_SEQ_STATE_LISTENING:
2864 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2865 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2866 break;
2867 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2868 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2869 break;
2870 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2871 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2872 break;
2873 }
5e659e4c 2874 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2875out:
2876 return 0;
2877}
2878
73cb88ec
AV
2879static const struct file_operations tcp_afinfo_seq_fops = {
2880 .owner = THIS_MODULE,
2881 .open = tcp_seq_open,
2882 .read = seq_read,
2883 .llseek = seq_lseek,
2884 .release = seq_release_net
2885};
2886
1da177e4 2887static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2888 .name = "tcp",
2889 .family = AF_INET,
73cb88ec 2890 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2891 .seq_ops = {
2892 .show = tcp4_seq_show,
2893 },
1da177e4
LT
2894};
2895
2c8c1e72 2896static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2897{
2898 return tcp_proc_register(net, &tcp4_seq_afinfo);
2899}
2900
2c8c1e72 2901static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2902{
2903 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2904}
2905
2906static struct pernet_operations tcp4_net_ops = {
2907 .init = tcp4_proc_init_net,
2908 .exit = tcp4_proc_exit_net,
2909};
2910
1da177e4
LT
2911int __init tcp4_proc_init(void)
2912{
757764f6 2913 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2914}
2915
2916void tcp4_proc_exit(void)
2917{
757764f6 2918 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2919}
2920#endif /* CONFIG_PROC_FS */
2921
bf296b12
HX
2922struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2923{
b71d1d42 2924 const struct iphdr *iph = skb_gro_network_header(skb);
861b6501
ED
2925 __wsum wsum;
2926 __sum16 sum;
bf296b12
HX
2927
2928 switch (skb->ip_summed) {
2929 case CHECKSUM_COMPLETE:
86911732 2930 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2931 skb->csum)) {
2932 skb->ip_summed = CHECKSUM_UNNECESSARY;
2933 break;
2934 }
861b6501 2935flush:
bf296b12
HX
2936 NAPI_GRO_CB(skb)->flush = 1;
2937 return NULL;
861b6501
ED
2938
2939 case CHECKSUM_NONE:
2940 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2941 skb_gro_len(skb), IPPROTO_TCP, 0);
2942 sum = csum_fold(skb_checksum(skb,
2943 skb_gro_offset(skb),
2944 skb_gro_len(skb),
2945 wsum));
2946 if (sum)
2947 goto flush;
2948
2949 skb->ip_summed = CHECKSUM_UNNECESSARY;
2950 break;
bf296b12
HX
2951 }
2952
2953 return tcp_gro_receive(head, skb);
2954}
bf296b12
HX
2955
2956int tcp4_gro_complete(struct sk_buff *skb)
2957{
b71d1d42 2958 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2959 struct tcphdr *th = tcp_hdr(skb);
2960
2961 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2962 iph->saddr, iph->daddr, 0);
2963 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2964
2965 return tcp_gro_complete(skb);
2966}
bf296b12 2967
1da177e4
LT
2968struct proto tcp_prot = {
2969 .name = "TCP",
2970 .owner = THIS_MODULE,
2971 .close = tcp_close,
2972 .connect = tcp_v4_connect,
2973 .disconnect = tcp_disconnect,
463c84b9 2974 .accept = inet_csk_accept,
1da177e4
LT
2975 .ioctl = tcp_ioctl,
2976 .init = tcp_v4_init_sock,
2977 .destroy = tcp_v4_destroy_sock,
2978 .shutdown = tcp_shutdown,
2979 .setsockopt = tcp_setsockopt,
2980 .getsockopt = tcp_getsockopt,
1da177e4 2981 .recvmsg = tcp_recvmsg,
7ba42910
CG
2982 .sendmsg = tcp_sendmsg,
2983 .sendpage = tcp_sendpage,
1da177e4 2984 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2985 .release_cb = tcp_release_cb,
ab1e0a13
ACM
2986 .hash = inet_hash,
2987 .unhash = inet_unhash,
2988 .get_port = inet_csk_get_port,
1da177e4
LT
2989 .enter_memory_pressure = tcp_enter_memory_pressure,
2990 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2991 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2992 .memory_allocated = &tcp_memory_allocated,
2993 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2994 .sysctl_wmem = sysctl_tcp_wmem,
2995 .sysctl_rmem = sysctl_tcp_rmem,
2996 .max_header = MAX_TCP_HEADER,
2997 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2998 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2999 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 3000 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 3001 .h.hashinfo = &tcp_hashinfo,
7ba42910 3002 .no_autobind = true,
543d9cfe
ACM
3003#ifdef CONFIG_COMPAT
3004 .compat_setsockopt = compat_tcp_setsockopt,
3005 .compat_getsockopt = compat_tcp_getsockopt,
3006#endif
c255a458 3007#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
3008 .init_cgroup = tcp_init_cgroup,
3009 .destroy_cgroup = tcp_destroy_cgroup,
3010 .proto_cgroup = tcp_proto_cgroup,
3011#endif
1da177e4 3012};
4bc2f18b 3013EXPORT_SYMBOL(tcp_prot);
1da177e4 3014
6bed3166
ED
3015static void __net_exit tcp_sk_exit(struct net *net)
3016{
3017 int cpu;
3018
3019 for_each_possible_cpu(cpu)
3020 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
3021 free_percpu(net->ipv4.tcp_sk);
3022}
3023
046ee902
DL
3024static int __net_init tcp_sk_init(struct net *net)
3025{
6bed3166
ED
3026 int res, cpu;
3027
3028 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
3029 if (!net->ipv4.tcp_sk)
3030 return -ENOMEM;
3031
3032 for_each_possible_cpu(cpu) {
3033 struct sock *sk;
3034
3035 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
3036 IPPROTO_TCP, net);
3037 if (res)
3038 goto fail;
3039 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
3040 }
5d134f1c 3041 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 3042 return 0;
046ee902 3043
6bed3166
ED
3044fail:
3045 tcp_sk_exit(net);
3046
3047 return res;
b099ce26
EB
3048}
3049
3050static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
3051{
3052 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
3053}
3054
3055static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
3056 .init = tcp_sk_init,
3057 .exit = tcp_sk_exit,
3058 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
3059};
3060
9b0f976f 3061void __init tcp_v4_init(void)
1da177e4 3062{
5caea4ea 3063 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 3064 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 3065 panic("Failed to create the TCP control socket.\n");
1da177e4 3066}