battery: sec_battery: export {CURRENT/VOLTAGE}_MAX to sysfs
[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7be560d6 179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
199 if (likely(!tp->repair))
200 tp->write_seq = 0;
1da177e4
LT
201 }
202
295ff7ed 203 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 206
c720c7e8
ED
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
1da177e4 209
d83d8461 210 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 213
bee7ca9e 214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket
218 * lock select source port, enter ourselves into the hash tables and
219 * complete initialization after this.
220 */
221 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 222 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
223 if (err)
224 goto failure;
225
da905bd1 226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
1da177e4 231 goto failure;
b23dd4fe 232 }
1da177e4 233 /* OK, now commit destination to socket. */
bcd76111 234 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 235 sk_setup_caps(sk, &rt->dst);
1da177e4 236
ee995283 237 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
1da177e4
LT
241 usin->sin_port);
242
c720c7e8 243 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 244
2b916477 245 err = tcp_connect(sk);
ee995283 246
1da177e4
LT
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253failure:
7174259e
ACM
254 /*
255 * This unhashes the socket and releases the local port,
256 * if necessary.
257 */
1da177e4
LT
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
c720c7e8 261 inet->inet_dport = 0;
1da177e4
LT
262 return err;
263}
4bc2f18b 264EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 265
1da177e4 266/*
563d34d0
ED
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 270 */
5f80f4d8 271void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4 272{
1da177e4 273 struct inet_sock *inet = inet_sk(sk);
1028bf85
ED
274 struct dst_entry *dst;
275 u32 mtu;
1da177e4 276
1028bf85
ED
277 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
278 return;
279 mtu = tcp_sk(sk)->mtu_info;
80d0a69f
DM
280 dst = inet_csk_update_pmtu(sk, mtu);
281 if (!dst)
1da177e4
LT
282 return;
283
1da177e4
LT
284 /* Something is about to be wrong... Remember soft error
285 * for the case, if this connection will not able to recover.
286 */
287 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
288 sk->sk_err_soft = EMSGSIZE;
289
290 mtu = dst_mtu(dst);
291
292 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 293 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
294 tcp_sync_mss(sk, mtu);
295
296 /* Resend the TCP packet because it's
297 * clear that the old packet has been
298 * dropped. This is the new "fast" path mtu
299 * discovery.
300 */
301 tcp_simple_retransmit(sk);
302 } /* else let the usual retransmit timer handle it */
303}
5f80f4d8 304EXPORT_SYMBOL(tcp_v4_mtu_reduced);
1da177e4 305
55be7a9c
DM
306static void do_redirect(struct sk_buff *skb, struct sock *sk)
307{
308 struct dst_entry *dst = __sk_dst_check(sk, 0);
309
1ed5c48f 310 if (dst)
6700c270 311 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
312}
313
1da177e4
LT
314/*
315 * This routine is called by the ICMP module when it gets some
316 * sort of error condition. If err < 0 then the socket should
317 * be closed and the error returned to the user. If err > 0
318 * it's just the icmp type << 8 | icmp code. After adjustment
319 * header points to the first 8 bytes of the tcp header. We need
320 * to find the appropriate port.
321 *
322 * The locking strategy used here is very "optimistic". When
323 * someone else accesses the socket the ICMP is just dropped
324 * and for some paths there is no check at all.
325 * A more general error queue to queue errors for later handling
326 * is probably better.
327 *
328 */
329
4d1a2d9e 330void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 331{
b71d1d42 332 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 333 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 334 struct inet_connection_sock *icsk;
1da177e4
LT
335 struct tcp_sock *tp;
336 struct inet_sock *inet;
4d1a2d9e
DL
337 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 339 struct sock *sk;
f1ecd5d9 340 struct sk_buff *skb;
168a8f58 341 struct request_sock *req;
1da177e4 342 __u32 seq;
f1ecd5d9 343 __u32 remaining;
1da177e4 344 int err;
4d1a2d9e 345 struct net *net = dev_net(icmp_skb->dev);
1da177e4 346
4d1a2d9e 347 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
349 return;
350 }
351
fd54d716 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 353 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 354 if (!sk) {
dcfc23ca 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
356 return;
357 }
358 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 359 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
360 return;
361 }
362
363 bh_lock_sock(sk);
364 /* If too many ICMPs get dropped on busy
365 * servers this needs to be solved differently.
563d34d0
ED
366 * We do take care of PMTU discovery (RFC1191) special case :
367 * we can receive locally generated ICMP messages while socket is held.
1da177e4 368 */
b74aa930
ED
369 if (sock_owned_by_user(sk)) {
370 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
371 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
372 }
1da177e4
LT
373 if (sk->sk_state == TCP_CLOSE)
374 goto out;
375
97e3ecd1 376 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
377 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
378 goto out;
379 }
380
f1ecd5d9 381 icsk = inet_csk(sk);
1da177e4 382 tp = tcp_sk(sk);
168a8f58 383 req = tp->fastopen_rsk;
1da177e4
LT
384 seq = ntohl(th->seq);
385 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
386 !between(seq, tp->snd_una, tp->snd_nxt) &&
387 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
388 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 389 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
390 goto out;
391 }
392
393 switch (type) {
55be7a9c 394 case ICMP_REDIRECT:
e836d629
JM
395 if (!sock_owned_by_user(sk))
396 do_redirect(icmp_skb, sk);
55be7a9c 397 goto out;
1da177e4
LT
398 case ICMP_SOURCE_QUENCH:
399 /* Just silently ignore these. */
400 goto out;
401 case ICMP_PARAMETERPROB:
402 err = EPROTO;
403 break;
404 case ICMP_DEST_UNREACH:
405 if (code > NR_ICMP_UNREACH)
406 goto out;
407
408 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
409 /* We are not interested in TCP_LISTEN and open_requests
410 * (SYN-ACKs send out by Linux are always <576bytes so
411 * they should go through unfragmented).
412 */
413 if (sk->sk_state == TCP_LISTEN)
414 goto out;
415
563d34d0 416 tp->mtu_info = info;
144d56e9 417 if (!sock_owned_by_user(sk)) {
563d34d0 418 tcp_v4_mtu_reduced(sk);
144d56e9
ED
419 } else {
420 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
421 sock_hold(sk);
422 }
1da177e4
LT
423 goto out;
424 }
425
426 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
427 /* check if icmp_skb allows revert of backoff
428 * (see draft-zimmermann-tcp-lcd) */
429 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
430 break;
431 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
432 !icsk->icsk_backoff)
433 break;
434
168a8f58
JC
435 /* XXX (TFO) - revisit the following logic for TFO */
436
8f49c270
DM
437 if (sock_owned_by_user(sk))
438 break;
439
f1ecd5d9 440 icsk->icsk_backoff--;
9ad7c049
JC
441 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
442 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
443 tcp_bound_rto(sk);
444
445 skb = tcp_write_queue_head(sk);
446 BUG_ON(!skb);
447
448 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
449 tcp_time_stamp - TCP_SKB_CB(skb)->when);
450
451 if (remaining) {
452 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
453 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
454 } else {
455 /* RTO revert clocked out retransmission.
456 * Will retransmit now */
457 tcp_retransmit_timer(sk);
458 }
459
1da177e4
LT
460 break;
461 case ICMP_TIME_EXCEEDED:
462 err = EHOSTUNREACH;
463 break;
464 default:
465 goto out;
466 }
467
168a8f58
JC
468 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
469 * than following the TCP_SYN_RECV case and closing the socket,
470 * we ignore the ICMP error and keep trying like a fully established
471 * socket. Is this the right thing to do?
472 */
473 if (req && req->sk == NULL)
474 goto out;
475
1da177e4 476 switch (sk->sk_state) {
60236fdd 477 struct request_sock *req, **prev;
1da177e4
LT
478 case TCP_LISTEN:
479 if (sock_owned_by_user(sk))
480 goto out;
481
463c84b9
ACM
482 req = inet_csk_search_req(sk, &prev, th->dest,
483 iph->daddr, iph->saddr);
1da177e4
LT
484 if (!req)
485 goto out;
486
487 /* ICMPs are not backlogged, hence we cannot get
488 an established socket here.
489 */
547b792c 490 WARN_ON(req->sk);
1da177e4 491
2e6599cb 492 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 493 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
494 goto out;
495 }
496
497 /*
498 * Still in SYN_RECV, just remove it silently.
499 * There is no good way to pass the error to the newly
500 * created socket, and POSIX does not want network
501 * errors returned from accept().
502 */
463c84b9 503 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 504 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
505 goto out;
506
507 case TCP_SYN_SENT:
508 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
509 It can f.e. if SYNs crossed,
510 or Fast Open.
1da177e4
LT
511 */
512 if (!sock_owned_by_user(sk)) {
1da177e4
LT
513 sk->sk_err = err;
514
515 sk->sk_error_report(sk);
516
517 tcp_done(sk);
518 } else {
519 sk->sk_err_soft = err;
520 }
521 goto out;
522 }
523
524 /* If we've already connected we will keep trying
525 * until we time out, or the user gives up.
526 *
527 * rfc1122 4.2.3.9 allows to consider as hard errors
528 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
529 * but it is obsoleted by pmtu discovery).
530 *
531 * Note, that in modern internet, where routing is unreliable
532 * and in each dark corner broken firewalls sit, sending random
533 * errors ordered by their masters even this two messages finally lose
534 * their original sense (even Linux sends invalid PORT_UNREACHs)
535 *
536 * Now we are in compliance with RFCs.
537 * --ANK (980905)
538 */
539
540 inet = inet_sk(sk);
541 if (!sock_owned_by_user(sk) && inet->recverr) {
542 sk->sk_err = err;
543 sk->sk_error_report(sk);
544 } else { /* Only an error on timeout */
545 sk->sk_err_soft = err;
546 }
547
548out:
549 bh_unlock_sock(sk);
550 sock_put(sk);
551}
552
419f9f89
HX
553static void __tcp_v4_send_check(struct sk_buff *skb,
554 __be32 saddr, __be32 daddr)
1da177e4 555{
aa8223c7 556 struct tcphdr *th = tcp_hdr(skb);
1da177e4 557
84fa7933 558 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 559 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 560 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 561 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 562 } else {
419f9f89 563 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 564 csum_partial(th,
1da177e4
LT
565 th->doff << 2,
566 skb->csum));
567 }
568}
569
419f9f89 570/* This routine computes an IPv4 TCP checksum. */
bb296246 571void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 572{
cf533ea5 573 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
574
575 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
576}
4bc2f18b 577EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 578
a430a43d
HX
579int tcp_v4_gso_send_check(struct sk_buff *skb)
580{
eddc9ec5 581 const struct iphdr *iph;
a430a43d
HX
582 struct tcphdr *th;
583
584 if (!pskb_may_pull(skb, sizeof(*th)))
585 return -EINVAL;
586
eddc9ec5 587 iph = ip_hdr(skb);
aa8223c7 588 th = tcp_hdr(skb);
a430a43d
HX
589
590 th->check = 0;
84fa7933 591 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 592 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
593 return 0;
594}
595
1da177e4
LT
596/*
597 * This routine will send an RST to the other tcp.
598 *
599 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
600 * for reset.
601 * Answer: if a packet caused RST, it is not for a socket
602 * existing in our system, if it is matched to a socket,
603 * it is just duplicate segment or bug in other side's TCP.
604 * So that we build reply only basing on parameters
605 * arrived with segment.
606 * Exception: precedence violation. We do not implement it in any case.
607 */
608
cfb6eeb4 609static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 610{
cf533ea5 611 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
612 struct {
613 struct tcphdr th;
614#ifdef CONFIG_TCP_MD5SIG
714e85be 615 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
616#endif
617 } rep;
1da177e4 618 struct ip_reply_arg arg;
cfb6eeb4
YH
619#ifdef CONFIG_TCP_MD5SIG
620 struct tcp_md5sig_key *key;
658ddaaf
SL
621 const __u8 *hash_location = NULL;
622 unsigned char newhash[16];
623 int genhash;
624 struct sock *sk1 = NULL;
cfb6eeb4 625#endif
a86b1e30 626 struct net *net;
1da177e4
LT
627
628 /* Never send a reset in response to a reset. */
629 if (th->rst)
630 return;
631
511c3f92 632 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
633 return;
634
635 /* Swap the send and the receive. */
cfb6eeb4
YH
636 memset(&rep, 0, sizeof(rep));
637 rep.th.dest = th->source;
638 rep.th.source = th->dest;
639 rep.th.doff = sizeof(struct tcphdr) / 4;
640 rep.th.rst = 1;
1da177e4
LT
641
642 if (th->ack) {
cfb6eeb4 643 rep.th.seq = th->ack_seq;
1da177e4 644 } else {
cfb6eeb4
YH
645 rep.th.ack = 1;
646 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
647 skb->len - (th->doff << 2));
1da177e4
LT
648 }
649
7174259e 650 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
651 arg.iov[0].iov_base = (unsigned char *)&rep;
652 arg.iov[0].iov_len = sizeof(rep.th);
653
04140b07 654 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
cfb6eeb4 655#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
656 hash_location = tcp_parse_md5sig_option(th);
657 if (!sk && hash_location) {
658 /*
659 * active side is lost. Try to find listening socket through
660 * source port, and then find md5 key through listening socket.
661 * we are not loose security here:
662 * Incoming packet is checked with md5 hash with finding key,
663 * no RST generated if md5 hash doesn't match.
664 */
04140b07 665 sk1 = __inet_lookup_listener(net,
da5e3630
TH
666 &tcp_hashinfo, ip_hdr(skb)->saddr,
667 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
668 ntohs(th->source), inet_iif(skb));
669 /* don't send rst if it can't find key */
670 if (!sk1)
671 return;
672 rcu_read_lock();
673 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
674 &ip_hdr(skb)->saddr, AF_INET);
675 if (!key)
676 goto release_sk1;
677
678 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
679 if (genhash || memcmp(hash_location, newhash, 16) != 0)
680 goto release_sk1;
681 } else {
682 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
683 &ip_hdr(skb)->saddr,
684 AF_INET) : NULL;
685 }
686
cfb6eeb4
YH
687 if (key) {
688 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
689 (TCPOPT_NOP << 16) |
690 (TCPOPT_MD5SIG << 8) |
691 TCPOLEN_MD5SIG);
692 /* Update length and the length the header thinks exists */
693 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
694 rep.th.doff = arg.iov[0].iov_len / 4;
695
49a72dfb 696 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
697 key, ip_hdr(skb)->saddr,
698 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
699 }
700#endif
eddc9ec5
ACM
701 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
702 ip_hdr(skb)->saddr, /* XXX */
52cd5750 703 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 704 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 705 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 706 /* When socket is gone, all binding information is lost.
4c675258
AK
707 * routing might fail in this case. No choice here, if we choose to force
708 * input interface, we will misroute in case of asymmetric route.
e2446eaa 709 */
4c675258
AK
710 if (sk)
711 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 712
66b13d99 713 arg.tos = ip_hdr(skb)->tos;
83a2dfd5 714 arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
4ad4ec54
ED
715 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
716 skb, ip_hdr(skb)->saddr,
70e73416 717 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 718
63231bdd
PE
719 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
720 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
721
722#ifdef CONFIG_TCP_MD5SIG
723release_sk1:
724 if (sk1) {
725 rcu_read_unlock();
726 sock_put(sk1);
727 }
728#endif
1da177e4
LT
729}
730
731/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
732 outside socket context is ugly, certainly. What can I do?
733 */
734
83a2dfd5
LC
735static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb,
736 u32 seq, u32 ack,
ee684b6f 737 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 738 struct tcp_md5sig_key *key,
66b13d99 739 int reply_flags, u8 tos)
1da177e4 740{
cf533ea5 741 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
742 struct {
743 struct tcphdr th;
714e85be 744 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 745#ifdef CONFIG_TCP_MD5SIG
714e85be 746 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
747#endif
748 ];
1da177e4
LT
749 } rep;
750 struct ip_reply_arg arg;
83a2dfd5 751 struct net *net = sock_net(sk);
1da177e4
LT
752
753 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 754 memset(&arg, 0, sizeof(arg));
1da177e4
LT
755
756 arg.iov[0].iov_base = (unsigned char *)&rep;
757 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 758 if (tsecr) {
cfb6eeb4
YH
759 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
760 (TCPOPT_TIMESTAMP << 8) |
761 TCPOLEN_TIMESTAMP);
ee684b6f
AV
762 rep.opt[1] = htonl(tsval);
763 rep.opt[2] = htonl(tsecr);
cb48cfe8 764 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
765 }
766
767 /* Swap the send and the receive. */
768 rep.th.dest = th->source;
769 rep.th.source = th->dest;
770 rep.th.doff = arg.iov[0].iov_len / 4;
771 rep.th.seq = htonl(seq);
772 rep.th.ack_seq = htonl(ack);
773 rep.th.ack = 1;
774 rep.th.window = htons(win);
775
cfb6eeb4 776#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 777 if (key) {
ee684b6f 778 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
779
780 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
781 (TCPOPT_NOP << 16) |
782 (TCPOPT_MD5SIG << 8) |
783 TCPOLEN_MD5SIG);
784 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
785 rep.th.doff = arg.iov[0].iov_len/4;
786
49a72dfb 787 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
788 key, ip_hdr(skb)->saddr,
789 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
790 }
791#endif
88ef4a5a 792 arg.flags = reply_flags;
eddc9ec5
ACM
793 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
794 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
795 arg.iov[0].iov_len, IPPROTO_TCP, 0);
796 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
797 if (oif)
798 arg.bound_dev_if = oif;
66b13d99 799 arg.tos = tos;
83a2dfd5 800 arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
4ad4ec54
ED
801 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
802 skb, ip_hdr(skb)->saddr,
70e73416 803 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 804
63231bdd 805 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
806}
807
808static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
809{
8feaf0c0 810 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 811 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 812
83a2dfd5 813 tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 814 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 815 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
816 tcptw->tw_ts_recent,
817 tw->tw_bound_dev_if,
88ef4a5a 818 tcp_twsk_md5_key(tcptw),
66b13d99
ED
819 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
820 tw->tw_tos
9501f972 821 );
1da177e4 822
8feaf0c0 823 inet_twsk_put(tw);
1da177e4
LT
824}
825
6edafaaf 826static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 827 struct request_sock *req)
1da177e4 828{
168a8f58
JC
829 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
830 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
831 */
83a2dfd5 832 tcp_v4_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
168a8f58 833 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
7beeabef
ED
834 tcp_rsk(req)->rcv_nxt,
835 req->rcv_wnd >> inet_rsk(req)->rcv_wscale,
ee684b6f 836 tcp_time_stamp,
9501f972
YH
837 req->ts_recent,
838 0,
a915da9b
ED
839 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
840 AF_INET),
66b13d99
ED
841 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
842 ip_hdr(skb)->tos);
1da177e4
LT
843}
844
1da177e4 845/*
9bf1d83e 846 * Send a SYN-ACK after having received a SYN.
60236fdd 847 * This still operates on a request_sock only, not on a big
1da177e4
LT
848 * socket.
849 */
72659ecc
OP
850static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
851 struct request_sock *req,
7586eceb
ED
852 u16 queue_mapping,
853 bool nocache)
1da177e4 854{
2e6599cb 855 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 856 struct flowi4 fl4;
1da177e4
LT
857 int err = -1;
858 struct sk_buff * skb;
859
860 /* First, grab a route. */
ba3f7f04 861 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 862 return -1;
1da177e4 863
1a2c6181 864 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
865
866 if (skb) {
419f9f89 867 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 868
fff32699 869 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
870 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
871 ireq->rmt_addr,
872 ireq->opt);
b9df3cb8 873 err = net_xmit_eval(err);
016818d0
NC
874 if (!tcp_rsk(req)->snt_synack && !err)
875 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
876 }
877
1da177e4
LT
878 return err;
879}
880
1a2c6181 881static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 882{
1a2c6181 883 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
e6c022a4
ED
884
885 if (!res)
886 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
887 return res;
fd80eb94
DL
888}
889
1da177e4 890/*
60236fdd 891 * IPv4 request_sock destructor.
1da177e4 892 */
60236fdd 893static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 894{
a51482bd 895 kfree(inet_rsk(req)->opt);
1da177e4
LT
896}
897
946cedcc 898/*
a2a385d6 899 * Return true if a syncookie should be sent
946cedcc 900 */
a2a385d6 901bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
902 const struct sk_buff *skb,
903 const char *proto)
1da177e4 904{
946cedcc 905 const char *msg = "Dropping request";
a2a385d6 906 bool want_cookie = false;
946cedcc
ED
907 struct listen_sock *lopt;
908
909
1da177e4 910
2a1d4bd4 911#ifdef CONFIG_SYN_COOKIES
946cedcc 912 if (sysctl_tcp_syncookies) {
2a1d4bd4 913 msg = "Sending cookies";
a2a385d6 914 want_cookie = true;
946cedcc
ED
915 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
916 } else
80e40daa 917#endif
946cedcc
ED
918 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
919
920 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
921 if (!lopt->synflood_warned) {
922 lopt->synflood_warned = 1;
afd46503 923 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
924 proto, ntohs(tcp_hdr(skb)->dest), msg);
925 }
926 return want_cookie;
2a1d4bd4 927}
946cedcc 928EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
929
930/*
60236fdd 931 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 932 */
5dff747b 933static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 934{
f6d8bd05
ED
935 const struct ip_options *opt = &(IPCB(skb)->opt);
936 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
937
938 if (opt && opt->optlen) {
f6d8bd05
ED
939 int opt_size = sizeof(*dopt) + opt->optlen;
940
1da177e4
LT
941 dopt = kmalloc(opt_size, GFP_ATOMIC);
942 if (dopt) {
f6d8bd05 943 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
944 kfree(dopt);
945 dopt = NULL;
946 }
947 }
948 }
949 return dopt;
950}
951
cfb6eeb4
YH
952#ifdef CONFIG_TCP_MD5SIG
953/*
954 * RFC2385 MD5 checksumming requires a mapping of
955 * IP address->MD5 Key.
956 * We need to maintain these in the sk structure.
957 */
958
959/* Find the Key structure for an address. */
a915da9b
ED
960struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
961 const union tcp_md5_addr *addr,
962 int family)
cfb6eeb4
YH
963{
964 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 965 struct tcp_md5sig_key *key;
a915da9b 966 unsigned int size = sizeof(struct in_addr);
a8afca03 967 struct tcp_md5sig_info *md5sig;
cfb6eeb4 968
a8afca03
ED
969 /* caller either holds rcu_read_lock() or socket lock */
970 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
971 sock_owned_by_user(sk) ||
972 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 973 if (!md5sig)
cfb6eeb4 974 return NULL;
a915da9b
ED
975#if IS_ENABLED(CONFIG_IPV6)
976 if (family == AF_INET6)
977 size = sizeof(struct in6_addr);
978#endif
b67bfe0d 979 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
980 if (key->family != family)
981 continue;
982 if (!memcmp(&key->addr, addr, size))
983 return key;
cfb6eeb4
YH
984 }
985 return NULL;
986}
a915da9b 987EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
988
989struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
990 struct sock *addr_sk)
991{
a915da9b
ED
992 union tcp_md5_addr *addr;
993
994 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
995 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 996}
cfb6eeb4
YH
997EXPORT_SYMBOL(tcp_v4_md5_lookup);
998
f5b99bcd
AB
999static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
1000 struct request_sock *req)
cfb6eeb4 1001{
a915da9b
ED
1002 union tcp_md5_addr *addr;
1003
1004 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
1005 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
1006}
1007
1008/* This can be called on a newly created socket, from other files */
a915da9b
ED
1009int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1010 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
1011{
1012 /* Add Key to the list */
b0a713e9 1013 struct tcp_md5sig_key *key;
cfb6eeb4 1014 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1015 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1016
c0353c7b 1017 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
1018 if (key) {
1019 /* Pre-existing entry - just update that one. */
a915da9b 1020 memcpy(key->key, newkey, newkeylen);
b0a713e9 1021 key->keylen = newkeylen;
a915da9b
ED
1022 return 0;
1023 }
260fcbeb 1024
a8afca03 1025 md5sig = rcu_dereference_protected(tp->md5sig_info,
98c72b5f
ED
1026 sock_owned_by_user(sk) ||
1027 lockdep_is_held(&sk->sk_lock.slock));
a915da9b
ED
1028 if (!md5sig) {
1029 md5sig = kmalloc(sizeof(*md5sig), gfp);
1030 if (!md5sig)
cfb6eeb4 1031 return -ENOMEM;
cfb6eeb4 1032
a915da9b
ED
1033 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1034 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1035 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1036 }
cfb6eeb4 1037
5f3d9cb2 1038 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1039 if (!key)
1040 return -ENOMEM;
1041 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1042 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1043 return -ENOMEM;
cfb6eeb4 1044 }
a915da9b
ED
1045
1046 memcpy(key->key, newkey, newkeylen);
1047 key->keylen = newkeylen;
1048 key->family = family;
1049 memcpy(&key->addr, addr,
1050 (family == AF_INET6) ? sizeof(struct in6_addr) :
1051 sizeof(struct in_addr));
1052 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1053 return 0;
1054}
a915da9b 1055EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1056
a915da9b 1057int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1058{
1059 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1060 struct tcp_md5sig_key *key;
a8afca03 1061 struct tcp_md5sig_info *md5sig;
a915da9b 1062
c0353c7b 1063 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1064 if (!key)
1065 return -ENOENT;
1066 hlist_del_rcu(&key->node);
5f3d9cb2 1067 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1068 kfree_rcu(key, rcu);
a8afca03
ED
1069 md5sig = rcu_dereference_protected(tp->md5sig_info,
1070 sock_owned_by_user(sk));
1071 if (hlist_empty(&md5sig->head))
a915da9b
ED
1072 tcp_free_md5sig_pool();
1073 return 0;
cfb6eeb4 1074}
a915da9b 1075EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1076
e0683e70 1077static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1078{
1079 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1080 struct tcp_md5sig_key *key;
b67bfe0d 1081 struct hlist_node *n;
a8afca03 1082 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1083
a8afca03
ED
1084 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1085
1086 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1087 tcp_free_md5sig_pool();
b67bfe0d 1088 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1089 hlist_del_rcu(&key->node);
5f3d9cb2 1090 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1091 kfree_rcu(key, rcu);
cfb6eeb4
YH
1092 }
1093}
1094
7174259e
ACM
1095static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1096 int optlen)
cfb6eeb4
YH
1097{
1098 struct tcp_md5sig cmd;
1099 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1100
1101 if (optlen < sizeof(cmd))
1102 return -EINVAL;
1103
7174259e 1104 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1105 return -EFAULT;
1106
1107 if (sin->sin_family != AF_INET)
1108 return -EINVAL;
1109
a8afca03 1110 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1111 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1112 AF_INET);
cfb6eeb4
YH
1113
1114 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1115 return -EINVAL;
1116
a915da9b
ED
1117 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1118 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1119 GFP_KERNEL);
cfb6eeb4
YH
1120}
1121
49a72dfb
AL
1122static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1123 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1124{
cfb6eeb4 1125 struct tcp4_pseudohdr *bp;
49a72dfb 1126 struct scatterlist sg;
cfb6eeb4
YH
1127
1128 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1129
1130 /*
49a72dfb 1131 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1132 * destination IP address, zero-padded protocol number, and
1133 * segment length)
1134 */
1135 bp->saddr = saddr;
1136 bp->daddr = daddr;
1137 bp->pad = 0;
076fb722 1138 bp->protocol = IPPROTO_TCP;
49a72dfb 1139 bp->len = cpu_to_be16(nbytes);
c7da57a1 1140
49a72dfb
AL
1141 sg_init_one(&sg, bp, sizeof(*bp));
1142 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1143}
1144
a915da9b 1145static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1146 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1147{
1148 struct tcp_md5sig_pool *hp;
1149 struct hash_desc *desc;
1150
1151 hp = tcp_get_md5sig_pool();
1152 if (!hp)
1153 goto clear_hash_noput;
1154 desc = &hp->md5_desc;
1155
1156 if (crypto_hash_init(desc))
1157 goto clear_hash;
1158 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1159 goto clear_hash;
1160 if (tcp_md5_hash_header(hp, th))
1161 goto clear_hash;
1162 if (tcp_md5_hash_key(hp, key))
1163 goto clear_hash;
1164 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1165 goto clear_hash;
1166
cfb6eeb4 1167 tcp_put_md5sig_pool();
cfb6eeb4 1168 return 0;
49a72dfb 1169
cfb6eeb4
YH
1170clear_hash:
1171 tcp_put_md5sig_pool();
1172clear_hash_noput:
1173 memset(md5_hash, 0, 16);
49a72dfb 1174 return 1;
cfb6eeb4
YH
1175}
1176
49a72dfb 1177int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1178 const struct sock *sk, const struct request_sock *req,
1179 const struct sk_buff *skb)
cfb6eeb4 1180{
49a72dfb
AL
1181 struct tcp_md5sig_pool *hp;
1182 struct hash_desc *desc;
318cf7aa 1183 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1184 __be32 saddr, daddr;
1185
1186 if (sk) {
c720c7e8
ED
1187 saddr = inet_sk(sk)->inet_saddr;
1188 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1189 } else if (req) {
1190 saddr = inet_rsk(req)->loc_addr;
1191 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1192 } else {
49a72dfb
AL
1193 const struct iphdr *iph = ip_hdr(skb);
1194 saddr = iph->saddr;
1195 daddr = iph->daddr;
cfb6eeb4 1196 }
49a72dfb
AL
1197
1198 hp = tcp_get_md5sig_pool();
1199 if (!hp)
1200 goto clear_hash_noput;
1201 desc = &hp->md5_desc;
1202
1203 if (crypto_hash_init(desc))
1204 goto clear_hash;
1205
1206 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1207 goto clear_hash;
1208 if (tcp_md5_hash_header(hp, th))
1209 goto clear_hash;
1210 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1211 goto clear_hash;
1212 if (tcp_md5_hash_key(hp, key))
1213 goto clear_hash;
1214 if (crypto_hash_final(desc, md5_hash))
1215 goto clear_hash;
1216
1217 tcp_put_md5sig_pool();
1218 return 0;
1219
1220clear_hash:
1221 tcp_put_md5sig_pool();
1222clear_hash_noput:
1223 memset(md5_hash, 0, 16);
1224 return 1;
cfb6eeb4 1225}
49a72dfb 1226EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1227
a2a385d6 1228static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1229{
1230 /*
1231 * This gets called for each TCP segment that arrives
1232 * so we want to be efficient.
1233 * We have 3 drop cases:
1234 * o No MD5 hash and one expected.
1235 * o MD5 hash and we're not expecting one.
1236 * o MD5 hash and its wrong.
1237 */
cf533ea5 1238 const __u8 *hash_location = NULL;
cfb6eeb4 1239 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1240 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1241 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1242 int genhash;
cfb6eeb4
YH
1243 unsigned char newhash[16];
1244
a915da9b
ED
1245 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1246 AF_INET);
7d5d5525 1247 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1248
cfb6eeb4
YH
1249 /* We've parsed the options - do we have a hash? */
1250 if (!hash_expected && !hash_location)
a2a385d6 1251 return false;
cfb6eeb4
YH
1252
1253 if (hash_expected && !hash_location) {
785957d3 1254 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1255 return true;
cfb6eeb4
YH
1256 }
1257
1258 if (!hash_expected && hash_location) {
785957d3 1259 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1260 return true;
cfb6eeb4
YH
1261 }
1262
1263 /* Okay, so this is hash_expected and hash_location -
1264 * so we need to calculate the checksum.
1265 */
49a72dfb
AL
1266 genhash = tcp_v4_md5_hash_skb(newhash,
1267 hash_expected,
1268 NULL, NULL, skb);
cfb6eeb4
YH
1269
1270 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1271 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1272 &iph->saddr, ntohs(th->source),
1273 &iph->daddr, ntohs(th->dest),
1274 genhash ? " tcp_v4_calc_md5_hash failed"
1275 : "");
a2a385d6 1276 return true;
cfb6eeb4 1277 }
a2a385d6 1278 return false;
cfb6eeb4
YH
1279}
1280
1281#endif
1282
72a3effa 1283struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1284 .family = PF_INET,
2e6599cb 1285 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1286 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1287 .send_ack = tcp_v4_reqsk_send_ack,
1288 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1289 .send_reset = tcp_v4_send_reset,
72659ecc 1290 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1291};
1292
cfb6eeb4 1293#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1294static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1295 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1296 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1297};
b6332e6c 1298#endif
cfb6eeb4 1299
168a8f58
JC
1300static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1301 struct request_sock *req,
1302 struct tcp_fastopen_cookie *foc,
1303 struct tcp_fastopen_cookie *valid_foc)
1304{
1305 bool skip_cookie = false;
1306 struct fastopen_queue *fastopenq;
1307
1308 if (likely(!fastopen_cookie_present(foc))) {
1309 /* See include/net/tcp.h for the meaning of these knobs */
1310 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1311 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1312 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1313 skip_cookie = true; /* no cookie to validate */
1314 else
1315 return false;
1316 }
1317 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1318 /* A FO option is present; bump the counter. */
1319 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1320
1321 /* Make sure the listener has enabled fastopen, and we don't
1322 * exceed the max # of pending TFO requests allowed before trying
1323 * to validating the cookie in order to avoid burning CPU cycles
1324 * unnecessarily.
1325 *
1326 * XXX (TFO) - The implication of checking the max_qlen before
1327 * processing a cookie request is that clients can't differentiate
1328 * between qlen overflow causing Fast Open to be disabled
1329 * temporarily vs a server not supporting Fast Open at all.
1330 */
1331 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1332 fastopenq == NULL || fastopenq->max_qlen == 0)
1333 return false;
1334
1335 if (fastopenq->qlen >= fastopenq->max_qlen) {
1336 struct request_sock *req1;
1337 spin_lock(&fastopenq->lock);
1338 req1 = fastopenq->rskq_rst_head;
1339 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1340 spin_unlock(&fastopenq->lock);
1341 NET_INC_STATS_BH(sock_net(sk),
1342 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1343 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1344 foc->len = -1;
1345 return false;
1346 }
1347 fastopenq->rskq_rst_head = req1->dl_next;
1348 fastopenq->qlen--;
1349 spin_unlock(&fastopenq->lock);
1350 reqsk_free(req1);
1351 }
1352 if (skip_cookie) {
1353 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1354 return true;
1355 }
1356 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1357 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1358 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1359 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1360 memcmp(&foc->val[0], &valid_foc->val[0],
1361 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1362 return false;
1363 valid_foc->len = -1;
1364 }
1365 /* Acknowledge the data received from the peer. */
1366 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1367 return true;
1368 } else if (foc->len == 0) { /* Client requesting a cookie */
1369 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1370 NET_INC_STATS_BH(sock_net(sk),
1371 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1372 } else {
1373 /* Client sent a cookie with wrong size. Treat it
1374 * the same as invalid and return a valid one.
1375 */
1376 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1377 }
1378 return false;
1379}
1380
1381static int tcp_v4_conn_req_fastopen(struct sock *sk,
1382 struct sk_buff *skb,
1383 struct sk_buff *skb_synack,
1a2c6181 1384 struct request_sock *req)
168a8f58
JC
1385{
1386 struct tcp_sock *tp = tcp_sk(sk);
1387 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1388 const struct inet_request_sock *ireq = inet_rsk(req);
1389 struct sock *child;
016818d0 1390 int err;
168a8f58 1391
e6c022a4
ED
1392 req->num_retrans = 0;
1393 req->num_timeout = 0;
168a8f58
JC
1394 req->sk = NULL;
1395
1396 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1397 if (child == NULL) {
1398 NET_INC_STATS_BH(sock_net(sk),
1399 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1400 kfree_skb(skb_synack);
1401 return -1;
1402 }
016818d0
NC
1403 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1404 ireq->rmt_addr, ireq->opt);
1405 err = net_xmit_eval(err);
1406 if (!err)
1407 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1408 /* XXX (TFO) - is it ok to ignore error and continue? */
1409
1410 spin_lock(&queue->fastopenq->lock);
1411 queue->fastopenq->qlen++;
1412 spin_unlock(&queue->fastopenq->lock);
1413
1414 /* Initialize the child socket. Have to fix some values to take
1415 * into account the child is a Fast Open socket and is created
1416 * only out of the bits carried in the SYN packet.
1417 */
1418 tp = tcp_sk(child);
1419
1420 tp->fastopen_rsk = req;
1421 /* Do a hold on the listner sk so that if the listener is being
1422 * closed, the child that has been accepted can live on and still
1423 * access listen_lock.
1424 */
1425 sock_hold(sk);
1426 tcp_rsk(req)->listener = sk;
1427
1428 /* RFC1323: The window in SYN & SYN/ACK segments is never
1429 * scaled. So correct it appropriately.
1430 */
1431 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
e57c7723 1432 tp->max_window = tp->snd_wnd;
168a8f58
JC
1433
1434 /* Activate the retrans timer so that SYNACK can be retransmitted.
1435 * The request socket is not added to the SYN table of the parent
1436 * because it's been added to the accept queue directly.
1437 */
1438 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1439 TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1440
1441 /* Add the child socket directly into the accept queue */
1442 inet_csk_reqsk_queue_add(sk, req, child);
1443
1444 /* Now finish processing the fastopen child socket. */
1445 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1446 tcp_init_congestion_control(child);
1447 tcp_mtup_init(child);
1448 tcp_init_buffer_space(child);
1449 tcp_init_metrics(child);
1450
1451 /* Queue the data carried in the SYN packet. We need to first
1452 * bump skb's refcnt because the caller will attempt to free it.
1453 *
1454 * XXX (TFO) - we honor a zero-payload TFO request for now.
1455 * (Any reason not to?)
1456 */
1457 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1458 /* Don't queue the skb if there is no payload in SYN.
1459 * XXX (TFO) - How about SYN+FIN?
1460 */
1461 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1462 } else {
1463 skb = skb_get(skb);
1464 skb_dst_drop(skb);
1465 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1466 skb_set_owner_r(skb, child);
1467 __skb_queue_tail(&child->sk_receive_queue, skb);
1468 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
6f73601e 1469 tp->syn_data_acked = 1;
d2c87ba1 1470 tp->bytes_received = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - 1;
168a8f58 1471 }
b3ac109f 1472 sk->sk_data_ready(sk);
168a8f58
JC
1473 bh_unlock_sock(child);
1474 sock_put(child);
1475 WARN_ON(req->sk == NULL);
1476 return 0;
1477}
1478
1da177e4
LT
1479int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1480{
1481 struct tcp_options_received tmp_opt;
60236fdd 1482 struct request_sock *req;
e6b4d113 1483 struct inet_request_sock *ireq;
4957faad 1484 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1485 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1486 __be32 saddr = ip_hdr(skb)->saddr;
1487 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1488 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1489 bool want_cookie = false;
168a8f58
JC
1490 struct flowi4 fl4;
1491 struct tcp_fastopen_cookie foc = { .len = -1 };
1492 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1493 struct sk_buff *skb_synack;
1494 int do_fastopen;
1da177e4
LT
1495
1496 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1497 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1498 goto drop;
1499
1500 /* TW buckets are converted to open requests without
1501 * limitations, they conserve resources and peer is
1502 * evidently real one.
1503 */
463c84b9 1504 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1505 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1506 if (!want_cookie)
1507 goto drop;
1da177e4
LT
1508 }
1509
1510 /* Accept backlog is full. If we have already queued enough
1511 * of warm entries in syn queue, drop request. It is better than
1512 * clogging syn queue with openreqs with exponentially increasing
1513 * timeout.
1514 */
2aeef18d
NS
1515 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1517 goto drop;
2aeef18d 1518 }
1da177e4 1519
ce4a7d0d 1520 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1521 if (!req)
1522 goto drop;
1523
cfb6eeb4
YH
1524#ifdef CONFIG_TCP_MD5SIG
1525 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1526#endif
1527
1da177e4 1528 tcp_clear_options(&tmp_opt);
bee7ca9e 1529 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1530 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1531 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1532
4dfc2817 1533 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1534 tcp_clear_options(&tmp_opt);
1da177e4 1535
1da177e4 1536 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1537 tcp_openreq_init(req, &tmp_opt, skb);
1538
bb5b7c11
DM
1539 ireq = inet_rsk(req);
1540 ireq->loc_addr = daddr;
1541 ireq->rmt_addr = saddr;
1542 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1543 ireq->opt = tcp_v4_save_options(skb);
3c2a0909 1544 ireq->ir_mark = inet_request_mark(sk, skb);
bb5b7c11 1545
284904aa 1546 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1547 goto drop_and_free;
284904aa 1548
172d69e6 1549 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1550 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1551
1552 if (want_cookie) {
1da177e4 1553 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1554 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1555 } else if (!isn) {
1da177e4
LT
1556 /* VJ's idea. We save last timestamp seen
1557 * from the destination in peer table, when entering
1558 * state TIME-WAIT, and check against it before
1559 * accepting new connection request.
1560 *
1561 * If "isn" is not zero, this request hit alive
1562 * timewait bucket, so that all the necessary checks
1563 * are made in the function processing timewait state.
1564 */
1565 if (tmp_opt.saw_tstamp &&
295ff7ed 1566 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1567 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1568 fl4.daddr == saddr) {
1569 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1570 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1571 goto drop_and_release;
1da177e4
LT
1572 }
1573 }
1574 /* Kill the following clause, if you dislike this way. */
1575 else if (!sysctl_tcp_syncookies &&
463c84b9 1576 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1577 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1578 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1579 /* Without syncookies last quarter of
1580 * backlog is filled with destinations,
1581 * proven to be alive.
1582 * It means that we continue to communicate
1583 * to destinations, already remembered
1584 * to the moment of synflood.
1585 */
afd46503 1586 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1587 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1588 goto drop_and_release;
1da177e4
LT
1589 }
1590
a94f723d 1591 isn = tcp_v4_init_sequence(skb);
1da177e4 1592 }
2e6599cb 1593 tcp_rsk(req)->snt_isn = isn;
1da177e4 1594
168a8f58
JC
1595 if (dst == NULL) {
1596 dst = inet_csk_route_req(sk, &fl4, req);
1597 if (dst == NULL)
1598 goto drop_and_free;
1599 }
1600 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1601
1602 /* We don't call tcp_v4_send_synack() directly because we need
1603 * to make sure a child socket can be created successfully before
1604 * sending back synack!
1605 *
1606 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1607 * (or better yet, call tcp_send_synack() in the child context
1608 * directly, but will have to fix bunch of other code first)
1609 * after syn_recv_sock() except one will need to first fix the
1610 * latter to remove its dependency on the current implementation
1611 * of tcp_v4_send_synack()->tcp_select_initial_window().
1612 */
1613 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1614 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1615
1616 if (skb_synack) {
1617 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1618 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1619 } else
1620 goto drop_and_free;
1621
1622 if (likely(!do_fastopen)) {
1623 int err;
1624 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1625 ireq->rmt_addr, ireq->opt);
1626 err = net_xmit_eval(err);
1627 if (err || want_cookie)
1628 goto drop_and_free;
1629
016818d0 1630 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1631 tcp_rsk(req)->listener = NULL;
1632 /* Add the request_sock to the SYN table */
1633 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1634 if (fastopen_cookie_present(&foc) && foc.len != 0)
1635 NET_INC_STATS_BH(sock_net(sk),
1636 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1a2c6181 1637 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1da177e4
LT
1638 goto drop_and_free;
1639
1da177e4
LT
1640 return 0;
1641
7cd04fa7
DL
1642drop_and_release:
1643 dst_release(dst);
1da177e4 1644drop_and_free:
60236fdd 1645 reqsk_free(req);
1da177e4 1646drop:
848bf15f 1647 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1648 return 0;
1649}
4bc2f18b 1650EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1651
1652
1653/*
1654 * The three way handshake has completed - we got a valid synack -
1655 * now create the new socket.
1656 */
1657struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1658 struct request_sock *req,
1da177e4
LT
1659 struct dst_entry *dst)
1660{
2e6599cb 1661 struct inet_request_sock *ireq;
1da177e4
LT
1662 struct inet_sock *newinet;
1663 struct tcp_sock *newtp;
1664 struct sock *newsk;
cfb6eeb4
YH
1665#ifdef CONFIG_TCP_MD5SIG
1666 struct tcp_md5sig_key *key;
1667#endif
f6d8bd05 1668 struct ip_options_rcu *inet_opt;
1da177e4
LT
1669
1670 if (sk_acceptq_is_full(sk))
1671 goto exit_overflow;
1672
1da177e4
LT
1673 newsk = tcp_create_openreq_child(sk, req, skb);
1674 if (!newsk)
093d2823 1675 goto exit_nonewsk;
1da177e4 1676
bcd76111 1677 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1678 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1679
1680 newtp = tcp_sk(newsk);
1681 newinet = inet_sk(newsk);
2e6599cb 1682 ireq = inet_rsk(req);
c720c7e8
ED
1683 newinet->inet_daddr = ireq->rmt_addr;
1684 newinet->inet_rcv_saddr = ireq->loc_addr;
1685 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1686 inet_opt = ireq->opt;
1687 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1688 ireq->opt = NULL;
463c84b9 1689 newinet->mc_index = inet_iif(skb);
eddc9ec5 1690 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1691 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1692 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1693 if (inet_opt)
1694 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1695 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1696
dfd25fff
ED
1697 if (!dst) {
1698 dst = inet_csk_route_child_sock(sk, newsk, req);
1699 if (!dst)
1700 goto put_and_exit;
1701 } else {
1702 /* syncookie case : see end of cookie_v4_check() */
1703 }
0e734419
DM
1704 sk_setup_caps(newsk, dst);
1705
5d424d5a 1706 tcp_mtup_init(newsk);
1da177e4 1707 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1708 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1709 if (tcp_sk(sk)->rx_opt.user_mss &&
1710 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1711 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1712
1da177e4 1713 tcp_initialize_rcv_mss(newsk);
623df484 1714 tcp_synack_rtt_meas(newsk, req);
e6c022a4 1715 newtp->total_retrans = req->num_retrans;
1da177e4 1716
cfb6eeb4
YH
1717#ifdef CONFIG_TCP_MD5SIG
1718 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1719 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1720 AF_INET);
c720c7e8 1721 if (key != NULL) {
cfb6eeb4
YH
1722 /*
1723 * We're using one, so create a matching key
1724 * on the newsk structure. If we fail to get
1725 * memory, then we end up not copying the key
1726 * across. Shucks.
1727 */
a915da9b
ED
1728 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1729 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1730 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1731 }
1732#endif
1733
0e734419
DM
1734 if (__inet_inherit_port(sk, newsk) < 0)
1735 goto put_and_exit;
9327f705 1736 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1737
1738 return newsk;
1739
1740exit_overflow:
de0744af 1741 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1742exit_nonewsk:
1743 dst_release(dst);
1da177e4 1744exit:
de0744af 1745 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1746 return NULL;
0e734419 1747put_and_exit:
e337e24d
CP
1748 inet_csk_prepare_forced_close(newsk);
1749 tcp_done(newsk);
0e734419 1750 goto exit;
1da177e4 1751}
4bc2f18b 1752EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1753
1754static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1755{
aa8223c7 1756 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1757 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1758 struct sock *nsk;
60236fdd 1759 struct request_sock **prev;
1da177e4 1760 /* Find possible connection requests. */
463c84b9
ACM
1761 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1762 iph->saddr, iph->daddr);
1da177e4 1763 if (req)
8336886f 1764 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1765
3b1e0a65 1766 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1767 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1768
1769 if (nsk) {
1770 if (nsk->sk_state != TCP_TIME_WAIT) {
1771 bh_lock_sock(nsk);
1772 return nsk;
1773 }
9469c7b4 1774 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1775 return NULL;
1776 }
1777
1778#ifdef CONFIG_SYN_COOKIES
af9b4738 1779 if (!th->syn)
1da177e4
LT
1780 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1781#endif
1782 return sk;
1783}
1784
b51655b9 1785static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1786{
eddc9ec5
ACM
1787 const struct iphdr *iph = ip_hdr(skb);
1788
84fa7933 1789 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1790 if (!tcp_v4_check(skb->len, iph->saddr,
1791 iph->daddr, skb->csum)) {
fb286bb2 1792 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1793 return 0;
fb286bb2 1794 }
1da177e4 1795 }
fb286bb2 1796
eddc9ec5 1797 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1798 skb->len, IPPROTO_TCP, 0);
1799
1da177e4 1800 if (skb->len <= 76) {
fb286bb2 1801 return __skb_checksum_complete(skb);
1da177e4
LT
1802 }
1803 return 0;
1804}
1805
1806
1807/* The socket must have it's spinlock held when we get
1808 * here.
1809 *
1810 * We have a potential double-lock case here, so even when
1811 * doing backlog processing we use the BH locking scheme.
1812 * This is because we cannot sleep with the original spinlock
1813 * held.
1814 */
1815int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1816{
cfb6eeb4
YH
1817 struct sock *rsk;
1818#ifdef CONFIG_TCP_MD5SIG
1819 /*
1820 * We really want to reject the packet as early as possible
1821 * if:
1822 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1823 * o There is an MD5 option and we're not expecting one
1824 */
7174259e 1825 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1826 goto discard;
1827#endif
1828
1da177e4 1829 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1830 struct dst_entry *dst = sk->sk_rx_dst;
1831
bdeab991 1832 sock_rps_save_rxhash(sk, skb);
404e0a8b 1833 if (dst) {
505fbcf0
ED
1834 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1835 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1836 dst_release(dst);
1837 sk->sk_rx_dst = NULL;
1838 }
1839 }
aa8223c7 1840 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1841 rsk = sk;
1da177e4 1842 goto reset;
cfb6eeb4 1843 }
1da177e4
LT
1844 return 0;
1845 }
1846
ab6a5bb6 1847 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1848 goto csum_err;
1849
1850 if (sk->sk_state == TCP_LISTEN) {
1851 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1852 if (!nsk)
1853 goto discard;
1854
1855 if (nsk != sk) {
bdeab991 1856 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1857 if (tcp_child_process(sk, nsk, skb)) {
1858 rsk = nsk;
1da177e4 1859 goto reset;
cfb6eeb4 1860 }
1da177e4
LT
1861 return 0;
1862 }
ca55158c 1863 } else
bdeab991 1864 sock_rps_save_rxhash(sk, skb);
ca55158c 1865
aa8223c7 1866 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1867 rsk = sk;
1da177e4 1868 goto reset;
cfb6eeb4 1869 }
1da177e4
LT
1870 return 0;
1871
1872reset:
cfb6eeb4 1873 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1874discard:
1875 kfree_skb(skb);
1876 /* Be careful here. If this function gets more complicated and
1877 * gcc suffers from register pressure on the x86, sk (in %ebx)
1878 * might be destroyed here. This current version compiles correctly,
1879 * but you have been warned.
1880 */
1881 return 0;
1882
1883csum_err:
6a5dc9e5 1884 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1885 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1886 goto discard;
1887}
4bc2f18b 1888EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1889
160eb5a6 1890void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1891{
41063e9d
DM
1892 const struct iphdr *iph;
1893 const struct tcphdr *th;
1894 struct sock *sk;
41063e9d 1895
41063e9d 1896 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1897 return;
41063e9d 1898
45f00f99 1899 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1900 return;
41063e9d
DM
1901
1902 iph = ip_hdr(skb);
45f00f99 1903 th = tcp_hdr(skb);
41063e9d
DM
1904
1905 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1906 return;
41063e9d 1907
45f00f99 1908 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1909 iph->saddr, th->source,
7011d085 1910 iph->daddr, ntohs(th->dest),
9cb429d6 1911 skb->skb_iif);
41063e9d
DM
1912 if (sk) {
1913 skb->sk = sk;
1914 skb->destructor = sock_edemux;
1915 if (sk->sk_state != TCP_TIME_WAIT) {
ec0b3532 1916 struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst);
505fbcf0 1917
41063e9d
DM
1918 if (dst)
1919 dst = dst_check(dst, 0);
92101b3b 1920 if (dst &&
505fbcf0 1921 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1922 skb_dst_set_noref(skb, dst);
41063e9d
DM
1923 }
1924 }
41063e9d
DM
1925}
1926
b2fb4f54
ED
1927/* Packet is added to VJ-style prequeue for processing in process
1928 * context, if a reader task is waiting. Apparently, this exciting
1929 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1930 * failed somewhere. Latency? Burstiness? Well, at least now we will
1931 * see, why it failed. 8)8) --ANK
1932 *
1933 */
1934bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1935{
1936 struct tcp_sock *tp = tcp_sk(sk);
1937
1938 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1939 return false;
1940
1941 if (skb->len <= tcp_hdrlen(skb) &&
1942 skb_queue_len(&tp->ucopy.prequeue) == 0)
1943 return false;
1944
58717686 1945 skb_dst_force(skb);
b2fb4f54
ED
1946 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1947 tp->ucopy.memory += skb->truesize;
1948 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1949 struct sk_buff *skb1;
1950
1951 BUG_ON(sock_owned_by_user(sk));
1952
1953 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1954 sk_backlog_rcv(sk, skb1);
1955 NET_INC_STATS_BH(sock_net(sk),
1956 LINUX_MIB_TCPPREQUEUEDROPPED);
1957 }
1958
1959 tp->ucopy.memory = 0;
1960 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1961 wake_up_interruptible_sync_poll(sk_sleep(sk),
1962 POLLIN | POLLRDNORM | POLLRDBAND);
1963 if (!inet_csk_ack_scheduled(sk))
1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1965 (3 * tcp_rto_min(sk)) / 4,
1966 TCP_RTO_MAX);
1967 }
1968 return true;
1969}
1970EXPORT_SYMBOL(tcp_prequeue);
1971
a605ac9d
ED
1972int tcp_filter(struct sock *sk, struct sk_buff *skb)
1973{
1974 struct tcphdr *th = (struct tcphdr *)skb->data;
1975 unsigned int eaten = skb->len;
1976 int err;
1977
1978 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1979 if (!err) {
1980 eaten -= skb->len;
1981 TCP_SKB_CB(skb)->end_seq -= eaten;
1982 }
1983 return err;
1984}
1985EXPORT_SYMBOL(tcp_filter);
1986
1da177e4
LT
1987/*
1988 * From tcp_input.c
1989 */
1990
1991int tcp_v4_rcv(struct sk_buff *skb)
1992{
eddc9ec5 1993 const struct iphdr *iph;
cf533ea5 1994 const struct tcphdr *th;
1da177e4
LT
1995 struct sock *sk;
1996 int ret;
a86b1e30 1997 struct net *net = dev_net(skb->dev);
1da177e4
LT
1998
1999 if (skb->pkt_type != PACKET_HOST)
2000 goto discard_it;
2001
2002 /* Count it even if it's bad */
63231bdd 2003 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
2004
2005 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
2006 goto discard_it;
2007
aa8223c7 2008 th = tcp_hdr(skb);
1da177e4
LT
2009
2010 if (th->doff < sizeof(struct tcphdr) / 4)
2011 goto bad_packet;
2012 if (!pskb_may_pull(skb, th->doff * 4))
2013 goto discard_it;
2014
2015 /* An explanation is required here, I think.
2016 * Packet length and doff are validated by header prediction,
caa20d9a 2017 * provided case of th->doff==0 is eliminated.
1da177e4 2018 * So, we defer the checks. */
60476372 2019 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
6a5dc9e5 2020 goto csum_error;
1da177e4 2021
aa8223c7 2022 th = tcp_hdr(skb);
eddc9ec5 2023 iph = ip_hdr(skb);
1da177e4
LT
2024 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
2025 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
2026 skb->len - th->doff * 4);
2027 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
2028 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 2029 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
2030 TCP_SKB_CB(skb)->sacked = 0;
2031
9a1f27c4 2032 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
2033 if (!sk)
2034 goto no_tcp_socket;
2035
bb134d5d
ED
2036process:
2037 if (sk->sk_state == TCP_TIME_WAIT)
2038 goto do_time_wait;
2039
6cce09f8
ED
2040 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
2041 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 2042 goto discard_and_relse;
6cce09f8 2043 }
d218d111 2044
1da177e4
LT
2045 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
2046 goto discard_and_relse;
b59c2701 2047 nf_reset(skb);
1da177e4 2048
a605ac9d 2049 if (tcp_filter(sk, skb))
1da177e4 2050 goto discard_and_relse;
a605ac9d
ED
2051 th = (const struct tcphdr *)skb->data;
2052 iph = ip_hdr(skb);
1da177e4
LT
2053
2054 skb->dev = NULL;
2055
c6366184 2056 bh_lock_sock_nested(sk);
b1bba9ad 2057 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1da177e4
LT
2058 ret = 0;
2059 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
2060#ifdef CONFIG_NET_DMA
2061 struct tcp_sock *tp = tcp_sk(sk);
2062 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 2063 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 2064 if (tp->ucopy.dma_chan)
1da177e4 2065 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
2066 else
2067#endif
2068 {
2069 if (!tcp_prequeue(sk, skb))
ae8d7f88 2070 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 2071 }
da882c1f
ED
2072 } else if (unlikely(sk_add_backlog(sk, skb,
2073 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 2074 bh_unlock_sock(sk);
6cce09f8 2075 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
2076 goto discard_and_relse;
2077 }
1da177e4
LT
2078 bh_unlock_sock(sk);
2079
2080 sock_put(sk);
2081
2082 return ret;
2083
2084no_tcp_socket:
2085 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2086 goto discard_it;
2087
2088 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
2089csum_error:
2090 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 2091bad_packet:
63231bdd 2092 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 2093 } else {
cfb6eeb4 2094 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
2095 }
2096
2097discard_it:
2098 /* Discard frame. */
2099 kfree_skb(skb);
e905a9ed 2100 return 0;
1da177e4
LT
2101
2102discard_and_relse:
2103 sock_put(sk);
2104 goto discard_it;
2105
2106do_time_wait:
2107 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 2108 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2109 goto discard_it;
2110 }
2111
6a5dc9e5 2112 if (skb->len < (th->doff << 2)) {
9469c7b4 2113 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
2114 goto bad_packet;
2115 }
2116 if (tcp_checksum_complete(skb)) {
2117 inet_twsk_put(inet_twsk(sk));
2118 goto csum_error;
1da177e4 2119 }
9469c7b4 2120 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 2121 case TCP_TW_SYN: {
c346dca1 2122 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 2123 &tcp_hashinfo,
da5e3630 2124 iph->saddr, th->source,
eddc9ec5 2125 iph->daddr, th->dest,
463c84b9 2126 inet_iif(skb));
1da177e4 2127 if (sk2) {
9469c7b4
YH
2128 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2129 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2130 sk = sk2;
2131 goto process;
2132 }
2133 /* Fall through to ACK */
2134 }
2135 case TCP_TW_ACK:
2136 tcp_v4_timewait_ack(sk, skb);
2137 break;
2138 case TCP_TW_RST:
2139 goto no_tcp_socket;
2140 case TCP_TW_SUCCESS:;
2141 }
2142 goto discard_it;
2143}
2144
ccb7c410
DM
2145static struct timewait_sock_ops tcp_timewait_sock_ops = {
2146 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2147 .twsk_unique = tcp_twsk_unique,
2148 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 2149};
1da177e4 2150
63d02d15 2151void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
2152{
2153 struct dst_entry *dst = skb_dst(skb);
2154
2155 dst_hold(dst);
2156 sk->sk_rx_dst = dst;
2157 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2158}
63d02d15 2159EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 2160
3b401a81 2161const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
2162 .queue_xmit = ip_queue_xmit,
2163 .send_check = tcp_v4_send_check,
2164 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 2165 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
2166 .conn_request = tcp_v4_conn_request,
2167 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
2168 .net_header_len = sizeof(struct iphdr),
2169 .setsockopt = ip_setsockopt,
2170 .getsockopt = ip_getsockopt,
2171 .addr2sockaddr = inet_csk_addr2sockaddr,
2172 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 2173 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 2174#ifdef CONFIG_COMPAT
543d9cfe
ACM
2175 .compat_setsockopt = compat_ip_setsockopt,
2176 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 2177#endif
5f80f4d8 2178 .mtu_reduced = tcp_v4_mtu_reduced,
1da177e4 2179};
4bc2f18b 2180EXPORT_SYMBOL(ipv4_specific);
1da177e4 2181
cfb6eeb4 2182#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 2183static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 2184 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 2185 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 2186 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 2187};
b6332e6c 2188#endif
cfb6eeb4 2189
1da177e4
LT
2190/* NOTE: A lot of things set to zero explicitly by call to
2191 * sk_alloc() so need not be done here.
2192 */
2193static int tcp_v4_init_sock(struct sock *sk)
2194{
6687e988 2195 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 2196
900f65d3 2197 tcp_init_sock(sk);
1da177e4 2198
8292a17a 2199 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 2200
cfb6eeb4 2201#ifdef CONFIG_TCP_MD5SIG
ac807fa8 2202 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 2203#endif
1da177e4 2204
1da177e4
LT
2205 return 0;
2206}
2207
7d06b2e0 2208void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
2209{
2210 struct tcp_sock *tp = tcp_sk(sk);
2211
2212 tcp_clear_xmit_timers(sk);
2213
6687e988 2214 tcp_cleanup_congestion_control(sk);
317a76f9 2215
1da177e4 2216 /* Cleanup up the write buffer. */
fe067e8a 2217 tcp_write_queue_purge(sk);
1da177e4
LT
2218
2219 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 2220 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 2221
cfb6eeb4
YH
2222#ifdef CONFIG_TCP_MD5SIG
2223 /* Clean up the MD5 key list, if any */
2224 if (tp->md5sig_info) {
a915da9b 2225 tcp_clear_md5_list(sk);
a8afca03 2226 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
2227 tp->md5sig_info = NULL;
2228 }
2229#endif
2230
1a2449a8
CL
2231#ifdef CONFIG_NET_DMA
2232 /* Cleans up our sk_async_wait_queue */
e905a9ed 2233 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
2234#endif
2235
1da177e4
LT
2236 /* Clean prequeue, it must be empty really */
2237 __skb_queue_purge(&tp->ucopy.prequeue);
2238
2239 /* Clean up a referenced TCP bind bucket. */
463c84b9 2240 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 2241 inet_put_port(sk);
1da177e4 2242
168a8f58 2243 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 2244
cf60af03
YC
2245 /* If socket is aborted during connect operation */
2246 tcp_free_fastopen_req(tp);
2247
180d8cd9 2248 sk_sockets_allocated_dec(sk);
d1a4c0b3 2249 sock_release_memcg(sk);
1da177e4 2250}
1da177e4
LT
2251EXPORT_SYMBOL(tcp_v4_destroy_sock);
2252
2253#ifdef CONFIG_PROC_FS
2254/* Proc filesystem TCP sock list dumping. */
2255
3ab5aee7 2256static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 2257{
3ab5aee7 2258 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 2259 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
2260}
2261
8feaf0c0 2262static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2263{
3ab5aee7
ED
2264 return !is_a_nulls(tw->tw_node.next) ?
2265 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2266}
2267
a8b690f9
TH
2268/*
2269 * Get next listener socket follow cur. If cur is NULL, get first socket
2270 * starting from bucket given in st->bucket; when st->bucket is zero the
2271 * very first socket in the hash table is returned.
2272 */
1da177e4
LT
2273static void *listening_get_next(struct seq_file *seq, void *cur)
2274{
463c84b9 2275 struct inet_connection_sock *icsk;
c25eb3bf 2276 struct hlist_nulls_node *node;
1da177e4 2277 struct sock *sk = cur;
5caea4ea 2278 struct inet_listen_hashbucket *ilb;
5799de0b 2279 struct tcp_iter_state *st = seq->private;
a4146b1b 2280 struct net *net = seq_file_net(seq);
1da177e4
LT
2281
2282 if (!sk) {
a8b690f9 2283 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2284 spin_lock_bh(&ilb->lock);
c25eb3bf 2285 sk = sk_nulls_head(&ilb->head);
a8b690f9 2286 st->offset = 0;
1da177e4
LT
2287 goto get_sk;
2288 }
5caea4ea 2289 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2290 ++st->num;
a8b690f9 2291 ++st->offset;
1da177e4
LT
2292
2293 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2294 struct request_sock *req = cur;
1da177e4 2295
72a3effa 2296 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2297 req = req->dl_next;
2298 while (1) {
2299 while (req) {
bdccc4ca 2300 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2301 cur = req;
2302 goto out;
2303 }
2304 req = req->dl_next;
2305 }
72a3effa 2306 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2307 break;
2308get_req:
463c84b9 2309 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2310 }
1bde5ac4 2311 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2312 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2313 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2314 } else {
e905a9ed 2315 icsk = inet_csk(sk);
463c84b9
ACM
2316 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2317 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2318 goto start_req;
463c84b9 2319 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2320 sk = sk_nulls_next(sk);
1da177e4
LT
2321 }
2322get_sk:
c25eb3bf 2323 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2324 if (!net_eq(sock_net(sk), net))
2325 continue;
2326 if (sk->sk_family == st->family) {
1da177e4
LT
2327 cur = sk;
2328 goto out;
2329 }
e905a9ed 2330 icsk = inet_csk(sk);
463c84b9
ACM
2331 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2332 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2333start_req:
2334 st->uid = sock_i_uid(sk);
2335 st->syn_wait_sk = sk;
2336 st->state = TCP_SEQ_STATE_OPENREQ;
2337 st->sbucket = 0;
2338 goto get_req;
2339 }
463c84b9 2340 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2341 }
5caea4ea 2342 spin_unlock_bh(&ilb->lock);
a8b690f9 2343 st->offset = 0;
0f7ff927 2344 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2345 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2346 spin_lock_bh(&ilb->lock);
c25eb3bf 2347 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2348 goto get_sk;
2349 }
2350 cur = NULL;
2351out:
2352 return cur;
2353}
2354
2355static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2356{
a8b690f9
TH
2357 struct tcp_iter_state *st = seq->private;
2358 void *rc;
2359
2360 st->bucket = 0;
2361 st->offset = 0;
2362 rc = listening_get_next(seq, NULL);
1da177e4
LT
2363
2364 while (rc && *pos) {
2365 rc = listening_get_next(seq, rc);
2366 --*pos;
2367 }
2368 return rc;
2369}
2370
a2a385d6 2371static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2372{
3ab5aee7
ED
2373 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2374 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2375}
2376
a8b690f9
TH
2377/*
2378 * Get first established socket starting from bucket given in st->bucket.
2379 * If st->bucket is zero, the very first socket in the hash is returned.
2380 */
1da177e4
LT
2381static void *established_get_first(struct seq_file *seq)
2382{
5799de0b 2383 struct tcp_iter_state *st = seq->private;
a4146b1b 2384 struct net *net = seq_file_net(seq);
1da177e4
LT
2385 void *rc = NULL;
2386
a8b690f9
TH
2387 st->offset = 0;
2388 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2389 struct sock *sk;
3ab5aee7 2390 struct hlist_nulls_node *node;
8feaf0c0 2391 struct inet_timewait_sock *tw;
9db66bdc 2392 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2393
6eac5604
AK
2394 /* Lockless fast path for the common case of empty buckets */
2395 if (empty_bucket(st))
2396 continue;
2397
9db66bdc 2398 spin_lock_bh(lock);
3ab5aee7 2399 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2400 if (sk->sk_family != st->family ||
878628fb 2401 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2402 continue;
2403 }
2404 rc = sk;
2405 goto out;
2406 }
2407 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2408 inet_twsk_for_each(tw, node,
dbca9b27 2409 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2410 if (tw->tw_family != st->family ||
878628fb 2411 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2412 continue;
2413 }
2414 rc = tw;
2415 goto out;
2416 }
9db66bdc 2417 spin_unlock_bh(lock);
1da177e4
LT
2418 st->state = TCP_SEQ_STATE_ESTABLISHED;
2419 }
2420out:
2421 return rc;
2422}
2423
2424static void *established_get_next(struct seq_file *seq, void *cur)
2425{
2426 struct sock *sk = cur;
8feaf0c0 2427 struct inet_timewait_sock *tw;
3ab5aee7 2428 struct hlist_nulls_node *node;
5799de0b 2429 struct tcp_iter_state *st = seq->private;
a4146b1b 2430 struct net *net = seq_file_net(seq);
1da177e4
LT
2431
2432 ++st->num;
a8b690f9 2433 ++st->offset;
1da177e4
LT
2434
2435 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2436 tw = cur;
2437 tw = tw_next(tw);
2438get_tw:
878628fb 2439 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2440 tw = tw_next(tw);
2441 }
2442 if (tw) {
2443 cur = tw;
2444 goto out;
2445 }
9db66bdc 2446 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2447 st->state = TCP_SEQ_STATE_ESTABLISHED;
2448
6eac5604 2449 /* Look for next non empty bucket */
a8b690f9 2450 st->offset = 0;
f373b53b 2451 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2452 empty_bucket(st))
2453 ;
f373b53b 2454 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2455 return NULL;
2456
9db66bdc 2457 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2458 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2459 } else
3ab5aee7 2460 sk = sk_nulls_next(sk);
1da177e4 2461
3ab5aee7 2462 sk_nulls_for_each_from(sk, node) {
878628fb 2463 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2464 goto found;
2465 }
2466
2467 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2468 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2469 goto get_tw;
2470found:
2471 cur = sk;
2472out:
2473 return cur;
2474}
2475
2476static void *established_get_idx(struct seq_file *seq, loff_t pos)
2477{
a8b690f9
TH
2478 struct tcp_iter_state *st = seq->private;
2479 void *rc;
2480
2481 st->bucket = 0;
2482 rc = established_get_first(seq);
1da177e4
LT
2483
2484 while (rc && pos) {
2485 rc = established_get_next(seq, rc);
2486 --pos;
7174259e 2487 }
1da177e4
LT
2488 return rc;
2489}
2490
2491static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2492{
2493 void *rc;
5799de0b 2494 struct tcp_iter_state *st = seq->private;
1da177e4 2495
1da177e4
LT
2496 st->state = TCP_SEQ_STATE_LISTENING;
2497 rc = listening_get_idx(seq, &pos);
2498
2499 if (!rc) {
1da177e4
LT
2500 st->state = TCP_SEQ_STATE_ESTABLISHED;
2501 rc = established_get_idx(seq, pos);
2502 }
2503
2504 return rc;
2505}
2506
a8b690f9
TH
2507static void *tcp_seek_last_pos(struct seq_file *seq)
2508{
2509 struct tcp_iter_state *st = seq->private;
2510 int offset = st->offset;
2511 int orig_num = st->num;
2512 void *rc = NULL;
2513
2514 switch (st->state) {
2515 case TCP_SEQ_STATE_OPENREQ:
2516 case TCP_SEQ_STATE_LISTENING:
2517 if (st->bucket >= INET_LHTABLE_SIZE)
2518 break;
2519 st->state = TCP_SEQ_STATE_LISTENING;
2520 rc = listening_get_next(seq, NULL);
2521 while (offset-- && rc)
2522 rc = listening_get_next(seq, rc);
2523 if (rc)
2524 break;
2525 st->bucket = 0;
2526 /* Fallthrough */
2527 case TCP_SEQ_STATE_ESTABLISHED:
2528 case TCP_SEQ_STATE_TIME_WAIT:
2529 st->state = TCP_SEQ_STATE_ESTABLISHED;
2530 if (st->bucket > tcp_hashinfo.ehash_mask)
2531 break;
2532 rc = established_get_first(seq);
2533 while (offset-- && rc)
2534 rc = established_get_next(seq, rc);
2535 }
2536
2537 st->num = orig_num;
2538
2539 return rc;
2540}
2541
1da177e4
LT
2542static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2543{
5799de0b 2544 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2545 void *rc;
2546
2547 if (*pos && *pos == st->last_pos) {
2548 rc = tcp_seek_last_pos(seq);
2549 if (rc)
2550 goto out;
2551 }
2552
1da177e4
LT
2553 st->state = TCP_SEQ_STATE_LISTENING;
2554 st->num = 0;
a8b690f9
TH
2555 st->bucket = 0;
2556 st->offset = 0;
2557 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2558
2559out:
2560 st->last_pos = *pos;
2561 return rc;
1da177e4
LT
2562}
2563
2564static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2565{
a8b690f9 2566 struct tcp_iter_state *st = seq->private;
1da177e4 2567 void *rc = NULL;
1da177e4
LT
2568
2569 if (v == SEQ_START_TOKEN) {
2570 rc = tcp_get_idx(seq, 0);
2571 goto out;
2572 }
1da177e4
LT
2573
2574 switch (st->state) {
2575 case TCP_SEQ_STATE_OPENREQ:
2576 case TCP_SEQ_STATE_LISTENING:
2577 rc = listening_get_next(seq, v);
2578 if (!rc) {
1da177e4 2579 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2580 st->bucket = 0;
2581 st->offset = 0;
1da177e4
LT
2582 rc = established_get_first(seq);
2583 }
2584 break;
2585 case TCP_SEQ_STATE_ESTABLISHED:
2586 case TCP_SEQ_STATE_TIME_WAIT:
2587 rc = established_get_next(seq, v);
2588 break;
2589 }
2590out:
2591 ++*pos;
a8b690f9 2592 st->last_pos = *pos;
1da177e4
LT
2593 return rc;
2594}
2595
2596static void tcp_seq_stop(struct seq_file *seq, void *v)
2597{
5799de0b 2598 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2599
2600 switch (st->state) {
2601 case TCP_SEQ_STATE_OPENREQ:
2602 if (v) {
463c84b9
ACM
2603 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2604 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2605 }
2606 case TCP_SEQ_STATE_LISTENING:
2607 if (v != SEQ_START_TOKEN)
5caea4ea 2608 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2609 break;
2610 case TCP_SEQ_STATE_TIME_WAIT:
2611 case TCP_SEQ_STATE_ESTABLISHED:
2612 if (v)
9db66bdc 2613 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2614 break;
2615 }
2616}
2617
73cb88ec 2618int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2619{
d9dda78b 2620 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2621 struct tcp_iter_state *s;
52d6f3f1 2622 int err;
1da177e4 2623
52d6f3f1
DL
2624 err = seq_open_net(inode, file, &afinfo->seq_ops,
2625 sizeof(struct tcp_iter_state));
2626 if (err < 0)
2627 return err;
f40c8174 2628
52d6f3f1 2629 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2630 s->family = afinfo->family;
a8b690f9 2631 s->last_pos = 0;
f40c8174
DL
2632 return 0;
2633}
73cb88ec 2634EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2635
6f8b13bc 2636int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2637{
2638 int rc = 0;
2639 struct proc_dir_entry *p;
2640
9427c4b3
DL
2641 afinfo->seq_ops.start = tcp_seq_start;
2642 afinfo->seq_ops.next = tcp_seq_next;
2643 afinfo->seq_ops.stop = tcp_seq_stop;
2644
84841c3c 2645 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2646 afinfo->seq_fops, afinfo);
84841c3c 2647 if (!p)
1da177e4
LT
2648 rc = -ENOMEM;
2649 return rc;
2650}
4bc2f18b 2651EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2652
6f8b13bc 2653void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2654{
ece31ffd 2655 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2656}
4bc2f18b 2657EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2658
cf533ea5 2659static void get_openreq4(const struct sock *sk, const struct request_sock *req,
a7cb5a49 2660 struct seq_file *f, int i, kuid_t uid, int *len)
1da177e4 2661{
2e6599cb 2662 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2663 long delta = req->expires - jiffies;
1da177e4 2664
5e659e4c 2665 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2666 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2667 i,
2e6599cb 2668 ireq->loc_addr,
c720c7e8 2669 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2670 ireq->rmt_addr,
2671 ntohs(ireq->rmt_port),
1da177e4
LT
2672 TCP_SYN_RECV,
2673 0, 0, /* could print option size, but that is af dependent. */
2674 1, /* timers active (only the expire timer) */
a399a805 2675 jiffies_delta_to_clock_t(delta),
e6c022a4 2676 req->num_timeout,
a7cb5a49 2677 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2678 0, /* non standard timer */
2679 0, /* open_requests have no inode */
2680 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2681 req,
2682 len);
1da177e4
LT
2683}
2684
5e659e4c 2685static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2686{
2687 int timer_active;
2688 unsigned long timer_expires;
cf533ea5 2689 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2690 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2691 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2692 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2693 __be32 dest = inet->inet_daddr;
2694 __be32 src = inet->inet_rcv_saddr;
2695 __u16 destp = ntohs(inet->inet_dport);
2696 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2697 int rx_queue;
1da177e4 2698
6ba8a3b1
ND
2699 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2700 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2701 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2702 timer_active = 1;
463c84b9
ACM
2703 timer_expires = icsk->icsk_timeout;
2704 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2705 timer_active = 4;
463c84b9 2706 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2707 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2708 timer_active = 2;
cf4c6bf8 2709 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2710 } else {
2711 timer_active = 0;
2712 timer_expires = jiffies;
2713 }
2714
49d09007
ED
2715 if (sk->sk_state == TCP_LISTEN)
2716 rx_queue = sk->sk_ack_backlog;
2717 else
2718 /*
2719 * because we dont lock socket, we might find a transient negative value
2720 */
2721 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2722
5e659e4c 2723 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2724 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2725 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2726 tp->write_seq - tp->snd_una,
49d09007 2727 rx_queue,
1da177e4 2728 timer_active,
a399a805 2729 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2730 icsk->icsk_retransmits,
a7cb5a49 2731 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2732 icsk->icsk_probes_out,
cf4c6bf8
IJ
2733 sock_i_ino(sk),
2734 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2735 jiffies_to_clock_t(icsk->icsk_rto),
2736 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2737 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2738 tp->snd_cwnd,
168a8f58
JC
2739 sk->sk_state == TCP_LISTEN ?
2740 (fastopenq ? fastopenq->max_qlen : 0) :
2741 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
5e659e4c 2742 len);
1da177e4
LT
2743}
2744
cf533ea5 2745static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2746 struct seq_file *f, int i, int *len)
1da177e4 2747{
23f33c2d 2748 __be32 dest, src;
1da177e4 2749 __u16 destp, srcp;
a399a805 2750 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2751
2752 dest = tw->tw_daddr;
2753 src = tw->tw_rcv_saddr;
2754 destp = ntohs(tw->tw_dport);
2755 srcp = ntohs(tw->tw_sport);
2756
5e659e4c 2757 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2758 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4 2759 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2760 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
5e659e4c 2761 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2762}
2763
2764#define TMPSZ 150
2765
2766static int tcp4_seq_show(struct seq_file *seq, void *v)
2767{
5799de0b 2768 struct tcp_iter_state *st;
5e659e4c 2769 int len;
1da177e4
LT
2770
2771 if (v == SEQ_START_TOKEN) {
2772 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2773 " sl local_address rem_address st tx_queue "
2774 "rx_queue tr tm->when retrnsmt uid timeout "
2775 "inode");
2776 goto out;
2777 }
2778 st = seq->private;
2779
2780 switch (st->state) {
2781 case TCP_SEQ_STATE_LISTENING:
2782 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2783 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2784 break;
2785 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2786 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2787 break;
2788 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2789 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2790 break;
2791 }
5e659e4c 2792 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2793out:
2794 return 0;
2795}
2796
73cb88ec
AV
2797static const struct file_operations tcp_afinfo_seq_fops = {
2798 .owner = THIS_MODULE,
2799 .open = tcp_seq_open,
2800 .read = seq_read,
2801 .llseek = seq_lseek,
2802 .release = seq_release_net
2803};
2804
1da177e4 2805static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2806 .name = "tcp",
2807 .family = AF_INET,
73cb88ec 2808 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2809 .seq_ops = {
2810 .show = tcp4_seq_show,
2811 },
1da177e4
LT
2812};
2813
2c8c1e72 2814static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2815{
2816 return tcp_proc_register(net, &tcp4_seq_afinfo);
2817}
2818
2c8c1e72 2819static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2820{
2821 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2822}
2823
2824static struct pernet_operations tcp4_net_ops = {
2825 .init = tcp4_proc_init_net,
2826 .exit = tcp4_proc_exit_net,
2827};
2828
1da177e4
LT
2829int __init tcp4_proc_init(void)
2830{
757764f6 2831 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2832}
2833
2834void tcp4_proc_exit(void)
2835{
757764f6 2836 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2837}
2838#endif /* CONFIG_PROC_FS */
2839
bf296b12
HX
2840struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2841{
b71d1d42 2842 const struct iphdr *iph = skb_gro_network_header(skb);
861b6501
ED
2843 __wsum wsum;
2844 __sum16 sum;
bf296b12
HX
2845
2846 switch (skb->ip_summed) {
2847 case CHECKSUM_COMPLETE:
86911732 2848 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2849 skb->csum)) {
2850 skb->ip_summed = CHECKSUM_UNNECESSARY;
2851 break;
2852 }
861b6501 2853flush:
bf296b12
HX
2854 NAPI_GRO_CB(skb)->flush = 1;
2855 return NULL;
861b6501
ED
2856
2857 case CHECKSUM_NONE:
2858 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2859 skb_gro_len(skb), IPPROTO_TCP, 0);
2860 sum = csum_fold(skb_checksum(skb,
2861 skb_gro_offset(skb),
2862 skb_gro_len(skb),
2863 wsum));
2864 if (sum)
2865 goto flush;
2866
2867 skb->ip_summed = CHECKSUM_UNNECESSARY;
2868 break;
bf296b12
HX
2869 }
2870
2871 return tcp_gro_receive(head, skb);
2872}
bf296b12
HX
2873
2874int tcp4_gro_complete(struct sk_buff *skb)
2875{
b71d1d42 2876 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2877 struct tcphdr *th = tcp_hdr(skb);
2878
2879 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2880 iph->saddr, iph->daddr, 0);
2881 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2882
2883 return tcp_gro_complete(skb);
2884}
bf296b12 2885
1da177e4
LT
2886struct proto tcp_prot = {
2887 .name = "TCP",
2888 .owner = THIS_MODULE,
2889 .close = tcp_close,
2890 .connect = tcp_v4_connect,
2891 .disconnect = tcp_disconnect,
463c84b9 2892 .accept = inet_csk_accept,
1da177e4
LT
2893 .ioctl = tcp_ioctl,
2894 .init = tcp_v4_init_sock,
2895 .destroy = tcp_v4_destroy_sock,
2896 .shutdown = tcp_shutdown,
2897 .setsockopt = tcp_setsockopt,
2898 .getsockopt = tcp_getsockopt,
1da177e4 2899 .recvmsg = tcp_recvmsg,
7ba42910
CG
2900 .sendmsg = tcp_sendmsg,
2901 .sendpage = tcp_sendpage,
1da177e4 2902 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2903 .release_cb = tcp_release_cb,
ab1e0a13
ACM
2904 .hash = inet_hash,
2905 .unhash = inet_unhash,
2906 .get_port = inet_csk_get_port,
1da177e4
LT
2907 .enter_memory_pressure = tcp_enter_memory_pressure,
2908 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2909 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2910 .memory_allocated = &tcp_memory_allocated,
2911 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2912 .sysctl_wmem = sysctl_tcp_wmem,
2913 .sysctl_rmem = sysctl_tcp_rmem,
2914 .max_header = MAX_TCP_HEADER,
2915 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2916 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2917 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2918 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2919 .h.hashinfo = &tcp_hashinfo,
7ba42910 2920 .no_autobind = true,
543d9cfe
ACM
2921#ifdef CONFIG_COMPAT
2922 .compat_setsockopt = compat_tcp_setsockopt,
2923 .compat_getsockopt = compat_tcp_getsockopt,
2924#endif
c255a458 2925#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2926 .init_cgroup = tcp_init_cgroup,
2927 .destroy_cgroup = tcp_destroy_cgroup,
2928 .proto_cgroup = tcp_proto_cgroup,
2929#endif
3c2a0909 2930 .diag_destroy = tcp_abort,
1da177e4 2931};
4bc2f18b 2932EXPORT_SYMBOL(tcp_prot);
1da177e4 2933
4ad4ec54
ED
2934static void __net_exit tcp_sk_exit(struct net *net)
2935{
2936 int cpu;
2937
2938 for_each_possible_cpu(cpu)
2939 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2940 free_percpu(net->ipv4.tcp_sk);
2941}
2942
046ee902
DL
2943static int __net_init tcp_sk_init(struct net *net)
2944{
4ad4ec54
ED
2945 int res, cpu;
2946
2947 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2948 if (!net->ipv4.tcp_sk)
2949 return -ENOMEM;
2950
2951 for_each_possible_cpu(cpu) {
2952 struct sock *sk;
2953
2954 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2955 IPPROTO_TCP, net);
2956 if (res)
2957 goto fail;
2958 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2959 }
5d134f1c 2960 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2961 return 0;
046ee902 2962
4ad4ec54
ED
2963fail:
2964 tcp_sk_exit(net);
2965
2966 return res;
b099ce26
EB
2967}
2968
2969static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2970{
2971 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2972}
2973
2974static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2975 .init = tcp_sk_init,
2976 .exit = tcp_sk_exit,
2977 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2978};
2979
9b0f976f 2980void __init tcp_v4_init(void)
1da177e4 2981{
5caea4ea 2982 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2983 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2984 panic("Failed to create the TCP control socket.\n");
1da177e4 2985}