Merge tag 'v3.10.105' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
1da177e4
LT
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
cfb6eeb4
YH
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
ab32ea5d
BH
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 90EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 91
1da177e4 92
cfb6eeb4 93#ifdef CONFIG_TCP_MD5SIG
a915da9b 94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
96#endif
97
5caea4ea 98struct inet_hashinfo tcp_hashinfo;
4bc2f18b 99EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 100
cf533ea5 101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 102{
eddc9ec5
ACM
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
aa8223c7
ACM
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
1da177e4
LT
107}
108
6d6ee43e
ACM
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
6d6ee43e
ACM
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
1da177e4
LT
141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
2d7192d6 144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 147 __be16 orig_sport, orig_dport;
bada8adc 148 __be32 daddr, nexthop;
da905bd1 149 struct flowi4 *fl4;
2d7192d6 150 struct rtable *rt;
1da177e4 151 int err;
f6d8bd05 152 struct ip_options_rcu *inet_opt;
1da177e4
LT
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
164 if (!daddr)
165 return -EINVAL;
f6d8bd05 166 nexthop = inet_opt->opt.faddr;
1da177e4
LT
167 }
168
dca8b089
DM
169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
da905bd1
DM
171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true);
176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
7be560d6 179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 180 return err;
584bdf8c 181 }
1da177e4
LT
182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
f6d8bd05 188 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 189 daddr = fl4->daddr;
1da177e4 190
c720c7e8 191 if (!inet->inet_saddr)
da905bd1 192 inet->inet_saddr = fl4->saddr;
c720c7e8 193 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 194
c720c7e8 195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
199 if (likely(!tp->repair))
200 tp->write_seq = 0;
1da177e4
LT
201 }
202
295ff7ed 203 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 206
c720c7e8
ED
207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
1da177e4 209
d83d8461 210 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
211 if (inet_opt)
212 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 213
bee7ca9e 214 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
215
216 /* Socket identity is still unknown (sport may be zero).
217 * However we set state to SYN-SENT and not releasing socket
218 * lock select source port, enter ourselves into the hash tables and
219 * complete initialization after this.
220 */
221 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 222 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
223 if (err)
224 goto failure;
225
da905bd1 226 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
227 inet->inet_sport, inet->inet_dport, sk);
228 if (IS_ERR(rt)) {
229 err = PTR_ERR(rt);
230 rt = NULL;
1da177e4 231 goto failure;
b23dd4fe 232 }
1da177e4 233 /* OK, now commit destination to socket. */
bcd76111 234 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 235 sk_setup_caps(sk, &rt->dst);
6fa3eb70 236 printk(KERN_INFO "[socket_conn]IPV4 socket[%lu] sport:%u \n", SOCK_INODE(sk->sk_socket)->i_ino, ntohs(inet->inet_sport));
ee995283 237 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
238 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239 inet->inet_daddr,
240 inet->inet_sport,
1da177e4
LT
241 usin->sin_port);
242
c720c7e8 243 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 244
2b916477 245 err = tcp_connect(sk);
ee995283 246
1da177e4
LT
247 rt = NULL;
248 if (err)
249 goto failure;
250
251 return 0;
252
253failure:
7174259e
ACM
254 /*
255 * This unhashes the socket and releases the local port,
256 * if necessary.
257 */
1da177e4
LT
258 tcp_set_state(sk, TCP_CLOSE);
259 ip_rt_put(rt);
260 sk->sk_route_caps = 0;
c720c7e8 261 inet->inet_dport = 0;
1da177e4
LT
262 return err;
263}
4bc2f18b 264EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 265
1da177e4 266/*
563d34d0
ED
267 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268 * It can be called through tcp_release_cb() if socket was owned by user
269 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 270 */
5f80f4d8 271void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
563d34d0 275 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 276
80d0a69f
DM
277 dst = inet_csk_update_pmtu(sk, mtu);
278 if (!dst)
1da177e4
LT
279 return;
280
1da177e4
LT
281 /* Something is about to be wrong... Remember soft error
282 * for the case, if this connection will not able to recover.
283 */
284 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285 sk->sk_err_soft = EMSGSIZE;
286
287 mtu = dst_mtu(dst);
288
289 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 290 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
291 tcp_sync_mss(sk, mtu);
292
293 /* Resend the TCP packet because it's
294 * clear that the old packet has been
295 * dropped. This is the new "fast" path mtu
296 * discovery.
297 */
298 tcp_simple_retransmit(sk);
299 } /* else let the usual retransmit timer handle it */
300}
5f80f4d8 301EXPORT_SYMBOL(tcp_v4_mtu_reduced);
1da177e4 302
55be7a9c
DM
303static void do_redirect(struct sk_buff *skb, struct sock *sk)
304{
305 struct dst_entry *dst = __sk_dst_check(sk, 0);
306
1ed5c48f 307 if (dst)
6700c270 308 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
309}
310
1da177e4
LT
311/*
312 * This routine is called by the ICMP module when it gets some
313 * sort of error condition. If err < 0 then the socket should
314 * be closed and the error returned to the user. If err > 0
315 * it's just the icmp type << 8 | icmp code. After adjustment
316 * header points to the first 8 bytes of the tcp header. We need
317 * to find the appropriate port.
318 *
319 * The locking strategy used here is very "optimistic". When
320 * someone else accesses the socket the ICMP is just dropped
321 * and for some paths there is no check at all.
322 * A more general error queue to queue errors for later handling
323 * is probably better.
324 *
325 */
326
4d1a2d9e 327void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 328{
b71d1d42 329 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 330 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 331 struct inet_connection_sock *icsk;
1da177e4
LT
332 struct tcp_sock *tp;
333 struct inet_sock *inet;
4d1a2d9e
DL
334 const int type = icmp_hdr(icmp_skb)->type;
335 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 336 struct sock *sk;
f1ecd5d9 337 struct sk_buff *skb;
168a8f58 338 struct request_sock *req;
1da177e4 339 __u32 seq;
f1ecd5d9 340 __u32 remaining;
1da177e4 341 int err;
4d1a2d9e 342 struct net *net = dev_net(icmp_skb->dev);
1da177e4 343
4d1a2d9e 344 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
346 return;
347 }
348
fd54d716 349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 350 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 351 if (!sk) {
dcfc23ca 352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
353 return;
354 }
355 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 356 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
357 return;
358 }
359
360 bh_lock_sock(sk);
361 /* If too many ICMPs get dropped on busy
362 * servers this needs to be solved differently.
563d34d0
ED
363 * We do take care of PMTU discovery (RFC1191) special case :
364 * we can receive locally generated ICMP messages while socket is held.
1da177e4 365 */
b74aa930
ED
366 if (sock_owned_by_user(sk)) {
367 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
369 }
1da177e4
LT
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
97e3ecd1 373 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 goto out;
376 }
377
f1ecd5d9 378 icsk = inet_csk(sk);
1da177e4 379 tp = tcp_sk(sk);
168a8f58 380 req = tp->fastopen_rsk;
1da177e4
LT
381 seq = ntohl(th->seq);
382 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
383 !between(seq, tp->snd_una, tp->snd_nxt) &&
384 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
385 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 386 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
387 goto out;
388 }
389
390 switch (type) {
55be7a9c
DM
391 case ICMP_REDIRECT:
392 do_redirect(icmp_skb, sk);
393 goto out;
1da177e4
LT
394 case ICMP_SOURCE_QUENCH:
395 /* Just silently ignore these. */
396 goto out;
397 case ICMP_PARAMETERPROB:
398 err = EPROTO;
399 break;
400 case ICMP_DEST_UNREACH:
401 if (code > NR_ICMP_UNREACH)
402 goto out;
403
404 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
405 /* We are not interested in TCP_LISTEN and open_requests
406 * (SYN-ACKs send out by Linux are always <576bytes so
407 * they should go through unfragmented).
408 */
409 if (sk->sk_state == TCP_LISTEN)
410 goto out;
411
563d34d0 412 tp->mtu_info = info;
144d56e9 413 if (!sock_owned_by_user(sk)) {
563d34d0 414 tcp_v4_mtu_reduced(sk);
144d56e9
ED
415 } else {
416 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
417 sock_hold(sk);
418 }
1da177e4
LT
419 goto out;
420 }
421
422 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
423 /* check if icmp_skb allows revert of backoff
424 * (see draft-zimmermann-tcp-lcd) */
425 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
426 break;
427 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
428 !icsk->icsk_backoff)
429 break;
430
168a8f58
JC
431 /* XXX (TFO) - revisit the following logic for TFO */
432
8f49c270
DM
433 if (sock_owned_by_user(sk))
434 break;
435
f1ecd5d9 436 icsk->icsk_backoff--;
9ad7c049
JC
437 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
438 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
439 tcp_bound_rto(sk);
440
441 skb = tcp_write_queue_head(sk);
442 BUG_ON(!skb);
443
444 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
445 tcp_time_stamp - TCP_SKB_CB(skb)->when);
446
447 if (remaining) {
448 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
6fa3eb70 449 remaining, sysctl_tcp_rto_max);
f1ecd5d9
DL
450 } else {
451 /* RTO revert clocked out retransmission.
452 * Will retransmit now */
453 tcp_retransmit_timer(sk);
454 }
455
1da177e4
LT
456 break;
457 case ICMP_TIME_EXCEEDED:
458 err = EHOSTUNREACH;
459 break;
460 default:
461 goto out;
462 }
463
168a8f58
JC
464 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
465 * than following the TCP_SYN_RECV case and closing the socket,
466 * we ignore the ICMP error and keep trying like a fully established
467 * socket. Is this the right thing to do?
468 */
469 if (req && req->sk == NULL)
470 goto out;
471
1da177e4 472 switch (sk->sk_state) {
60236fdd 473 struct request_sock *req, **prev;
1da177e4
LT
474 case TCP_LISTEN:
475 if (sock_owned_by_user(sk))
476 goto out;
477
463c84b9
ACM
478 req = inet_csk_search_req(sk, &prev, th->dest,
479 iph->daddr, iph->saddr);
1da177e4
LT
480 if (!req)
481 goto out;
482
483 /* ICMPs are not backlogged, hence we cannot get
484 an established socket here.
485 */
547b792c 486 WARN_ON(req->sk);
1da177e4 487
2e6599cb 488 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 489 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
490 goto out;
491 }
492
493 /*
494 * Still in SYN_RECV, just remove it silently.
495 * There is no good way to pass the error to the newly
496 * created socket, and POSIX does not want network
497 * errors returned from accept().
498 */
463c84b9 499 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 500 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
501 goto out;
502
503 case TCP_SYN_SENT:
504 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
505 It can f.e. if SYNs crossed,
506 or Fast Open.
1da177e4
LT
507 */
508 if (!sock_owned_by_user(sk)) {
1da177e4
LT
509 sk->sk_err = err;
510
511 sk->sk_error_report(sk);
512
513 tcp_done(sk);
514 } else {
515 sk->sk_err_soft = err;
516 }
517 goto out;
518 }
519
520 /* If we've already connected we will keep trying
521 * until we time out, or the user gives up.
522 *
523 * rfc1122 4.2.3.9 allows to consider as hard errors
524 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
525 * but it is obsoleted by pmtu discovery).
526 *
527 * Note, that in modern internet, where routing is unreliable
528 * and in each dark corner broken firewalls sit, sending random
529 * errors ordered by their masters even this two messages finally lose
530 * their original sense (even Linux sends invalid PORT_UNREACHs)
531 *
532 * Now we are in compliance with RFCs.
533 * --ANK (980905)
534 */
535
536 inet = inet_sk(sk);
537 if (!sock_owned_by_user(sk) && inet->recverr) {
538 sk->sk_err = err;
539 sk->sk_error_report(sk);
540 } else { /* Only an error on timeout */
541 sk->sk_err_soft = err;
542 }
543
544out:
545 bh_unlock_sock(sk);
546 sock_put(sk);
547}
548
419f9f89
HX
549static void __tcp_v4_send_check(struct sk_buff *skb,
550 __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
a430a43d
HX
575int tcp_v4_gso_send_check(struct sk_buff *skb)
576{
eddc9ec5 577 const struct iphdr *iph;
a430a43d
HX
578 struct tcphdr *th;
579
580 if (!pskb_may_pull(skb, sizeof(*th)))
581 return -EINVAL;
582
eddc9ec5 583 iph = ip_hdr(skb);
aa8223c7 584 th = tcp_hdr(skb);
a430a43d
HX
585
586 th->check = 0;
84fa7933 587 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 588 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
589 return 0;
590}
591
1da177e4
LT
592/*
593 * This routine will send an RST to the other tcp.
594 *
595 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
596 * for reset.
597 * Answer: if a packet caused RST, it is not for a socket
598 * existing in our system, if it is matched to a socket,
599 * it is just duplicate segment or bug in other side's TCP.
600 * So that we build reply only basing on parameters
601 * arrived with segment.
602 * Exception: precedence violation. We do not implement it in any case.
603 */
604
cfb6eeb4 605static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 606{
cf533ea5 607 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
608 struct {
609 struct tcphdr th;
610#ifdef CONFIG_TCP_MD5SIG
714e85be 611 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
612#endif
613 } rep;
1da177e4 614 struct ip_reply_arg arg;
cfb6eeb4
YH
615#ifdef CONFIG_TCP_MD5SIG
616 struct tcp_md5sig_key *key;
658ddaaf
SL
617 const __u8 *hash_location = NULL;
618 unsigned char newhash[16];
619 int genhash;
620 struct sock *sk1 = NULL;
cfb6eeb4 621#endif
a86b1e30 622 struct net *net;
1da177e4
LT
623
624 /* Never send a reset in response to a reset. */
625 if (th->rst)
626 return;
627
511c3f92 628 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
629 return;
630
631 /* Swap the send and the receive. */
cfb6eeb4
YH
632 memset(&rep, 0, sizeof(rep));
633 rep.th.dest = th->source;
634 rep.th.source = th->dest;
635 rep.th.doff = sizeof(struct tcphdr) / 4;
636 rep.th.rst = 1;
1da177e4
LT
637
638 if (th->ack) {
cfb6eeb4 639 rep.th.seq = th->ack_seq;
1da177e4 640 } else {
cfb6eeb4
YH
641 rep.th.ack = 1;
642 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
643 skb->len - (th->doff << 2));
1da177e4
LT
644 }
645
7174259e 646 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
647 arg.iov[0].iov_base = (unsigned char *)&rep;
648 arg.iov[0].iov_len = sizeof(rep.th);
649
650#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
651 hash_location = tcp_parse_md5sig_option(th);
652 if (!sk && hash_location) {
653 /*
654 * active side is lost. Try to find listening socket through
655 * source port, and then find md5 key through listening socket.
656 * we are not loose security here:
657 * Incoming packet is checked with md5 hash with finding key,
658 * no RST generated if md5 hash doesn't match.
659 */
660 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
661 &tcp_hashinfo, ip_hdr(skb)->saddr,
662 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
663 ntohs(th->source), inet_iif(skb));
664 /* don't send rst if it can't find key */
665 if (!sk1)
666 return;
667 rcu_read_lock();
668 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
669 &ip_hdr(skb)->saddr, AF_INET);
670 if (!key)
671 goto release_sk1;
672
673 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
674 if (genhash || memcmp(hash_location, newhash, 16) != 0)
675 goto release_sk1;
676 } else {
677 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
678 &ip_hdr(skb)->saddr,
679 AF_INET) : NULL;
680 }
681
cfb6eeb4
YH
682 if (key) {
683 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
684 (TCPOPT_NOP << 16) |
685 (TCPOPT_MD5SIG << 8) |
686 TCPOLEN_MD5SIG);
687 /* Update length and the length the header thinks exists */
688 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 rep.th.doff = arg.iov[0].iov_len / 4;
690
49a72dfb 691 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
692 key, ip_hdr(skb)->saddr,
693 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
694 }
695#endif
eddc9ec5
ACM
696 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
697 ip_hdr(skb)->saddr, /* XXX */
52cd5750 698 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 699 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 700 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 701 /* When socket is gone, all binding information is lost.
4c675258
AK
702 * routing might fail in this case. No choice here, if we choose to force
703 * input interface, we will misroute in case of asymmetric route.
e2446eaa 704 */
4c675258
AK
705 if (sk)
706 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 707
adf30907 708 net = dev_net(skb_dst(skb)->dev);
66b13d99 709 arg.tos = ip_hdr(skb)->tos;
6bed3166
ED
710 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
711 skb, ip_hdr(skb)->saddr,
70e73416 712 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 713
63231bdd
PE
714 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
715 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
716
717#ifdef CONFIG_TCP_MD5SIG
718release_sk1:
719 if (sk1) {
720 rcu_read_unlock();
721 sock_put(sk1);
722 }
723#endif
1da177e4
LT
724}
725
726/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
727 outside socket context is ugly, certainly. What can I do?
728 */
729
9501f972 730static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 731 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 732 struct tcp_md5sig_key *key,
66b13d99 733 int reply_flags, u8 tos)
1da177e4 734{
cf533ea5 735 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
736 struct {
737 struct tcphdr th;
714e85be 738 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 739#ifdef CONFIG_TCP_MD5SIG
714e85be 740 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
741#endif
742 ];
1da177e4
LT
743 } rep;
744 struct ip_reply_arg arg;
adf30907 745 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
746
747 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 748 memset(&arg, 0, sizeof(arg));
1da177e4
LT
749
750 arg.iov[0].iov_base = (unsigned char *)&rep;
751 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 752 if (tsecr) {
cfb6eeb4
YH
753 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
754 (TCPOPT_TIMESTAMP << 8) |
755 TCPOLEN_TIMESTAMP);
ee684b6f
AV
756 rep.opt[1] = htonl(tsval);
757 rep.opt[2] = htonl(tsecr);
cb48cfe8 758 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
759 }
760
761 /* Swap the send and the receive. */
762 rep.th.dest = th->source;
763 rep.th.source = th->dest;
764 rep.th.doff = arg.iov[0].iov_len / 4;
765 rep.th.seq = htonl(seq);
766 rep.th.ack_seq = htonl(ack);
767 rep.th.ack = 1;
768 rep.th.window = htons(win);
769
cfb6eeb4 770#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 771 if (key) {
ee684b6f 772 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
773
774 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
775 (TCPOPT_NOP << 16) |
776 (TCPOPT_MD5SIG << 8) |
777 TCPOLEN_MD5SIG);
778 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
779 rep.th.doff = arg.iov[0].iov_len/4;
780
49a72dfb 781 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
782 key, ip_hdr(skb)->saddr,
783 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
784 }
785#endif
88ef4a5a 786 arg.flags = reply_flags;
eddc9ec5
ACM
787 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
788 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
789 arg.iov[0].iov_len, IPPROTO_TCP, 0);
790 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
791 if (oif)
792 arg.bound_dev_if = oif;
66b13d99 793 arg.tos = tos;
6bed3166
ED
794 ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
795 skb, ip_hdr(skb)->saddr,
70e73416 796 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 797
63231bdd 798 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
799}
800
801static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
802{
8feaf0c0 803 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 804 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 805
9501f972 806 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 807 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 808 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
809 tcptw->tw_ts_recent,
810 tw->tw_bound_dev_if,
88ef4a5a 811 tcp_twsk_md5_key(tcptw),
66b13d99
ED
812 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
813 tw->tw_tos
9501f972 814 );
1da177e4 815
8feaf0c0 816 inet_twsk_put(tw);
1da177e4
LT
817}
818
6edafaaf 819static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 820 struct request_sock *req)
1da177e4 821{
168a8f58
JC
822 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
823 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
824 */
825 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
826 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1c50d3ae
ED
827 tcp_rsk(req)->rcv_nxt,
828 req->rcv_wnd >> inet_rsk(req)->rcv_wscale,
ee684b6f 829 tcp_time_stamp,
9501f972
YH
830 req->ts_recent,
831 0,
a915da9b
ED
832 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
833 AF_INET),
66b13d99
ED
834 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
835 ip_hdr(skb)->tos);
1da177e4
LT
836}
837
1da177e4 838/*
9bf1d83e 839 * Send a SYN-ACK after having received a SYN.
60236fdd 840 * This still operates on a request_sock only, not on a big
1da177e4
LT
841 * socket.
842 */
72659ecc
OP
843static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
844 struct request_sock *req,
7586eceb
ED
845 u16 queue_mapping,
846 bool nocache)
1da177e4 847{
2e6599cb 848 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 849 struct flowi4 fl4;
1da177e4
LT
850 int err = -1;
851 struct sk_buff * skb;
852
853 /* First, grab a route. */
ba3f7f04 854 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 855 return -1;
1da177e4 856
1a2c6181 857 skb = tcp_make_synack(sk, dst, req, NULL);
1da177e4
LT
858
859 if (skb) {
419f9f89 860 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 861
fff32699 862 skb_set_queue_mapping(skb, queue_mapping);
2e6599cb
ACM
863 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
864 ireq->rmt_addr,
865 ireq->opt);
b9df3cb8 866 err = net_xmit_eval(err);
016818d0
NC
867 if (!tcp_rsk(req)->snt_synack && !err)
868 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
869 }
870
1da177e4
LT
871 return err;
872}
873
1a2c6181 874static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 875{
1a2c6181 876 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
e6c022a4
ED
877
878 if (!res)
879 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
880 return res;
fd80eb94
DL
881}
882
1da177e4 883/*
60236fdd 884 * IPv4 request_sock destructor.
1da177e4 885 */
60236fdd 886static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 887{
a51482bd 888 kfree(inet_rsk(req)->opt);
1da177e4
LT
889}
890
946cedcc 891/*
a2a385d6 892 * Return true if a syncookie should be sent
946cedcc 893 */
a2a385d6 894bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
895 const struct sk_buff *skb,
896 const char *proto)
1da177e4 897{
946cedcc 898 const char *msg = "Dropping request";
a2a385d6 899 bool want_cookie = false;
946cedcc
ED
900 struct listen_sock *lopt;
901
902
1da177e4 903
2a1d4bd4 904#ifdef CONFIG_SYN_COOKIES
946cedcc 905 if (sysctl_tcp_syncookies) {
2a1d4bd4 906 msg = "Sending cookies";
a2a385d6 907 want_cookie = true;
946cedcc
ED
908 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
909 } else
80e40daa 910#endif
946cedcc
ED
911 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
912
913 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
914 if (!lopt->synflood_warned) {
915 lopt->synflood_warned = 1;
afd46503 916 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
917 proto, ntohs(tcp_hdr(skb)->dest), msg);
918 }
919 return want_cookie;
2a1d4bd4 920}
946cedcc 921EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
922
923/*
60236fdd 924 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 925 */
5dff747b 926static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 927{
f6d8bd05
ED
928 const struct ip_options *opt = &(IPCB(skb)->opt);
929 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
930
931 if (opt && opt->optlen) {
f6d8bd05
ED
932 int opt_size = sizeof(*dopt) + opt->optlen;
933
1da177e4
LT
934 dopt = kmalloc(opt_size, GFP_ATOMIC);
935 if (dopt) {
f6d8bd05 936 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
937 kfree(dopt);
938 dopt = NULL;
939 }
940 }
941 }
942 return dopt;
943}
944
cfb6eeb4
YH
945#ifdef CONFIG_TCP_MD5SIG
946/*
947 * RFC2385 MD5 checksumming requires a mapping of
948 * IP address->MD5 Key.
949 * We need to maintain these in the sk structure.
950 */
951
952/* Find the Key structure for an address. */
a915da9b
ED
953struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
954 const union tcp_md5_addr *addr,
955 int family)
cfb6eeb4
YH
956{
957 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 958 struct tcp_md5sig_key *key;
a915da9b 959 unsigned int size = sizeof(struct in_addr);
a8afca03 960 struct tcp_md5sig_info *md5sig;
cfb6eeb4 961
a8afca03
ED
962 /* caller either holds rcu_read_lock() or socket lock */
963 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
964 sock_owned_by_user(sk) ||
965 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 966 if (!md5sig)
cfb6eeb4 967 return NULL;
a915da9b
ED
968#if IS_ENABLED(CONFIG_IPV6)
969 if (family == AF_INET6)
970 size = sizeof(struct in6_addr);
971#endif
b67bfe0d 972 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
973 if (key->family != family)
974 continue;
975 if (!memcmp(&key->addr, addr, size))
976 return key;
cfb6eeb4
YH
977 }
978 return NULL;
979}
a915da9b 980EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
981
982struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
983 struct sock *addr_sk)
984{
a915da9b
ED
985 union tcp_md5_addr *addr;
986
987 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
988 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 989}
cfb6eeb4
YH
990EXPORT_SYMBOL(tcp_v4_md5_lookup);
991
f5b99bcd
AB
992static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
993 struct request_sock *req)
cfb6eeb4 994{
a915da9b
ED
995 union tcp_md5_addr *addr;
996
997 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
998 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
999}
1000
1001/* This can be called on a newly created socket, from other files */
a915da9b
ED
1002int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1003 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
1004{
1005 /* Add Key to the list */
b0a713e9 1006 struct tcp_md5sig_key *key;
cfb6eeb4 1007 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1008 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1009
c0353c7b 1010 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
1011 if (key) {
1012 /* Pre-existing entry - just update that one. */
a915da9b 1013 memcpy(key->key, newkey, newkeylen);
b0a713e9 1014 key->keylen = newkeylen;
a915da9b
ED
1015 return 0;
1016 }
260fcbeb 1017
a8afca03 1018 md5sig = rcu_dereference_protected(tp->md5sig_info,
98d2ffdc
ED
1019 sock_owned_by_user(sk) ||
1020 lockdep_is_held(&sk->sk_lock.slock));
a915da9b
ED
1021 if (!md5sig) {
1022 md5sig = kmalloc(sizeof(*md5sig), gfp);
1023 if (!md5sig)
cfb6eeb4 1024 return -ENOMEM;
cfb6eeb4 1025
a915da9b
ED
1026 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1027 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1028 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1029 }
cfb6eeb4 1030
5f3d9cb2 1031 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1032 if (!key)
1033 return -ENOMEM;
1034 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 1035 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1036 return -ENOMEM;
cfb6eeb4 1037 }
a915da9b
ED
1038
1039 memcpy(key->key, newkey, newkeylen);
1040 key->keylen = newkeylen;
1041 key->family = family;
1042 memcpy(&key->addr, addr,
1043 (family == AF_INET6) ? sizeof(struct in6_addr) :
1044 sizeof(struct in_addr));
1045 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1046 return 0;
1047}
a915da9b 1048EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1049
a915da9b 1050int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1051{
1052 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1053 struct tcp_md5sig_key *key;
a8afca03 1054 struct tcp_md5sig_info *md5sig;
a915da9b 1055
c0353c7b 1056 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1057 if (!key)
1058 return -ENOENT;
1059 hlist_del_rcu(&key->node);
5f3d9cb2 1060 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1061 kfree_rcu(key, rcu);
a8afca03
ED
1062 md5sig = rcu_dereference_protected(tp->md5sig_info,
1063 sock_owned_by_user(sk));
1064 if (hlist_empty(&md5sig->head))
a915da9b
ED
1065 tcp_free_md5sig_pool();
1066 return 0;
cfb6eeb4 1067}
a915da9b 1068EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1069
e0683e70 1070static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1071{
1072 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1073 struct tcp_md5sig_key *key;
b67bfe0d 1074 struct hlist_node *n;
a8afca03 1075 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1076
a8afca03
ED
1077 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1078
1079 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1080 tcp_free_md5sig_pool();
b67bfe0d 1081 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1082 hlist_del_rcu(&key->node);
5f3d9cb2 1083 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1084 kfree_rcu(key, rcu);
cfb6eeb4
YH
1085 }
1086}
1087
7174259e
ACM
1088static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1089 int optlen)
cfb6eeb4
YH
1090{
1091 struct tcp_md5sig cmd;
1092 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1093
1094 if (optlen < sizeof(cmd))
1095 return -EINVAL;
1096
7174259e 1097 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1098 return -EFAULT;
1099
1100 if (sin->sin_family != AF_INET)
1101 return -EINVAL;
1102
a8afca03 1103 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1104 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1105 AF_INET);
cfb6eeb4
YH
1106
1107 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1108 return -EINVAL;
1109
a915da9b
ED
1110 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1111 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1112 GFP_KERNEL);
cfb6eeb4
YH
1113}
1114
49a72dfb
AL
1115static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1116 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1117{
cfb6eeb4 1118 struct tcp4_pseudohdr *bp;
49a72dfb 1119 struct scatterlist sg;
cfb6eeb4
YH
1120
1121 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1122
1123 /*
49a72dfb 1124 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1125 * destination IP address, zero-padded protocol number, and
1126 * segment length)
1127 */
1128 bp->saddr = saddr;
1129 bp->daddr = daddr;
1130 bp->pad = 0;
076fb722 1131 bp->protocol = IPPROTO_TCP;
49a72dfb 1132 bp->len = cpu_to_be16(nbytes);
c7da57a1 1133
49a72dfb
AL
1134 sg_init_one(&sg, bp, sizeof(*bp));
1135 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1136}
1137
a915da9b 1138static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1139 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1140{
1141 struct tcp_md5sig_pool *hp;
1142 struct hash_desc *desc;
1143
1144 hp = tcp_get_md5sig_pool();
1145 if (!hp)
1146 goto clear_hash_noput;
1147 desc = &hp->md5_desc;
1148
1149 if (crypto_hash_init(desc))
1150 goto clear_hash;
1151 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1152 goto clear_hash;
1153 if (tcp_md5_hash_header(hp, th))
1154 goto clear_hash;
1155 if (tcp_md5_hash_key(hp, key))
1156 goto clear_hash;
1157 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1158 goto clear_hash;
1159
cfb6eeb4 1160 tcp_put_md5sig_pool();
cfb6eeb4 1161 return 0;
49a72dfb 1162
cfb6eeb4
YH
1163clear_hash:
1164 tcp_put_md5sig_pool();
1165clear_hash_noput:
1166 memset(md5_hash, 0, 16);
49a72dfb 1167 return 1;
cfb6eeb4
YH
1168}
1169
49a72dfb 1170int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1171 const struct sock *sk, const struct request_sock *req,
1172 const struct sk_buff *skb)
cfb6eeb4 1173{
49a72dfb
AL
1174 struct tcp_md5sig_pool *hp;
1175 struct hash_desc *desc;
318cf7aa 1176 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1177 __be32 saddr, daddr;
1178
1179 if (sk) {
c720c7e8
ED
1180 saddr = inet_sk(sk)->inet_saddr;
1181 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1182 } else if (req) {
1183 saddr = inet_rsk(req)->loc_addr;
1184 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1185 } else {
49a72dfb
AL
1186 const struct iphdr *iph = ip_hdr(skb);
1187 saddr = iph->saddr;
1188 daddr = iph->daddr;
cfb6eeb4 1189 }
49a72dfb
AL
1190
1191 hp = tcp_get_md5sig_pool();
1192 if (!hp)
1193 goto clear_hash_noput;
1194 desc = &hp->md5_desc;
1195
1196 if (crypto_hash_init(desc))
1197 goto clear_hash;
1198
1199 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1200 goto clear_hash;
1201 if (tcp_md5_hash_header(hp, th))
1202 goto clear_hash;
1203 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1204 goto clear_hash;
1205 if (tcp_md5_hash_key(hp, key))
1206 goto clear_hash;
1207 if (crypto_hash_final(desc, md5_hash))
1208 goto clear_hash;
1209
1210 tcp_put_md5sig_pool();
1211 return 0;
1212
1213clear_hash:
1214 tcp_put_md5sig_pool();
1215clear_hash_noput:
1216 memset(md5_hash, 0, 16);
1217 return 1;
cfb6eeb4 1218}
49a72dfb 1219EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1220
a2a385d6 1221static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1222{
1223 /*
1224 * This gets called for each TCP segment that arrives
1225 * so we want to be efficient.
1226 * We have 3 drop cases:
1227 * o No MD5 hash and one expected.
1228 * o MD5 hash and we're not expecting one.
1229 * o MD5 hash and its wrong.
1230 */
cf533ea5 1231 const __u8 *hash_location = NULL;
cfb6eeb4 1232 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1233 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1234 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1235 int genhash;
cfb6eeb4
YH
1236 unsigned char newhash[16];
1237
a915da9b
ED
1238 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1239 AF_INET);
7d5d5525 1240 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1241
cfb6eeb4
YH
1242 /* We've parsed the options - do we have a hash? */
1243 if (!hash_expected && !hash_location)
a2a385d6 1244 return false;
cfb6eeb4
YH
1245
1246 if (hash_expected && !hash_location) {
785957d3 1247 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1248 return true;
cfb6eeb4
YH
1249 }
1250
1251 if (!hash_expected && hash_location) {
785957d3 1252 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1253 return true;
cfb6eeb4
YH
1254 }
1255
1256 /* Okay, so this is hash_expected and hash_location -
1257 * so we need to calculate the checksum.
1258 */
49a72dfb
AL
1259 genhash = tcp_v4_md5_hash_skb(newhash,
1260 hash_expected,
1261 NULL, NULL, skb);
cfb6eeb4
YH
1262
1263 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1264 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1265 &iph->saddr, ntohs(th->source),
1266 &iph->daddr, ntohs(th->dest),
1267 genhash ? " tcp_v4_calc_md5_hash failed"
1268 : "");
a2a385d6 1269 return true;
cfb6eeb4 1270 }
a2a385d6 1271 return false;
cfb6eeb4
YH
1272}
1273
1274#endif
1275
72a3effa 1276struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1277 .family = PF_INET,
2e6599cb 1278 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1279 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1280 .send_ack = tcp_v4_reqsk_send_ack,
1281 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1282 .send_reset = tcp_v4_send_reset,
72659ecc 1283 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1284};
1285
cfb6eeb4 1286#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1287static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1288 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1289 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1290};
b6332e6c 1291#endif
cfb6eeb4 1292
168a8f58
JC
1293static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1294 struct request_sock *req,
1295 struct tcp_fastopen_cookie *foc,
1296 struct tcp_fastopen_cookie *valid_foc)
1297{
1298 bool skip_cookie = false;
1299 struct fastopen_queue *fastopenq;
1300
1301 if (likely(!fastopen_cookie_present(foc))) {
1302 /* See include/net/tcp.h for the meaning of these knobs */
1303 if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1304 ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1305 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1306 skip_cookie = true; /* no cookie to validate */
1307 else
1308 return false;
1309 }
1310 fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1311 /* A FO option is present; bump the counter. */
1312 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1313
1314 /* Make sure the listener has enabled fastopen, and we don't
1315 * exceed the max # of pending TFO requests allowed before trying
1316 * to validating the cookie in order to avoid burning CPU cycles
1317 * unnecessarily.
1318 *
1319 * XXX (TFO) - The implication of checking the max_qlen before
1320 * processing a cookie request is that clients can't differentiate
1321 * between qlen overflow causing Fast Open to be disabled
1322 * temporarily vs a server not supporting Fast Open at all.
1323 */
1324 if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1325 fastopenq == NULL || fastopenq->max_qlen == 0)
1326 return false;
1327
1328 if (fastopenq->qlen >= fastopenq->max_qlen) {
1329 struct request_sock *req1;
1330 spin_lock(&fastopenq->lock);
1331 req1 = fastopenq->rskq_rst_head;
1332 if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1333 spin_unlock(&fastopenq->lock);
1334 NET_INC_STATS_BH(sock_net(sk),
1335 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1336 /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1337 foc->len = -1;
1338 return false;
1339 }
1340 fastopenq->rskq_rst_head = req1->dl_next;
1341 fastopenq->qlen--;
1342 spin_unlock(&fastopenq->lock);
1343 reqsk_free(req1);
1344 }
1345 if (skip_cookie) {
1346 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1347 return true;
1348 }
1349 if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1350 if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1351 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1352 if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1353 memcmp(&foc->val[0], &valid_foc->val[0],
1354 TCP_FASTOPEN_COOKIE_SIZE) != 0)
1355 return false;
1356 valid_foc->len = -1;
1357 }
1358 /* Acknowledge the data received from the peer. */
1359 tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1360 return true;
1361 } else if (foc->len == 0) { /* Client requesting a cookie */
1362 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1363 NET_INC_STATS_BH(sock_net(sk),
1364 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1365 } else {
1366 /* Client sent a cookie with wrong size. Treat it
1367 * the same as invalid and return a valid one.
1368 */
1369 tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1370 }
1371 return false;
1372}
1373
1374static int tcp_v4_conn_req_fastopen(struct sock *sk,
1375 struct sk_buff *skb,
1376 struct sk_buff *skb_synack,
1a2c6181 1377 struct request_sock *req)
168a8f58
JC
1378{
1379 struct tcp_sock *tp = tcp_sk(sk);
1380 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1381 const struct inet_request_sock *ireq = inet_rsk(req);
1382 struct sock *child;
016818d0 1383 int err;
168a8f58 1384
e6c022a4
ED
1385 req->num_retrans = 0;
1386 req->num_timeout = 0;
168a8f58
JC
1387 req->sk = NULL;
1388
1389 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1390 if (child == NULL) {
1391 NET_INC_STATS_BH(sock_net(sk),
1392 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1393 kfree_skb(skb_synack);
1394 return -1;
1395 }
016818d0
NC
1396 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1397 ireq->rmt_addr, ireq->opt);
1398 err = net_xmit_eval(err);
1399 if (!err)
1400 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1401 /* XXX (TFO) - is it ok to ignore error and continue? */
1402
1403 spin_lock(&queue->fastopenq->lock);
1404 queue->fastopenq->qlen++;
1405 spin_unlock(&queue->fastopenq->lock);
1406
1407 /* Initialize the child socket. Have to fix some values to take
1408 * into account the child is a Fast Open socket and is created
1409 * only out of the bits carried in the SYN packet.
1410 */
1411 tp = tcp_sk(child);
1412
1413 tp->fastopen_rsk = req;
1414 /* Do a hold on the listner sk so that if the listener is being
1415 * closed, the child that has been accepted can live on and still
1416 * access listen_lock.
1417 */
1418 sock_hold(sk);
1419 tcp_rsk(req)->listener = sk;
1420
1421 /* RFC1323: The window in SYN & SYN/ACK segments is never
1422 * scaled. So correct it appropriately.
1423 */
1424 tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1425
1426 /* Activate the retrans timer so that SYNACK can be retransmitted.
1427 * The request socket is not added to the SYN table of the parent
1428 * because it's been added to the accept queue directly.
1429 */
1430 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
6fa3eb70 1431 TCP_TIMEOUT_INIT, sysctl_tcp_rto_max);
168a8f58
JC
1432
1433 /* Add the child socket directly into the accept queue */
1434 inet_csk_reqsk_queue_add(sk, req, child);
1435
1436 /* Now finish processing the fastopen child socket. */
1437 inet_csk(child)->icsk_af_ops->rebuild_header(child);
1438 tcp_init_congestion_control(child);
1439 tcp_mtup_init(child);
1440 tcp_init_buffer_space(child);
1441 tcp_init_metrics(child);
1442
1443 /* Queue the data carried in the SYN packet. We need to first
1444 * bump skb's refcnt because the caller will attempt to free it.
1445 *
1446 * XXX (TFO) - we honor a zero-payload TFO request for now.
1447 * (Any reason not to?)
1448 */
1449 if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1450 /* Don't queue the skb if there is no payload in SYN.
1451 * XXX (TFO) - How about SYN+FIN?
1452 */
1453 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1454 } else {
1455 skb = skb_get(skb);
1456 skb_dst_drop(skb);
1457 __skb_pull(skb, tcp_hdr(skb)->doff * 4);
1458 skb_set_owner_r(skb, child);
1459 __skb_queue_tail(&child->sk_receive_queue, skb);
1460 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
6f73601e 1461 tp->syn_data_acked = 1;
168a8f58
JC
1462 }
1463 sk->sk_data_ready(sk, 0);
1464 bh_unlock_sock(child);
1465 sock_put(child);
1466 WARN_ON(req->sk == NULL);
1467 return 0;
1468}
1469
1da177e4
LT
1470int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1471{
1472 struct tcp_options_received tmp_opt;
60236fdd 1473 struct request_sock *req;
e6b4d113 1474 struct inet_request_sock *ireq;
4957faad 1475 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1476 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1477 __be32 saddr = ip_hdr(skb)->saddr;
1478 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1479 __u32 isn = TCP_SKB_CB(skb)->when;
a2a385d6 1480 bool want_cookie = false;
168a8f58
JC
1481 struct flowi4 fl4;
1482 struct tcp_fastopen_cookie foc = { .len = -1 };
1483 struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1484 struct sk_buff *skb_synack;
1485 int do_fastopen;
1da177e4
LT
1486
1487 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1488 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1489 goto drop;
1490
1491 /* TW buckets are converted to open requests without
1492 * limitations, they conserve resources and peer is
1493 * evidently real one.
1494 */
463c84b9 1495 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1496 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1497 if (!want_cookie)
1498 goto drop;
1da177e4
LT
1499 }
1500
1501 /* Accept backlog is full. If we have already queued enough
1502 * of warm entries in syn queue, drop request. It is better than
1503 * clogging syn queue with openreqs with exponentially increasing
1504 * timeout.
1505 */
2aeef18d
NS
1506 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1507 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1508 goto drop;
2aeef18d 1509 }
1da177e4 1510
ce4a7d0d 1511 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1512 if (!req)
1513 goto drop;
1514
cfb6eeb4
YH
1515#ifdef CONFIG_TCP_MD5SIG
1516 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1517#endif
1518
1da177e4 1519 tcp_clear_options(&tmp_opt);
bee7ca9e 1520 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1521 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1522 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1523
4dfc2817 1524 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1525 tcp_clear_options(&tmp_opt);
1da177e4 1526
1da177e4 1527 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1528 tcp_openreq_init(req, &tmp_opt, skb);
1529
bb5b7c11
DM
1530 ireq = inet_rsk(req);
1531 ireq->loc_addr = daddr;
1532 ireq->rmt_addr = saddr;
1533 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1534 ireq->opt = tcp_v4_save_options(skb);
6fa3eb70 1535 ireq->ir_mark = inet_request_mark(sk, skb);
bb5b7c11 1536
284904aa 1537 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1538 goto drop_and_free;
284904aa 1539
172d69e6 1540 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1541 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1542
1543 if (want_cookie) {
1da177e4 1544 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1545 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1546 } else if (!isn) {
1da177e4
LT
1547 /* VJ's idea. We save last timestamp seen
1548 * from the destination in peer table, when entering
1549 * state TIME-WAIT, and check against it before
1550 * accepting new connection request.
1551 *
1552 * If "isn" is not zero, this request hit alive
1553 * timewait bucket, so that all the necessary checks
1554 * are made in the function processing timewait state.
1555 */
1556 if (tmp_opt.saw_tstamp &&
295ff7ed 1557 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1558 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1559 fl4.daddr == saddr) {
1560 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1561 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1562 goto drop_and_release;
1da177e4
LT
1563 }
1564 }
1565 /* Kill the following clause, if you dislike this way. */
1566 else if (!sysctl_tcp_syncookies &&
463c84b9 1567 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1568 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1569 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1570 /* Without syncookies last quarter of
1571 * backlog is filled with destinations,
1572 * proven to be alive.
1573 * It means that we continue to communicate
1574 * to destinations, already remembered
1575 * to the moment of synflood.
1576 */
afd46503 1577 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1578 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1579 goto drop_and_release;
1da177e4
LT
1580 }
1581
a94f723d 1582 isn = tcp_v4_init_sequence(skb);
1da177e4 1583 }
2e6599cb 1584 tcp_rsk(req)->snt_isn = isn;
1da177e4 1585
168a8f58
JC
1586 if (dst == NULL) {
1587 dst = inet_csk_route_req(sk, &fl4, req);
1588 if (dst == NULL)
1589 goto drop_and_free;
1590 }
1591 do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1592
1593 /* We don't call tcp_v4_send_synack() directly because we need
1594 * to make sure a child socket can be created successfully before
1595 * sending back synack!
1596 *
1597 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1598 * (or better yet, call tcp_send_synack() in the child context
1599 * directly, but will have to fix bunch of other code first)
1600 * after syn_recv_sock() except one will need to first fix the
1601 * latter to remove its dependency on the current implementation
1602 * of tcp_v4_send_synack()->tcp_select_initial_window().
1603 */
1604 skb_synack = tcp_make_synack(sk, dst, req,
168a8f58
JC
1605 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1606
1607 if (skb_synack) {
1608 __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1609 skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1610 } else
1611 goto drop_and_free;
1612
1613 if (likely(!do_fastopen)) {
1614 int err;
1615 err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1616 ireq->rmt_addr, ireq->opt);
1617 err = net_xmit_eval(err);
1618 if (err || want_cookie)
1619 goto drop_and_free;
1620
016818d0 1621 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58
JC
1622 tcp_rsk(req)->listener = NULL;
1623 /* Add the request_sock to the SYN table */
1624 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1625 if (fastopen_cookie_present(&foc) && foc.len != 0)
1626 NET_INC_STATS_BH(sock_net(sk),
1627 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1a2c6181 1628 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1da177e4
LT
1629 goto drop_and_free;
1630
1da177e4
LT
1631 return 0;
1632
7cd04fa7
DL
1633drop_and_release:
1634 dst_release(dst);
1da177e4 1635drop_and_free:
60236fdd 1636 reqsk_free(req);
1da177e4 1637drop:
848bf15f 1638 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1639 return 0;
1640}
4bc2f18b 1641EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1642
1643
1644/*
1645 * The three way handshake has completed - we got a valid synack -
1646 * now create the new socket.
1647 */
1648struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1649 struct request_sock *req,
1da177e4
LT
1650 struct dst_entry *dst)
1651{
2e6599cb 1652 struct inet_request_sock *ireq;
1da177e4
LT
1653 struct inet_sock *newinet;
1654 struct tcp_sock *newtp;
1655 struct sock *newsk;
cfb6eeb4
YH
1656#ifdef CONFIG_TCP_MD5SIG
1657 struct tcp_md5sig_key *key;
1658#endif
f6d8bd05 1659 struct ip_options_rcu *inet_opt;
1da177e4
LT
1660
1661 if (sk_acceptq_is_full(sk))
1662 goto exit_overflow;
1663
1da177e4
LT
1664 newsk = tcp_create_openreq_child(sk, req, skb);
1665 if (!newsk)
093d2823 1666 goto exit_nonewsk;
1da177e4 1667
bcd76111 1668 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1669 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1670
1671 newtp = tcp_sk(newsk);
1672 newinet = inet_sk(newsk);
2e6599cb 1673 ireq = inet_rsk(req);
c720c7e8
ED
1674 newinet->inet_daddr = ireq->rmt_addr;
1675 newinet->inet_rcv_saddr = ireq->loc_addr;
1676 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1677 inet_opt = ireq->opt;
1678 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1679 ireq->opt = NULL;
463c84b9 1680 newinet->mc_index = inet_iif(skb);
eddc9ec5 1681 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1682 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1683 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1684 if (inet_opt)
1685 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1686 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1687
dfd25fff
ED
1688 if (!dst) {
1689 dst = inet_csk_route_child_sock(sk, newsk, req);
1690 if (!dst)
1691 goto put_and_exit;
1692 } else {
1693 /* syncookie case : see end of cookie_v4_check() */
1694 }
0e734419
DM
1695 sk_setup_caps(newsk, dst);
1696
5d424d5a 1697 tcp_mtup_init(newsk);
1da177e4 1698 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1699 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1700 if (tcp_sk(sk)->rx_opt.user_mss &&
1701 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1702 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1703
1da177e4 1704 tcp_initialize_rcv_mss(newsk);
623df484 1705 tcp_synack_rtt_meas(newsk, req);
e6c022a4 1706 newtp->total_retrans = req->num_retrans;
1da177e4 1707
cfb6eeb4
YH
1708#ifdef CONFIG_TCP_MD5SIG
1709 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1710 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1711 AF_INET);
c720c7e8 1712 if (key != NULL) {
cfb6eeb4
YH
1713 /*
1714 * We're using one, so create a matching key
1715 * on the newsk structure. If we fail to get
1716 * memory, then we end up not copying the key
1717 * across. Shucks.
1718 */
a915da9b
ED
1719 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1720 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1721 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1722 }
1723#endif
1724
0e734419
DM
1725 if (__inet_inherit_port(sk, newsk) < 0)
1726 goto put_and_exit;
9327f705 1727 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1728
1729 return newsk;
1730
1731exit_overflow:
de0744af 1732 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1733exit_nonewsk:
1734 dst_release(dst);
1da177e4 1735exit:
de0744af 1736 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1737 return NULL;
0e734419 1738put_and_exit:
e337e24d
CP
1739 inet_csk_prepare_forced_close(newsk);
1740 tcp_done(newsk);
0e734419 1741 goto exit;
1da177e4 1742}
4bc2f18b 1743EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1744
1745static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1746{
aa8223c7 1747 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1748 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1749 struct sock *nsk;
60236fdd 1750 struct request_sock **prev;
1da177e4 1751 /* Find possible connection requests. */
463c84b9
ACM
1752 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1753 iph->saddr, iph->daddr);
1da177e4 1754 if (req)
8336886f 1755 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1756
3b1e0a65 1757 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1758 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1759
1760 if (nsk) {
1761 if (nsk->sk_state != TCP_TIME_WAIT) {
1762 bh_lock_sock(nsk);
1763 return nsk;
1764 }
9469c7b4 1765 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1766 return NULL;
1767 }
1768
1769#ifdef CONFIG_SYN_COOKIES
af9b4738 1770 if (!th->syn)
1da177e4
LT
1771 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1772#endif
1773 return sk;
1774}
1775
b51655b9 1776static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1777{
eddc9ec5
ACM
1778 const struct iphdr *iph = ip_hdr(skb);
1779
84fa7933 1780 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1781 if (!tcp_v4_check(skb->len, iph->saddr,
1782 iph->daddr, skb->csum)) {
fb286bb2 1783 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1784 return 0;
fb286bb2 1785 }
1da177e4 1786 }
fb286bb2 1787
eddc9ec5 1788 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1789 skb->len, IPPROTO_TCP, 0);
1790
1da177e4 1791 if (skb->len <= 76) {
fb286bb2 1792 return __skb_checksum_complete(skb);
1da177e4
LT
1793 }
1794 return 0;
1795}
1796
1797
1798/* The socket must have it's spinlock held when we get
1799 * here.
1800 *
1801 * We have a potential double-lock case here, so even when
1802 * doing backlog processing we use the BH locking scheme.
1803 * This is because we cannot sleep with the original spinlock
1804 * held.
1805 */
1806int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1807{
cfb6eeb4
YH
1808 struct sock *rsk;
1809#ifdef CONFIG_TCP_MD5SIG
1810 /*
1811 * We really want to reject the packet as early as possible
1812 * if:
1813 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1814 * o There is an MD5 option and we're not expecting one
1815 */
7174259e 1816 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1817 goto discard;
1818#endif
1819
1da177e4 1820 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1821 struct dst_entry *dst = sk->sk_rx_dst;
1822
bdeab991 1823 sock_rps_save_rxhash(sk, skb);
404e0a8b 1824 if (dst) {
505fbcf0
ED
1825 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1826 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1827 dst_release(dst);
1828 sk->sk_rx_dst = NULL;
1829 }
1830 }
aa8223c7 1831 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1832 rsk = sk;
1da177e4 1833 goto reset;
cfb6eeb4 1834 }
1da177e4
LT
1835 return 0;
1836 }
1837
ab6a5bb6 1838 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1839 goto csum_err;
1840
1841 if (sk->sk_state == TCP_LISTEN) {
1842 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1843 if (!nsk)
1844 goto discard;
1845
1846 if (nsk != sk) {
bdeab991 1847 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1848 if (tcp_child_process(sk, nsk, skb)) {
1849 rsk = nsk;
1da177e4 1850 goto reset;
cfb6eeb4 1851 }
1da177e4
LT
1852 return 0;
1853 }
ca55158c 1854 } else
bdeab991 1855 sock_rps_save_rxhash(sk, skb);
ca55158c 1856
aa8223c7 1857 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1858 rsk = sk;
1da177e4 1859 goto reset;
cfb6eeb4 1860 }
1da177e4
LT
1861 return 0;
1862
1863reset:
cfb6eeb4 1864 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1865discard:
1866 kfree_skb(skb);
1867 /* Be careful here. If this function gets more complicated and
1868 * gcc suffers from register pressure on the x86, sk (in %ebx)
1869 * might be destroyed here. This current version compiles correctly,
1870 * but you have been warned.
1871 */
1872 return 0;
1873
1874csum_err:
6a5dc9e5 1875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1876 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1877 goto discard;
1878}
4bc2f18b 1879EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1880
160eb5a6 1881void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1882{
41063e9d
DM
1883 const struct iphdr *iph;
1884 const struct tcphdr *th;
1885 struct sock *sk;
41063e9d 1886
41063e9d 1887 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1888 return;
41063e9d 1889
45f00f99 1890 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1891 return;
41063e9d
DM
1892
1893 iph = ip_hdr(skb);
45f00f99 1894 th = tcp_hdr(skb);
41063e9d
DM
1895
1896 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1897 return;
41063e9d 1898
45f00f99 1899 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1900 iph->saddr, th->source,
7011d085 1901 iph->daddr, ntohs(th->dest),
9cb429d6 1902 skb->skb_iif);
41063e9d
DM
1903 if (sk) {
1904 skb->sk = sk;
1905 skb->destructor = sock_edemux;
1906 if (sk->sk_state != TCP_TIME_WAIT) {
1b946e38 1907 struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst);
505fbcf0 1908
41063e9d
DM
1909 if (dst)
1910 dst = dst_check(dst, 0);
92101b3b 1911 if (dst &&
505fbcf0 1912 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1913 skb_dst_set_noref(skb, dst);
41063e9d
DM
1914 }
1915 }
41063e9d
DM
1916}
1917
b2fb4f54
ED
1918/* Packet is added to VJ-style prequeue for processing in process
1919 * context, if a reader task is waiting. Apparently, this exciting
1920 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1921 * failed somewhere. Latency? Burstiness? Well, at least now we will
1922 * see, why it failed. 8)8) --ANK
1923 *
1924 */
1925bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1926{
1927 struct tcp_sock *tp = tcp_sk(sk);
1928
1929 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1930 return false;
1931
1932 if (skb->len <= tcp_hdrlen(skb) &&
1933 skb_queue_len(&tp->ucopy.prequeue) == 0)
1934 return false;
1935
58717686 1936 skb_dst_force(skb);
b2fb4f54
ED
1937 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1938 tp->ucopy.memory += skb->truesize;
1939 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1940 struct sk_buff *skb1;
1941
1942 BUG_ON(sock_owned_by_user(sk));
1943
1944 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1945 sk_backlog_rcv(sk, skb1);
1946 NET_INC_STATS_BH(sock_net(sk),
1947 LINUX_MIB_TCPPREQUEUEDROPPED);
1948 }
1949
1950 tp->ucopy.memory = 0;
1951 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1952 wake_up_interruptible_sync_poll(sk_sleep(sk),
1953 POLLIN | POLLRDNORM | POLLRDBAND);
1954 if (!inet_csk_ack_scheduled(sk))
1955 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1956 (3 * tcp_rto_min(sk)) / 4,
6fa3eb70 1957 sysctl_tcp_rto_max);
b2fb4f54
ED
1958 }
1959 return true;
1960}
1961EXPORT_SYMBOL(tcp_prequeue);
1962
56325d9f
ED
1963int tcp_filter(struct sock *sk, struct sk_buff *skb)
1964{
1965 struct tcphdr *th = (struct tcphdr *)skb->data;
1966 unsigned int eaten = skb->len;
1967 int err;
1968
1969 err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1970 if (!err) {
1971 eaten -= skb->len;
1972 TCP_SKB_CB(skb)->end_seq -= eaten;
1973 }
1974 return err;
1975}
1976EXPORT_SYMBOL(tcp_filter);
1977
1da177e4
LT
1978/*
1979 * From tcp_input.c
1980 */
1981
1982int tcp_v4_rcv(struct sk_buff *skb)
1983{
eddc9ec5 1984 const struct iphdr *iph;
cf533ea5 1985 const struct tcphdr *th;
1da177e4
LT
1986 struct sock *sk;
1987 int ret;
a86b1e30 1988 struct net *net = dev_net(skb->dev);
1da177e4
LT
1989
1990 if (skb->pkt_type != PACKET_HOST)
1991 goto discard_it;
1992
1993 /* Count it even if it's bad */
63231bdd 1994 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1995
1996 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1997 goto discard_it;
1998
aa8223c7 1999 th = tcp_hdr(skb);
1da177e4
LT
2000
2001 if (th->doff < sizeof(struct tcphdr) / 4)
2002 goto bad_packet;
2003 if (!pskb_may_pull(skb, th->doff * 4))
2004 goto discard_it;
2005
2006 /* An explanation is required here, I think.
2007 * Packet length and doff are validated by header prediction,
caa20d9a 2008 * provided case of th->doff==0 is eliminated.
1da177e4 2009 * So, we defer the checks. */
60476372 2010 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
6a5dc9e5 2011 goto csum_error;
1da177e4 2012
aa8223c7 2013 th = tcp_hdr(skb);
eddc9ec5 2014 iph = ip_hdr(skb);
1da177e4
LT
2015 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
2016 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
2017 skb->len - th->doff * 4);
2018 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
2019 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 2020 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
2021 TCP_SKB_CB(skb)->sacked = 0;
2022
9a1f27c4 2023 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
2024 if (!sk)
2025 goto no_tcp_socket;
2026
bb134d5d
ED
2027process:
2028 if (sk->sk_state == TCP_TIME_WAIT)
2029 goto do_time_wait;
2030
6cce09f8
ED
2031 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
2032 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 2033 goto discard_and_relse;
6cce09f8 2034 }
d218d111 2035
1da177e4
LT
2036 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
2037 goto discard_and_relse;
b59c2701 2038 nf_reset(skb);
1da177e4 2039
56325d9f 2040 if (tcp_filter(sk, skb))
1da177e4 2041 goto discard_and_relse;
56325d9f
ED
2042 th = (const struct tcphdr *)skb->data;
2043 iph = ip_hdr(skb);
1da177e4
LT
2044
2045 skb->dev = NULL;
2046
c6366184 2047 bh_lock_sock_nested(sk);
1da177e4
LT
2048 ret = 0;
2049 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
2050#ifdef CONFIG_NET_DMA
2051 struct tcp_sock *tp = tcp_sk(sk);
2052 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 2053 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 2054 if (tp->ucopy.dma_chan)
1da177e4 2055 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
2056 else
2057#endif
2058 {
2059 if (!tcp_prequeue(sk, skb))
ae8d7f88 2060 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 2061 }
da882c1f
ED
2062 } else if (unlikely(sk_add_backlog(sk, skb,
2063 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 2064 bh_unlock_sock(sk);
6cce09f8 2065 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
2066 goto discard_and_relse;
2067 }
1da177e4
LT
2068 bh_unlock_sock(sk);
2069
2070 sock_put(sk);
2071
2072 return ret;
2073
2074no_tcp_socket:
2075 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
2076 goto discard_it;
2077
2078 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
2079csum_error:
2080 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 2081bad_packet:
63231bdd 2082 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 2083 } else {
cfb6eeb4 2084 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
2085 }
2086
2087discard_it:
2088 /* Discard frame. */
2089 kfree_skb(skb);
e905a9ed 2090 return 0;
1da177e4
LT
2091
2092discard_and_relse:
2093 sock_put(sk);
2094 goto discard_it;
2095
2096do_time_wait:
2097 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 2098 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2099 goto discard_it;
2100 }
2101
6a5dc9e5 2102 if (skb->len < (th->doff << 2)) {
9469c7b4 2103 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
2104 goto bad_packet;
2105 }
2106 if (tcp_checksum_complete(skb)) {
2107 inet_twsk_put(inet_twsk(sk));
2108 goto csum_error;
1da177e4 2109 }
9469c7b4 2110 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 2111 case TCP_TW_SYN: {
c346dca1 2112 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 2113 &tcp_hashinfo,
da5e3630 2114 iph->saddr, th->source,
eddc9ec5 2115 iph->daddr, th->dest,
463c84b9 2116 inet_iif(skb));
1da177e4 2117 if (sk2) {
9469c7b4
YH
2118 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
2119 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
2120 sk = sk2;
2121 goto process;
2122 }
2123 /* Fall through to ACK */
2124 }
2125 case TCP_TW_ACK:
2126 tcp_v4_timewait_ack(sk, skb);
2127 break;
2128 case TCP_TW_RST:
2129 goto no_tcp_socket;
2130 case TCP_TW_SUCCESS:;
2131 }
2132 goto discard_it;
2133}
2134
ccb7c410
DM
2135static struct timewait_sock_ops tcp_timewait_sock_ops = {
2136 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
2137 .twsk_unique = tcp_twsk_unique,
2138 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 2139};
1da177e4 2140
63d02d15 2141void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
2142{
2143 struct dst_entry *dst = skb_dst(skb);
2144
2145 dst_hold(dst);
2146 sk->sk_rx_dst = dst;
2147 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
2148}
63d02d15 2149EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 2150
3b401a81 2151const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
2152 .queue_xmit = ip_queue_xmit,
2153 .send_check = tcp_v4_send_check,
2154 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 2155 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
2156 .conn_request = tcp_v4_conn_request,
2157 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
2158 .net_header_len = sizeof(struct iphdr),
2159 .setsockopt = ip_setsockopt,
2160 .getsockopt = ip_getsockopt,
2161 .addr2sockaddr = inet_csk_addr2sockaddr,
2162 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 2163 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 2164#ifdef CONFIG_COMPAT
543d9cfe
ACM
2165 .compat_setsockopt = compat_ip_setsockopt,
2166 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 2167#endif
5f80f4d8 2168 .mtu_reduced = tcp_v4_mtu_reduced,
1da177e4 2169};
4bc2f18b 2170EXPORT_SYMBOL(ipv4_specific);
1da177e4 2171
cfb6eeb4 2172#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 2173static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 2174 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 2175 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 2176 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 2177};
b6332e6c 2178#endif
cfb6eeb4 2179
1da177e4
LT
2180/* NOTE: A lot of things set to zero explicitly by call to
2181 * sk_alloc() so need not be done here.
2182 */
2183static int tcp_v4_init_sock(struct sock *sk)
2184{
6687e988 2185 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 2186
900f65d3 2187 tcp_init_sock(sk);
6fa3eb70 2188 icsk->icsk_MMSRB = 0;
1da177e4 2189
8292a17a 2190 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 2191
cfb6eeb4 2192#ifdef CONFIG_TCP_MD5SIG
ac807fa8 2193 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 2194#endif
1da177e4 2195
1da177e4
LT
2196 return 0;
2197}
2198
7d06b2e0 2199void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
2200{
2201 struct tcp_sock *tp = tcp_sk(sk);
2202
2203 tcp_clear_xmit_timers(sk);
2204
6687e988 2205 tcp_cleanup_congestion_control(sk);
317a76f9 2206
1da177e4 2207 /* Cleanup up the write buffer. */
fe067e8a 2208 tcp_write_queue_purge(sk);
1da177e4
LT
2209
2210 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 2211 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 2212
cfb6eeb4
YH
2213#ifdef CONFIG_TCP_MD5SIG
2214 /* Clean up the MD5 key list, if any */
2215 if (tp->md5sig_info) {
a915da9b 2216 tcp_clear_md5_list(sk);
a8afca03 2217 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
2218 tp->md5sig_info = NULL;
2219 }
2220#endif
2221
1a2449a8
CL
2222#ifdef CONFIG_NET_DMA
2223 /* Cleans up our sk_async_wait_queue */
e905a9ed 2224 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
2225#endif
2226
1da177e4
LT
2227 /* Clean prequeue, it must be empty really */
2228 __skb_queue_purge(&tp->ucopy.prequeue);
2229
2230 /* Clean up a referenced TCP bind bucket. */
463c84b9 2231 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 2232 inet_put_port(sk);
1da177e4 2233
168a8f58 2234 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 2235
cf60af03
YC
2236 /* If socket is aborted during connect operation */
2237 tcp_free_fastopen_req(tp);
2238
180d8cd9 2239 sk_sockets_allocated_dec(sk);
d1a4c0b3 2240 sock_release_memcg(sk);
1da177e4 2241}
1da177e4
LT
2242EXPORT_SYMBOL(tcp_v4_destroy_sock);
2243
6fa3eb70
S
2244void tcp_v4_handle_retrans_time_by_uid(struct uid_err uid_e)
2245{
2246 unsigned int bucket;
2247 uid_t skuid = (uid_t)(uid_e.appuid);
2248 struct inet_connection_sock *icsk = NULL;//inet_csk(sk);
2249
2250
2251 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2252 struct hlist_nulls_node *node;
2253 struct sock *sk;
2254 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2255
2256 spin_lock_bh(lock);
2257 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2258
2259 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2260 continue;
2261 if (sock_flag(sk, SOCK_DEAD))
2262 continue;
2263
2264 if(sk->sk_socket){
2265 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2266 continue;
2267 else
2268 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid socket uid(%d) match!",
2269 SOCK_INODE(sk->sk_socket)->i_uid);
2270 } else{
2271 continue;
2272 }
2273
2274 sock_hold(sk);
2275 spin_unlock_bh(lock);
2276
2277 local_bh_disable();
2278 bh_lock_sock(sk);
2279
2280 // update sk time out value
2281 icsk = inet_csk(sk);
2282 printk("[mmspb] tcp_v4_handle_retrans_time_by_uid update timer\n");
2283
2284 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + 2);
2285 icsk->icsk_rto = sysctl_tcp_rto_min * 30;
2286 icsk->icsk_MMSRB = 1;
2287
2288 bh_unlock_sock(sk);
2289 local_bh_enable();
2290 spin_lock_bh(lock);
2291 sock_put(sk);
2292
2293 }
2294 spin_unlock_bh(lock);
2295 }
2296
2297}
2298
2299
2300/*
2301 * tcp_v4_nuke_addr_by_uid - destroy all sockets of spcial uid
2302 */
2303void tcp_v4_reset_connections_by_uid(struct uid_err uid_e)
2304{
2305 unsigned int bucket;
2306 uid_t skuid = (uid_t)(uid_e.appuid);
2307
2308 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
2309 struct hlist_nulls_node *node;
2310 struct sock *sk;
2311 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
2312
2313restart:
2314 spin_lock_bh(lock);
2315 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
2316
2317 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
2318 continue;
2319 if (sock_flag(sk, SOCK_DEAD))
2320 continue;
2321
2322 if(sk->sk_socket){
2323 if(SOCK_INODE(sk->sk_socket)->i_uid != skuid)
2324 continue;
2325 else
2326 printk(KERN_INFO "SIOCKILLSOCK socket uid(%d) match!",
2327 SOCK_INODE(sk->sk_socket)->i_uid);
2328 } else{
2329 continue;
2330 }
2331
2332 sock_hold(sk);
2333 spin_unlock_bh(lock);
2334
2335 local_bh_disable();
2336 bh_lock_sock(sk);
2337 sk->sk_err = uid_e.errNum;
2338 printk(KERN_INFO "SIOCKILLSOCK set sk err == %d!! \n", sk->sk_err);
2339 sk->sk_error_report(sk);
2340
2341 tcp_done(sk);
2342 bh_unlock_sock(sk);
2343 local_bh_enable();
2344 sock_put(sk);
2345
2346 goto restart;
2347 }
2348 spin_unlock_bh(lock);
2349 }
2350}
2351
2352
1da177e4
LT
2353#ifdef CONFIG_PROC_FS
2354/* Proc filesystem TCP sock list dumping. */
2355
3ab5aee7 2356static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 2357{
3ab5aee7 2358 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 2359 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
2360}
2361
8feaf0c0 2362static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 2363{
3ab5aee7
ED
2364 return !is_a_nulls(tw->tw_node.next) ?
2365 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2366}
2367
a8b690f9
TH
2368/*
2369 * Get next listener socket follow cur. If cur is NULL, get first socket
2370 * starting from bucket given in st->bucket; when st->bucket is zero the
2371 * very first socket in the hash table is returned.
2372 */
1da177e4
LT
2373static void *listening_get_next(struct seq_file *seq, void *cur)
2374{
463c84b9 2375 struct inet_connection_sock *icsk;
c25eb3bf 2376 struct hlist_nulls_node *node;
1da177e4 2377 struct sock *sk = cur;
5caea4ea 2378 struct inet_listen_hashbucket *ilb;
5799de0b 2379 struct tcp_iter_state *st = seq->private;
a4146b1b 2380 struct net *net = seq_file_net(seq);
1da177e4
LT
2381
2382 if (!sk) {
a8b690f9 2383 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2384 spin_lock_bh(&ilb->lock);
c25eb3bf 2385 sk = sk_nulls_head(&ilb->head);
a8b690f9 2386 st->offset = 0;
1da177e4
LT
2387 goto get_sk;
2388 }
5caea4ea 2389 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2390 ++st->num;
a8b690f9 2391 ++st->offset;
1da177e4
LT
2392
2393 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2394 struct request_sock *req = cur;
1da177e4 2395
72a3effa 2396 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2397 req = req->dl_next;
2398 while (1) {
2399 while (req) {
bdccc4ca 2400 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2401 cur = req;
2402 goto out;
2403 }
2404 req = req->dl_next;
2405 }
72a3effa 2406 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2407 break;
2408get_req:
463c84b9 2409 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2410 }
1bde5ac4 2411 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2412 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2413 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2414 } else {
e905a9ed 2415 icsk = inet_csk(sk);
463c84b9
ACM
2416 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2417 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2418 goto start_req;
463c84b9 2419 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2420 sk = sk_nulls_next(sk);
1da177e4
LT
2421 }
2422get_sk:
c25eb3bf 2423 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2424 if (!net_eq(sock_net(sk), net))
2425 continue;
2426 if (sk->sk_family == st->family) {
1da177e4
LT
2427 cur = sk;
2428 goto out;
2429 }
e905a9ed 2430 icsk = inet_csk(sk);
463c84b9
ACM
2431 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2432 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2433start_req:
2434 st->uid = sock_i_uid(sk);
2435 st->syn_wait_sk = sk;
2436 st->state = TCP_SEQ_STATE_OPENREQ;
2437 st->sbucket = 0;
2438 goto get_req;
2439 }
463c84b9 2440 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2441 }
5caea4ea 2442 spin_unlock_bh(&ilb->lock);
a8b690f9 2443 st->offset = 0;
0f7ff927 2444 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2445 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2446 spin_lock_bh(&ilb->lock);
c25eb3bf 2447 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2448 goto get_sk;
2449 }
2450 cur = NULL;
2451out:
2452 return cur;
2453}
2454
2455static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2456{
a8b690f9
TH
2457 struct tcp_iter_state *st = seq->private;
2458 void *rc;
2459
2460 st->bucket = 0;
2461 st->offset = 0;
2462 rc = listening_get_next(seq, NULL);
1da177e4
LT
2463
2464 while (rc && *pos) {
2465 rc = listening_get_next(seq, rc);
2466 --*pos;
2467 }
2468 return rc;
2469}
2470
a2a385d6 2471static inline bool empty_bucket(struct tcp_iter_state *st)
6eac5604 2472{
3ab5aee7
ED
2473 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2474 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2475}
2476
a8b690f9
TH
2477/*
2478 * Get first established socket starting from bucket given in st->bucket.
2479 * If st->bucket is zero, the very first socket in the hash is returned.
2480 */
1da177e4
LT
2481static void *established_get_first(struct seq_file *seq)
2482{
5799de0b 2483 struct tcp_iter_state *st = seq->private;
a4146b1b 2484 struct net *net = seq_file_net(seq);
1da177e4
LT
2485 void *rc = NULL;
2486
a8b690f9
TH
2487 st->offset = 0;
2488 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2489 struct sock *sk;
3ab5aee7 2490 struct hlist_nulls_node *node;
8feaf0c0 2491 struct inet_timewait_sock *tw;
9db66bdc 2492 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2493
6eac5604
AK
2494 /* Lockless fast path for the common case of empty buckets */
2495 if (empty_bucket(st))
2496 continue;
2497
9db66bdc 2498 spin_lock_bh(lock);
3ab5aee7 2499 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2500 if (sk->sk_family != st->family ||
878628fb 2501 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2502 continue;
2503 }
2504 rc = sk;
2505 goto out;
2506 }
2507 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2508 inet_twsk_for_each(tw, node,
dbca9b27 2509 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2510 if (tw->tw_family != st->family ||
878628fb 2511 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2512 continue;
2513 }
2514 rc = tw;
2515 goto out;
2516 }
9db66bdc 2517 spin_unlock_bh(lock);
1da177e4
LT
2518 st->state = TCP_SEQ_STATE_ESTABLISHED;
2519 }
2520out:
2521 return rc;
2522}
2523
2524static void *established_get_next(struct seq_file *seq, void *cur)
2525{
2526 struct sock *sk = cur;
8feaf0c0 2527 struct inet_timewait_sock *tw;
3ab5aee7 2528 struct hlist_nulls_node *node;
5799de0b 2529 struct tcp_iter_state *st = seq->private;
a4146b1b 2530 struct net *net = seq_file_net(seq);
1da177e4
LT
2531
2532 ++st->num;
a8b690f9 2533 ++st->offset;
1da177e4
LT
2534
2535 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2536 tw = cur;
2537 tw = tw_next(tw);
2538get_tw:
878628fb 2539 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2540 tw = tw_next(tw);
2541 }
2542 if (tw) {
2543 cur = tw;
2544 goto out;
2545 }
9db66bdc 2546 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2547 st->state = TCP_SEQ_STATE_ESTABLISHED;
2548
6eac5604 2549 /* Look for next non empty bucket */
a8b690f9 2550 st->offset = 0;
f373b53b 2551 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2552 empty_bucket(st))
2553 ;
f373b53b 2554 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2555 return NULL;
2556
9db66bdc 2557 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2558 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2559 } else
3ab5aee7 2560 sk = sk_nulls_next(sk);
1da177e4 2561
3ab5aee7 2562 sk_nulls_for_each_from(sk, node) {
878628fb 2563 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2564 goto found;
2565 }
2566
2567 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2568 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2569 goto get_tw;
2570found:
2571 cur = sk;
2572out:
2573 return cur;
2574}
2575
2576static void *established_get_idx(struct seq_file *seq, loff_t pos)
2577{
a8b690f9
TH
2578 struct tcp_iter_state *st = seq->private;
2579 void *rc;
2580
2581 st->bucket = 0;
2582 rc = established_get_first(seq);
1da177e4
LT
2583
2584 while (rc && pos) {
2585 rc = established_get_next(seq, rc);
2586 --pos;
7174259e 2587 }
1da177e4
LT
2588 return rc;
2589}
2590
2591static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2592{
2593 void *rc;
5799de0b 2594 struct tcp_iter_state *st = seq->private;
1da177e4 2595
1da177e4
LT
2596 st->state = TCP_SEQ_STATE_LISTENING;
2597 rc = listening_get_idx(seq, &pos);
2598
2599 if (!rc) {
1da177e4
LT
2600 st->state = TCP_SEQ_STATE_ESTABLISHED;
2601 rc = established_get_idx(seq, pos);
2602 }
2603
2604 return rc;
2605}
2606
a8b690f9
TH
2607static void *tcp_seek_last_pos(struct seq_file *seq)
2608{
2609 struct tcp_iter_state *st = seq->private;
2610 int offset = st->offset;
2611 int orig_num = st->num;
2612 void *rc = NULL;
2613
2614 switch (st->state) {
2615 case TCP_SEQ_STATE_OPENREQ:
2616 case TCP_SEQ_STATE_LISTENING:
2617 if (st->bucket >= INET_LHTABLE_SIZE)
2618 break;
2619 st->state = TCP_SEQ_STATE_LISTENING;
2620 rc = listening_get_next(seq, NULL);
2621 while (offset-- && rc)
2622 rc = listening_get_next(seq, rc);
2623 if (rc)
2624 break;
2625 st->bucket = 0;
2626 /* Fallthrough */
2627 case TCP_SEQ_STATE_ESTABLISHED:
2628 case TCP_SEQ_STATE_TIME_WAIT:
2629 st->state = TCP_SEQ_STATE_ESTABLISHED;
2630 if (st->bucket > tcp_hashinfo.ehash_mask)
2631 break;
2632 rc = established_get_first(seq);
2633 while (offset-- && rc)
2634 rc = established_get_next(seq, rc);
2635 }
2636
2637 st->num = orig_num;
2638
2639 return rc;
2640}
2641
1da177e4
LT
2642static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2643{
5799de0b 2644 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2645 void *rc;
2646
2647 if (*pos && *pos == st->last_pos) {
2648 rc = tcp_seek_last_pos(seq);
2649 if (rc)
2650 goto out;
2651 }
2652
1da177e4
LT
2653 st->state = TCP_SEQ_STATE_LISTENING;
2654 st->num = 0;
a8b690f9
TH
2655 st->bucket = 0;
2656 st->offset = 0;
2657 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2658
2659out:
2660 st->last_pos = *pos;
2661 return rc;
1da177e4
LT
2662}
2663
2664static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2665{
a8b690f9 2666 struct tcp_iter_state *st = seq->private;
1da177e4 2667 void *rc = NULL;
1da177e4
LT
2668
2669 if (v == SEQ_START_TOKEN) {
2670 rc = tcp_get_idx(seq, 0);
2671 goto out;
2672 }
1da177e4
LT
2673
2674 switch (st->state) {
2675 case TCP_SEQ_STATE_OPENREQ:
2676 case TCP_SEQ_STATE_LISTENING:
2677 rc = listening_get_next(seq, v);
2678 if (!rc) {
1da177e4 2679 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2680 st->bucket = 0;
2681 st->offset = 0;
1da177e4
LT
2682 rc = established_get_first(seq);
2683 }
2684 break;
2685 case TCP_SEQ_STATE_ESTABLISHED:
2686 case TCP_SEQ_STATE_TIME_WAIT:
2687 rc = established_get_next(seq, v);
2688 break;
2689 }
2690out:
2691 ++*pos;
a8b690f9 2692 st->last_pos = *pos;
1da177e4
LT
2693 return rc;
2694}
2695
2696static void tcp_seq_stop(struct seq_file *seq, void *v)
2697{
5799de0b 2698 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2699
2700 switch (st->state) {
2701 case TCP_SEQ_STATE_OPENREQ:
2702 if (v) {
463c84b9
ACM
2703 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2704 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2705 }
2706 case TCP_SEQ_STATE_LISTENING:
2707 if (v != SEQ_START_TOKEN)
5caea4ea 2708 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2709 break;
2710 case TCP_SEQ_STATE_TIME_WAIT:
2711 case TCP_SEQ_STATE_ESTABLISHED:
2712 if (v)
9db66bdc 2713 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2714 break;
2715 }
2716}
2717
73cb88ec 2718int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2719{
d9dda78b 2720 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2721 struct tcp_iter_state *s;
52d6f3f1 2722 int err;
1da177e4 2723
52d6f3f1
DL
2724 err = seq_open_net(inode, file, &afinfo->seq_ops,
2725 sizeof(struct tcp_iter_state));
2726 if (err < 0)
2727 return err;
f40c8174 2728
52d6f3f1 2729 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2730 s->family = afinfo->family;
a8b690f9 2731 s->last_pos = 0;
f40c8174
DL
2732 return 0;
2733}
73cb88ec 2734EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2735
6f8b13bc 2736int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2737{
2738 int rc = 0;
2739 struct proc_dir_entry *p;
2740
9427c4b3
DL
2741 afinfo->seq_ops.start = tcp_seq_start;
2742 afinfo->seq_ops.next = tcp_seq_next;
2743 afinfo->seq_ops.stop = tcp_seq_stop;
2744
84841c3c 2745 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2746 afinfo->seq_fops, afinfo);
84841c3c 2747 if (!p)
1da177e4
LT
2748 rc = -ENOMEM;
2749 return rc;
2750}
4bc2f18b 2751EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2752
6f8b13bc 2753void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2754{
ece31ffd 2755 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2756}
4bc2f18b 2757EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2758
cf533ea5 2759static void get_openreq4(const struct sock *sk, const struct request_sock *req,
a7cb5a49 2760 struct seq_file *f, int i, kuid_t uid, int *len)
1da177e4 2761{
2e6599cb 2762 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2763 long delta = req->expires - jiffies;
1da177e4 2764
5e659e4c 2765 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2766 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2767 i,
2e6599cb 2768 ireq->loc_addr,
c720c7e8 2769 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2770 ireq->rmt_addr,
2771 ntohs(ireq->rmt_port),
1da177e4
LT
2772 TCP_SYN_RECV,
2773 0, 0, /* could print option size, but that is af dependent. */
2774 1, /* timers active (only the expire timer) */
a399a805 2775 jiffies_delta_to_clock_t(delta),
e6c022a4 2776 req->num_timeout,
a7cb5a49 2777 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2778 0, /* non standard timer */
2779 0, /* open_requests have no inode */
2780 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2781 req,
2782 len);
1da177e4
LT
2783}
2784
5e659e4c 2785static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2786{
2787 int timer_active;
2788 unsigned long timer_expires;
cf533ea5 2789 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2790 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2791 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2792 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2793 __be32 dest = inet->inet_daddr;
2794 __be32 src = inet->inet_rcv_saddr;
2795 __u16 destp = ntohs(inet->inet_dport);
2796 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2797 int rx_queue;
1da177e4 2798
6ba8a3b1
ND
2799 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2800 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2801 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2802 timer_active = 1;
463c84b9
ACM
2803 timer_expires = icsk->icsk_timeout;
2804 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2805 timer_active = 4;
463c84b9 2806 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2807 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2808 timer_active = 2;
cf4c6bf8 2809 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2810 } else {
2811 timer_active = 0;
2812 timer_expires = jiffies;
2813 }
2814
49d09007
ED
2815 if (sk->sk_state == TCP_LISTEN)
2816 rx_queue = sk->sk_ack_backlog;
2817 else
2818 /*
2819 * because we dont lock socket, we might find a transient negative value
2820 */
2821 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2822
5e659e4c 2823 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2824 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2825 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2826 tp->write_seq - tp->snd_una,
49d09007 2827 rx_queue,
1da177e4 2828 timer_active,
a399a805 2829 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2830 icsk->icsk_retransmits,
a7cb5a49 2831 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2832 icsk->icsk_probes_out,
cf4c6bf8
IJ
2833 sock_i_ino(sk),
2834 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2835 jiffies_to_clock_t(icsk->icsk_rto),
2836 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2837 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2838 tp->snd_cwnd,
168a8f58
JC
2839 sk->sk_state == TCP_LISTEN ?
2840 (fastopenq ? fastopenq->max_qlen : 0) :
2841 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
5e659e4c 2842 len);
1da177e4
LT
2843}
2844
cf533ea5 2845static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2846 struct seq_file *f, int i, int *len)
1da177e4 2847{
23f33c2d 2848 __be32 dest, src;
1da177e4 2849 __u16 destp, srcp;
a399a805 2850 long delta = tw->tw_ttd - jiffies;
1da177e4
LT
2851
2852 dest = tw->tw_daddr;
2853 src = tw->tw_rcv_saddr;
2854 destp = ntohs(tw->tw_dport);
2855 srcp = ntohs(tw->tw_sport);
2856
5e659e4c 2857 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2858 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4 2859 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2860 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
5e659e4c 2861 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2862}
2863
2864#define TMPSZ 150
2865
2866static int tcp4_seq_show(struct seq_file *seq, void *v)
2867{
5799de0b 2868 struct tcp_iter_state *st;
5e659e4c 2869 int len;
1da177e4
LT
2870
2871 if (v == SEQ_START_TOKEN) {
2872 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2873 " sl local_address rem_address st tx_queue "
2874 "rx_queue tr tm->when retrnsmt uid timeout "
2875 "inode");
2876 goto out;
2877 }
2878 st = seq->private;
2879
2880 switch (st->state) {
2881 case TCP_SEQ_STATE_LISTENING:
2882 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2883 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2884 break;
2885 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2886 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2887 break;
2888 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2889 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2890 break;
2891 }
5e659e4c 2892 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2893out:
2894 return 0;
2895}
2896
73cb88ec
AV
2897static const struct file_operations tcp_afinfo_seq_fops = {
2898 .owner = THIS_MODULE,
2899 .open = tcp_seq_open,
2900 .read = seq_read,
2901 .llseek = seq_lseek,
2902 .release = seq_release_net
2903};
2904
1da177e4 2905static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2906 .name = "tcp",
2907 .family = AF_INET,
73cb88ec 2908 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2909 .seq_ops = {
2910 .show = tcp4_seq_show,
2911 },
1da177e4
LT
2912};
2913
2c8c1e72 2914static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2915{
2916 return tcp_proc_register(net, &tcp4_seq_afinfo);
2917}
2918
2c8c1e72 2919static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2920{
2921 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2922}
2923
2924static struct pernet_operations tcp4_net_ops = {
2925 .init = tcp4_proc_init_net,
2926 .exit = tcp4_proc_exit_net,
2927};
2928
1da177e4
LT
2929int __init tcp4_proc_init(void)
2930{
757764f6 2931 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2932}
2933
2934void tcp4_proc_exit(void)
2935{
757764f6 2936 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2937}
2938#endif /* CONFIG_PROC_FS */
2939
bf296b12
HX
2940struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2941{
b71d1d42 2942 const struct iphdr *iph = skb_gro_network_header(skb);
861b6501
ED
2943 __wsum wsum;
2944 __sum16 sum;
bf296b12
HX
2945
2946 switch (skb->ip_summed) {
2947 case CHECKSUM_COMPLETE:
86911732 2948 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2949 skb->csum)) {
2950 skb->ip_summed = CHECKSUM_UNNECESSARY;
2951 break;
2952 }
861b6501 2953flush:
bf296b12
HX
2954 NAPI_GRO_CB(skb)->flush = 1;
2955 return NULL;
861b6501
ED
2956
2957 case CHECKSUM_NONE:
2958 wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2959 skb_gro_len(skb), IPPROTO_TCP, 0);
2960 sum = csum_fold(skb_checksum(skb,
2961 skb_gro_offset(skb),
2962 skb_gro_len(skb),
2963 wsum));
2964 if (sum)
2965 goto flush;
2966
2967 skb->ip_summed = CHECKSUM_UNNECESSARY;
2968 break;
bf296b12
HX
2969 }
2970
2971 return tcp_gro_receive(head, skb);
2972}
bf296b12
HX
2973
2974int tcp4_gro_complete(struct sk_buff *skb)
2975{
b71d1d42 2976 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2977 struct tcphdr *th = tcp_hdr(skb);
2978
2979 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2980 iph->saddr, iph->daddr, 0);
2981 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2982
2983 return tcp_gro_complete(skb);
2984}
bf296b12 2985
1da177e4
LT
2986struct proto tcp_prot = {
2987 .name = "TCP",
2988 .owner = THIS_MODULE,
2989 .close = tcp_close,
2990 .connect = tcp_v4_connect,
2991 .disconnect = tcp_disconnect,
463c84b9 2992 .accept = inet_csk_accept,
1da177e4
LT
2993 .ioctl = tcp_ioctl,
2994 .init = tcp_v4_init_sock,
2995 .destroy = tcp_v4_destroy_sock,
2996 .shutdown = tcp_shutdown,
2997 .setsockopt = tcp_setsockopt,
2998 .getsockopt = tcp_getsockopt,
1da177e4 2999 .recvmsg = tcp_recvmsg,
7ba42910
CG
3000 .sendmsg = tcp_sendmsg,
3001 .sendpage = tcp_sendpage,
1da177e4 3002 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 3003 .release_cb = tcp_release_cb,
ab1e0a13
ACM
3004 .hash = inet_hash,
3005 .unhash = inet_unhash,
3006 .get_port = inet_csk_get_port,
1da177e4
LT
3007 .enter_memory_pressure = tcp_enter_memory_pressure,
3008 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 3009 .orphan_count = &tcp_orphan_count,
1da177e4
LT
3010 .memory_allocated = &tcp_memory_allocated,
3011 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
3012 .sysctl_wmem = sysctl_tcp_wmem,
3013 .sysctl_rmem = sysctl_tcp_rmem,
3014 .max_header = MAX_TCP_HEADER,
3015 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 3016 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 3017 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 3018 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 3019 .h.hashinfo = &tcp_hashinfo,
7ba42910 3020 .no_autobind = true,
543d9cfe
ACM
3021#ifdef CONFIG_COMPAT
3022 .compat_setsockopt = compat_tcp_setsockopt,
3023 .compat_getsockopt = compat_tcp_getsockopt,
3024#endif
c255a458 3025#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
3026 .init_cgroup = tcp_init_cgroup,
3027 .destroy_cgroup = tcp_destroy_cgroup,
3028 .proto_cgroup = tcp_proto_cgroup,
3029#endif
1da177e4 3030};
4bc2f18b 3031EXPORT_SYMBOL(tcp_prot);
1da177e4 3032
6bed3166
ED
3033static void __net_exit tcp_sk_exit(struct net *net)
3034{
3035 int cpu;
3036
3037 for_each_possible_cpu(cpu)
3038 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
3039 free_percpu(net->ipv4.tcp_sk);
3040}
3041
046ee902
DL
3042static int __net_init tcp_sk_init(struct net *net)
3043{
6bed3166
ED
3044 int res, cpu;
3045
3046 net->ipv4.tcp_sk = alloc_percpu(struct sock *);
3047 if (!net->ipv4.tcp_sk)
3048 return -ENOMEM;
3049
3050 for_each_possible_cpu(cpu) {
3051 struct sock *sk;
3052
3053 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
3054 IPPROTO_TCP, net);
3055 if (res)
3056 goto fail;
3057 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
3058 }
5d134f1c 3059 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 3060 return 0;
046ee902 3061
6bed3166
ED
3062fail:
3063 tcp_sk_exit(net);
3064
3065 return res;
b099ce26
EB
3066}
3067
3068static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
3069{
3070 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
3071}
3072
3073static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
3074 .init = tcp_sk_init,
3075 .exit = tcp_sk_exit,
3076 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
3077};
3078
9b0f976f 3079void __init tcp_v4_init(void)
1da177e4 3080{
5caea4ea 3081 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 3082 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 3083 panic("Failed to create the TCP control socket.\n");
1da177e4 3084}