3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
);
76 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
77 struct request_sock
*req
);
79 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
);
81 static const struct inet_connection_sock_af_ops ipv6_mapped
;
82 static const struct inet_connection_sock_af_ops ipv6_specific
;
83 #ifdef CONFIG_TCP_MD5SIG
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
;
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
;
87 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
88 const struct in6_addr
*addr
)
94 static void inet6_sk_rx_dst_set(struct sock
*sk
, const struct sk_buff
*skb
)
96 struct dst_entry
*dst
= skb_dst(skb
);
97 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
101 inet_sk(sk
)->rx_dst_ifindex
= skb
->skb_iif
;
103 inet6_sk(sk
)->rx_dst_cookie
= rt
->rt6i_node
->fn_sernum
;
106 static void tcp_v6_hash(struct sock
*sk
)
108 if (sk
->sk_state
!= TCP_CLOSE
) {
109 if (inet_csk(sk
)->icsk_af_ops
== &ipv6_mapped
) {
114 __inet6_hash(sk
, NULL
);
119 static __u32
tcp_v6_init_sequence(const struct sk_buff
*skb
)
121 return secure_tcpv6_sequence_number(ipv6_hdr(skb
)->daddr
.s6_addr32
,
122 ipv6_hdr(skb
)->saddr
.s6_addr32
,
124 tcp_hdr(skb
)->source
);
127 static int tcp_v6_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
130 struct sockaddr_in6
*usin
= (struct sockaddr_in6
*) uaddr
;
131 struct inet_sock
*inet
= inet_sk(sk
);
132 struct inet_connection_sock
*icsk
= inet_csk(sk
);
133 struct ipv6_pinfo
*np
= inet6_sk(sk
);
134 struct tcp_sock
*tp
= tcp_sk(sk
);
135 struct in6_addr
*saddr
= NULL
, *final_p
, final
;
138 struct dst_entry
*dst
;
142 if (addr_len
< SIN6_LEN_RFC2133
)
145 if (usin
->sin6_family
!= AF_INET6
)
146 return -EAFNOSUPPORT
;
148 memset(&fl6
, 0, sizeof(fl6
));
151 fl6
.flowlabel
= usin
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
152 IP6_ECN_flow_init(fl6
.flowlabel
);
153 if (fl6
.flowlabel
&IPV6_FLOWLABEL_MASK
) {
154 struct ip6_flowlabel
*flowlabel
;
155 flowlabel
= fl6_sock_lookup(sk
, fl6
.flowlabel
);
156 if (flowlabel
== NULL
)
158 usin
->sin6_addr
= flowlabel
->dst
;
159 fl6_sock_release(flowlabel
);
164 * connect() to INADDR_ANY means loopback (BSD'ism).
167 if(ipv6_addr_any(&usin
->sin6_addr
))
168 usin
->sin6_addr
.s6_addr
[15] = 0x1;
170 addr_type
= ipv6_addr_type(&usin
->sin6_addr
);
172 if(addr_type
& IPV6_ADDR_MULTICAST
)
175 if (addr_type
&IPV6_ADDR_LINKLOCAL
) {
176 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
177 usin
->sin6_scope_id
) {
178 /* If interface is set while binding, indices
181 if (sk
->sk_bound_dev_if
&&
182 sk
->sk_bound_dev_if
!= usin
->sin6_scope_id
)
185 sk
->sk_bound_dev_if
= usin
->sin6_scope_id
;
188 /* Connect to link-local address requires an interface */
189 if (!sk
->sk_bound_dev_if
)
193 if (tp
->rx_opt
.ts_recent_stamp
&&
194 !ipv6_addr_equal(&np
->daddr
, &usin
->sin6_addr
)) {
195 tp
->rx_opt
.ts_recent
= 0;
196 tp
->rx_opt
.ts_recent_stamp
= 0;
200 np
->daddr
= usin
->sin6_addr
;
201 np
->flow_label
= fl6
.flowlabel
;
207 if (addr_type
== IPV6_ADDR_MAPPED
) {
208 u32 exthdrlen
= icsk
->icsk_ext_hdr_len
;
209 struct sockaddr_in sin
;
211 SOCK_DEBUG(sk
, "connect: ipv4 mapped\n");
213 if (__ipv6_only_sock(sk
))
216 sin
.sin_family
= AF_INET
;
217 sin
.sin_port
= usin
->sin6_port
;
218 sin
.sin_addr
.s_addr
= usin
->sin6_addr
.s6_addr32
[3];
220 icsk
->icsk_af_ops
= &ipv6_mapped
;
221 sk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
226 err
= tcp_v4_connect(sk
, (struct sockaddr
*)&sin
, sizeof(sin
));
229 icsk
->icsk_ext_hdr_len
= exthdrlen
;
230 icsk
->icsk_af_ops
= &ipv6_specific
;
231 sk
->sk_backlog_rcv
= tcp_v6_do_rcv
;
232 #ifdef CONFIG_TCP_MD5SIG
233 tp
->af_specific
= &tcp_sock_ipv6_specific
;
237 ipv6_addr_set_v4mapped(inet
->inet_saddr
, &np
->saddr
);
238 ipv6_addr_set_v4mapped(inet
->inet_rcv_saddr
,
245 if (!ipv6_addr_any(&np
->rcv_saddr
))
246 saddr
= &np
->rcv_saddr
;
248 fl6
.flowi6_proto
= IPPROTO_TCP
;
249 fl6
.daddr
= np
->daddr
;
250 fl6
.saddr
= saddr
? *saddr
: np
->saddr
;
251 fl6
.flowi6_oif
= sk
->sk_bound_dev_if
;
252 fl6
.flowi6_mark
= sk
->sk_mark
;
253 fl6
.fl6_dport
= usin
->sin6_port
;
254 fl6
.fl6_sport
= inet
->inet_sport
;
255 fl6
.flowi6_uid
= sock_i_uid(sk
);
257 final_p
= fl6_update_dst(&fl6
, np
->opt
, &final
);
259 security_sk_classify_flow(sk
, flowi6_to_flowi(&fl6
));
261 dst
= ip6_dst_lookup_flow(sk
, &fl6
, final_p
, true);
269 np
->rcv_saddr
= *saddr
;
272 /* set the source address */
274 inet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
276 sk
->sk_gso_type
= SKB_GSO_TCPV6
;
277 __ip6_dst_store(sk
, dst
, NULL
, NULL
);
279 rt
= (struct rt6_info
*) dst
;
280 if (tcp_death_row
.sysctl_tw_recycle
&&
281 !tp
->rx_opt
.ts_recent_stamp
&&
282 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, &np
->daddr
))
283 tcp_fetch_timewait_stamp(sk
, dst
);
285 icsk
->icsk_ext_hdr_len
= 0;
287 icsk
->icsk_ext_hdr_len
= (np
->opt
->opt_flen
+
290 tp
->rx_opt
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
292 inet
->inet_dport
= usin
->sin6_port
;
294 tcp_set_state(sk
, TCP_SYN_SENT
);
295 err
= inet6_hash_connect(&tcp_death_row
, sk
);
299 printk(KERN_INFO
"net_sock, IPV6 socket[%lu] sport:%u \n", SOCK_INODE(sk
->sk_socket
)->i_ino
, ntohs(inet
->inet_sport
));
300 if (!tp
->write_seq
&& likely(!tp
->repair
))
301 tp
->write_seq
= secure_tcpv6_sequence_number(np
->saddr
.s6_addr32
,
306 err
= tcp_connect(sk
);
313 tcp_set_state(sk
, TCP_CLOSE
);
316 inet
->inet_dport
= 0;
317 sk
->sk_route_caps
= 0;
321 static void tcp_v6_mtu_reduced(struct sock
*sk
)
323 struct dst_entry
*dst
;
325 if ((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
))
328 dst
= inet6_csk_update_pmtu(sk
, tcp_sk(sk
)->mtu_info
);
332 if (inet_csk(sk
)->icsk_pmtu_cookie
> dst_mtu(dst
)) {
333 tcp_sync_mss(sk
, dst_mtu(dst
));
334 tcp_simple_retransmit(sk
);
338 static void tcp_v6_err(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
339 u8 type
, u8 code
, int offset
, __be32 info
)
341 const struct ipv6hdr
*hdr
= (const struct ipv6hdr
*)skb
->data
;
342 const struct tcphdr
*th
= (struct tcphdr
*)(skb
->data
+offset
);
343 struct ipv6_pinfo
*np
;
348 struct net
*net
= dev_net(skb
->dev
);
350 sk
= inet6_lookup(net
, &tcp_hashinfo
, &hdr
->daddr
,
351 th
->dest
, &hdr
->saddr
, th
->source
, skb
->dev
->ifindex
);
354 ICMP6_INC_STATS_BH(net
, __in6_dev_get(skb
->dev
),
359 if (sk
->sk_state
== TCP_TIME_WAIT
) {
360 inet_twsk_put(inet_twsk(sk
));
365 if (sock_owned_by_user(sk
) && type
!= ICMPV6_PKT_TOOBIG
)
366 NET_INC_STATS_BH(net
, LINUX_MIB_LOCKDROPPEDICMPS
);
368 if (sk
->sk_state
== TCP_CLOSE
)
371 if (ipv6_hdr(skb
)->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
372 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
377 seq
= ntohl(th
->seq
);
378 if (sk
->sk_state
!= TCP_LISTEN
&&
379 !between(seq
, tp
->snd_una
, tp
->snd_nxt
)) {
380 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
386 if (type
== NDISC_REDIRECT
) {
387 struct dst_entry
*dst
= __sk_dst_check(sk
, np
->dst_cookie
);
390 dst
->ops
->redirect(dst
, sk
, skb
);
394 if (type
== ICMPV6_PKT_TOOBIG
) {
395 /* We are not interested in TCP_LISTEN and open_requests
396 * (SYN-ACKs send out by Linux are always <576bytes so
397 * they should go through unfragmented).
399 if (sk
->sk_state
== TCP_LISTEN
)
402 tp
->mtu_info
= ntohl(info
);
403 if (!sock_owned_by_user(sk
))
404 tcp_v6_mtu_reduced(sk
);
405 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED
,
411 icmpv6_err_convert(type
, code
, &err
);
413 /* Might be for an request_sock */
414 switch (sk
->sk_state
) {
415 struct request_sock
*req
, **prev
;
417 if (sock_owned_by_user(sk
))
420 req
= inet6_csk_search_req(sk
, &prev
, th
->dest
, &hdr
->daddr
,
421 &hdr
->saddr
, inet6_iif(skb
));
425 /* ICMPs are not backlogged, hence we cannot get
426 * an established socket here.
428 WARN_ON(req
->sk
!= NULL
);
430 if (seq
!= tcp_rsk(req
)->snt_isn
) {
431 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
435 inet_csk_reqsk_queue_drop(sk
, req
, prev
);
436 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
440 case TCP_SYN_RECV
: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk
)) {
444 sk
->sk_error_report(sk
); /* Wake people up to see the error (see connect in sock.c) */
448 sk
->sk_err_soft
= err
;
452 if (!sock_owned_by_user(sk
) && np
->recverr
) {
454 sk
->sk_error_report(sk
);
456 sk
->sk_err_soft
= err
;
464 static int tcp_v6_send_synack(struct sock
*sk
, struct dst_entry
*dst
,
466 struct request_sock
*req
,
469 struct inet6_request_sock
*treq
= inet6_rsk(req
);
470 struct ipv6_pinfo
*np
= inet6_sk(sk
);
471 struct sk_buff
* skb
;
474 /* First, grab a route. */
475 if (!dst
&& (dst
= inet6_csk_route_req(sk
, fl6
, req
)) == NULL
)
478 skb
= tcp_make_synack(sk
, dst
, req
, NULL
);
481 __tcp_v6_send_check(skb
, &treq
->loc_addr
, &treq
->rmt_addr
);
483 fl6
->daddr
= treq
->rmt_addr
;
484 skb_set_queue_mapping(skb
, queue_mapping
);
485 err
= ip6_xmit(sk
, skb
, fl6
, np
->opt
, np
->tclass
);
486 err
= net_xmit_eval(err
);
493 static int tcp_v6_rtx_synack(struct sock
*sk
, struct request_sock
*req
)
498 res
= tcp_v6_send_synack(sk
, NULL
, &fl6
, req
, 0);
500 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_RETRANSSEGS
);
504 static void tcp_v6_reqsk_destructor(struct request_sock
*req
)
506 kfree_skb(inet6_rsk(req
)->pktopts
);
509 #ifdef CONFIG_TCP_MD5SIG
510 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
511 const struct in6_addr
*addr
)
513 return tcp_md5_do_lookup(sk
, (union tcp_md5_addr
*)addr
, AF_INET6
);
516 static struct tcp_md5sig_key
*tcp_v6_md5_lookup(struct sock
*sk
,
517 struct sock
*addr_sk
)
519 return tcp_v6_md5_do_lookup(sk
, &inet6_sk(addr_sk
)->daddr
);
522 static struct tcp_md5sig_key
*tcp_v6_reqsk_md5_lookup(struct sock
*sk
,
523 struct request_sock
*req
)
525 return tcp_v6_md5_do_lookup(sk
, &inet6_rsk(req
)->rmt_addr
);
528 static int tcp_v6_parse_md5_keys (struct sock
*sk
, char __user
*optval
,
531 struct tcp_md5sig cmd
;
532 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)&cmd
.tcpm_addr
;
534 if (optlen
< sizeof(cmd
))
537 if (copy_from_user(&cmd
, optval
, sizeof(cmd
)))
540 if (sin6
->sin6_family
!= AF_INET6
)
543 if (!cmd
.tcpm_keylen
) {
544 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
545 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
547 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
551 if (cmd
.tcpm_keylen
> TCP_MD5SIG_MAXKEYLEN
)
554 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
555 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
556 AF_INET
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
558 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
559 AF_INET6
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
562 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool
*hp
,
563 const struct in6_addr
*daddr
,
564 const struct in6_addr
*saddr
, int nbytes
)
566 struct tcp6_pseudohdr
*bp
;
567 struct scatterlist sg
;
569 bp
= &hp
->md5_blk
.ip6
;
570 /* 1. TCP pseudo-header (RFC2460) */
573 bp
->protocol
= cpu_to_be32(IPPROTO_TCP
);
574 bp
->len
= cpu_to_be32(nbytes
);
576 sg_init_one(&sg
, bp
, sizeof(*bp
));
577 return crypto_hash_update(&hp
->md5_desc
, &sg
, sizeof(*bp
));
580 static int tcp_v6_md5_hash_hdr(char *md5_hash
, struct tcp_md5sig_key
*key
,
581 const struct in6_addr
*daddr
, struct in6_addr
*saddr
,
582 const struct tcphdr
*th
)
584 struct tcp_md5sig_pool
*hp
;
585 struct hash_desc
*desc
;
587 hp
= tcp_get_md5sig_pool();
589 goto clear_hash_noput
;
590 desc
= &hp
->md5_desc
;
592 if (crypto_hash_init(desc
))
594 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, th
->doff
<< 2))
596 if (tcp_md5_hash_header(hp
, th
))
598 if (tcp_md5_hash_key(hp
, key
))
600 if (crypto_hash_final(desc
, md5_hash
))
603 tcp_put_md5sig_pool();
607 tcp_put_md5sig_pool();
609 memset(md5_hash
, 0, 16);
613 static int tcp_v6_md5_hash_skb(char *md5_hash
, struct tcp_md5sig_key
*key
,
614 const struct sock
*sk
,
615 const struct request_sock
*req
,
616 const struct sk_buff
*skb
)
618 const struct in6_addr
*saddr
, *daddr
;
619 struct tcp_md5sig_pool
*hp
;
620 struct hash_desc
*desc
;
621 const struct tcphdr
*th
= tcp_hdr(skb
);
624 saddr
= &inet6_sk(sk
)->saddr
;
625 daddr
= &inet6_sk(sk
)->daddr
;
627 saddr
= &inet6_rsk(req
)->loc_addr
;
628 daddr
= &inet6_rsk(req
)->rmt_addr
;
630 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
631 saddr
= &ip6h
->saddr
;
632 daddr
= &ip6h
->daddr
;
635 hp
= tcp_get_md5sig_pool();
637 goto clear_hash_noput
;
638 desc
= &hp
->md5_desc
;
640 if (crypto_hash_init(desc
))
643 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, skb
->len
))
645 if (tcp_md5_hash_header(hp
, th
))
647 if (tcp_md5_hash_skb_data(hp
, skb
, th
->doff
<< 2))
649 if (tcp_md5_hash_key(hp
, key
))
651 if (crypto_hash_final(desc
, md5_hash
))
654 tcp_put_md5sig_pool();
658 tcp_put_md5sig_pool();
660 memset(md5_hash
, 0, 16);
664 static int tcp_v6_inbound_md5_hash(struct sock
*sk
, const struct sk_buff
*skb
)
666 const __u8
*hash_location
= NULL
;
667 struct tcp_md5sig_key
*hash_expected
;
668 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
669 const struct tcphdr
*th
= tcp_hdr(skb
);
673 hash_expected
= tcp_v6_md5_do_lookup(sk
, &ip6h
->saddr
);
674 hash_location
= tcp_parse_md5sig_option(th
);
676 /* We've parsed the options - do we have a hash? */
677 if (!hash_expected
&& !hash_location
)
680 if (hash_expected
&& !hash_location
) {
681 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5NOTFOUND
);
685 if (!hash_expected
&& hash_location
) {
686 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5UNEXPECTED
);
690 /* check the signature */
691 genhash
= tcp_v6_md5_hash_skb(newhash
,
695 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0) {
696 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
697 genhash
? "failed" : "mismatch",
698 &ip6h
->saddr
, ntohs(th
->source
),
699 &ip6h
->daddr
, ntohs(th
->dest
));
706 struct request_sock_ops tcp6_request_sock_ops __read_mostly
= {
708 .obj_size
= sizeof(struct tcp6_request_sock
),
709 .rtx_syn_ack
= tcp_v6_rtx_synack
,
710 .send_ack
= tcp_v6_reqsk_send_ack
,
711 .destructor
= tcp_v6_reqsk_destructor
,
712 .send_reset
= tcp_v6_send_reset
,
713 .syn_ack_timeout
= tcp_syn_ack_timeout
,
716 #ifdef CONFIG_TCP_MD5SIG
717 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops
= {
718 .md5_lookup
= tcp_v6_reqsk_md5_lookup
,
719 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
723 static void tcp_v6_send_response(struct sk_buff
*skb
, u32 seq
, u32 ack
, u32 win
,
724 u32 tsval
, u32 tsecr
,
725 struct tcp_md5sig_key
*key
, int rst
, u8 tclass
)
727 const struct tcphdr
*th
= tcp_hdr(skb
);
729 struct sk_buff
*buff
;
731 struct net
*net
= dev_net(skb_dst(skb
)->dev
);
732 struct sock
*ctl_sk
= net
->ipv6
.tcp_sk
;
733 unsigned int tot_len
= sizeof(struct tcphdr
);
734 struct dst_entry
*dst
;
738 tot_len
+= TCPOLEN_TSTAMP_ALIGNED
;
739 #ifdef CONFIG_TCP_MD5SIG
741 tot_len
+= TCPOLEN_MD5SIG_ALIGNED
;
744 buff
= alloc_skb(MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
,
749 skb_reserve(buff
, MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
);
751 t1
= (struct tcphdr
*) skb_push(buff
, tot_len
);
752 skb_reset_transport_header(buff
);
754 /* Swap the send and the receive. */
755 memset(t1
, 0, sizeof(*t1
));
756 t1
->dest
= th
->source
;
757 t1
->source
= th
->dest
;
758 t1
->doff
= tot_len
/ 4;
759 t1
->seq
= htonl(seq
);
760 t1
->ack_seq
= htonl(ack
);
761 t1
->ack
= !rst
|| !th
->ack
;
763 t1
->window
= htons(win
);
765 topt
= (__be32
*)(t1
+ 1);
768 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
769 (TCPOPT_TIMESTAMP
<< 8) | TCPOLEN_TIMESTAMP
);
770 *topt
++ = htonl(tsval
);
771 *topt
++ = htonl(tsecr
);
774 #ifdef CONFIG_TCP_MD5SIG
776 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
777 (TCPOPT_MD5SIG
<< 8) | TCPOLEN_MD5SIG
);
778 tcp_v6_md5_hash_hdr((__u8
*)topt
, key
,
779 &ipv6_hdr(skb
)->saddr
,
780 &ipv6_hdr(skb
)->daddr
, t1
);
784 memset(&fl6
, 0, sizeof(fl6
));
785 fl6
.daddr
= ipv6_hdr(skb
)->saddr
;
786 fl6
.saddr
= ipv6_hdr(skb
)->daddr
;
788 buff
->ip_summed
= CHECKSUM_PARTIAL
;
791 __tcp_v6_send_check(buff
, &fl6
.saddr
, &fl6
.daddr
);
793 fl6
.flowi6_proto
= IPPROTO_TCP
;
794 if (ipv6_addr_type(&fl6
.daddr
) & IPV6_ADDR_LINKLOCAL
)
795 fl6
.flowi6_oif
= inet6_iif(skb
);
796 fl6
.flowi6_mark
= IP6_REPLY_MARK(net
, skb
->mark
);
797 fl6
.fl6_dport
= t1
->dest
;
798 fl6
.fl6_sport
= t1
->source
;
799 security_skb_classify_flow(skb
, flowi6_to_flowi(&fl6
));
801 /* Pass a socket to ip6_dst_lookup either it is for RST
802 * Underlying function will use this to retrieve the network
805 dst
= ip6_dst_lookup_flow(ctl_sk
, &fl6
, NULL
, false);
807 skb_dst_set(buff
, dst
);
808 ip6_xmit(ctl_sk
, buff
, &fl6
, NULL
, tclass
);
809 TCP_INC_STATS_BH(net
, TCP_MIB_OUTSEGS
);
811 TCP_INC_STATS_BH(net
, TCP_MIB_OUTRSTS
);
818 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
)
820 const struct tcphdr
*th
= tcp_hdr(skb
);
821 u32 seq
= 0, ack_seq
= 0;
822 struct tcp_md5sig_key
*key
= NULL
;
823 #ifdef CONFIG_TCP_MD5SIG
824 const __u8
*hash_location
= NULL
;
825 struct ipv6hdr
*ipv6h
= ipv6_hdr(skb
);
826 unsigned char newhash
[16];
828 struct sock
*sk1
= NULL
;
834 if (!ipv6_unicast_destination(skb
))
837 #ifdef CONFIG_TCP_MD5SIG
838 hash_location
= tcp_parse_md5sig_option(th
);
839 if (!sk
&& hash_location
) {
841 * active side is lost. Try to find listening socket through
842 * source port, and then find md5 key through listening socket.
843 * we are not loose security here:
844 * Incoming packet is checked with md5 hash with finding key,
845 * no RST generated if md5 hash doesn't match.
847 sk1
= inet6_lookup_listener(dev_net(skb_dst(skb
)->dev
),
848 &tcp_hashinfo
, &ipv6h
->saddr
,
849 th
->source
, &ipv6h
->daddr
,
850 ntohs(th
->source
), inet6_iif(skb
));
855 key
= tcp_v6_md5_do_lookup(sk1
, &ipv6h
->saddr
);
859 genhash
= tcp_v6_md5_hash_skb(newhash
, key
, NULL
, NULL
, skb
);
860 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0)
863 key
= sk
? tcp_v6_md5_do_lookup(sk
, &ipv6h
->saddr
) : NULL
;
868 seq
= ntohl(th
->ack_seq
);
870 ack_seq
= ntohl(th
->seq
) + th
->syn
+ th
->fin
+ skb
->len
-
873 tcp_v6_send_response(skb
, seq
, ack_seq
, 0, 0, 0, key
, 1, 0);
875 #ifdef CONFIG_TCP_MD5SIG
884 static void tcp_v6_send_ack(struct sk_buff
*skb
, u32 seq
, u32 ack
,
885 u32 win
, u32 tsval
, u32 tsecr
,
886 struct tcp_md5sig_key
*key
, u8 tclass
)
888 tcp_v6_send_response(skb
, seq
, ack
, win
, tsval
, tsecr
, key
, 0, tclass
);
891 static void tcp_v6_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
893 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
894 struct tcp_timewait_sock
*tcptw
= tcp_twsk(sk
);
896 tcp_v6_send_ack(skb
, tcptw
->tw_snd_nxt
, tcptw
->tw_rcv_nxt
,
897 tcptw
->tw_rcv_wnd
>> tw
->tw_rcv_wscale
,
898 tcp_time_stamp
+ tcptw
->tw_ts_offset
,
899 tcptw
->tw_ts_recent
, tcp_twsk_md5_key(tcptw
),
905 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
906 struct request_sock
*req
)
908 tcp_v6_send_ack(skb
, tcp_rsk(req
)->snt_isn
+ 1, tcp_rsk(req
)->rcv_isn
+ 1,
909 req
->rcv_wnd
, tcp_time_stamp
, req
->ts_recent
,
910 tcp_v6_md5_do_lookup(sk
, &ipv6_hdr(skb
)->daddr
), 0);
914 static struct sock
*tcp_v6_hnd_req(struct sock
*sk
,struct sk_buff
*skb
)
916 struct request_sock
*req
, **prev
;
917 const struct tcphdr
*th
= tcp_hdr(skb
);
920 /* Find possible connection requests. */
921 req
= inet6_csk_search_req(sk
, &prev
, th
->source
,
922 &ipv6_hdr(skb
)->saddr
,
923 &ipv6_hdr(skb
)->daddr
, inet6_iif(skb
));
925 return tcp_check_req(sk
, skb
, req
, prev
, false);
927 nsk
= __inet6_lookup_established(sock_net(sk
), &tcp_hashinfo
,
928 &ipv6_hdr(skb
)->saddr
, th
->source
,
929 &ipv6_hdr(skb
)->daddr
, ntohs(th
->dest
), inet6_iif(skb
));
932 if (nsk
->sk_state
!= TCP_TIME_WAIT
) {
936 inet_twsk_put(inet_twsk(nsk
));
940 #ifdef CONFIG_SYN_COOKIES
942 sk
= cookie_v6_check(sk
, skb
);
947 /* FIXME: this is substantially similar to the ipv4 code.
948 * Can some kind of merge be done? -- erics
950 static int tcp_v6_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
952 struct tcp_options_received tmp_opt
;
953 struct request_sock
*req
;
954 struct inet6_request_sock
*treq
;
955 struct ipv6_pinfo
*np
= inet6_sk(sk
);
956 struct tcp_sock
*tp
= tcp_sk(sk
);
957 __u32 isn
= TCP_SKB_CB(skb
)->when
;
958 struct dst_entry
*dst
= NULL
;
960 bool want_cookie
= false;
962 if (skb
->protocol
== htons(ETH_P_IP
))
963 return tcp_v4_conn_request(sk
, skb
);
965 if (!ipv6_unicast_destination(skb
))
968 if (inet_csk_reqsk_queue_is_full(sk
) && !isn
) {
969 want_cookie
= tcp_syn_flood_action(sk
, skb
, "TCPv6");
974 if (sk_acceptq_is_full(sk
) && inet_csk_reqsk_queue_young(sk
) > 1) {
975 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
979 req
= inet6_reqsk_alloc(&tcp6_request_sock_ops
);
983 #ifdef CONFIG_TCP_MD5SIG
984 tcp_rsk(req
)->af_specific
= &tcp_request_sock_ipv6_ops
;
987 tcp_clear_options(&tmp_opt
);
988 tmp_opt
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
989 tmp_opt
.user_mss
= tp
->rx_opt
.user_mss
;
990 tcp_parse_options(skb
, &tmp_opt
, 0, NULL
);
992 if (want_cookie
&& !tmp_opt
.saw_tstamp
)
993 tcp_clear_options(&tmp_opt
);
995 tmp_opt
.tstamp_ok
= tmp_opt
.saw_tstamp
;
996 tcp_openreq_init(req
, &tmp_opt
, skb
);
998 treq
= inet6_rsk(req
);
999 treq
->rmt_addr
= ipv6_hdr(skb
)->saddr
;
1000 treq
->loc_addr
= ipv6_hdr(skb
)->daddr
;
1001 if (!want_cookie
|| tmp_opt
.tstamp_ok
)
1002 TCP_ECN_create_request(req
, skb
, sock_net(sk
));
1004 treq
->iif
= sk
->sk_bound_dev_if
;
1005 inet_rsk(req
)->ir_mark
= inet_request_mark(sk
, skb
);
1007 /* So that link locals have meaning */
1008 if (!sk
->sk_bound_dev_if
&&
1009 ipv6_addr_type(&treq
->rmt_addr
) & IPV6_ADDR_LINKLOCAL
)
1010 treq
->iif
= inet6_iif(skb
);
1013 if (ipv6_opt_accepted(sk
, skb
) ||
1014 np
->rxopt
.bits
.rxinfo
|| np
->rxopt
.bits
.rxoinfo
||
1015 np
->rxopt
.bits
.rxhlim
|| np
->rxopt
.bits
.rxohlim
) {
1016 atomic_inc(&skb
->users
);
1017 treq
->pktopts
= skb
;
1021 isn
= cookie_v6_init_sequence(sk
, skb
, &req
->mss
);
1022 req
->cookie_ts
= tmp_opt
.tstamp_ok
;
1026 /* VJ's idea. We save last timestamp seen
1027 * from the destination in peer table, when entering
1028 * state TIME-WAIT, and check against it before
1029 * accepting new connection request.
1031 * If "isn" is not zero, this request hit alive
1032 * timewait bucket, so that all the necessary checks
1033 * are made in the function processing timewait state.
1035 if (tmp_opt
.saw_tstamp
&&
1036 tcp_death_row
.sysctl_tw_recycle
&&
1037 (dst
= inet6_csk_route_req(sk
, &fl6
, req
)) != NULL
) {
1038 if (!tcp_peer_is_proven(req
, dst
, true)) {
1039 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_PAWSPASSIVEREJECTED
);
1040 goto drop_and_release
;
1043 /* Kill the following clause, if you dislike this way. */
1044 else if (!sysctl_tcp_syncookies
&&
1045 (sysctl_max_syn_backlog
- inet_csk_reqsk_queue_len(sk
) <
1046 (sysctl_max_syn_backlog
>> 2)) &&
1047 !tcp_peer_is_proven(req
, dst
, false)) {
1048 /* Without syncookies last quarter of
1049 * backlog is filled with destinations,
1050 * proven to be alive.
1051 * It means that we continue to communicate
1052 * to destinations, already remembered
1053 * to the moment of synflood.
1055 LIMIT_NETDEBUG(KERN_DEBUG
"TCP: drop open request from %pI6/%u\n",
1056 &treq
->rmt_addr
, ntohs(tcp_hdr(skb
)->source
));
1057 goto drop_and_release
;
1060 isn
= tcp_v6_init_sequence(skb
);
1063 tcp_rsk(req
)->snt_isn
= isn
;
1065 if (security_inet_conn_request(sk
, skb
, req
))
1066 goto drop_and_release
;
1068 if (tcp_v6_send_synack(sk
, dst
, &fl6
, req
,
1069 skb_get_queue_mapping(skb
)) ||
1073 tcp_rsk(req
)->snt_synack
= tcp_time_stamp
;
1074 tcp_rsk(req
)->listener
= NULL
;
1075 inet6_csk_reqsk_queue_hash_add(sk
, req
, TCP_TIMEOUT_INIT
);
1083 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1084 return 0; /* don't send reset */
1087 static struct sock
* tcp_v6_syn_recv_sock(struct sock
*sk
, struct sk_buff
*skb
,
1088 struct request_sock
*req
,
1089 struct dst_entry
*dst
)
1091 struct inet6_request_sock
*treq
;
1092 struct ipv6_pinfo
*newnp
, *np
= inet6_sk(sk
);
1093 struct tcp6_sock
*newtcp6sk
;
1094 struct inet_sock
*newinet
;
1095 struct tcp_sock
*newtp
;
1097 #ifdef CONFIG_TCP_MD5SIG
1098 struct tcp_md5sig_key
*key
;
1102 if (skb
->protocol
== htons(ETH_P_IP
)) {
1107 newsk
= tcp_v4_syn_recv_sock(sk
, skb
, req
, dst
);
1112 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1113 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1115 newinet
= inet_sk(newsk
);
1116 newnp
= inet6_sk(newsk
);
1117 newtp
= tcp_sk(newsk
);
1119 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1121 ipv6_addr_set_v4mapped(newinet
->inet_daddr
, &newnp
->daddr
);
1123 ipv6_addr_set_v4mapped(newinet
->inet_saddr
, &newnp
->saddr
);
1125 newnp
->rcv_saddr
= newnp
->saddr
;
1127 inet_csk(newsk
)->icsk_af_ops
= &ipv6_mapped
;
1128 newsk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
1129 #ifdef CONFIG_TCP_MD5SIG
1130 newtp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
1133 newnp
->ipv6_ac_list
= NULL
;
1134 newnp
->ipv6_fl_list
= NULL
;
1135 newnp
->pktoptions
= NULL
;
1137 newnp
->mcast_oif
= inet6_iif(skb
);
1138 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1139 newnp
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(skb
));
1142 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1143 * here, tcp_create_openreq_child now does this for us, see the comment in
1144 * that function for the gory details. -acme
1147 /* It is tricky place. Until this moment IPv4 tcp
1148 worked with IPv6 icsk.icsk_af_ops.
1151 tcp_sync_mss(newsk
, inet_csk(newsk
)->icsk_pmtu_cookie
);
1156 treq
= inet6_rsk(req
);
1158 if (sk_acceptq_is_full(sk
))
1162 dst
= inet6_csk_route_req(sk
, &fl6
, req
);
1167 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1172 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1173 * count here, tcp_create_openreq_child now does this for us, see the
1174 * comment in that function for the gory details. -acme
1177 newsk
->sk_gso_type
= SKB_GSO_TCPV6
;
1178 __ip6_dst_store(newsk
, dst
, NULL
, NULL
);
1179 inet6_sk_rx_dst_set(newsk
, skb
);
1181 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1182 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1184 newtp
= tcp_sk(newsk
);
1185 newinet
= inet_sk(newsk
);
1186 newnp
= inet6_sk(newsk
);
1188 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1190 newnp
->daddr
= treq
->rmt_addr
;
1191 newnp
->saddr
= treq
->loc_addr
;
1192 newnp
->rcv_saddr
= treq
->loc_addr
;
1193 newsk
->sk_bound_dev_if
= treq
->iif
;
1195 /* Now IPv6 options...
1197 First: no IPv4 options.
1199 newinet
->inet_opt
= NULL
;
1200 newnp
->ipv6_ac_list
= NULL
;
1201 newnp
->ipv6_fl_list
= NULL
;
1204 newnp
->rxopt
.all
= np
->rxopt
.all
;
1206 /* Clone pktoptions received with SYN */
1207 newnp
->pktoptions
= NULL
;
1208 if (treq
->pktopts
!= NULL
) {
1209 newnp
->pktoptions
= skb_clone(treq
->pktopts
,
1210 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1211 consume_skb(treq
->pktopts
);
1212 treq
->pktopts
= NULL
;
1213 if (newnp
->pktoptions
)
1214 skb_set_owner_r(newnp
->pktoptions
, newsk
);
1217 newnp
->mcast_oif
= inet6_iif(skb
);
1218 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1219 newnp
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(skb
));
1221 /* Clone native IPv6 options from listening socket (if any)
1223 Yes, keeping reference count would be much more clever,
1224 but we make one more one thing there: reattach optmem
1228 newnp
->opt
= ipv6_dup_options(newsk
, np
->opt
);
1230 inet_csk(newsk
)->icsk_ext_hdr_len
= 0;
1232 inet_csk(newsk
)->icsk_ext_hdr_len
= (newnp
->opt
->opt_nflen
+
1233 newnp
->opt
->opt_flen
);
1235 tcp_mtup_init(newsk
);
1236 tcp_sync_mss(newsk
, dst_mtu(dst
));
1237 newtp
->advmss
= dst_metric_advmss(dst
);
1238 if (tcp_sk(sk
)->rx_opt
.user_mss
&&
1239 tcp_sk(sk
)->rx_opt
.user_mss
< newtp
->advmss
)
1240 newtp
->advmss
= tcp_sk(sk
)->rx_opt
.user_mss
;
1242 tcp_initialize_rcv_mss(newsk
);
1243 tcp_synack_rtt_meas(newsk
, req
);
1244 newtp
->total_retrans
= req
->num_retrans
;
1246 newinet
->inet_daddr
= newinet
->inet_saddr
= LOOPBACK4_IPV6
;
1247 newinet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
1249 #ifdef CONFIG_TCP_MD5SIG
1250 /* Copy over the MD5 key from the original socket */
1251 if ((key
= tcp_v6_md5_do_lookup(sk
, &newnp
->daddr
)) != NULL
) {
1252 /* We're using one, so create a matching key
1253 * on the newsk structure. If we fail to get
1254 * memory, then we end up not copying the key
1257 tcp_md5_do_add(newsk
, (union tcp_md5_addr
*)&newnp
->daddr
,
1258 AF_INET6
, key
->key
, key
->keylen
,
1259 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1263 if (__inet_inherit_port(sk
, newsk
) < 0) {
1264 inet_csk_prepare_forced_close(newsk
);
1268 __inet6_hash(newsk
, NULL
);
1273 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
1277 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1281 static __sum16
tcp_v6_checksum_init(struct sk_buff
*skb
)
1283 if (skb
->ip_summed
== CHECKSUM_COMPLETE
) {
1284 if (!tcp_v6_check(skb
->len
, &ipv6_hdr(skb
)->saddr
,
1285 &ipv6_hdr(skb
)->daddr
, skb
->csum
)) {
1286 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1291 skb
->csum
= ~csum_unfold(tcp_v6_check(skb
->len
,
1292 &ipv6_hdr(skb
)->saddr
,
1293 &ipv6_hdr(skb
)->daddr
, 0));
1295 if (skb
->len
<= 76) {
1296 return __skb_checksum_complete(skb
);
1301 /* The socket must have it's spinlock held when we get
1304 * We have a potential double-lock case here, so even when
1305 * doing backlog processing we use the BH locking scheme.
1306 * This is because we cannot sleep with the original spinlock
1309 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1311 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1312 struct tcp_sock
*tp
;
1313 struct sk_buff
*opt_skb
= NULL
;
1315 /* Imagine: socket is IPv6. IPv4 packet arrives,
1316 goes to IPv4 receive handler and backlogged.
1317 From backlog it always goes here. Kerboom...
1318 Fortunately, tcp_rcv_established and rcv_established
1319 handle them correctly, but it is not case with
1320 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1323 if (skb
->protocol
== htons(ETH_P_IP
))
1324 return tcp_v4_do_rcv(sk
, skb
);
1326 #ifdef CONFIG_TCP_MD5SIG
1327 if (tcp_v6_inbound_md5_hash (sk
, skb
))
1331 if (sk_filter(sk
, skb
))
1335 * socket locking is here for SMP purposes as backlog rcv
1336 * is currently called with bh processing disabled.
1339 /* Do Stevens' IPV6_PKTOPTIONS.
1341 Yes, guys, it is the only place in our code, where we
1342 may make it not affecting IPv4.
1343 The rest of code is protocol independent,
1344 and I do not like idea to uglify IPv4.
1346 Actually, all the idea behind IPV6_PKTOPTIONS
1347 looks not very well thought. For now we latch
1348 options, received in the last packet, enqueued
1349 by tcp. Feel free to propose better solution.
1353 opt_skb
= skb_clone(skb
, sk_gfp_atomic(sk
, GFP_ATOMIC
));
1355 if (sk
->sk_state
== TCP_ESTABLISHED
) { /* Fast path */
1356 struct dst_entry
*dst
= sk
->sk_rx_dst
;
1358 sock_rps_save_rxhash(sk
, skb
);
1360 if (inet_sk(sk
)->rx_dst_ifindex
!= skb
->skb_iif
||
1361 dst
->ops
->check(dst
, np
->rx_dst_cookie
) == NULL
) {
1363 sk
->sk_rx_dst
= NULL
;
1367 if (tcp_rcv_established(sk
, skb
, tcp_hdr(skb
), skb
->len
))
1370 goto ipv6_pktoptions
;
1374 if (skb
->len
< tcp_hdrlen(skb
) || tcp_checksum_complete(skb
))
1377 if (sk
->sk_state
== TCP_LISTEN
) {
1378 struct sock
*nsk
= tcp_v6_hnd_req(sk
, skb
);
1383 * Queue it on the new socket if the new socket is active,
1384 * otherwise we just shortcircuit this and continue with
1388 sock_rps_save_rxhash(nsk
, skb
);
1389 if (tcp_child_process(sk
, nsk
, skb
))
1392 __kfree_skb(opt_skb
);
1396 sock_rps_save_rxhash(sk
, skb
);
1398 if (tcp_rcv_state_process(sk
, skb
, tcp_hdr(skb
), skb
->len
))
1401 goto ipv6_pktoptions
;
1405 tcp_v6_send_reset(sk
, skb
);
1408 __kfree_skb(opt_skb
);
1412 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_CSUMERRORS
);
1413 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_INERRS
);
1418 /* Do you ask, what is it?
1420 1. skb was enqueued by tcp.
1421 2. skb is added to tail of read queue, rather than out of order.
1422 3. socket is not in passive state.
1423 4. Finally, it really contains options, which user wants to receive.
1426 if (TCP_SKB_CB(opt_skb
)->end_seq
== tp
->rcv_nxt
&&
1427 !((1 << sk
->sk_state
) & (TCPF_CLOSE
| TCPF_LISTEN
))) {
1428 if (np
->rxopt
.bits
.rxinfo
|| np
->rxopt
.bits
.rxoinfo
)
1429 np
->mcast_oif
= inet6_iif(opt_skb
);
1430 if (np
->rxopt
.bits
.rxhlim
|| np
->rxopt
.bits
.rxohlim
)
1431 np
->mcast_hops
= ipv6_hdr(opt_skb
)->hop_limit
;
1432 if (np
->rxopt
.bits
.rxtclass
)
1433 np
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(opt_skb
));
1434 if (ipv6_opt_accepted(sk
, opt_skb
)) {
1435 skb_set_owner_r(opt_skb
, sk
);
1436 opt_skb
= xchg(&np
->pktoptions
, opt_skb
);
1438 __kfree_skb(opt_skb
);
1439 opt_skb
= xchg(&np
->pktoptions
, NULL
);
1447 static int tcp_v6_rcv(struct sk_buff
*skb
)
1449 const struct tcphdr
*th
;
1450 const struct ipv6hdr
*hdr
;
1453 struct net
*net
= dev_net(skb
->dev
);
1455 if (skb
->pkt_type
!= PACKET_HOST
)
1459 * Count it even if it's bad.
1461 TCP_INC_STATS_BH(net
, TCP_MIB_INSEGS
);
1463 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
1468 if (th
->doff
< sizeof(struct tcphdr
)/4)
1470 if (!pskb_may_pull(skb
, th
->doff
*4))
1473 if (!skb_csum_unnecessary(skb
) && tcp_v6_checksum_init(skb
))
1477 hdr
= ipv6_hdr(skb
);
1478 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1479 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1480 skb
->len
- th
->doff
*4);
1481 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1482 TCP_SKB_CB(skb
)->when
= 0;
1483 TCP_SKB_CB(skb
)->ip_dsfield
= ipv6_get_dsfield(hdr
);
1484 TCP_SKB_CB(skb
)->sacked
= 0;
1486 sk
= __inet6_lookup_skb(&tcp_hashinfo
, skb
, th
->source
, th
->dest
);
1491 if (sk
->sk_state
== TCP_TIME_WAIT
)
1494 if (hdr
->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
1495 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
1496 goto discard_and_relse
;
1499 if (!xfrm6_policy_check(sk
, XFRM_POLICY_IN
, skb
))
1500 goto discard_and_relse
;
1502 if (sk_filter(sk
, skb
))
1503 goto discard_and_relse
;
1507 bh_lock_sock_nested(sk
);
1509 if (!sock_owned_by_user(sk
)) {
1510 #ifdef CONFIG_NET_DMA
1511 struct tcp_sock
*tp
= tcp_sk(sk
);
1512 if (!tp
->ucopy
.dma_chan
&& tp
->ucopy
.pinned_list
)
1513 tp
->ucopy
.dma_chan
= net_dma_find_channel();
1514 if (tp
->ucopy
.dma_chan
)
1515 ret
= tcp_v6_do_rcv(sk
, skb
);
1519 if (!tcp_prequeue(sk
, skb
))
1520 ret
= tcp_v6_do_rcv(sk
, skb
);
1522 } else if (unlikely(sk_add_backlog(sk
, skb
,
1523 sk
->sk_rcvbuf
+ sk
->sk_sndbuf
))) {
1525 NET_INC_STATS_BH(net
, LINUX_MIB_TCPBACKLOGDROP
);
1526 goto discard_and_relse
;
1531 return ret
? -1 : 0;
1534 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1537 if (skb
->len
< (th
->doff
<<2) || tcp_checksum_complete(skb
)) {
1539 TCP_INC_STATS_BH(net
, TCP_MIB_CSUMERRORS
);
1541 TCP_INC_STATS_BH(net
, TCP_MIB_INERRS
);
1543 tcp_v6_send_reset(NULL
, skb
);
1555 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
)) {
1556 inet_twsk_put(inet_twsk(sk
));
1560 if (skb
->len
< (th
->doff
<<2)) {
1561 inet_twsk_put(inet_twsk(sk
));
1564 if (tcp_checksum_complete(skb
)) {
1565 inet_twsk_put(inet_twsk(sk
));
1569 switch (tcp_timewait_state_process(inet_twsk(sk
), skb
, th
)) {
1574 sk2
= inet6_lookup_listener(dev_net(skb
->dev
), &tcp_hashinfo
,
1575 &ipv6_hdr(skb
)->saddr
, th
->source
,
1576 &ipv6_hdr(skb
)->daddr
,
1577 ntohs(th
->dest
), inet6_iif(skb
));
1579 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
1580 inet_twsk_deschedule(tw
, &tcp_death_row
);
1585 /* Fall through to ACK */
1588 tcp_v6_timewait_ack(sk
, skb
);
1592 case TCP_TW_SUCCESS
:;
1597 static void tcp_v6_early_demux(struct sk_buff
*skb
)
1599 const struct ipv6hdr
*hdr
;
1600 const struct tcphdr
*th
;
1603 if (skb
->pkt_type
!= PACKET_HOST
)
1606 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + sizeof(struct tcphdr
)))
1609 hdr
= ipv6_hdr(skb
);
1612 if (th
->doff
< sizeof(struct tcphdr
) / 4)
1615 sk
= __inet6_lookup_established(dev_net(skb
->dev
), &tcp_hashinfo
,
1616 &hdr
->saddr
, th
->source
,
1617 &hdr
->daddr
, ntohs(th
->dest
),
1621 skb
->destructor
= sock_edemux
;
1622 if (sk
->sk_state
!= TCP_TIME_WAIT
) {
1623 struct dst_entry
*dst
= ACCESS_ONCE(sk
->sk_rx_dst
);
1626 dst
= dst_check(dst
, inet6_sk(sk
)->rx_dst_cookie
);
1628 inet_sk(sk
)->rx_dst_ifindex
== skb
->skb_iif
)
1629 skb_dst_set_noref(skb
, dst
);
1634 static struct timewait_sock_ops tcp6_timewait_sock_ops
= {
1635 .twsk_obj_size
= sizeof(struct tcp6_timewait_sock
),
1636 .twsk_unique
= tcp_twsk_unique
,
1637 .twsk_destructor
= tcp_twsk_destructor
,
1640 static const struct inet_connection_sock_af_ops ipv6_specific
= {
1641 .queue_xmit
= inet6_csk_xmit
,
1642 .send_check
= tcp_v6_send_check
,
1643 .rebuild_header
= inet6_sk_rebuild_header
,
1644 .sk_rx_dst_set
= inet6_sk_rx_dst_set
,
1645 .conn_request
= tcp_v6_conn_request
,
1646 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1647 .net_header_len
= sizeof(struct ipv6hdr
),
1648 .net_frag_header_len
= sizeof(struct frag_hdr
),
1649 .setsockopt
= ipv6_setsockopt
,
1650 .getsockopt
= ipv6_getsockopt
,
1651 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1652 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1653 .bind_conflict
= inet6_csk_bind_conflict
,
1654 #ifdef CONFIG_COMPAT
1655 .compat_setsockopt
= compat_ipv6_setsockopt
,
1656 .compat_getsockopt
= compat_ipv6_getsockopt
,
1658 .mtu_reduced
= tcp_v6_mtu_reduced
,
1661 #ifdef CONFIG_TCP_MD5SIG
1662 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
= {
1663 .md5_lookup
= tcp_v6_md5_lookup
,
1664 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
1665 .md5_parse
= tcp_v6_parse_md5_keys
,
1670 * TCP over IPv4 via INET6 API
1673 static const struct inet_connection_sock_af_ops ipv6_mapped
= {
1674 .queue_xmit
= ip_queue_xmit
,
1675 .send_check
= tcp_v4_send_check
,
1676 .rebuild_header
= inet_sk_rebuild_header
,
1677 .sk_rx_dst_set
= inet_sk_rx_dst_set
,
1678 .conn_request
= tcp_v6_conn_request
,
1679 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1680 .net_header_len
= sizeof(struct iphdr
),
1681 .setsockopt
= ipv6_setsockopt
,
1682 .getsockopt
= ipv6_getsockopt
,
1683 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1684 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1685 .bind_conflict
= inet6_csk_bind_conflict
,
1686 #ifdef CONFIG_COMPAT
1687 .compat_setsockopt
= compat_ipv6_setsockopt
,
1688 .compat_getsockopt
= compat_ipv6_getsockopt
,
1690 .mtu_reduced
= tcp_v4_mtu_reduced
,
1693 #ifdef CONFIG_TCP_MD5SIG
1694 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
= {
1695 .md5_lookup
= tcp_v4_md5_lookup
,
1696 .calc_md5_hash
= tcp_v4_md5_hash_skb
,
1697 .md5_parse
= tcp_v6_parse_md5_keys
,
1701 /* NOTE: A lot of things set to zero explicitly by call to
1702 * sk_alloc() so need not be done here.
1704 static int tcp_v6_init_sock(struct sock
*sk
)
1706 struct inet_connection_sock
*icsk
= inet_csk(sk
);
1710 icsk
->icsk_af_ops
= &ipv6_specific
;
1712 #ifdef CONFIG_TCP_MD5SIG
1713 tcp_sk(sk
)->af_specific
= &tcp_sock_ipv6_specific
;
1719 static void tcp_v6_destroy_sock(struct sock
*sk
)
1721 tcp_v4_destroy_sock(sk
);
1722 inet6_destroy_sock(sk
);
1725 #ifdef CONFIG_PROC_FS
1726 /* Proc filesystem TCPv6 sock list dumping. */
1727 static void get_openreq6(struct seq_file
*seq
,
1728 const struct sock
*sk
, struct request_sock
*req
, int i
, kuid_t uid
)
1730 int ttd
= req
->expires
- jiffies
;
1731 const struct in6_addr
*src
= &inet6_rsk(req
)->loc_addr
;
1732 const struct in6_addr
*dest
= &inet6_rsk(req
)->rmt_addr
;
1738 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1739 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1741 src
->s6_addr32
[0], src
->s6_addr32
[1],
1742 src
->s6_addr32
[2], src
->s6_addr32
[3],
1743 ntohs(inet_rsk(req
)->loc_port
),
1744 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1745 dest
->s6_addr32
[2], dest
->s6_addr32
[3],
1746 ntohs(inet_rsk(req
)->rmt_port
),
1748 0,0, /* could print option size, but that is af dependent. */
1749 1, /* timers active (only the expire timer) */
1750 jiffies_to_clock_t(ttd
),
1752 from_kuid_munged(seq_user_ns(seq
), uid
),
1753 0, /* non standard timer */
1754 0, /* open_requests have no inode */
1758 static void get_tcp6_sock(struct seq_file
*seq
, struct sock
*sp
, int i
)
1760 const struct in6_addr
*dest
, *src
;
1763 unsigned long timer_expires
;
1764 const struct inet_sock
*inet
= inet_sk(sp
);
1765 const struct tcp_sock
*tp
= tcp_sk(sp
);
1766 const struct inet_connection_sock
*icsk
= inet_csk(sp
);
1767 const struct ipv6_pinfo
*np
= inet6_sk(sp
);
1770 src
= &np
->rcv_saddr
;
1771 destp
= ntohs(inet
->inet_dport
);
1772 srcp
= ntohs(inet
->inet_sport
);
1774 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
) {
1776 timer_expires
= icsk
->icsk_timeout
;
1777 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
1779 timer_expires
= icsk
->icsk_timeout
;
1780 } else if (timer_pending(&sp
->sk_timer
)) {
1782 timer_expires
= sp
->sk_timer
.expires
;
1785 timer_expires
= jiffies
;
1789 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1790 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1792 src
->s6_addr32
[0], src
->s6_addr32
[1],
1793 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1794 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1795 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1797 tp
->write_seq
-tp
->snd_una
,
1798 (sp
->sk_state
== TCP_LISTEN
) ? sp
->sk_ack_backlog
: (tp
->rcv_nxt
- tp
->copied_seq
),
1800 jiffies_delta_to_clock_t(timer_expires
- jiffies
),
1801 icsk
->icsk_retransmits
,
1802 from_kuid_munged(seq_user_ns(seq
), sock_i_uid(sp
)),
1803 icsk
->icsk_probes_out
,
1805 atomic_read(&sp
->sk_refcnt
), sp
,
1806 jiffies_to_clock_t(icsk
->icsk_rto
),
1807 jiffies_to_clock_t(icsk
->icsk_ack
.ato
),
1808 (icsk
->icsk_ack
.quick
<< 1 ) | icsk
->icsk_ack
.pingpong
,
1810 tcp_in_initial_slowstart(tp
) ? -1 : tp
->snd_ssthresh
1814 static void get_timewait6_sock(struct seq_file
*seq
,
1815 struct inet_timewait_sock
*tw
, int i
)
1817 const struct in6_addr
*dest
, *src
;
1819 const struct inet6_timewait_sock
*tw6
= inet6_twsk((struct sock
*)tw
);
1820 long delta
= tw
->tw_ttd
- jiffies
;
1822 dest
= &tw6
->tw_v6_daddr
;
1823 src
= &tw6
->tw_v6_rcv_saddr
;
1824 destp
= ntohs(tw
->tw_dport
);
1825 srcp
= ntohs(tw
->tw_sport
);
1828 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1829 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1831 src
->s6_addr32
[0], src
->s6_addr32
[1],
1832 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1833 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1834 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1835 tw
->tw_substate
, 0, 0,
1836 3, jiffies_delta_to_clock_t(delta
), 0, 0, 0, 0,
1837 atomic_read(&tw
->tw_refcnt
), tw
);
1840 static int tcp6_seq_show(struct seq_file
*seq
, void *v
)
1842 struct tcp_iter_state
*st
;
1844 if (v
== SEQ_START_TOKEN
) {
1849 "st tx_queue rx_queue tr tm->when retrnsmt"
1850 " uid timeout inode\n");
1855 switch (st
->state
) {
1856 case TCP_SEQ_STATE_LISTENING
:
1857 case TCP_SEQ_STATE_ESTABLISHED
:
1858 get_tcp6_sock(seq
, v
, st
->num
);
1860 case TCP_SEQ_STATE_OPENREQ
:
1861 get_openreq6(seq
, st
->syn_wait_sk
, v
, st
->num
, st
->uid
);
1863 case TCP_SEQ_STATE_TIME_WAIT
:
1864 get_timewait6_sock(seq
, v
, st
->num
);
1871 static const struct file_operations tcp6_afinfo_seq_fops
= {
1872 .owner
= THIS_MODULE
,
1873 .open
= tcp_seq_open
,
1875 .llseek
= seq_lseek
,
1876 .release
= seq_release_net
1879 static struct tcp_seq_afinfo tcp6_seq_afinfo
= {
1882 .seq_fops
= &tcp6_afinfo_seq_fops
,
1884 .show
= tcp6_seq_show
,
1888 int __net_init
tcp6_proc_init(struct net
*net
)
1890 return tcp_proc_register(net
, &tcp6_seq_afinfo
);
1893 void tcp6_proc_exit(struct net
*net
)
1895 tcp_proc_unregister(net
, &tcp6_seq_afinfo
);
1899 static void tcp_v6_clear_sk(struct sock
*sk
, int size
)
1901 struct inet_sock
*inet
= inet_sk(sk
);
1903 /* we do not want to clear pinet6 field, because of RCU lookups */
1904 sk_prot_clear_nulls(sk
, offsetof(struct inet_sock
, pinet6
));
1906 size
-= offsetof(struct inet_sock
, pinet6
) + sizeof(inet
->pinet6
);
1907 memset(&inet
->pinet6
+ 1, 0, size
);
1910 struct proto tcpv6_prot
= {
1912 .owner
= THIS_MODULE
,
1914 .connect
= tcp_v6_connect
,
1915 .disconnect
= tcp_disconnect
,
1916 .accept
= inet_csk_accept
,
1918 .init
= tcp_v6_init_sock
,
1919 .destroy
= tcp_v6_destroy_sock
,
1920 .shutdown
= tcp_shutdown
,
1921 .setsockopt
= tcp_setsockopt
,
1922 .getsockopt
= tcp_getsockopt
,
1923 .recvmsg
= tcp_recvmsg
,
1924 .sendmsg
= tcp_sendmsg
,
1925 .sendpage
= tcp_sendpage
,
1926 .backlog_rcv
= tcp_v6_do_rcv
,
1927 .release_cb
= tcp_release_cb
,
1928 .hash
= tcp_v6_hash
,
1929 .unhash
= inet_unhash
,
1930 .get_port
= inet_csk_get_port
,
1931 .enter_memory_pressure
= tcp_enter_memory_pressure
,
1932 .sockets_allocated
= &tcp_sockets_allocated
,
1933 .memory_allocated
= &tcp_memory_allocated
,
1934 .memory_pressure
= &tcp_memory_pressure
,
1935 .orphan_count
= &tcp_orphan_count
,
1936 .sysctl_wmem
= sysctl_tcp_wmem
,
1937 .sysctl_rmem
= sysctl_tcp_rmem
,
1938 .max_header
= MAX_TCP_HEADER
,
1939 .obj_size
= sizeof(struct tcp6_sock
),
1940 .slab_flags
= SLAB_DESTROY_BY_RCU
,
1941 .twsk_prot
= &tcp6_timewait_sock_ops
,
1942 .rsk_prot
= &tcp6_request_sock_ops
,
1943 .h
.hashinfo
= &tcp_hashinfo
,
1944 .no_autobind
= true,
1945 #ifdef CONFIG_COMPAT
1946 .compat_setsockopt
= compat_tcp_setsockopt
,
1947 .compat_getsockopt
= compat_tcp_getsockopt
,
1949 #ifdef CONFIG_MEMCG_KMEM
1950 .proto_cgroup
= tcp_proto_cgroup
,
1952 .clear_sk
= tcp_v6_clear_sk
,
1955 static const struct inet6_protocol tcpv6_protocol
= {
1956 .early_demux
= tcp_v6_early_demux
,
1957 .handler
= tcp_v6_rcv
,
1958 .err_handler
= tcp_v6_err
,
1959 .flags
= INET6_PROTO_NOPOLICY
|INET6_PROTO_FINAL
,
1962 static struct inet_protosw tcpv6_protosw
= {
1963 .type
= SOCK_STREAM
,
1964 .protocol
= IPPROTO_TCP
,
1965 .prot
= &tcpv6_prot
,
1966 .ops
= &inet6_stream_ops
,
1968 .flags
= INET_PROTOSW_PERMANENT
|
1972 static int __net_init
tcpv6_net_init(struct net
*net
)
1974 return inet_ctl_sock_create(&net
->ipv6
.tcp_sk
, PF_INET6
,
1975 SOCK_RAW
, IPPROTO_TCP
, net
);
1978 static void __net_exit
tcpv6_net_exit(struct net
*net
)
1980 inet_ctl_sock_destroy(net
->ipv6
.tcp_sk
);
1983 static void __net_exit
tcpv6_net_exit_batch(struct list_head
*net_exit_list
)
1985 inet_twsk_purge(&tcp_hashinfo
, &tcp_death_row
, AF_INET6
);
1988 static struct pernet_operations tcpv6_net_ops
= {
1989 .init
= tcpv6_net_init
,
1990 .exit
= tcpv6_net_exit
,
1991 .exit_batch
= tcpv6_net_exit_batch
,
1994 int __init
tcpv6_init(void)
1998 ret
= inet6_add_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
2002 /* register inet6 protocol */
2003 ret
= inet6_register_protosw(&tcpv6_protosw
);
2005 goto out_tcpv6_protocol
;
2007 ret
= register_pernet_subsys(&tcpv6_net_ops
);
2009 goto out_tcpv6_protosw
;
2014 inet6_unregister_protosw(&tcpv6_protosw
);
2016 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
2020 void tcpv6_exit(void)
2022 unregister_pernet_subsys(&tcpv6_net_ops
);
2023 inet6_unregister_protosw(&tcpv6_protosw
);
2024 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);