3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
);
76 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
77 struct request_sock
*req
);
79 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
);
81 static const struct inet_connection_sock_af_ops ipv6_mapped
;
82 static const struct inet_connection_sock_af_ops ipv6_specific
;
83 #ifdef CONFIG_TCP_MD5SIG
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
;
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
;
87 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
88 const struct in6_addr
*addr
)
94 static void inet6_sk_rx_dst_set(struct sock
*sk
, const struct sk_buff
*skb
)
96 struct dst_entry
*dst
= skb_dst(skb
);
97 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
101 inet_sk(sk
)->rx_dst_ifindex
= skb
->skb_iif
;
103 inet6_sk(sk
)->rx_dst_cookie
= rt
->rt6i_node
->fn_sernum
;
106 static void tcp_v6_hash(struct sock
*sk
)
108 if (sk
->sk_state
!= TCP_CLOSE
) {
109 if (inet_csk(sk
)->icsk_af_ops
== &ipv6_mapped
) {
114 __inet6_hash(sk
, NULL
);
119 static __u32
tcp_v6_init_sequence(const struct sk_buff
*skb
)
121 return secure_tcpv6_sequence_number(ipv6_hdr(skb
)->daddr
.s6_addr32
,
122 ipv6_hdr(skb
)->saddr
.s6_addr32
,
124 tcp_hdr(skb
)->source
);
127 static int tcp_v6_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
130 struct sockaddr_in6
*usin
= (struct sockaddr_in6
*) uaddr
;
131 struct inet_sock
*inet
= inet_sk(sk
);
132 struct inet_connection_sock
*icsk
= inet_csk(sk
);
133 struct ipv6_pinfo
*np
= inet6_sk(sk
);
134 struct tcp_sock
*tp
= tcp_sk(sk
);
135 struct in6_addr
*saddr
= NULL
, *final_p
, final
;
138 struct dst_entry
*dst
;
142 if (addr_len
< SIN6_LEN_RFC2133
)
145 if (usin
->sin6_family
!= AF_INET6
)
146 return -EAFNOSUPPORT
;
148 memset(&fl6
, 0, sizeof(fl6
));
151 fl6
.flowlabel
= usin
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
152 IP6_ECN_flow_init(fl6
.flowlabel
);
153 if (fl6
.flowlabel
&IPV6_FLOWLABEL_MASK
) {
154 struct ip6_flowlabel
*flowlabel
;
155 flowlabel
= fl6_sock_lookup(sk
, fl6
.flowlabel
);
156 if (flowlabel
== NULL
)
158 usin
->sin6_addr
= flowlabel
->dst
;
159 fl6_sock_release(flowlabel
);
164 * connect() to INADDR_ANY means loopback (BSD'ism).
167 if(ipv6_addr_any(&usin
->sin6_addr
))
168 usin
->sin6_addr
.s6_addr
[15] = 0x1;
170 addr_type
= ipv6_addr_type(&usin
->sin6_addr
);
172 if(addr_type
& IPV6_ADDR_MULTICAST
)
175 if (addr_type
&IPV6_ADDR_LINKLOCAL
) {
176 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
177 usin
->sin6_scope_id
) {
178 /* If interface is set while binding, indices
181 if (sk
->sk_bound_dev_if
&&
182 sk
->sk_bound_dev_if
!= usin
->sin6_scope_id
)
185 sk
->sk_bound_dev_if
= usin
->sin6_scope_id
;
188 /* Connect to link-local address requires an interface */
189 if (!sk
->sk_bound_dev_if
)
193 if (tp
->rx_opt
.ts_recent_stamp
&&
194 !ipv6_addr_equal(&np
->daddr
, &usin
->sin6_addr
)) {
195 tp
->rx_opt
.ts_recent
= 0;
196 tp
->rx_opt
.ts_recent_stamp
= 0;
200 np
->daddr
= usin
->sin6_addr
;
201 np
->flow_label
= fl6
.flowlabel
;
207 if (addr_type
== IPV6_ADDR_MAPPED
) {
208 u32 exthdrlen
= icsk
->icsk_ext_hdr_len
;
209 struct sockaddr_in sin
;
211 SOCK_DEBUG(sk
, "connect: ipv4 mapped\n");
213 if (__ipv6_only_sock(sk
))
216 sin
.sin_family
= AF_INET
;
217 sin
.sin_port
= usin
->sin6_port
;
218 sin
.sin_addr
.s_addr
= usin
->sin6_addr
.s6_addr32
[3];
220 icsk
->icsk_af_ops
= &ipv6_mapped
;
221 sk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
226 err
= tcp_v4_connect(sk
, (struct sockaddr
*)&sin
, sizeof(sin
));
229 icsk
->icsk_ext_hdr_len
= exthdrlen
;
230 icsk
->icsk_af_ops
= &ipv6_specific
;
231 sk
->sk_backlog_rcv
= tcp_v6_do_rcv
;
232 #ifdef CONFIG_TCP_MD5SIG
233 tp
->af_specific
= &tcp_sock_ipv6_specific
;
237 ipv6_addr_set_v4mapped(inet
->inet_saddr
, &np
->saddr
);
238 ipv6_addr_set_v4mapped(inet
->inet_rcv_saddr
,
245 if (!ipv6_addr_any(&np
->rcv_saddr
))
246 saddr
= &np
->rcv_saddr
;
248 fl6
.flowi6_proto
= IPPROTO_TCP
;
249 fl6
.daddr
= np
->daddr
;
250 fl6
.saddr
= saddr
? *saddr
: np
->saddr
;
251 fl6
.flowi6_oif
= sk
->sk_bound_dev_if
;
252 fl6
.flowi6_mark
= sk
->sk_mark
;
253 fl6
.fl6_dport
= usin
->sin6_port
;
254 fl6
.fl6_sport
= inet
->inet_sport
;
255 fl6
.flowi6_uid
= sock_i_uid(sk
);
257 final_p
= fl6_update_dst(&fl6
, np
->opt
, &final
);
259 security_sk_classify_flow(sk
, flowi6_to_flowi(&fl6
));
261 dst
= ip6_dst_lookup_flow(sk
, &fl6
, final_p
, true);
269 np
->rcv_saddr
= *saddr
;
272 /* set the source address */
274 inet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
276 sk
->sk_gso_type
= SKB_GSO_TCPV6
;
277 __ip6_dst_store(sk
, dst
, NULL
, NULL
);
279 rt
= (struct rt6_info
*) dst
;
280 if (tcp_death_row
.sysctl_tw_recycle
&&
281 !tp
->rx_opt
.ts_recent_stamp
&&
282 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, &np
->daddr
))
283 tcp_fetch_timewait_stamp(sk
, dst
);
285 icsk
->icsk_ext_hdr_len
= 0;
287 icsk
->icsk_ext_hdr_len
= (np
->opt
->opt_flen
+
290 tp
->rx_opt
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
292 inet
->inet_dport
= usin
->sin6_port
;
294 tcp_set_state(sk
, TCP_SYN_SENT
);
295 err
= inet6_hash_connect(&tcp_death_row
, sk
);
299 printk(KERN_INFO
"net_sock, IPV6 socket[%lu] sport:%u \n", SOCK_INODE(sk
->sk_socket
)->i_ino
, ntohs(inet
->inet_sport
));
300 if (!tp
->write_seq
&& likely(!tp
->repair
))
301 tp
->write_seq
= secure_tcpv6_sequence_number(np
->saddr
.s6_addr32
,
306 err
= tcp_connect(sk
);
313 tcp_set_state(sk
, TCP_CLOSE
);
316 inet
->inet_dport
= 0;
317 sk
->sk_route_caps
= 0;
321 static void tcp_v6_mtu_reduced(struct sock
*sk
)
323 struct dst_entry
*dst
;
325 if ((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
))
328 dst
= inet6_csk_update_pmtu(sk
, tcp_sk(sk
)->mtu_info
);
332 if (inet_csk(sk
)->icsk_pmtu_cookie
> dst_mtu(dst
)) {
333 tcp_sync_mss(sk
, dst_mtu(dst
));
334 tcp_simple_retransmit(sk
);
338 static void tcp_v6_err(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
339 u8 type
, u8 code
, int offset
, __be32 info
)
341 const struct ipv6hdr
*hdr
= (const struct ipv6hdr
*)skb
->data
;
342 const struct tcphdr
*th
= (struct tcphdr
*)(skb
->data
+offset
);
343 struct ipv6_pinfo
*np
;
348 struct net
*net
= dev_net(skb
->dev
);
350 sk
= inet6_lookup(net
, &tcp_hashinfo
, &hdr
->daddr
,
351 th
->dest
, &hdr
->saddr
, th
->source
, skb
->dev
->ifindex
);
354 ICMP6_INC_STATS_BH(net
, __in6_dev_get(skb
->dev
),
359 if (sk
->sk_state
== TCP_TIME_WAIT
) {
360 inet_twsk_put(inet_twsk(sk
));
365 if (sock_owned_by_user(sk
) && type
!= ICMPV6_PKT_TOOBIG
)
366 NET_INC_STATS_BH(net
, LINUX_MIB_LOCKDROPPEDICMPS
);
368 if (sk
->sk_state
== TCP_CLOSE
)
371 if (ipv6_hdr(skb
)->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
372 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
377 seq
= ntohl(th
->seq
);
378 if (sk
->sk_state
!= TCP_LISTEN
&&
379 !between(seq
, tp
->snd_una
, tp
->snd_nxt
)) {
380 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
386 if (type
== NDISC_REDIRECT
) {
387 if (!sock_owned_by_user(sk
)) {
388 struct dst_entry
*dst
= __sk_dst_check(sk
, np
->dst_cookie
);
391 dst
->ops
->redirect(dst
, sk
, skb
);
396 if (type
== ICMPV6_PKT_TOOBIG
) {
397 /* We are not interested in TCP_LISTEN and open_requests
398 * (SYN-ACKs send out by Linux are always <576bytes so
399 * they should go through unfragmented).
401 if (sk
->sk_state
== TCP_LISTEN
)
404 tp
->mtu_info
= ntohl(info
);
405 if (!sock_owned_by_user(sk
))
406 tcp_v6_mtu_reduced(sk
);
407 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED
,
413 icmpv6_err_convert(type
, code
, &err
);
415 /* Might be for an request_sock */
416 switch (sk
->sk_state
) {
417 struct request_sock
*req
, **prev
;
419 if (sock_owned_by_user(sk
))
422 req
= inet6_csk_search_req(sk
, &prev
, th
->dest
, &hdr
->daddr
,
423 &hdr
->saddr
, inet6_iif(skb
));
427 /* ICMPs are not backlogged, hence we cannot get
428 * an established socket here.
430 WARN_ON(req
->sk
!= NULL
);
432 if (seq
!= tcp_rsk(req
)->snt_isn
) {
433 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
437 inet_csk_reqsk_queue_drop(sk
, req
, prev
);
438 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
442 case TCP_SYN_RECV
: /* Cannot happen.
443 It can, it SYNs are crossed. --ANK */
444 if (!sock_owned_by_user(sk
)) {
446 sk
->sk_error_report(sk
); /* Wake people up to see the error (see connect in sock.c) */
450 sk
->sk_err_soft
= err
;
454 if (!sock_owned_by_user(sk
) && np
->recverr
) {
456 sk
->sk_error_report(sk
);
458 sk
->sk_err_soft
= err
;
466 static int tcp_v6_send_synack(struct sock
*sk
, struct dst_entry
*dst
,
468 struct request_sock
*req
,
471 struct inet6_request_sock
*treq
= inet6_rsk(req
);
472 struct ipv6_pinfo
*np
= inet6_sk(sk
);
473 struct sk_buff
* skb
;
476 /* First, grab a route. */
477 if (!dst
&& (dst
= inet6_csk_route_req(sk
, fl6
, req
)) == NULL
)
480 skb
= tcp_make_synack(sk
, dst
, req
, NULL
);
483 __tcp_v6_send_check(skb
, &treq
->loc_addr
, &treq
->rmt_addr
);
485 fl6
->daddr
= treq
->rmt_addr
;
486 skb_set_queue_mapping(skb
, queue_mapping
);
487 err
= ip6_xmit(sk
, skb
, fl6
, np
->opt
, np
->tclass
);
488 err
= net_xmit_eval(err
);
495 static int tcp_v6_rtx_synack(struct sock
*sk
, struct request_sock
*req
)
500 res
= tcp_v6_send_synack(sk
, NULL
, &fl6
, req
, 0);
502 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_RETRANSSEGS
);
506 static void tcp_v6_reqsk_destructor(struct request_sock
*req
)
508 kfree_skb(inet6_rsk(req
)->pktopts
);
511 #ifdef CONFIG_TCP_MD5SIG
512 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
513 const struct in6_addr
*addr
)
515 return tcp_md5_do_lookup(sk
, (union tcp_md5_addr
*)addr
, AF_INET6
);
518 static struct tcp_md5sig_key
*tcp_v6_md5_lookup(struct sock
*sk
,
519 struct sock
*addr_sk
)
521 return tcp_v6_md5_do_lookup(sk
, &inet6_sk(addr_sk
)->daddr
);
524 static struct tcp_md5sig_key
*tcp_v6_reqsk_md5_lookup(struct sock
*sk
,
525 struct request_sock
*req
)
527 return tcp_v6_md5_do_lookup(sk
, &inet6_rsk(req
)->rmt_addr
);
530 static int tcp_v6_parse_md5_keys (struct sock
*sk
, char __user
*optval
,
533 struct tcp_md5sig cmd
;
534 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)&cmd
.tcpm_addr
;
536 if (optlen
< sizeof(cmd
))
539 if (copy_from_user(&cmd
, optval
, sizeof(cmd
)))
542 if (sin6
->sin6_family
!= AF_INET6
)
545 if (!cmd
.tcpm_keylen
) {
546 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
547 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
549 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
553 if (cmd
.tcpm_keylen
> TCP_MD5SIG_MAXKEYLEN
)
556 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
557 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
558 AF_INET
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
560 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
561 AF_INET6
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
564 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool
*hp
,
565 const struct in6_addr
*daddr
,
566 const struct in6_addr
*saddr
, int nbytes
)
568 struct tcp6_pseudohdr
*bp
;
569 struct scatterlist sg
;
571 bp
= &hp
->md5_blk
.ip6
;
572 /* 1. TCP pseudo-header (RFC2460) */
575 bp
->protocol
= cpu_to_be32(IPPROTO_TCP
);
576 bp
->len
= cpu_to_be32(nbytes
);
578 sg_init_one(&sg
, bp
, sizeof(*bp
));
579 return crypto_hash_update(&hp
->md5_desc
, &sg
, sizeof(*bp
));
582 static int tcp_v6_md5_hash_hdr(char *md5_hash
, struct tcp_md5sig_key
*key
,
583 const struct in6_addr
*daddr
, struct in6_addr
*saddr
,
584 const struct tcphdr
*th
)
586 struct tcp_md5sig_pool
*hp
;
587 struct hash_desc
*desc
;
589 hp
= tcp_get_md5sig_pool();
591 goto clear_hash_noput
;
592 desc
= &hp
->md5_desc
;
594 if (crypto_hash_init(desc
))
596 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, th
->doff
<< 2))
598 if (tcp_md5_hash_header(hp
, th
))
600 if (tcp_md5_hash_key(hp
, key
))
602 if (crypto_hash_final(desc
, md5_hash
))
605 tcp_put_md5sig_pool();
609 tcp_put_md5sig_pool();
611 memset(md5_hash
, 0, 16);
615 static int tcp_v6_md5_hash_skb(char *md5_hash
, struct tcp_md5sig_key
*key
,
616 const struct sock
*sk
,
617 const struct request_sock
*req
,
618 const struct sk_buff
*skb
)
620 const struct in6_addr
*saddr
, *daddr
;
621 struct tcp_md5sig_pool
*hp
;
622 struct hash_desc
*desc
;
623 const struct tcphdr
*th
= tcp_hdr(skb
);
626 saddr
= &inet6_sk(sk
)->saddr
;
627 daddr
= &inet6_sk(sk
)->daddr
;
629 saddr
= &inet6_rsk(req
)->loc_addr
;
630 daddr
= &inet6_rsk(req
)->rmt_addr
;
632 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
633 saddr
= &ip6h
->saddr
;
634 daddr
= &ip6h
->daddr
;
637 hp
= tcp_get_md5sig_pool();
639 goto clear_hash_noput
;
640 desc
= &hp
->md5_desc
;
642 if (crypto_hash_init(desc
))
645 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, skb
->len
))
647 if (tcp_md5_hash_header(hp
, th
))
649 if (tcp_md5_hash_skb_data(hp
, skb
, th
->doff
<< 2))
651 if (tcp_md5_hash_key(hp
, key
))
653 if (crypto_hash_final(desc
, md5_hash
))
656 tcp_put_md5sig_pool();
660 tcp_put_md5sig_pool();
662 memset(md5_hash
, 0, 16);
666 static int tcp_v6_inbound_md5_hash(struct sock
*sk
, const struct sk_buff
*skb
)
668 const __u8
*hash_location
= NULL
;
669 struct tcp_md5sig_key
*hash_expected
;
670 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
671 const struct tcphdr
*th
= tcp_hdr(skb
);
675 hash_expected
= tcp_v6_md5_do_lookup(sk
, &ip6h
->saddr
);
676 hash_location
= tcp_parse_md5sig_option(th
);
678 /* We've parsed the options - do we have a hash? */
679 if (!hash_expected
&& !hash_location
)
682 if (hash_expected
&& !hash_location
) {
683 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5NOTFOUND
);
687 if (!hash_expected
&& hash_location
) {
688 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5UNEXPECTED
);
692 /* check the signature */
693 genhash
= tcp_v6_md5_hash_skb(newhash
,
697 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0) {
698 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
699 genhash
? "failed" : "mismatch",
700 &ip6h
->saddr
, ntohs(th
->source
),
701 &ip6h
->daddr
, ntohs(th
->dest
));
708 struct request_sock_ops tcp6_request_sock_ops __read_mostly
= {
710 .obj_size
= sizeof(struct tcp6_request_sock
),
711 .rtx_syn_ack
= tcp_v6_rtx_synack
,
712 .send_ack
= tcp_v6_reqsk_send_ack
,
713 .destructor
= tcp_v6_reqsk_destructor
,
714 .send_reset
= tcp_v6_send_reset
,
715 .syn_ack_timeout
= tcp_syn_ack_timeout
,
718 #ifdef CONFIG_TCP_MD5SIG
719 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops
= {
720 .md5_lookup
= tcp_v6_reqsk_md5_lookup
,
721 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
725 static void tcp_v6_send_response(struct sk_buff
*skb
, u32 seq
, u32 ack
, u32 win
,
726 u32 tsval
, u32 tsecr
,
727 struct tcp_md5sig_key
*key
, int rst
, u8 tclass
)
729 const struct tcphdr
*th
= tcp_hdr(skb
);
731 struct sk_buff
*buff
;
733 struct net
*net
= dev_net(skb_dst(skb
)->dev
);
734 struct sock
*ctl_sk
= net
->ipv6
.tcp_sk
;
735 unsigned int tot_len
= sizeof(struct tcphdr
);
736 struct dst_entry
*dst
;
740 tot_len
+= TCPOLEN_TSTAMP_ALIGNED
;
741 #ifdef CONFIG_TCP_MD5SIG
743 tot_len
+= TCPOLEN_MD5SIG_ALIGNED
;
746 buff
= alloc_skb(MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
,
751 skb_reserve(buff
, MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
);
753 t1
= (struct tcphdr
*) skb_push(buff
, tot_len
);
754 skb_reset_transport_header(buff
);
756 /* Swap the send and the receive. */
757 memset(t1
, 0, sizeof(*t1
));
758 t1
->dest
= th
->source
;
759 t1
->source
= th
->dest
;
760 t1
->doff
= tot_len
/ 4;
761 t1
->seq
= htonl(seq
);
762 t1
->ack_seq
= htonl(ack
);
763 t1
->ack
= !rst
|| !th
->ack
;
765 t1
->window
= htons(win
);
767 topt
= (__be32
*)(t1
+ 1);
770 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
771 (TCPOPT_TIMESTAMP
<< 8) | TCPOLEN_TIMESTAMP
);
772 *topt
++ = htonl(tsval
);
773 *topt
++ = htonl(tsecr
);
776 #ifdef CONFIG_TCP_MD5SIG
778 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
779 (TCPOPT_MD5SIG
<< 8) | TCPOLEN_MD5SIG
);
780 tcp_v6_md5_hash_hdr((__u8
*)topt
, key
,
781 &ipv6_hdr(skb
)->saddr
,
782 &ipv6_hdr(skb
)->daddr
, t1
);
786 memset(&fl6
, 0, sizeof(fl6
));
787 fl6
.daddr
= ipv6_hdr(skb
)->saddr
;
788 fl6
.saddr
= ipv6_hdr(skb
)->daddr
;
790 buff
->ip_summed
= CHECKSUM_PARTIAL
;
793 __tcp_v6_send_check(buff
, &fl6
.saddr
, &fl6
.daddr
);
795 fl6
.flowi6_proto
= IPPROTO_TCP
;
796 if (ipv6_addr_type(&fl6
.daddr
) & IPV6_ADDR_LINKLOCAL
)
797 fl6
.flowi6_oif
= inet6_iif(skb
);
798 fl6
.flowi6_mark
= IP6_REPLY_MARK(net
, skb
->mark
);
799 fl6
.fl6_dport
= t1
->dest
;
800 fl6
.fl6_sport
= t1
->source
;
801 security_skb_classify_flow(skb
, flowi6_to_flowi(&fl6
));
803 /* Pass a socket to ip6_dst_lookup either it is for RST
804 * Underlying function will use this to retrieve the network
807 dst
= ip6_dst_lookup_flow(ctl_sk
, &fl6
, NULL
, false);
809 skb_dst_set(buff
, dst
);
810 ip6_xmit(ctl_sk
, buff
, &fl6
, NULL
, tclass
);
811 TCP_INC_STATS_BH(net
, TCP_MIB_OUTSEGS
);
813 TCP_INC_STATS_BH(net
, TCP_MIB_OUTRSTS
);
820 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
)
822 const struct tcphdr
*th
= tcp_hdr(skb
);
823 u32 seq
= 0, ack_seq
= 0;
824 struct tcp_md5sig_key
*key
= NULL
;
825 #ifdef CONFIG_TCP_MD5SIG
826 const __u8
*hash_location
= NULL
;
827 struct ipv6hdr
*ipv6h
= ipv6_hdr(skb
);
828 unsigned char newhash
[16];
830 struct sock
*sk1
= NULL
;
836 if (!ipv6_unicast_destination(skb
))
839 #ifdef CONFIG_TCP_MD5SIG
840 hash_location
= tcp_parse_md5sig_option(th
);
841 if (!sk
&& hash_location
) {
843 * active side is lost. Try to find listening socket through
844 * source port, and then find md5 key through listening socket.
845 * we are not loose security here:
846 * Incoming packet is checked with md5 hash with finding key,
847 * no RST generated if md5 hash doesn't match.
849 sk1
= inet6_lookup_listener(dev_net(skb_dst(skb
)->dev
),
850 &tcp_hashinfo
, &ipv6h
->saddr
,
851 th
->source
, &ipv6h
->daddr
,
852 ntohs(th
->source
), inet6_iif(skb
));
857 key
= tcp_v6_md5_do_lookup(sk1
, &ipv6h
->saddr
);
861 genhash
= tcp_v6_md5_hash_skb(newhash
, key
, NULL
, NULL
, skb
);
862 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0)
865 key
= sk
? tcp_v6_md5_do_lookup(sk
, &ipv6h
->saddr
) : NULL
;
870 seq
= ntohl(th
->ack_seq
);
872 ack_seq
= ntohl(th
->seq
) + th
->syn
+ th
->fin
+ skb
->len
-
875 tcp_v6_send_response(skb
, seq
, ack_seq
, 0, 0, 0, key
, 1, 0);
877 #ifdef CONFIG_TCP_MD5SIG
886 static void tcp_v6_send_ack(struct sk_buff
*skb
, u32 seq
, u32 ack
,
887 u32 win
, u32 tsval
, u32 tsecr
,
888 struct tcp_md5sig_key
*key
, u8 tclass
)
890 tcp_v6_send_response(skb
, seq
, ack
, win
, tsval
, tsecr
, key
, 0, tclass
);
893 static void tcp_v6_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
895 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
896 struct tcp_timewait_sock
*tcptw
= tcp_twsk(sk
);
898 tcp_v6_send_ack(skb
, tcptw
->tw_snd_nxt
, tcptw
->tw_rcv_nxt
,
899 tcptw
->tw_rcv_wnd
>> tw
->tw_rcv_wscale
,
900 tcp_time_stamp
+ tcptw
->tw_ts_offset
,
901 tcptw
->tw_ts_recent
, tcp_twsk_md5_key(tcptw
),
907 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
908 struct request_sock
*req
)
911 * The window field (SEG.WND) of every outgoing segment, with the
912 * exception of <SYN> segments, MUST be right-shifted by
913 * Rcv.Wind.Shift bits:
915 tcp_v6_send_ack(skb
, tcp_rsk(req
)->snt_isn
+ 1, tcp_rsk(req
)->rcv_isn
+ 1,
916 req
->rcv_wnd
>> inet_rsk(req
)->rcv_wscale
,
917 tcp_time_stamp
, req
->ts_recent
,
918 tcp_v6_md5_do_lookup(sk
, &ipv6_hdr(skb
)->daddr
), 0);
922 static struct sock
*tcp_v6_hnd_req(struct sock
*sk
,struct sk_buff
*skb
)
924 struct request_sock
*req
, **prev
;
925 const struct tcphdr
*th
= tcp_hdr(skb
);
928 /* Find possible connection requests. */
929 req
= inet6_csk_search_req(sk
, &prev
, th
->source
,
930 &ipv6_hdr(skb
)->saddr
,
931 &ipv6_hdr(skb
)->daddr
, inet6_iif(skb
));
933 return tcp_check_req(sk
, skb
, req
, prev
, false);
935 nsk
= __inet6_lookup_established(sock_net(sk
), &tcp_hashinfo
,
936 &ipv6_hdr(skb
)->saddr
, th
->source
,
937 &ipv6_hdr(skb
)->daddr
, ntohs(th
->dest
), inet6_iif(skb
));
940 if (nsk
->sk_state
!= TCP_TIME_WAIT
) {
944 inet_twsk_put(inet_twsk(nsk
));
948 #ifdef CONFIG_SYN_COOKIES
950 sk
= cookie_v6_check(sk
, skb
);
955 /* FIXME: this is substantially similar to the ipv4 code.
956 * Can some kind of merge be done? -- erics
958 static int tcp_v6_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
960 struct tcp_options_received tmp_opt
;
961 struct request_sock
*req
;
962 struct inet6_request_sock
*treq
;
963 struct ipv6_pinfo
*np
= inet6_sk(sk
);
964 struct tcp_sock
*tp
= tcp_sk(sk
);
965 __u32 isn
= TCP_SKB_CB(skb
)->when
;
966 struct dst_entry
*dst
= NULL
;
968 bool want_cookie
= false;
970 if (skb
->protocol
== htons(ETH_P_IP
))
971 return tcp_v4_conn_request(sk
, skb
);
973 if (!ipv6_unicast_destination(skb
))
976 if (inet_csk_reqsk_queue_is_full(sk
) && !isn
) {
977 want_cookie
= tcp_syn_flood_action(sk
, skb
, "TCPv6");
982 if (sk_acceptq_is_full(sk
) && inet_csk_reqsk_queue_young(sk
) > 1) {
983 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
987 req
= inet6_reqsk_alloc(&tcp6_request_sock_ops
);
991 #ifdef CONFIG_TCP_MD5SIG
992 tcp_rsk(req
)->af_specific
= &tcp_request_sock_ipv6_ops
;
995 tcp_clear_options(&tmp_opt
);
996 tmp_opt
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
997 tmp_opt
.user_mss
= tp
->rx_opt
.user_mss
;
998 tcp_parse_options(skb
, &tmp_opt
, 0, NULL
);
1000 if (want_cookie
&& !tmp_opt
.saw_tstamp
)
1001 tcp_clear_options(&tmp_opt
);
1003 tmp_opt
.tstamp_ok
= tmp_opt
.saw_tstamp
;
1004 tcp_openreq_init(req
, &tmp_opt
, skb
);
1006 treq
= inet6_rsk(req
);
1007 treq
->rmt_addr
= ipv6_hdr(skb
)->saddr
;
1008 treq
->loc_addr
= ipv6_hdr(skb
)->daddr
;
1009 if (!want_cookie
|| tmp_opt
.tstamp_ok
)
1010 TCP_ECN_create_request(req
, skb
, sock_net(sk
));
1012 treq
->iif
= sk
->sk_bound_dev_if
;
1013 inet_rsk(req
)->ir_mark
= inet_request_mark(sk
, skb
);
1015 /* So that link locals have meaning */
1016 if (!sk
->sk_bound_dev_if
&&
1017 ipv6_addr_type(&treq
->rmt_addr
) & IPV6_ADDR_LINKLOCAL
)
1018 treq
->iif
= inet6_iif(skb
);
1021 if (ipv6_opt_accepted(sk
, skb
) ||
1022 np
->rxopt
.bits
.rxinfo
|| np
->rxopt
.bits
.rxoinfo
||
1023 np
->rxopt
.bits
.rxhlim
|| np
->rxopt
.bits
.rxohlim
) {
1024 atomic_inc(&skb
->users
);
1025 treq
->pktopts
= skb
;
1029 isn
= cookie_v6_init_sequence(sk
, skb
, &req
->mss
);
1030 req
->cookie_ts
= tmp_opt
.tstamp_ok
;
1034 /* VJ's idea. We save last timestamp seen
1035 * from the destination in peer table, when entering
1036 * state TIME-WAIT, and check against it before
1037 * accepting new connection request.
1039 * If "isn" is not zero, this request hit alive
1040 * timewait bucket, so that all the necessary checks
1041 * are made in the function processing timewait state.
1043 if (tmp_opt
.saw_tstamp
&&
1044 tcp_death_row
.sysctl_tw_recycle
&&
1045 (dst
= inet6_csk_route_req(sk
, &fl6
, req
)) != NULL
) {
1046 if (!tcp_peer_is_proven(req
, dst
, true)) {
1047 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_PAWSPASSIVEREJECTED
);
1048 goto drop_and_release
;
1051 /* Kill the following clause, if you dislike this way. */
1052 else if (!sysctl_tcp_syncookies
&&
1053 (sysctl_max_syn_backlog
- inet_csk_reqsk_queue_len(sk
) <
1054 (sysctl_max_syn_backlog
>> 2)) &&
1055 !tcp_peer_is_proven(req
, dst
, false)) {
1056 /* Without syncookies last quarter of
1057 * backlog is filled with destinations,
1058 * proven to be alive.
1059 * It means that we continue to communicate
1060 * to destinations, already remembered
1061 * to the moment of synflood.
1063 LIMIT_NETDEBUG(KERN_DEBUG
"TCP: drop open request from %pI6/%u\n",
1064 &treq
->rmt_addr
, ntohs(tcp_hdr(skb
)->source
));
1065 goto drop_and_release
;
1068 isn
= tcp_v6_init_sequence(skb
);
1071 tcp_rsk(req
)->snt_isn
= isn
;
1073 if (security_inet_conn_request(sk
, skb
, req
))
1074 goto drop_and_release
;
1076 if (tcp_v6_send_synack(sk
, dst
, &fl6
, req
,
1077 skb_get_queue_mapping(skb
)) ||
1081 tcp_rsk(req
)->snt_synack
= tcp_time_stamp
;
1082 tcp_rsk(req
)->listener
= NULL
;
1083 inet6_csk_reqsk_queue_hash_add(sk
, req
, TCP_TIMEOUT_INIT
);
1091 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1092 return 0; /* don't send reset */
1095 static struct sock
* tcp_v6_syn_recv_sock(struct sock
*sk
, struct sk_buff
*skb
,
1096 struct request_sock
*req
,
1097 struct dst_entry
*dst
)
1099 struct inet6_request_sock
*treq
;
1100 struct ipv6_pinfo
*newnp
, *np
= inet6_sk(sk
);
1101 struct tcp6_sock
*newtcp6sk
;
1102 struct inet_sock
*newinet
;
1103 struct tcp_sock
*newtp
;
1105 #ifdef CONFIG_TCP_MD5SIG
1106 struct tcp_md5sig_key
*key
;
1110 if (skb
->protocol
== htons(ETH_P_IP
)) {
1115 newsk
= tcp_v4_syn_recv_sock(sk
, skb
, req
, dst
);
1120 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1121 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1123 newinet
= inet_sk(newsk
);
1124 newnp
= inet6_sk(newsk
);
1125 newtp
= tcp_sk(newsk
);
1127 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1129 ipv6_addr_set_v4mapped(newinet
->inet_daddr
, &newnp
->daddr
);
1131 ipv6_addr_set_v4mapped(newinet
->inet_saddr
, &newnp
->saddr
);
1133 newnp
->rcv_saddr
= newnp
->saddr
;
1135 inet_csk(newsk
)->icsk_af_ops
= &ipv6_mapped
;
1136 newsk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
1137 #ifdef CONFIG_TCP_MD5SIG
1138 newtp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
1141 newnp
->ipv6_ac_list
= NULL
;
1142 newnp
->ipv6_fl_list
= NULL
;
1143 newnp
->pktoptions
= NULL
;
1145 newnp
->mcast_oif
= inet6_iif(skb
);
1146 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1147 newnp
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(skb
));
1150 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1151 * here, tcp_create_openreq_child now does this for us, see the comment in
1152 * that function for the gory details. -acme
1155 /* It is tricky place. Until this moment IPv4 tcp
1156 worked with IPv6 icsk.icsk_af_ops.
1159 tcp_sync_mss(newsk
, inet_csk(newsk
)->icsk_pmtu_cookie
);
1164 treq
= inet6_rsk(req
);
1166 if (sk_acceptq_is_full(sk
))
1170 dst
= inet6_csk_route_req(sk
, &fl6
, req
);
1175 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1180 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1181 * count here, tcp_create_openreq_child now does this for us, see the
1182 * comment in that function for the gory details. -acme
1185 newsk
->sk_gso_type
= SKB_GSO_TCPV6
;
1186 __ip6_dst_store(newsk
, dst
, NULL
, NULL
);
1187 inet6_sk_rx_dst_set(newsk
, skb
);
1189 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1190 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1192 newtp
= tcp_sk(newsk
);
1193 newinet
= inet_sk(newsk
);
1194 newnp
= inet6_sk(newsk
);
1196 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1198 newnp
->daddr
= treq
->rmt_addr
;
1199 newnp
->saddr
= treq
->loc_addr
;
1200 newnp
->rcv_saddr
= treq
->loc_addr
;
1201 newsk
->sk_bound_dev_if
= treq
->iif
;
1203 /* Now IPv6 options...
1205 First: no IPv4 options.
1207 newinet
->inet_opt
= NULL
;
1208 newnp
->ipv6_ac_list
= NULL
;
1209 newnp
->ipv6_fl_list
= NULL
;
1212 newnp
->rxopt
.all
= np
->rxopt
.all
;
1214 /* Clone pktoptions received with SYN */
1215 newnp
->pktoptions
= NULL
;
1216 if (treq
->pktopts
!= NULL
) {
1217 newnp
->pktoptions
= skb_clone(treq
->pktopts
,
1218 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1219 consume_skb(treq
->pktopts
);
1220 treq
->pktopts
= NULL
;
1221 if (newnp
->pktoptions
)
1222 skb_set_owner_r(newnp
->pktoptions
, newsk
);
1225 newnp
->mcast_oif
= inet6_iif(skb
);
1226 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1227 newnp
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(skb
));
1229 /* Clone native IPv6 options from listening socket (if any)
1231 Yes, keeping reference count would be much more clever,
1232 but we make one more one thing there: reattach optmem
1236 newnp
->opt
= ipv6_dup_options(newsk
, np
->opt
);
1238 inet_csk(newsk
)->icsk_ext_hdr_len
= 0;
1240 inet_csk(newsk
)->icsk_ext_hdr_len
= (newnp
->opt
->opt_nflen
+
1241 newnp
->opt
->opt_flen
);
1243 tcp_mtup_init(newsk
);
1244 tcp_sync_mss(newsk
, dst_mtu(dst
));
1245 newtp
->advmss
= dst_metric_advmss(dst
);
1246 if (tcp_sk(sk
)->rx_opt
.user_mss
&&
1247 tcp_sk(sk
)->rx_opt
.user_mss
< newtp
->advmss
)
1248 newtp
->advmss
= tcp_sk(sk
)->rx_opt
.user_mss
;
1250 tcp_initialize_rcv_mss(newsk
);
1251 tcp_synack_rtt_meas(newsk
, req
);
1252 newtp
->total_retrans
= req
->num_retrans
;
1254 newinet
->inet_daddr
= newinet
->inet_saddr
= LOOPBACK4_IPV6
;
1255 newinet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
1257 #ifdef CONFIG_TCP_MD5SIG
1258 /* Copy over the MD5 key from the original socket */
1259 if ((key
= tcp_v6_md5_do_lookup(sk
, &newnp
->daddr
)) != NULL
) {
1260 /* We're using one, so create a matching key
1261 * on the newsk structure. If we fail to get
1262 * memory, then we end up not copying the key
1265 tcp_md5_do_add(newsk
, (union tcp_md5_addr
*)&newnp
->daddr
,
1266 AF_INET6
, key
->key
, key
->keylen
,
1267 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1271 if (__inet_inherit_port(sk
, newsk
) < 0) {
1272 inet_csk_prepare_forced_close(newsk
);
1276 __inet6_hash(newsk
, NULL
);
1281 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
1285 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1289 static __sum16
tcp_v6_checksum_init(struct sk_buff
*skb
)
1291 if (skb
->ip_summed
== CHECKSUM_COMPLETE
) {
1292 if (!tcp_v6_check(skb
->len
, &ipv6_hdr(skb
)->saddr
,
1293 &ipv6_hdr(skb
)->daddr
, skb
->csum
)) {
1294 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1299 skb
->csum
= ~csum_unfold(tcp_v6_check(skb
->len
,
1300 &ipv6_hdr(skb
)->saddr
,
1301 &ipv6_hdr(skb
)->daddr
, 0));
1303 if (skb
->len
<= 76) {
1304 return __skb_checksum_complete(skb
);
1309 /* The socket must have it's spinlock held when we get
1312 * We have a potential double-lock case here, so even when
1313 * doing backlog processing we use the BH locking scheme.
1314 * This is because we cannot sleep with the original spinlock
1317 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1319 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1320 struct tcp_sock
*tp
;
1321 struct sk_buff
*opt_skb
= NULL
;
1323 /* Imagine: socket is IPv6. IPv4 packet arrives,
1324 goes to IPv4 receive handler and backlogged.
1325 From backlog it always goes here. Kerboom...
1326 Fortunately, tcp_rcv_established and rcv_established
1327 handle them correctly, but it is not case with
1328 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1331 if (skb
->protocol
== htons(ETH_P_IP
))
1332 return tcp_v4_do_rcv(sk
, skb
);
1334 #ifdef CONFIG_TCP_MD5SIG
1335 if (tcp_v6_inbound_md5_hash (sk
, skb
))
1339 if (tcp_filter(sk
, skb
))
1343 * socket locking is here for SMP purposes as backlog rcv
1344 * is currently called with bh processing disabled.
1347 /* Do Stevens' IPV6_PKTOPTIONS.
1349 Yes, guys, it is the only place in our code, where we
1350 may make it not affecting IPv4.
1351 The rest of code is protocol independent,
1352 and I do not like idea to uglify IPv4.
1354 Actually, all the idea behind IPV6_PKTOPTIONS
1355 looks not very well thought. For now we latch
1356 options, received in the last packet, enqueued
1357 by tcp. Feel free to propose better solution.
1361 opt_skb
= skb_clone(skb
, sk_gfp_atomic(sk
, GFP_ATOMIC
));
1363 if (sk
->sk_state
== TCP_ESTABLISHED
) { /* Fast path */
1364 struct dst_entry
*dst
= sk
->sk_rx_dst
;
1366 sock_rps_save_rxhash(sk
, skb
);
1368 if (inet_sk(sk
)->rx_dst_ifindex
!= skb
->skb_iif
||
1369 dst
->ops
->check(dst
, np
->rx_dst_cookie
) == NULL
) {
1371 sk
->sk_rx_dst
= NULL
;
1375 if (tcp_rcv_established(sk
, skb
, tcp_hdr(skb
), skb
->len
))
1378 goto ipv6_pktoptions
;
1382 if (skb
->len
< tcp_hdrlen(skb
) || tcp_checksum_complete(skb
))
1385 if (sk
->sk_state
== TCP_LISTEN
) {
1386 struct sock
*nsk
= tcp_v6_hnd_req(sk
, skb
);
1391 * Queue it on the new socket if the new socket is active,
1392 * otherwise we just shortcircuit this and continue with
1396 sock_rps_save_rxhash(nsk
, skb
);
1397 if (tcp_child_process(sk
, nsk
, skb
))
1400 __kfree_skb(opt_skb
);
1404 sock_rps_save_rxhash(sk
, skb
);
1406 if (tcp_rcv_state_process(sk
, skb
, tcp_hdr(skb
), skb
->len
))
1409 goto ipv6_pktoptions
;
1413 tcp_v6_send_reset(sk
, skb
);
1416 __kfree_skb(opt_skb
);
1420 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_CSUMERRORS
);
1421 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_INERRS
);
1426 /* Do you ask, what is it?
1428 1. skb was enqueued by tcp.
1429 2. skb is added to tail of read queue, rather than out of order.
1430 3. socket is not in passive state.
1431 4. Finally, it really contains options, which user wants to receive.
1434 if (TCP_SKB_CB(opt_skb
)->end_seq
== tp
->rcv_nxt
&&
1435 !((1 << sk
->sk_state
) & (TCPF_CLOSE
| TCPF_LISTEN
))) {
1436 if (np
->rxopt
.bits
.rxinfo
|| np
->rxopt
.bits
.rxoinfo
)
1437 np
->mcast_oif
= inet6_iif(opt_skb
);
1438 if (np
->rxopt
.bits
.rxhlim
|| np
->rxopt
.bits
.rxohlim
)
1439 np
->mcast_hops
= ipv6_hdr(opt_skb
)->hop_limit
;
1440 if (np
->rxopt
.bits
.rxtclass
)
1441 np
->rcv_tclass
= ipv6_get_dsfield(ipv6_hdr(opt_skb
));
1442 if (ipv6_opt_accepted(sk
, opt_skb
)) {
1443 skb_set_owner_r(opt_skb
, sk
);
1444 opt_skb
= xchg(&np
->pktoptions
, opt_skb
);
1446 __kfree_skb(opt_skb
);
1447 opt_skb
= xchg(&np
->pktoptions
, NULL
);
1455 static int tcp_v6_rcv(struct sk_buff
*skb
)
1457 const struct tcphdr
*th
;
1458 const struct ipv6hdr
*hdr
;
1461 struct net
*net
= dev_net(skb
->dev
);
1463 if (skb
->pkt_type
!= PACKET_HOST
)
1467 * Count it even if it's bad.
1469 TCP_INC_STATS_BH(net
, TCP_MIB_INSEGS
);
1471 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
1476 if (th
->doff
< sizeof(struct tcphdr
)/4)
1478 if (!pskb_may_pull(skb
, th
->doff
*4))
1481 if (!skb_csum_unnecessary(skb
) && tcp_v6_checksum_init(skb
))
1485 hdr
= ipv6_hdr(skb
);
1486 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1487 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1488 skb
->len
- th
->doff
*4);
1489 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1490 TCP_SKB_CB(skb
)->when
= 0;
1491 TCP_SKB_CB(skb
)->ip_dsfield
= ipv6_get_dsfield(hdr
);
1492 TCP_SKB_CB(skb
)->sacked
= 0;
1494 sk
= __inet6_lookup_skb(&tcp_hashinfo
, skb
, th
->source
, th
->dest
);
1499 if (sk
->sk_state
== TCP_TIME_WAIT
)
1502 if (hdr
->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
1503 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
1504 goto discard_and_relse
;
1507 if (!xfrm6_policy_check(sk
, XFRM_POLICY_IN
, skb
))
1508 goto discard_and_relse
;
1510 if (tcp_filter(sk
, skb
))
1511 goto discard_and_relse
;
1512 th
= (const struct tcphdr
*)skb
->data
;
1513 hdr
= ipv6_hdr(skb
);
1517 bh_lock_sock_nested(sk
);
1519 if (!sock_owned_by_user(sk
)) {
1520 #ifdef CONFIG_NET_DMA
1521 struct tcp_sock
*tp
= tcp_sk(sk
);
1522 if (!tp
->ucopy
.dma_chan
&& tp
->ucopy
.pinned_list
)
1523 tp
->ucopy
.dma_chan
= net_dma_find_channel();
1524 if (tp
->ucopy
.dma_chan
)
1525 ret
= tcp_v6_do_rcv(sk
, skb
);
1529 if (!tcp_prequeue(sk
, skb
))
1530 ret
= tcp_v6_do_rcv(sk
, skb
);
1532 } else if (unlikely(sk_add_backlog(sk
, skb
,
1533 sk
->sk_rcvbuf
+ sk
->sk_sndbuf
))) {
1535 NET_INC_STATS_BH(net
, LINUX_MIB_TCPBACKLOGDROP
);
1536 goto discard_and_relse
;
1541 return ret
? -1 : 0;
1544 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1547 if (skb
->len
< (th
->doff
<<2) || tcp_checksum_complete(skb
)) {
1549 TCP_INC_STATS_BH(net
, TCP_MIB_CSUMERRORS
);
1551 TCP_INC_STATS_BH(net
, TCP_MIB_INERRS
);
1553 tcp_v6_send_reset(NULL
, skb
);
1565 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
)) {
1566 inet_twsk_put(inet_twsk(sk
));
1570 if (skb
->len
< (th
->doff
<<2)) {
1571 inet_twsk_put(inet_twsk(sk
));
1574 if (tcp_checksum_complete(skb
)) {
1575 inet_twsk_put(inet_twsk(sk
));
1579 switch (tcp_timewait_state_process(inet_twsk(sk
), skb
, th
)) {
1584 sk2
= inet6_lookup_listener(dev_net(skb
->dev
), &tcp_hashinfo
,
1585 &ipv6_hdr(skb
)->saddr
, th
->source
,
1586 &ipv6_hdr(skb
)->daddr
,
1587 ntohs(th
->dest
), inet6_iif(skb
));
1589 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
1590 inet_twsk_deschedule(tw
, &tcp_death_row
);
1595 /* Fall through to ACK */
1598 tcp_v6_timewait_ack(sk
, skb
);
1602 case TCP_TW_SUCCESS
:;
1607 static void tcp_v6_early_demux(struct sk_buff
*skb
)
1609 const struct ipv6hdr
*hdr
;
1610 const struct tcphdr
*th
;
1613 if (skb
->pkt_type
!= PACKET_HOST
)
1616 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + sizeof(struct tcphdr
)))
1619 hdr
= ipv6_hdr(skb
);
1622 if (th
->doff
< sizeof(struct tcphdr
) / 4)
1625 sk
= __inet6_lookup_established(dev_net(skb
->dev
), &tcp_hashinfo
,
1626 &hdr
->saddr
, th
->source
,
1627 &hdr
->daddr
, ntohs(th
->dest
),
1631 skb
->destructor
= sock_edemux
;
1632 if (sk
->sk_state
!= TCP_TIME_WAIT
) {
1633 struct dst_entry
*dst
= ACCESS_ONCE(sk
->sk_rx_dst
);
1636 dst
= dst_check(dst
, inet6_sk(sk
)->rx_dst_cookie
);
1638 inet_sk(sk
)->rx_dst_ifindex
== skb
->skb_iif
)
1639 skb_dst_set_noref(skb
, dst
);
1644 static struct timewait_sock_ops tcp6_timewait_sock_ops
= {
1645 .twsk_obj_size
= sizeof(struct tcp6_timewait_sock
),
1646 .twsk_unique
= tcp_twsk_unique
,
1647 .twsk_destructor
= tcp_twsk_destructor
,
1650 static const struct inet_connection_sock_af_ops ipv6_specific
= {
1651 .queue_xmit
= inet6_csk_xmit
,
1652 .send_check
= tcp_v6_send_check
,
1653 .rebuild_header
= inet6_sk_rebuild_header
,
1654 .sk_rx_dst_set
= inet6_sk_rx_dst_set
,
1655 .conn_request
= tcp_v6_conn_request
,
1656 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1657 .net_header_len
= sizeof(struct ipv6hdr
),
1658 .net_frag_header_len
= sizeof(struct frag_hdr
),
1659 .setsockopt
= ipv6_setsockopt
,
1660 .getsockopt
= ipv6_getsockopt
,
1661 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1662 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1663 .bind_conflict
= inet6_csk_bind_conflict
,
1664 #ifdef CONFIG_COMPAT
1665 .compat_setsockopt
= compat_ipv6_setsockopt
,
1666 .compat_getsockopt
= compat_ipv6_getsockopt
,
1668 .mtu_reduced
= tcp_v6_mtu_reduced
,
1671 #ifdef CONFIG_TCP_MD5SIG
1672 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
= {
1673 .md5_lookup
= tcp_v6_md5_lookup
,
1674 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
1675 .md5_parse
= tcp_v6_parse_md5_keys
,
1680 * TCP over IPv4 via INET6 API
1683 static const struct inet_connection_sock_af_ops ipv6_mapped
= {
1684 .queue_xmit
= ip_queue_xmit
,
1685 .send_check
= tcp_v4_send_check
,
1686 .rebuild_header
= inet_sk_rebuild_header
,
1687 .sk_rx_dst_set
= inet_sk_rx_dst_set
,
1688 .conn_request
= tcp_v6_conn_request
,
1689 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1690 .net_header_len
= sizeof(struct iphdr
),
1691 .setsockopt
= ipv6_setsockopt
,
1692 .getsockopt
= ipv6_getsockopt
,
1693 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1694 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1695 .bind_conflict
= inet6_csk_bind_conflict
,
1696 #ifdef CONFIG_COMPAT
1697 .compat_setsockopt
= compat_ipv6_setsockopt
,
1698 .compat_getsockopt
= compat_ipv6_getsockopt
,
1700 .mtu_reduced
= tcp_v4_mtu_reduced
,
1703 #ifdef CONFIG_TCP_MD5SIG
1704 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
= {
1705 .md5_lookup
= tcp_v4_md5_lookup
,
1706 .calc_md5_hash
= tcp_v4_md5_hash_skb
,
1707 .md5_parse
= tcp_v6_parse_md5_keys
,
1711 /* NOTE: A lot of things set to zero explicitly by call to
1712 * sk_alloc() so need not be done here.
1714 static int tcp_v6_init_sock(struct sock
*sk
)
1716 struct inet_connection_sock
*icsk
= inet_csk(sk
);
1720 icsk
->icsk_af_ops
= &ipv6_specific
;
1722 #ifdef CONFIG_TCP_MD5SIG
1723 tcp_sk(sk
)->af_specific
= &tcp_sock_ipv6_specific
;
1729 static void tcp_v6_destroy_sock(struct sock
*sk
)
1731 tcp_v4_destroy_sock(sk
);
1732 inet6_destroy_sock(sk
);
1735 #ifdef CONFIG_PROC_FS
1736 /* Proc filesystem TCPv6 sock list dumping. */
1737 static void get_openreq6(struct seq_file
*seq
,
1738 const struct sock
*sk
, struct request_sock
*req
, int i
, kuid_t uid
)
1740 int ttd
= req
->expires
- jiffies
;
1741 const struct in6_addr
*src
= &inet6_rsk(req
)->loc_addr
;
1742 const struct in6_addr
*dest
= &inet6_rsk(req
)->rmt_addr
;
1748 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1749 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1751 src
->s6_addr32
[0], src
->s6_addr32
[1],
1752 src
->s6_addr32
[2], src
->s6_addr32
[3],
1753 ntohs(inet_rsk(req
)->loc_port
),
1754 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1755 dest
->s6_addr32
[2], dest
->s6_addr32
[3],
1756 ntohs(inet_rsk(req
)->rmt_port
),
1758 0,0, /* could print option size, but that is af dependent. */
1759 1, /* timers active (only the expire timer) */
1760 jiffies_to_clock_t(ttd
),
1762 from_kuid_munged(seq_user_ns(seq
), uid
),
1763 0, /* non standard timer */
1764 0, /* open_requests have no inode */
1768 static void get_tcp6_sock(struct seq_file
*seq
, struct sock
*sp
, int i
)
1770 const struct in6_addr
*dest
, *src
;
1773 unsigned long timer_expires
;
1774 const struct inet_sock
*inet
= inet_sk(sp
);
1775 const struct tcp_sock
*tp
= tcp_sk(sp
);
1776 const struct inet_connection_sock
*icsk
= inet_csk(sp
);
1777 const struct ipv6_pinfo
*np
= inet6_sk(sp
);
1780 src
= &np
->rcv_saddr
;
1781 destp
= ntohs(inet
->inet_dport
);
1782 srcp
= ntohs(inet
->inet_sport
);
1784 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
||
1785 icsk
->icsk_pending
== ICSK_TIME_EARLY_RETRANS
||
1786 icsk
->icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
1788 timer_expires
= icsk
->icsk_timeout
;
1789 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
1791 timer_expires
= icsk
->icsk_timeout
;
1792 } else if (timer_pending(&sp
->sk_timer
)) {
1794 timer_expires
= sp
->sk_timer
.expires
;
1797 timer_expires
= jiffies
;
1801 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1802 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1804 src
->s6_addr32
[0], src
->s6_addr32
[1],
1805 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1806 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1807 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1809 tp
->write_seq
-tp
->snd_una
,
1810 (sp
->sk_state
== TCP_LISTEN
) ? sp
->sk_ack_backlog
: (tp
->rcv_nxt
- tp
->copied_seq
),
1812 jiffies_delta_to_clock_t(timer_expires
- jiffies
),
1813 icsk
->icsk_retransmits
,
1814 from_kuid_munged(seq_user_ns(seq
), sock_i_uid(sp
)),
1815 icsk
->icsk_probes_out
,
1817 atomic_read(&sp
->sk_refcnt
), sp
,
1818 jiffies_to_clock_t(icsk
->icsk_rto
),
1819 jiffies_to_clock_t(icsk
->icsk_ack
.ato
),
1820 (icsk
->icsk_ack
.quick
<< 1 ) | icsk
->icsk_ack
.pingpong
,
1822 tcp_in_initial_slowstart(tp
) ? -1 : tp
->snd_ssthresh
1826 static void get_timewait6_sock(struct seq_file
*seq
,
1827 struct inet_timewait_sock
*tw
, int i
)
1829 const struct in6_addr
*dest
, *src
;
1831 const struct inet6_timewait_sock
*tw6
= inet6_twsk((struct sock
*)tw
);
1832 long delta
= tw
->tw_ttd
- jiffies
;
1834 dest
= &tw6
->tw_v6_daddr
;
1835 src
= &tw6
->tw_v6_rcv_saddr
;
1836 destp
= ntohs(tw
->tw_dport
);
1837 srcp
= ntohs(tw
->tw_sport
);
1840 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1841 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1843 src
->s6_addr32
[0], src
->s6_addr32
[1],
1844 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1845 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1846 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1847 tw
->tw_substate
, 0, 0,
1848 3, jiffies_delta_to_clock_t(delta
), 0, 0, 0, 0,
1849 atomic_read(&tw
->tw_refcnt
), tw
);
1852 static int tcp6_seq_show(struct seq_file
*seq
, void *v
)
1854 struct tcp_iter_state
*st
;
1856 if (v
== SEQ_START_TOKEN
) {
1861 "st tx_queue rx_queue tr tm->when retrnsmt"
1862 " uid timeout inode\n");
1867 switch (st
->state
) {
1868 case TCP_SEQ_STATE_LISTENING
:
1869 case TCP_SEQ_STATE_ESTABLISHED
:
1870 get_tcp6_sock(seq
, v
, st
->num
);
1872 case TCP_SEQ_STATE_OPENREQ
:
1873 get_openreq6(seq
, st
->syn_wait_sk
, v
, st
->num
, st
->uid
);
1875 case TCP_SEQ_STATE_TIME_WAIT
:
1876 get_timewait6_sock(seq
, v
, st
->num
);
1883 static const struct file_operations tcp6_afinfo_seq_fops
= {
1884 .owner
= THIS_MODULE
,
1885 .open
= tcp_seq_open
,
1887 .llseek
= seq_lseek
,
1888 .release
= seq_release_net
1891 static struct tcp_seq_afinfo tcp6_seq_afinfo
= {
1894 .seq_fops
= &tcp6_afinfo_seq_fops
,
1896 .show
= tcp6_seq_show
,
1900 int __net_init
tcp6_proc_init(struct net
*net
)
1902 return tcp_proc_register(net
, &tcp6_seq_afinfo
);
1905 void tcp6_proc_exit(struct net
*net
)
1907 tcp_proc_unregister(net
, &tcp6_seq_afinfo
);
1911 static void tcp_v6_clear_sk(struct sock
*sk
, int size
)
1913 struct inet_sock
*inet
= inet_sk(sk
);
1915 /* we do not want to clear pinet6 field, because of RCU lookups */
1916 sk_prot_clear_nulls(sk
, offsetof(struct inet_sock
, pinet6
));
1918 size
-= offsetof(struct inet_sock
, pinet6
) + sizeof(inet
->pinet6
);
1919 memset(&inet
->pinet6
+ 1, 0, size
);
1922 struct proto tcpv6_prot
= {
1924 .owner
= THIS_MODULE
,
1926 .connect
= tcp_v6_connect
,
1927 .disconnect
= tcp_disconnect
,
1928 .accept
= inet_csk_accept
,
1930 .init
= tcp_v6_init_sock
,
1931 .destroy
= tcp_v6_destroy_sock
,
1932 .shutdown
= tcp_shutdown
,
1933 .setsockopt
= tcp_setsockopt
,
1934 .getsockopt
= tcp_getsockopt
,
1935 .recvmsg
= tcp_recvmsg
,
1936 .sendmsg
= tcp_sendmsg
,
1937 .sendpage
= tcp_sendpage
,
1938 .backlog_rcv
= tcp_v6_do_rcv
,
1939 .release_cb
= tcp_release_cb
,
1940 .hash
= tcp_v6_hash
,
1941 .unhash
= inet_unhash
,
1942 .get_port
= inet_csk_get_port
,
1943 .enter_memory_pressure
= tcp_enter_memory_pressure
,
1944 .sockets_allocated
= &tcp_sockets_allocated
,
1945 .memory_allocated
= &tcp_memory_allocated
,
1946 .memory_pressure
= &tcp_memory_pressure
,
1947 .orphan_count
= &tcp_orphan_count
,
1948 .sysctl_wmem
= sysctl_tcp_wmem
,
1949 .sysctl_rmem
= sysctl_tcp_rmem
,
1950 .max_header
= MAX_TCP_HEADER
,
1951 .obj_size
= sizeof(struct tcp6_sock
),
1952 .slab_flags
= SLAB_DESTROY_BY_RCU
,
1953 .twsk_prot
= &tcp6_timewait_sock_ops
,
1954 .rsk_prot
= &tcp6_request_sock_ops
,
1955 .h
.hashinfo
= &tcp_hashinfo
,
1956 .no_autobind
= true,
1957 #ifdef CONFIG_COMPAT
1958 .compat_setsockopt
= compat_tcp_setsockopt
,
1959 .compat_getsockopt
= compat_tcp_getsockopt
,
1961 #ifdef CONFIG_MEMCG_KMEM
1962 .proto_cgroup
= tcp_proto_cgroup
,
1964 .clear_sk
= tcp_v6_clear_sk
,
1967 static const struct inet6_protocol tcpv6_protocol
= {
1968 .early_demux
= tcp_v6_early_demux
,
1969 .handler
= tcp_v6_rcv
,
1970 .err_handler
= tcp_v6_err
,
1971 .flags
= INET6_PROTO_NOPOLICY
|INET6_PROTO_FINAL
,
1974 static struct inet_protosw tcpv6_protosw
= {
1975 .type
= SOCK_STREAM
,
1976 .protocol
= IPPROTO_TCP
,
1977 .prot
= &tcpv6_prot
,
1978 .ops
= &inet6_stream_ops
,
1980 .flags
= INET_PROTOSW_PERMANENT
|
1984 static int __net_init
tcpv6_net_init(struct net
*net
)
1986 return inet_ctl_sock_create(&net
->ipv6
.tcp_sk
, PF_INET6
,
1987 SOCK_RAW
, IPPROTO_TCP
, net
);
1990 static void __net_exit
tcpv6_net_exit(struct net
*net
)
1992 inet_ctl_sock_destroy(net
->ipv6
.tcp_sk
);
1995 static void __net_exit
tcpv6_net_exit_batch(struct list_head
*net_exit_list
)
1997 inet_twsk_purge(&tcp_hashinfo
, &tcp_death_row
, AF_INET6
);
2000 static struct pernet_operations tcpv6_net_ops
= {
2001 .init
= tcpv6_net_init
,
2002 .exit
= tcpv6_net_exit
,
2003 .exit_batch
= tcpv6_net_exit_batch
,
2006 int __init
tcpv6_init(void)
2010 ret
= inet6_add_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
2014 /* register inet6 protocol */
2015 ret
= inet6_register_protosw(&tcpv6_protosw
);
2017 goto out_tcpv6_protocol
;
2019 ret
= register_pernet_subsys(&tcpv6_net_ops
);
2021 goto out_tcpv6_protosw
;
2026 inet6_unregister_protosw(&tcpv6_protosw
);
2028 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
2032 void tcpv6_exit(void)
2034 unregister_pernet_subsys(&tcpv6_net_ops
);
2035 inet6_unregister_protosw(&tcpv6_protosw
);
2036 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);