[SOCK]: Introduce sk_clone
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / tcp_ipv6.c
CommitLineData
1da177e4
LT
1/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
60236fdd 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
1da177e4
LT
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
6e04e021 87 return (hashent & (tcp_hashinfo.ehash_size - 1));
1da177e4
LT
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
0f7ff927
ACM
101static inline int tcp_v6_bind_conflict(const struct sock *sk,
102 const struct inet_bind_bucket *tb)
1da177e4 103{
0f7ff927
ACM
104 const struct sock *sk2;
105 const struct hlist_node *node;
1da177e4
LT
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
0f7ff927
ACM
129 struct inet_bind_hashbucket *head;
130 struct inet_bind_bucket *tb;
1da177e4
LT
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
6e04e021
ACM
141 spin_lock(&tcp_hashinfo.portalloc_lock);
142 if (tcp_hashinfo.port_rover < low)
c3924c70
FH
143 rover = low;
144 else
6e04e021 145 rover = tcp_hashinfo.port_rover;
1da177e4 146 do { rover++;
c3924c70 147 if (rover > high)
1da177e4 148 rover = low;
6e04e021 149 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
1da177e4 150 spin_lock(&head->lock);
0f7ff927 151 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
152 if (tb->port == rover)
153 goto next;
154 break;
155 next:
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
6e04e021
ACM
158 tcp_hashinfo.port_rover = rover;
159 spin_unlock(&tcp_hashinfo.portalloc_lock);
1da177e4 160
d5d28375
DM
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
166 */
1da177e4 167 ret = 1;
d5d28375 168 if (unlikely(remaining <= 0))
1da177e4
LT
169 goto fail;
170
171 /* OK, here is the one we will use. */
172 snum = rover;
173 } else {
6e04e021 174 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
1da177e4 175 spin_lock(&head->lock);
0f7ff927 176 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
177 if (tb->port == snum)
178 goto tb_found;
179 }
180 tb = NULL;
181 goto tb_not_found;
182tb_found:
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
186 goto success;
187 } else {
188 ret = 1;
189 if (tcp_v6_bind_conflict(sk, tb))
190 goto fail_unlock;
191 }
192 }
193tb_not_found:
194 ret = 1;
6e04e021
ACM
195 if (tb == NULL) {
196 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
197 if (tb == NULL)
198 goto fail_unlock;
199 }
1da177e4
LT
200 if (hlist_empty(&tb->owners)) {
201 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
202 tb->fastreuse = 1;
203 else
204 tb->fastreuse = 0;
205 } else if (tb->fastreuse &&
206 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
207 tb->fastreuse = 0;
208
209success:
a55ebcc4 210 if (!inet_sk(sk)->bind_hash)
2d8c4ce5 211 inet_bind_hash(sk, tb, snum);
a55ebcc4 212 BUG_TRAP(inet_sk(sk)->bind_hash == tb);
1da177e4
LT
213 ret = 0;
214
215fail_unlock:
216 spin_unlock(&head->lock);
217fail:
218 local_bh_enable();
219 return ret;
220}
221
222static __inline__ void __tcp_v6_hash(struct sock *sk)
223{
224 struct hlist_head *list;
225 rwlock_t *lock;
226
227 BUG_TRAP(sk_unhashed(sk));
228
229 if (sk->sk_state == TCP_LISTEN) {
6e04e021
ACM
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock;
f3f05f70 232 inet_listen_wlock(&tcp_hashinfo);
1da177e4
LT
233 } else {
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
6e04e021
ACM
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
1da177e4
LT
237 write_lock(lock);
238 }
239
240 __sk_add_node(sk, list);
241 sock_prot_inc_use(sk->sk_prot);
242 write_unlock(lock);
243}
244
245
246static void tcp_v6_hash(struct sock *sk)
247{
248 if (sk->sk_state != TCP_CLOSE) {
249 struct tcp_sock *tp = tcp_sk(sk);
250
251 if (tp->af_specific == &ipv6_mapped) {
252 tcp_prot.hash(sk);
253 return;
254 }
255 local_bh_disable();
256 __tcp_v6_hash(sk);
257 local_bh_enable();
258 }
259}
260
261static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
262{
263 struct sock *sk;
264 struct hlist_node *node;
265 struct sock *result = NULL;
266 int score, hiscore;
267
268 hiscore=0;
6e04e021
ACM
269 read_lock(&tcp_hashinfo.lhash_lock);
270 sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
1da177e4
LT
271 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272 struct ipv6_pinfo *np = inet6_sk(sk);
273
274 score = 1;
275 if (!ipv6_addr_any(&np->rcv_saddr)) {
276 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
277 continue;
278 score++;
279 }
280 if (sk->sk_bound_dev_if) {
281 if (sk->sk_bound_dev_if != dif)
282 continue;
283 score++;
284 }
285 if (score == 3) {
286 result = sk;
287 break;
288 }
289 if (score > hiscore) {
290 hiscore = score;
291 result = sk;
292 }
293 }
294 }
295 if (result)
296 sock_hold(result);
6e04e021 297 read_unlock(&tcp_hashinfo.lhash_lock);
1da177e4
LT
298 return result;
299}
300
301/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
303 *
304 * The sockhash lock must be held as a reader here.
305 */
306
307static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308 struct in6_addr *daddr, u16 hnum,
309 int dif)
310{
1da177e4 311 struct sock *sk;
8feaf0c0
ACM
312 const struct hlist_node *node;
313 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
1da177e4
LT
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
316 */
8feaf0c0
ACM
317 const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
319
1da177e4
LT
320 read_lock(&head->lock);
321 sk_for_each(sk, node, &head->chain) {
322 /* For IPV6 do the cheaper port and family tests first. */
8feaf0c0 323 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
1da177e4
LT
324 goto hit; /* You sunk my battleship! */
325 }
326 /* Must check for a TIME_WAIT'er before going to listener hash. */
6e04e021 327 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
8feaf0c0 328 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4
LT
329
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
8feaf0c0
ACM
332 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
333
334 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
335 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
336 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
1da177e4
LT
337 goto hit;
338 }
339 }
340 read_unlock(&head->lock);
341 return NULL;
342
343hit:
344 sock_hold(sk);
345 read_unlock(&head->lock);
346 return sk;
347}
348
349
350static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 hnum,
352 int dif)
353{
354 struct sock *sk;
355
356 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
357
358 if (sk)
359 return sk;
360
361 return tcp_v6_lookup_listener(daddr, hnum, dif);
362}
363
364inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
365 struct in6_addr *daddr, u16 dport,
366 int dif)
367{
368 struct sock *sk;
369
370 local_bh_disable();
371 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
372 local_bh_enable();
373
374 return sk;
375}
376
377EXPORT_SYMBOL_GPL(tcp_v6_lookup);
378
379
380/*
381 * Open request hash tables.
382 */
383
384static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
385{
386 u32 a, b, c;
387
388 a = raddr->s6_addr32[0];
389 b = raddr->s6_addr32[1];
390 c = raddr->s6_addr32[2];
391
392 a += JHASH_GOLDEN_RATIO;
393 b += JHASH_GOLDEN_RATIO;
394 c += rnd;
395 __jhash_mix(a, b, c);
396
397 a += raddr->s6_addr32[3];
398 b += (u32) rport;
399 __jhash_mix(a, b, c);
400
401 return c & (TCP_SYNQ_HSIZE - 1);
402}
403
60236fdd
ACM
404static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
405 struct request_sock ***prevp,
1da177e4
LT
406 __u16 rport,
407 struct in6_addr *raddr,
408 struct in6_addr *laddr,
409 int iif)
410{
2ad69c55 411 struct listen_sock *lopt = tp->accept_queue.listen_opt;
60236fdd 412 struct request_sock *req, **prev;
1da177e4
LT
413
414 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
415 (req = *prev) != NULL;
416 prev = &req->dl_next) {
2e6599cb
ACM
417 const struct tcp6_request_sock *treq = tcp6_rsk(req);
418
419 if (inet_rsk(req)->rmt_port == rport &&
60236fdd 420 req->rsk_ops->family == AF_INET6 &&
2e6599cb
ACM
421 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
422 ipv6_addr_equal(&treq->loc_addr, laddr) &&
423 (!treq->iif || treq->iif == iif)) {
1da177e4
LT
424 BUG_TRAP(req->sk == NULL);
425 *prevp = prev;
426 return req;
427 }
428 }
429
430 return NULL;
431}
432
433static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
434 struct in6_addr *saddr,
435 struct in6_addr *daddr,
436 unsigned long base)
437{
438 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
439}
440
441static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
442{
443 if (skb->protocol == htons(ETH_P_IPV6)) {
444 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
445 skb->nh.ipv6h->saddr.s6_addr32,
446 skb->h.th->dest,
447 skb->h.th->source);
448 } else {
449 return secure_tcp_sequence_number(skb->nh.iph->daddr,
450 skb->nh.iph->saddr,
451 skb->h.th->dest,
452 skb->h.th->source);
453 }
454}
455
456static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
8feaf0c0 457 struct inet_timewait_sock **twp)
1da177e4
LT
458{
459 struct inet_sock *inet = inet_sk(sk);
460 struct ipv6_pinfo *np = inet6_sk(sk);
461 struct in6_addr *daddr = &np->rcv_saddr;
462 struct in6_addr *saddr = &np->daddr;
463 int dif = sk->sk_bound_dev_if;
8feaf0c0
ACM
464 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
465 const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
6e04e021 466 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
1da177e4 467 struct sock *sk2;
8feaf0c0
ACM
468 const struct hlist_node *node;
469 struct inet_timewait_sock *tw;
1da177e4
LT
470
471 write_lock(&head->lock);
472
473 /* Check TIME-WAIT sockets first. */
6e04e021 474 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
8feaf0c0
ACM
475 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
476
477 tw = inet_twsk(sk2);
1da177e4
LT
478
479 if(*((__u32 *)&(tw->tw_dport)) == ports &&
480 sk2->sk_family == PF_INET6 &&
8feaf0c0
ACM
481 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
482 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
1da177e4 483 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
8feaf0c0 484 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
1da177e4
LT
485 struct tcp_sock *tp = tcp_sk(sk);
486
8feaf0c0
ACM
487 if (tcptw->tw_ts_recent_stamp &&
488 (!twp ||
489 (sysctl_tcp_tw_reuse &&
490 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
1da177e4 491 /* See comment in tcp_ipv4.c */
8feaf0c0 492 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1da177e4
LT
493 if (!tp->write_seq)
494 tp->write_seq = 1;
8feaf0c0
ACM
495 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
496 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1da177e4
LT
497 sock_hold(sk2);
498 goto unique;
499 } else
500 goto not_unique;
501 }
502 }
503 tw = NULL;
504
505 /* And established part... */
506 sk_for_each(sk2, node, &head->chain) {
8feaf0c0 507 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
1da177e4
LT
508 goto not_unique;
509 }
510
511unique:
512 BUG_TRAP(sk_unhashed(sk));
513 __sk_add_node(sk, &head->chain);
514 sk->sk_hashent = hash;
515 sock_prot_inc_use(sk->sk_prot);
516 write_unlock(&head->lock);
517
518 if (twp) {
519 *twp = tw;
520 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
521 } else if (tw) {
522 /* Silly. Should hash-dance instead... */
523 tcp_tw_deschedule(tw);
524 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
525
8feaf0c0 526 inet_twsk_put(tw);
1da177e4
LT
527 }
528 return 0;
529
530not_unique:
531 write_unlock(&head->lock);
532 return -EADDRNOTAVAIL;
533}
534
535static inline u32 tcpv6_port_offset(const struct sock *sk)
536{
537 const struct inet_sock *inet = inet_sk(sk);
538 const struct ipv6_pinfo *np = inet6_sk(sk);
539
540 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
541 np->daddr.s6_addr32,
542 inet->dport);
543}
544
545static int tcp_v6_hash_connect(struct sock *sk)
546{
547 unsigned short snum = inet_sk(sk)->num;
0f7ff927
ACM
548 struct inet_bind_hashbucket *head;
549 struct inet_bind_bucket *tb;
1da177e4
LT
550 int ret;
551
552 if (!snum) {
553 int low = sysctl_local_port_range[0];
554 int high = sysctl_local_port_range[1];
555 int range = high - low;
556 int i;
557 int port;
558 static u32 hint;
559 u32 offset = hint + tcpv6_port_offset(sk);
560 struct hlist_node *node;
8feaf0c0 561 struct inet_timewait_sock *tw = NULL;
1da177e4
LT
562
563 local_bh_disable();
564 for (i = 1; i <= range; i++) {
565 port = low + (i + offset) % range;
6e04e021 566 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
1da177e4
LT
567 spin_lock(&head->lock);
568
569 /* Does not bother with rcv_saddr checks,
570 * because the established check is already
571 * unique enough.
572 */
0f7ff927 573 inet_bind_bucket_for_each(tb, node, &head->chain) {
1da177e4
LT
574 if (tb->port == port) {
575 BUG_TRAP(!hlist_empty(&tb->owners));
576 if (tb->fastreuse >= 0)
577 goto next_port;
578 if (!__tcp_v6_check_established(sk,
579 port,
580 &tw))
581 goto ok;
582 goto next_port;
583 }
584 }
585
6e04e021 586 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
1da177e4
LT
587 if (!tb) {
588 spin_unlock(&head->lock);
589 break;
590 }
591 tb->fastreuse = -1;
592 goto ok;
593
594 next_port:
595 spin_unlock(&head->lock);
596 }
597 local_bh_enable();
598
599 return -EADDRNOTAVAIL;
600
601ok:
602 hint += i;
603
604 /* Head lock still held and bh's disabled */
2d8c4ce5 605 inet_bind_hash(sk, tb, port);
1da177e4
LT
606 if (sk_unhashed(sk)) {
607 inet_sk(sk)->sport = htons(port);
608 __tcp_v6_hash(sk);
609 }
610 spin_unlock(&head->lock);
611
612 if (tw) {
613 tcp_tw_deschedule(tw);
8feaf0c0 614 inet_twsk_put(tw);
1da177e4
LT
615 }
616
617 ret = 0;
618 goto out;
619 }
620
6e04e021 621 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
a55ebcc4 622 tb = inet_sk(sk)->bind_hash;
1da177e4
LT
623 spin_lock_bh(&head->lock);
624
625 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
626 __tcp_v6_hash(sk);
627 spin_unlock_bh(&head->lock);
628 return 0;
629 } else {
630 spin_unlock(&head->lock);
631 /* No definite answer... Walk to established hash table */
632 ret = __tcp_v6_check_established(sk, snum, NULL);
633out:
634 local_bh_enable();
635 return ret;
636 }
637}
638
639static __inline__ int tcp_v6_iif(struct sk_buff *skb)
640{
641 return IP6CB(skb)->iif;
642}
643
644static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
645 int addr_len)
646{
647 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
648 struct inet_sock *inet = inet_sk(sk);
649 struct ipv6_pinfo *np = inet6_sk(sk);
650 struct tcp_sock *tp = tcp_sk(sk);
651 struct in6_addr *saddr = NULL, *final_p = NULL, final;
652 struct flowi fl;
653 struct dst_entry *dst;
654 int addr_type;
655 int err;
656
657 if (addr_len < SIN6_LEN_RFC2133)
658 return -EINVAL;
659
660 if (usin->sin6_family != AF_INET6)
661 return(-EAFNOSUPPORT);
662
663 memset(&fl, 0, sizeof(fl));
664
665 if (np->sndflow) {
666 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
667 IP6_ECN_flow_init(fl.fl6_flowlabel);
668 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
669 struct ip6_flowlabel *flowlabel;
670 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
671 if (flowlabel == NULL)
672 return -EINVAL;
673 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
674 fl6_sock_release(flowlabel);
675 }
676 }
677
678 /*
679 * connect() to INADDR_ANY means loopback (BSD'ism).
680 */
681
682 if(ipv6_addr_any(&usin->sin6_addr))
683 usin->sin6_addr.s6_addr[15] = 0x1;
684
685 addr_type = ipv6_addr_type(&usin->sin6_addr);
686
687 if(addr_type & IPV6_ADDR_MULTICAST)
688 return -ENETUNREACH;
689
690 if (addr_type&IPV6_ADDR_LINKLOCAL) {
691 if (addr_len >= sizeof(struct sockaddr_in6) &&
692 usin->sin6_scope_id) {
693 /* If interface is set while binding, indices
694 * must coincide.
695 */
696 if (sk->sk_bound_dev_if &&
697 sk->sk_bound_dev_if != usin->sin6_scope_id)
698 return -EINVAL;
699
700 sk->sk_bound_dev_if = usin->sin6_scope_id;
701 }
702
703 /* Connect to link-local address requires an interface */
704 if (!sk->sk_bound_dev_if)
705 return -EINVAL;
706 }
707
708 if (tp->rx_opt.ts_recent_stamp &&
709 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
710 tp->rx_opt.ts_recent = 0;
711 tp->rx_opt.ts_recent_stamp = 0;
712 tp->write_seq = 0;
713 }
714
715 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
716 np->flow_label = fl.fl6_flowlabel;
717
718 /*
719 * TCP over IPv4
720 */
721
722 if (addr_type == IPV6_ADDR_MAPPED) {
723 u32 exthdrlen = tp->ext_header_len;
724 struct sockaddr_in sin;
725
726 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
727
728 if (__ipv6_only_sock(sk))
729 return -ENETUNREACH;
730
731 sin.sin_family = AF_INET;
732 sin.sin_port = usin->sin6_port;
733 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
734
735 tp->af_specific = &ipv6_mapped;
736 sk->sk_backlog_rcv = tcp_v4_do_rcv;
737
738 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
739
740 if (err) {
741 tp->ext_header_len = exthdrlen;
742 tp->af_specific = &ipv6_specific;
743 sk->sk_backlog_rcv = tcp_v6_do_rcv;
744 goto failure;
745 } else {
746 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
747 inet->saddr);
748 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
749 inet->rcv_saddr);
750 }
751
752 return err;
753 }
754
755 if (!ipv6_addr_any(&np->rcv_saddr))
756 saddr = &np->rcv_saddr;
757
758 fl.proto = IPPROTO_TCP;
759 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
760 ipv6_addr_copy(&fl.fl6_src,
761 (saddr ? saddr : &np->saddr));
762 fl.oif = sk->sk_bound_dev_if;
763 fl.fl_ip_dport = usin->sin6_port;
764 fl.fl_ip_sport = inet->sport;
765
766 if (np->opt && np->opt->srcrt) {
767 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
768 ipv6_addr_copy(&final, &fl.fl6_dst);
769 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
770 final_p = &final;
771 }
772
773 err = ip6_dst_lookup(sk, &dst, &fl);
774 if (err)
775 goto failure;
776 if (final_p)
777 ipv6_addr_copy(&fl.fl6_dst, final_p);
778
779 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
780 dst_release(dst);
781 goto failure;
782 }
783
784 if (saddr == NULL) {
785 saddr = &fl.fl6_src;
786 ipv6_addr_copy(&np->rcv_saddr, saddr);
787 }
788
789 /* set the source address */
790 ipv6_addr_copy(&np->saddr, saddr);
791 inet->rcv_saddr = LOOPBACK4_IPV6;
792
793 ip6_dst_store(sk, dst, NULL);
794 sk->sk_route_caps = dst->dev->features &
795 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
796
797 tp->ext_header_len = 0;
798 if (np->opt)
799 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
800
801 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
802
803 inet->dport = usin->sin6_port;
804
805 tcp_set_state(sk, TCP_SYN_SENT);
806 err = tcp_v6_hash_connect(sk);
807 if (err)
808 goto late_failure;
809
810 if (!tp->write_seq)
811 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
812 np->daddr.s6_addr32,
813 inet->sport,
814 inet->dport);
815
816 err = tcp_connect(sk);
817 if (err)
818 goto late_failure;
819
820 return 0;
821
822late_failure:
823 tcp_set_state(sk, TCP_CLOSE);
824 __sk_dst_reset(sk);
825failure:
826 inet->dport = 0;
827 sk->sk_route_caps = 0;
828 return err;
829}
830
831static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
832 int type, int code, int offset, __u32 info)
833{
834 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
835 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
836 struct ipv6_pinfo *np;
837 struct sock *sk;
838 int err;
839 struct tcp_sock *tp;
840 __u32 seq;
841
842 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
843
844 if (sk == NULL) {
845 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
846 return;
847 }
848
849 if (sk->sk_state == TCP_TIME_WAIT) {
8feaf0c0 850 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
851 return;
852 }
853
854 bh_lock_sock(sk);
855 if (sock_owned_by_user(sk))
856 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
857
858 if (sk->sk_state == TCP_CLOSE)
859 goto out;
860
861 tp = tcp_sk(sk);
862 seq = ntohl(th->seq);
863 if (sk->sk_state != TCP_LISTEN &&
864 !between(seq, tp->snd_una, tp->snd_nxt)) {
865 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
866 goto out;
867 }
868
869 np = inet6_sk(sk);
870
871 if (type == ICMPV6_PKT_TOOBIG) {
872 struct dst_entry *dst = NULL;
873
874 if (sock_owned_by_user(sk))
875 goto out;
876 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
877 goto out;
878
879 /* icmp should have updated the destination cache entry */
880 dst = __sk_dst_check(sk, np->dst_cookie);
881
882 if (dst == NULL) {
883 struct inet_sock *inet = inet_sk(sk);
884 struct flowi fl;
885
886 /* BUGGG_FUTURE: Again, it is not clear how
887 to handle rthdr case. Ignore this complexity
888 for now.
889 */
890 memset(&fl, 0, sizeof(fl));
891 fl.proto = IPPROTO_TCP;
892 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
893 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
894 fl.oif = sk->sk_bound_dev_if;
895 fl.fl_ip_dport = inet->dport;
896 fl.fl_ip_sport = inet->sport;
897
898 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
899 sk->sk_err_soft = -err;
900 goto out;
901 }
902
903 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
904 sk->sk_err_soft = -err;
905 goto out;
906 }
907
908 } else
909 dst_hold(dst);
910
911 if (tp->pmtu_cookie > dst_mtu(dst)) {
912 tcp_sync_mss(sk, dst_mtu(dst));
913 tcp_simple_retransmit(sk);
914 } /* else let the usual retransmit timer handle it */
915 dst_release(dst);
916 goto out;
917 }
918
919 icmpv6_err_convert(type, code, &err);
920
60236fdd 921 /* Might be for an request_sock */
1da177e4 922 switch (sk->sk_state) {
60236fdd 923 struct request_sock *req, **prev;
1da177e4
LT
924 case TCP_LISTEN:
925 if (sock_owned_by_user(sk))
926 goto out;
927
928 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
929 &hdr->saddr, tcp_v6_iif(skb));
930 if (!req)
931 goto out;
932
933 /* ICMPs are not backlogged, hence we cannot get
934 * an established socket here.
935 */
936 BUG_TRAP(req->sk == NULL);
937
2e6599cb 938 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
939 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
940 goto out;
941 }
942
943 tcp_synq_drop(sk, req, prev);
944 goto out;
945
946 case TCP_SYN_SENT:
947 case TCP_SYN_RECV: /* Cannot happen.
948 It can, it SYNs are crossed. --ANK */
949 if (!sock_owned_by_user(sk)) {
950 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
951 sk->sk_err = err;
952 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
953
954 tcp_done(sk);
955 } else
956 sk->sk_err_soft = err;
957 goto out;
958 }
959
960 if (!sock_owned_by_user(sk) && np->recverr) {
961 sk->sk_err = err;
962 sk->sk_error_report(sk);
963 } else
964 sk->sk_err_soft = err;
965
966out:
967 bh_unlock_sock(sk);
968 sock_put(sk);
969}
970
971
60236fdd 972static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
973 struct dst_entry *dst)
974{
2e6599cb 975 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
976 struct ipv6_pinfo *np = inet6_sk(sk);
977 struct sk_buff * skb;
978 struct ipv6_txoptions *opt = NULL;
979 struct in6_addr * final_p = NULL, final;
980 struct flowi fl;
981 int err = -1;
982
983 memset(&fl, 0, sizeof(fl));
984 fl.proto = IPPROTO_TCP;
2e6599cb
ACM
985 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
986 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 987 fl.fl6_flowlabel = 0;
2e6599cb
ACM
988 fl.oif = treq->iif;
989 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
990 fl.fl_ip_sport = inet_sk(sk)->sport;
991
992 if (dst == NULL) {
993 opt = np->opt;
994 if (opt == NULL &&
995 np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
996 treq->pktopts) {
997 struct sk_buff *pktopts = treq->pktopts;
1da177e4
LT
998 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
999 if (rxopt->srcrt)
1000 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1001 }
1002
1003 if (opt && opt->srcrt) {
1004 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1005 ipv6_addr_copy(&final, &fl.fl6_dst);
1006 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1007 final_p = &final;
1008 }
1009
1010 err = ip6_dst_lookup(sk, &dst, &fl);
1011 if (err)
1012 goto done;
1013 if (final_p)
1014 ipv6_addr_copy(&fl.fl6_dst, final_p);
1015 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1016 goto done;
1017 }
1018
1019 skb = tcp_make_synack(sk, dst, req);
1020 if (skb) {
1021 struct tcphdr *th = skb->h.th;
1022
1023 th->check = tcp_v6_check(th, skb->len,
2e6599cb 1024 &treq->loc_addr, &treq->rmt_addr,
1da177e4
LT
1025 csum_partial((char *)th, skb->len, skb->csum));
1026
2e6599cb 1027 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
1028 err = ip6_xmit(sk, skb, &fl, opt, 0);
1029 if (err == NET_XMIT_CN)
1030 err = 0;
1031 }
1032
1033done:
1034 dst_release(dst);
1035 if (opt && opt != np->opt)
1036 sock_kfree_s(sk, opt, opt->tot_len);
1037 return err;
1038}
1039
60236fdd 1040static void tcp_v6_reqsk_destructor(struct request_sock *req)
1da177e4 1041{
2e6599cb
ACM
1042 if (tcp6_rsk(req)->pktopts)
1043 kfree_skb(tcp6_rsk(req)->pktopts);
1da177e4
LT
1044}
1045
60236fdd 1046static struct request_sock_ops tcp6_request_sock_ops = {
1da177e4 1047 .family = AF_INET6,
2e6599cb 1048 .obj_size = sizeof(struct tcp6_request_sock),
1da177e4 1049 .rtx_syn_ack = tcp_v6_send_synack,
60236fdd
ACM
1050 .send_ack = tcp_v6_reqsk_send_ack,
1051 .destructor = tcp_v6_reqsk_destructor,
1da177e4
LT
1052 .send_reset = tcp_v6_send_reset
1053};
1054
1055static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1056{
1057 struct ipv6_pinfo *np = inet6_sk(sk);
1058 struct inet6_skb_parm *opt = IP6CB(skb);
1059
1060 if (np->rxopt.all) {
1061 if ((opt->hop && np->rxopt.bits.hopopts) ||
1062 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1063 np->rxopt.bits.rxflow) ||
1064 (opt->srcrt && np->rxopt.bits.srcrt) ||
1065 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1066 return 1;
1067 }
1068 return 0;
1069}
1070
1071
1072static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1073 struct sk_buff *skb)
1074{
1075 struct ipv6_pinfo *np = inet6_sk(sk);
1076
1077 if (skb->ip_summed == CHECKSUM_HW) {
1078 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1079 skb->csum = offsetof(struct tcphdr, check);
1080 } else {
1081 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1082 csum_partial((char *)th, th->doff<<2,
1083 skb->csum));
1084 }
1085}
1086
1087
1088static void tcp_v6_send_reset(struct sk_buff *skb)
1089{
1090 struct tcphdr *th = skb->h.th, *t1;
1091 struct sk_buff *buff;
1092 struct flowi fl;
1093
1094 if (th->rst)
1095 return;
1096
1097 if (!ipv6_unicast_destination(skb))
1098 return;
1099
1100 /*
1101 * We need to grab some memory, and put together an RST,
1102 * and then put it into the queue to be sent.
1103 */
1104
1105 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1106 GFP_ATOMIC);
1107 if (buff == NULL)
1108 return;
1109
1110 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1111
1112 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1113
1114 /* Swap the send and the receive. */
1115 memset(t1, 0, sizeof(*t1));
1116 t1->dest = th->source;
1117 t1->source = th->dest;
1118 t1->doff = sizeof(*t1)/4;
1119 t1->rst = 1;
1120
1121 if(th->ack) {
1122 t1->seq = th->ack_seq;
1123 } else {
1124 t1->ack = 1;
1125 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1126 + skb->len - (th->doff<<2));
1127 }
1128
1129 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1130
1131 memset(&fl, 0, sizeof(fl));
1132 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1133 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1134
1135 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1136 sizeof(*t1), IPPROTO_TCP,
1137 buff->csum);
1138
1139 fl.proto = IPPROTO_TCP;
1140 fl.oif = tcp_v6_iif(skb);
1141 fl.fl_ip_dport = t1->dest;
1142 fl.fl_ip_sport = t1->source;
1143
1144 /* sk = NULL, but it is safe for now. RST socket required. */
1145 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1146
1147 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1148 dst_release(buff->dst);
1149 return;
1150 }
1151
1152 ip6_xmit(NULL, buff, &fl, NULL, 0);
1153 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1154 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1155 return;
1156 }
1157
1158 kfree_skb(buff);
1159}
1160
1161static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1162{
1163 struct tcphdr *th = skb->h.th, *t1;
1164 struct sk_buff *buff;
1165 struct flowi fl;
1166 int tot_len = sizeof(struct tcphdr);
1167
1168 if (ts)
1169 tot_len += 3*4;
1170
1171 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1172 GFP_ATOMIC);
1173 if (buff == NULL)
1174 return;
1175
1176 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1177
1178 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1179
1180 /* Swap the send and the receive. */
1181 memset(t1, 0, sizeof(*t1));
1182 t1->dest = th->source;
1183 t1->source = th->dest;
1184 t1->doff = tot_len/4;
1185 t1->seq = htonl(seq);
1186 t1->ack_seq = htonl(ack);
1187 t1->ack = 1;
1188 t1->window = htons(win);
1189
1190 if (ts) {
1191 u32 *ptr = (u32*)(t1 + 1);
1192 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1193 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1194 *ptr++ = htonl(tcp_time_stamp);
1195 *ptr = htonl(ts);
1196 }
1197
1198 buff->csum = csum_partial((char *)t1, tot_len, 0);
1199
1200 memset(&fl, 0, sizeof(fl));
1201 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1202 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1203
1204 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1205 tot_len, IPPROTO_TCP,
1206 buff->csum);
1207
1208 fl.proto = IPPROTO_TCP;
1209 fl.oif = tcp_v6_iif(skb);
1210 fl.fl_ip_dport = t1->dest;
1211 fl.fl_ip_sport = t1->source;
1212
1213 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1214 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1215 dst_release(buff->dst);
1216 return;
1217 }
1218 ip6_xmit(NULL, buff, &fl, NULL, 0);
1219 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1220 return;
1221 }
1222
1223 kfree_skb(buff);
1224}
1225
1226static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1227{
8feaf0c0
ACM
1228 struct inet_timewait_sock *tw = inet_twsk(sk);
1229 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 1230
8feaf0c0
ACM
1231 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1232 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1233 tcptw->tw_ts_recent);
1da177e4 1234
8feaf0c0 1235 inet_twsk_put(tw);
1da177e4
LT
1236}
1237
60236fdd 1238static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 1239{
2e6599cb 1240 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1da177e4
LT
1241}
1242
1243
1244static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1245{
60236fdd 1246 struct request_sock *req, **prev;
1da177e4
LT
1247 struct tcphdr *th = skb->h.th;
1248 struct tcp_sock *tp = tcp_sk(sk);
1249 struct sock *nsk;
1250
1251 /* Find possible connection requests. */
1252 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1253 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1254 if (req)
1255 return tcp_check_req(sk, skb, req, prev);
1256
1257 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1258 th->source,
1259 &skb->nh.ipv6h->daddr,
1260 ntohs(th->dest),
1261 tcp_v6_iif(skb));
1262
1263 if (nsk) {
1264 if (nsk->sk_state != TCP_TIME_WAIT) {
1265 bh_lock_sock(nsk);
1266 return nsk;
1267 }
8feaf0c0 1268 inet_twsk_put((struct inet_timewait_sock *)nsk);
1da177e4
LT
1269 return NULL;
1270 }
1271
1272#if 0 /*def CONFIG_SYN_COOKIES*/
1273 if (!th->rst && !th->syn && th->ack)
1274 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1275#endif
1276 return sk;
1277}
1278
60236fdd 1279static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1da177e4
LT
1280{
1281 struct tcp_sock *tp = tcp_sk(sk);
2ad69c55 1282 struct listen_sock *lopt = tp->accept_queue.listen_opt;
2e6599cb 1283 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1da177e4 1284
0e87506f 1285 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1286 tcp_synq_added(sk);
1287}
1288
1289
1290/* FIXME: this is substantially similar to the ipv4 code.
1291 * Can some kind of merge be done? -- erics
1292 */
1293static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1294{
2e6599cb 1295 struct tcp6_request_sock *treq;
1da177e4
LT
1296 struct ipv6_pinfo *np = inet6_sk(sk);
1297 struct tcp_options_received tmp_opt;
1298 struct tcp_sock *tp = tcp_sk(sk);
60236fdd 1299 struct request_sock *req = NULL;
1da177e4
LT
1300 __u32 isn = TCP_SKB_CB(skb)->when;
1301
1302 if (skb->protocol == htons(ETH_P_IP))
1303 return tcp_v4_conn_request(sk, skb);
1304
1305 if (!ipv6_unicast_destination(skb))
1306 goto drop;
1307
1308 /*
1309 * There are no SYN attacks on IPv6, yet...
1310 */
1311 if (tcp_synq_is_full(sk) && !isn) {
1312 if (net_ratelimit())
1313 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1314 goto drop;
1315 }
1316
1317 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1318 goto drop;
1319
60236fdd 1320 req = reqsk_alloc(&tcp6_request_sock_ops);
1da177e4
LT
1321 if (req == NULL)
1322 goto drop;
1323
1324 tcp_clear_options(&tmp_opt);
1325 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1326 tmp_opt.user_mss = tp->rx_opt.user_mss;
1327
1328 tcp_parse_options(skb, &tmp_opt, 0);
1329
1330 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331 tcp_openreq_init(req, &tmp_opt, skb);
1332
2e6599cb
ACM
1333 treq = tcp6_rsk(req);
1334 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1335 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1da177e4 1336 TCP_ECN_create_request(req, skb->h.th);
2e6599cb 1337 treq->pktopts = NULL;
1da177e4
LT
1338 if (ipv6_opt_accepted(sk, skb) ||
1339 np->rxopt.bits.rxinfo ||
1340 np->rxopt.bits.rxhlim) {
1341 atomic_inc(&skb->users);
2e6599cb 1342 treq->pktopts = skb;
1da177e4 1343 }
2e6599cb 1344 treq->iif = sk->sk_bound_dev_if;
1da177e4
LT
1345
1346 /* So that link locals have meaning */
1347 if (!sk->sk_bound_dev_if &&
2e6599cb
ACM
1348 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1349 treq->iif = tcp_v6_iif(skb);
1da177e4
LT
1350
1351 if (isn == 0)
1352 isn = tcp_v6_init_sequence(sk,skb);
1353
2e6599cb 1354 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1355
1356 if (tcp_v6_send_synack(sk, req, NULL))
1357 goto drop;
1358
1359 tcp_v6_synq_add(sk, req);
1360
1361 return 0;
1362
1363drop:
1364 if (req)
60236fdd 1365 reqsk_free(req);
1da177e4
LT
1366
1367 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1368 return 0; /* don't send reset */
1369}
1370
1371static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1372 struct request_sock *req,
1da177e4
LT
1373 struct dst_entry *dst)
1374{
2e6599cb 1375 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
1376 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1377 struct tcp6_sock *newtcp6sk;
1378 struct inet_sock *newinet;
1379 struct tcp_sock *newtp;
1380 struct sock *newsk;
1381 struct ipv6_txoptions *opt;
1382
1383 if (skb->protocol == htons(ETH_P_IP)) {
1384 /*
1385 * v6 mapped
1386 */
1387
1388 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1389
1390 if (newsk == NULL)
1391 return NULL;
1392
1393 newtcp6sk = (struct tcp6_sock *)newsk;
1394 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1395
1396 newinet = inet_sk(newsk);
1397 newnp = inet6_sk(newsk);
1398 newtp = tcp_sk(newsk);
1399
1400 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1401
1402 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1403 newinet->daddr);
1404
1405 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1406 newinet->saddr);
1407
1408 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1409
1410 newtp->af_specific = &ipv6_mapped;
1411 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1412 newnp->pktoptions = NULL;
1413 newnp->opt = NULL;
1414 newnp->mcast_oif = tcp_v6_iif(skb);
1415 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1416
e6848976
ACM
1417 /*
1418 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419 * here, tcp_create_openreq_child now does this for us, see the comment in
1420 * that function for the gory details. -acme
1da177e4 1421 */
1da177e4
LT
1422
1423 /* It is tricky place. Until this moment IPv4 tcp
1424 worked with IPv6 af_tcp.af_specific.
1425 Sync it now.
1426 */
1427 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1428
1429 return newsk;
1430 }
1431
1432 opt = np->opt;
1433
1434 if (sk_acceptq_is_full(sk))
1435 goto out_overflow;
1436
1437 if (np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
1438 opt == NULL && treq->pktopts) {
1439 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1da177e4 1440 if (rxopt->srcrt)
2e6599cb 1441 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1da177e4
LT
1442 }
1443
1444 if (dst == NULL) {
1445 struct in6_addr *final_p = NULL, final;
1446 struct flowi fl;
1447
1448 memset(&fl, 0, sizeof(fl));
1449 fl.proto = IPPROTO_TCP;
2e6599cb 1450 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
1451 if (opt && opt->srcrt) {
1452 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1453 ipv6_addr_copy(&final, &fl.fl6_dst);
1454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1455 final_p = &final;
1456 }
2e6599cb 1457 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 1458 fl.oif = sk->sk_bound_dev_if;
2e6599cb 1459 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
1460 fl.fl_ip_sport = inet_sk(sk)->sport;
1461
1462 if (ip6_dst_lookup(sk, &dst, &fl))
1463 goto out;
1464
1465 if (final_p)
1466 ipv6_addr_copy(&fl.fl6_dst, final_p);
1467
1468 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1469 goto out;
1470 }
1471
1472 newsk = tcp_create_openreq_child(sk, req, skb);
1473 if (newsk == NULL)
1474 goto out;
1475
e6848976
ACM
1476 /*
1477 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1478 * count here, tcp_create_openreq_child now does this for us, see the
1479 * comment in that function for the gory details. -acme
1480 */
1da177e4
LT
1481
1482 ip6_dst_store(newsk, dst, NULL);
1483 newsk->sk_route_caps = dst->dev->features &
1484 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1485
1486 newtcp6sk = (struct tcp6_sock *)newsk;
1487 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1488
1489 newtp = tcp_sk(newsk);
1490 newinet = inet_sk(newsk);
1491 newnp = inet6_sk(newsk);
1492
1493 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1494
2e6599cb
ACM
1495 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1496 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1497 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1498 newsk->sk_bound_dev_if = treq->iif;
1da177e4
LT
1499
1500 /* Now IPv6 options...
1501
1502 First: no IPv4 options.
1503 */
1504 newinet->opt = NULL;
1505
1506 /* Clone RX bits */
1507 newnp->rxopt.all = np->rxopt.all;
1508
1509 /* Clone pktoptions received with SYN */
1510 newnp->pktoptions = NULL;
2e6599cb
ACM
1511 if (treq->pktopts != NULL) {
1512 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1513 kfree_skb(treq->pktopts);
1514 treq->pktopts = NULL;
1da177e4
LT
1515 if (newnp->pktoptions)
1516 skb_set_owner_r(newnp->pktoptions, newsk);
1517 }
1518 newnp->opt = NULL;
1519 newnp->mcast_oif = tcp_v6_iif(skb);
1520 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1521
1522 /* Clone native IPv6 options from listening socket (if any)
1523
1524 Yes, keeping reference count would be much more clever,
1525 but we make one more one thing there: reattach optmem
1526 to newsk.
1527 */
1528 if (opt) {
1529 newnp->opt = ipv6_dup_options(newsk, opt);
1530 if (opt != np->opt)
1531 sock_kfree_s(sk, opt, opt->tot_len);
1532 }
1533
1534 newtp->ext_header_len = 0;
1535 if (newnp->opt)
1536 newtp->ext_header_len = newnp->opt->opt_nflen +
1537 newnp->opt->opt_flen;
1538
1539 tcp_sync_mss(newsk, dst_mtu(dst));
1540 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1541 tcp_initialize_rcv_mss(newsk);
1542
1543 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1544
1545 __tcp_v6_hash(newsk);
2d8c4ce5 1546 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1547
1548 return newsk;
1549
1550out_overflow:
1551 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1552out:
1553 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1554 if (opt && opt != np->opt)
1555 sock_kfree_s(sk, opt, opt->tot_len);
1556 dst_release(dst);
1557 return NULL;
1558}
1559
1560static int tcp_v6_checksum_init(struct sk_buff *skb)
1561{
1562 if (skb->ip_summed == CHECKSUM_HW) {
1563 skb->ip_summed = CHECKSUM_UNNECESSARY;
1564 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1565 &skb->nh.ipv6h->daddr,skb->csum))
1566 return 0;
1567 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1568 }
1569 if (skb->len <= 76) {
1570 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1572 return -1;
1573 skb->ip_summed = CHECKSUM_UNNECESSARY;
1574 } else {
1575 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1576 &skb->nh.ipv6h->daddr,0);
1577 }
1578 return 0;
1579}
1580
1581/* The socket must have it's spinlock held when we get
1582 * here.
1583 *
1584 * We have a potential double-lock case here, so even when
1585 * doing backlog processing we use the BH locking scheme.
1586 * This is because we cannot sleep with the original spinlock
1587 * held.
1588 */
1589static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1590{
1591 struct ipv6_pinfo *np = inet6_sk(sk);
1592 struct tcp_sock *tp;
1593 struct sk_buff *opt_skb = NULL;
1594
1595 /* Imagine: socket is IPv6. IPv4 packet arrives,
1596 goes to IPv4 receive handler and backlogged.
1597 From backlog it always goes here. Kerboom...
1598 Fortunately, tcp_rcv_established and rcv_established
1599 handle them correctly, but it is not case with
1600 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1601 */
1602
1603 if (skb->protocol == htons(ETH_P_IP))
1604 return tcp_v4_do_rcv(sk, skb);
1605
1606 if (sk_filter(sk, skb, 0))
1607 goto discard;
1608
1609 /*
1610 * socket locking is here for SMP purposes as backlog rcv
1611 * is currently called with bh processing disabled.
1612 */
1613
1614 /* Do Stevens' IPV6_PKTOPTIONS.
1615
1616 Yes, guys, it is the only place in our code, where we
1617 may make it not affecting IPv4.
1618 The rest of code is protocol independent,
1619 and I do not like idea to uglify IPv4.
1620
1621 Actually, all the idea behind IPV6_PKTOPTIONS
1622 looks not very well thought. For now we latch
1623 options, received in the last packet, enqueued
1624 by tcp. Feel free to propose better solution.
1625 --ANK (980728)
1626 */
1627 if (np->rxopt.all)
1628 opt_skb = skb_clone(skb, GFP_ATOMIC);
1629
1630 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 TCP_CHECK_TIMER(sk);
1632 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1633 goto reset;
1634 TCP_CHECK_TIMER(sk);
1635 if (opt_skb)
1636 goto ipv6_pktoptions;
1637 return 0;
1638 }
1639
1640 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1641 goto csum_err;
1642
1643 if (sk->sk_state == TCP_LISTEN) {
1644 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1645 if (!nsk)
1646 goto discard;
1647
1648 /*
1649 * Queue it on the new socket if the new socket is active,
1650 * otherwise we just shortcircuit this and continue with
1651 * the new socket..
1652 */
1653 if(nsk != sk) {
1654 if (tcp_child_process(sk, nsk, skb))
1655 goto reset;
1656 if (opt_skb)
1657 __kfree_skb(opt_skb);
1658 return 0;
1659 }
1660 }
1661
1662 TCP_CHECK_TIMER(sk);
1663 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1664 goto reset;
1665 TCP_CHECK_TIMER(sk);
1666 if (opt_skb)
1667 goto ipv6_pktoptions;
1668 return 0;
1669
1670reset:
1671 tcp_v6_send_reset(skb);
1672discard:
1673 if (opt_skb)
1674 __kfree_skb(opt_skb);
1675 kfree_skb(skb);
1676 return 0;
1677csum_err:
1678 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1679 goto discard;
1680
1681
1682ipv6_pktoptions:
1683 /* Do you ask, what is it?
1684
1685 1. skb was enqueued by tcp.
1686 2. skb is added to tail of read queue, rather than out of order.
1687 3. socket is not in passive state.
1688 4. Finally, it really contains options, which user wants to receive.
1689 */
1690 tp = tcp_sk(sk);
1691 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1692 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1693 if (np->rxopt.bits.rxinfo)
1694 np->mcast_oif = tcp_v6_iif(opt_skb);
1695 if (np->rxopt.bits.rxhlim)
1696 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1697 if (ipv6_opt_accepted(sk, opt_skb)) {
1698 skb_set_owner_r(opt_skb, sk);
1699 opt_skb = xchg(&np->pktoptions, opt_skb);
1700 } else {
1701 __kfree_skb(opt_skb);
1702 opt_skb = xchg(&np->pktoptions, NULL);
1703 }
1704 }
1705
1706 if (opt_skb)
1707 kfree_skb(opt_skb);
1708 return 0;
1709}
1710
1711static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1712{
1713 struct sk_buff *skb = *pskb;
1714 struct tcphdr *th;
1715 struct sock *sk;
1716 int ret;
1717
1718 if (skb->pkt_type != PACKET_HOST)
1719 goto discard_it;
1720
1721 /*
1722 * Count it even if it's bad.
1723 */
1724 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1725
1726 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1727 goto discard_it;
1728
1729 th = skb->h.th;
1730
1731 if (th->doff < sizeof(struct tcphdr)/4)
1732 goto bad_packet;
1733 if (!pskb_may_pull(skb, th->doff*4))
1734 goto discard_it;
1735
1736 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1737 tcp_v6_checksum_init(skb) < 0))
1738 goto bad_packet;
1739
1740 th = skb->h.th;
1741 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1742 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1743 skb->len - th->doff*4);
1744 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1745 TCP_SKB_CB(skb)->when = 0;
1746 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1747 TCP_SKB_CB(skb)->sacked = 0;
1748
1749 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1750 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1751
1752 if (!sk)
1753 goto no_tcp_socket;
1754
1755process:
1756 if (sk->sk_state == TCP_TIME_WAIT)
1757 goto do_time_wait;
1758
1759 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1760 goto discard_and_relse;
1761
1762 if (sk_filter(sk, skb, 0))
1763 goto discard_and_relse;
1764
1765 skb->dev = NULL;
1766
1767 bh_lock_sock(sk);
1768 ret = 0;
1769 if (!sock_owned_by_user(sk)) {
1770 if (!tcp_prequeue(sk, skb))
1771 ret = tcp_v6_do_rcv(sk, skb);
1772 } else
1773 sk_add_backlog(sk, skb);
1774 bh_unlock_sock(sk);
1775
1776 sock_put(sk);
1777 return ret ? -1 : 0;
1778
1779no_tcp_socket:
1780 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1781 goto discard_it;
1782
1783 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1784bad_packet:
1785 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1786 } else {
1787 tcp_v6_send_reset(skb);
1788 }
1789
1790discard_it:
1791
1792 /*
1793 * Discard frame
1794 */
1795
1796 kfree_skb(skb);
1797 return 0;
1798
1799discard_and_relse:
1800 sock_put(sk);
1801 goto discard_it;
1802
1803do_time_wait:
1804 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
8feaf0c0 1805 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1806 goto discard_it;
1807 }
1808
1809 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1810 TCP_INC_STATS_BH(TCP_MIB_INERRS);
8feaf0c0 1811 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1812 goto discard_it;
1813 }
1814
8feaf0c0
ACM
1815 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1816 skb, th)) {
1da177e4
LT
1817 case TCP_TW_SYN:
1818 {
1819 struct sock *sk2;
1820
1821 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1822 if (sk2 != NULL) {
8feaf0c0
ACM
1823 tcp_tw_deschedule((struct inet_timewait_sock *)sk);
1824 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1825 sk = sk2;
1826 goto process;
1827 }
1828 /* Fall through to ACK */
1829 }
1830 case TCP_TW_ACK:
1831 tcp_v6_timewait_ack(sk, skb);
1832 break;
1833 case TCP_TW_RST:
1834 goto no_tcp_socket;
1835 case TCP_TW_SUCCESS:;
1836 }
1837 goto discard_it;
1838}
1839
1840static int tcp_v6_rebuild_header(struct sock *sk)
1841{
1842 int err;
1843 struct dst_entry *dst;
1844 struct ipv6_pinfo *np = inet6_sk(sk);
1845
1846 dst = __sk_dst_check(sk, np->dst_cookie);
1847
1848 if (dst == NULL) {
1849 struct inet_sock *inet = inet_sk(sk);
1850 struct in6_addr *final_p = NULL, final;
1851 struct flowi fl;
1852
1853 memset(&fl, 0, sizeof(fl));
1854 fl.proto = IPPROTO_TCP;
1855 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1856 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1857 fl.fl6_flowlabel = np->flow_label;
1858 fl.oif = sk->sk_bound_dev_if;
1859 fl.fl_ip_dport = inet->dport;
1860 fl.fl_ip_sport = inet->sport;
1861
1862 if (np->opt && np->opt->srcrt) {
1863 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1864 ipv6_addr_copy(&final, &fl.fl6_dst);
1865 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1866 final_p = &final;
1867 }
1868
1869 err = ip6_dst_lookup(sk, &dst, &fl);
1870 if (err) {
1871 sk->sk_route_caps = 0;
1872 return err;
1873 }
1874 if (final_p)
1875 ipv6_addr_copy(&fl.fl6_dst, final_p);
1876
1877 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1878 sk->sk_err_soft = -err;
1879 dst_release(dst);
1880 return err;
1881 }
1882
1883 ip6_dst_store(sk, dst, NULL);
1884 sk->sk_route_caps = dst->dev->features &
1885 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1886 }
1887
1888 return 0;
1889}
1890
1891static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1892{
1893 struct sock *sk = skb->sk;
1894 struct inet_sock *inet = inet_sk(sk);
1895 struct ipv6_pinfo *np = inet6_sk(sk);
1896 struct flowi fl;
1897 struct dst_entry *dst;
1898 struct in6_addr *final_p = NULL, final;
1899
1900 memset(&fl, 0, sizeof(fl));
1901 fl.proto = IPPROTO_TCP;
1902 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1903 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1904 fl.fl6_flowlabel = np->flow_label;
1905 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1906 fl.oif = sk->sk_bound_dev_if;
1907 fl.fl_ip_sport = inet->sport;
1908 fl.fl_ip_dport = inet->dport;
1909
1910 if (np->opt && np->opt->srcrt) {
1911 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1912 ipv6_addr_copy(&final, &fl.fl6_dst);
1913 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1914 final_p = &final;
1915 }
1916
1917 dst = __sk_dst_check(sk, np->dst_cookie);
1918
1919 if (dst == NULL) {
1920 int err = ip6_dst_lookup(sk, &dst, &fl);
1921
1922 if (err) {
1923 sk->sk_err_soft = -err;
1924 return err;
1925 }
1926
1927 if (final_p)
1928 ipv6_addr_copy(&fl.fl6_dst, final_p);
1929
1930 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1931 sk->sk_route_caps = 0;
1932 dst_release(dst);
1933 return err;
1934 }
1935
1936 ip6_dst_store(sk, dst, NULL);
1937 sk->sk_route_caps = dst->dev->features &
1938 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1939 }
1940
1941 skb->dst = dst_clone(dst);
1942
1943 /* Restore final destination back after routing done */
1944 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1945
1946 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1947}
1948
1949static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1950{
1951 struct ipv6_pinfo *np = inet6_sk(sk);
1952 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1953
1954 sin6->sin6_family = AF_INET6;
1955 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1956 sin6->sin6_port = inet_sk(sk)->dport;
1957 /* We do not store received flowlabel for TCP */
1958 sin6->sin6_flowinfo = 0;
1959 sin6->sin6_scope_id = 0;
1960 if (sk->sk_bound_dev_if &&
1961 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1962 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1963}
1964
1965static int tcp_v6_remember_stamp(struct sock *sk)
1966{
1967 /* Alas, not yet... */
1968 return 0;
1969}
1970
1971static struct tcp_func ipv6_specific = {
1972 .queue_xmit = tcp_v6_xmit,
1973 .send_check = tcp_v6_send_check,
1974 .rebuild_header = tcp_v6_rebuild_header,
1975 .conn_request = tcp_v6_conn_request,
1976 .syn_recv_sock = tcp_v6_syn_recv_sock,
1977 .remember_stamp = tcp_v6_remember_stamp,
1978 .net_header_len = sizeof(struct ipv6hdr),
1979
1980 .setsockopt = ipv6_setsockopt,
1981 .getsockopt = ipv6_getsockopt,
1982 .addr2sockaddr = v6_addr2sockaddr,
1983 .sockaddr_len = sizeof(struct sockaddr_in6)
1984};
1985
1986/*
1987 * TCP over IPv4 via INET6 API
1988 */
1989
1990static struct tcp_func ipv6_mapped = {
1991 .queue_xmit = ip_queue_xmit,
1992 .send_check = tcp_v4_send_check,
32519f11 1993 .rebuild_header = inet_sk_rebuild_header,
1da177e4
LT
1994 .conn_request = tcp_v6_conn_request,
1995 .syn_recv_sock = tcp_v6_syn_recv_sock,
1996 .remember_stamp = tcp_v4_remember_stamp,
1997 .net_header_len = sizeof(struct iphdr),
1998
1999 .setsockopt = ipv6_setsockopt,
2000 .getsockopt = ipv6_getsockopt,
2001 .addr2sockaddr = v6_addr2sockaddr,
2002 .sockaddr_len = sizeof(struct sockaddr_in6)
2003};
2004
2005
2006
2007/* NOTE: A lot of things set to zero explicitly by call to
2008 * sk_alloc() so need not be done here.
2009 */
2010static int tcp_v6_init_sock(struct sock *sk)
2011{
2012 struct tcp_sock *tp = tcp_sk(sk);
2013
2014 skb_queue_head_init(&tp->out_of_order_queue);
2015 tcp_init_xmit_timers(sk);
2016 tcp_prequeue_init(tp);
2017
2018 tp->rto = TCP_TIMEOUT_INIT;
2019 tp->mdev = TCP_TIMEOUT_INIT;
2020
2021 /* So many TCP implementations out there (incorrectly) count the
2022 * initial SYN frame in their delayed-ACK and congestion control
2023 * algorithms that we must have the following bandaid to talk
2024 * efficiently to them. -DaveM
2025 */
2026 tp->snd_cwnd = 2;
2027
2028 /* See draft-stevens-tcpca-spec-01 for discussion of the
2029 * initialization of these values.
2030 */
2031 tp->snd_ssthresh = 0x7fffffff;
2032 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 2033 tp->mss_cache = 536;
1da177e4
LT
2034
2035 tp->reordering = sysctl_tcp_reordering;
2036
2037 sk->sk_state = TCP_CLOSE;
2038
2039 tp->af_specific = &ipv6_specific;
5f8ef48d 2040 tp->ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
2041 sk->sk_write_space = sk_stream_write_space;
2042 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2043
2044 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2045 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2046
2047 atomic_inc(&tcp_sockets_allocated);
2048
2049 return 0;
2050}
2051
2052static int tcp_v6_destroy_sock(struct sock *sk)
2053{
2054 extern int tcp_v4_destroy_sock(struct sock *sk);
2055
2056 tcp_v4_destroy_sock(sk);
2057 return inet6_destroy_sock(sk);
2058}
2059
2060/* Proc filesystem TCPv6 sock list dumping. */
2061static void get_openreq6(struct seq_file *seq,
60236fdd 2062 struct sock *sk, struct request_sock *req, int i, int uid)
1da177e4
LT
2063{
2064 struct in6_addr *dest, *src;
2065 int ttd = req->expires - jiffies;
2066
2067 if (ttd < 0)
2068 ttd = 0;
2069
2e6599cb
ACM
2070 src = &tcp6_rsk(req)->loc_addr;
2071 dest = &tcp6_rsk(req)->rmt_addr;
1da177e4
LT
2072 seq_printf(seq,
2073 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2074 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2075 i,
2076 src->s6_addr32[0], src->s6_addr32[1],
2077 src->s6_addr32[2], src->s6_addr32[3],
2078 ntohs(inet_sk(sk)->sport),
2079 dest->s6_addr32[0], dest->s6_addr32[1],
2080 dest->s6_addr32[2], dest->s6_addr32[3],
2e6599cb 2081 ntohs(inet_rsk(req)->rmt_port),
1da177e4
LT
2082 TCP_SYN_RECV,
2083 0,0, /* could print option size, but that is af dependent. */
2084 1, /* timers active (only the expire timer) */
2085 jiffies_to_clock_t(ttd),
2086 req->retrans,
2087 uid,
2088 0, /* non standard timer */
2089 0, /* open_requests have no inode */
2090 0, req);
2091}
2092
2093static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2094{
2095 struct in6_addr *dest, *src;
2096 __u16 destp, srcp;
2097 int timer_active;
2098 unsigned long timer_expires;
2099 struct inet_sock *inet = inet_sk(sp);
2100 struct tcp_sock *tp = tcp_sk(sp);
2101 struct ipv6_pinfo *np = inet6_sk(sp);
2102
2103 dest = &np->daddr;
2104 src = &np->rcv_saddr;
2105 destp = ntohs(inet->dport);
2106 srcp = ntohs(inet->sport);
2107 if (tp->pending == TCP_TIME_RETRANS) {
2108 timer_active = 1;
2109 timer_expires = tp->timeout;
2110 } else if (tp->pending == TCP_TIME_PROBE0) {
2111 timer_active = 4;
2112 timer_expires = tp->timeout;
2113 } else if (timer_pending(&sp->sk_timer)) {
2114 timer_active = 2;
2115 timer_expires = sp->sk_timer.expires;
2116 } else {
2117 timer_active = 0;
2118 timer_expires = jiffies;
2119 }
2120
2121 seq_printf(seq,
2122 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2123 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2124 i,
2125 src->s6_addr32[0], src->s6_addr32[1],
2126 src->s6_addr32[2], src->s6_addr32[3], srcp,
2127 dest->s6_addr32[0], dest->s6_addr32[1],
2128 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2129 sp->sk_state,
2130 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2131 timer_active,
2132 jiffies_to_clock_t(timer_expires - jiffies),
2133 tp->retransmits,
2134 sock_i_uid(sp),
2135 tp->probes_out,
2136 sock_i_ino(sp),
2137 atomic_read(&sp->sk_refcnt), sp,
2138 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2139 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2140 );
2141}
2142
2143static void get_timewait6_sock(struct seq_file *seq,
8feaf0c0 2144 struct inet_timewait_sock *tw, int i)
1da177e4
LT
2145{
2146 struct in6_addr *dest, *src;
2147 __u16 destp, srcp;
8feaf0c0 2148 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1da177e4
LT
2149 int ttd = tw->tw_ttd - jiffies;
2150
2151 if (ttd < 0)
2152 ttd = 0;
2153
8feaf0c0
ACM
2154 dest = &tcp6tw->tw_v6_daddr;
2155 src = &tcp6tw->tw_v6_rcv_saddr;
1da177e4
LT
2156 destp = ntohs(tw->tw_dport);
2157 srcp = ntohs(tw->tw_sport);
2158
2159 seq_printf(seq,
2160 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2161 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2162 i,
2163 src->s6_addr32[0], src->s6_addr32[1],
2164 src->s6_addr32[2], src->s6_addr32[3], srcp,
2165 dest->s6_addr32[0], dest->s6_addr32[1],
2166 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2167 tw->tw_substate, 0, 0,
2168 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2169 atomic_read(&tw->tw_refcnt), tw);
2170}
2171
2172#ifdef CONFIG_PROC_FS
2173static int tcp6_seq_show(struct seq_file *seq, void *v)
2174{
2175 struct tcp_iter_state *st;
2176
2177 if (v == SEQ_START_TOKEN) {
2178 seq_puts(seq,
2179 " sl "
2180 "local_address "
2181 "remote_address "
2182 "st tx_queue rx_queue tr tm->when retrnsmt"
2183 " uid timeout inode\n");
2184 goto out;
2185 }
2186 st = seq->private;
2187
2188 switch (st->state) {
2189 case TCP_SEQ_STATE_LISTENING:
2190 case TCP_SEQ_STATE_ESTABLISHED:
2191 get_tcp6_sock(seq, v, st->num);
2192 break;
2193 case TCP_SEQ_STATE_OPENREQ:
2194 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2195 break;
2196 case TCP_SEQ_STATE_TIME_WAIT:
2197 get_timewait6_sock(seq, v, st->num);
2198 break;
2199 }
2200out:
2201 return 0;
2202}
2203
2204static struct file_operations tcp6_seq_fops;
2205static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2206 .owner = THIS_MODULE,
2207 .name = "tcp6",
2208 .family = AF_INET6,
2209 .seq_show = tcp6_seq_show,
2210 .seq_fops = &tcp6_seq_fops,
2211};
2212
2213int __init tcp6_proc_init(void)
2214{
2215 return tcp_proc_register(&tcp6_seq_afinfo);
2216}
2217
2218void tcp6_proc_exit(void)
2219{
2220 tcp_proc_unregister(&tcp6_seq_afinfo);
2221}
2222#endif
2223
2224struct proto tcpv6_prot = {
2225 .name = "TCPv6",
2226 .owner = THIS_MODULE,
2227 .close = tcp_close,
2228 .connect = tcp_v6_connect,
2229 .disconnect = tcp_disconnect,
2230 .accept = tcp_accept,
2231 .ioctl = tcp_ioctl,
2232 .init = tcp_v6_init_sock,
2233 .destroy = tcp_v6_destroy_sock,
2234 .shutdown = tcp_shutdown,
2235 .setsockopt = tcp_setsockopt,
2236 .getsockopt = tcp_getsockopt,
2237 .sendmsg = tcp_sendmsg,
2238 .recvmsg = tcp_recvmsg,
2239 .backlog_rcv = tcp_v6_do_rcv,
2240 .hash = tcp_v6_hash,
2241 .unhash = tcp_unhash,
2242 .get_port = tcp_v6_get_port,
2243 .enter_memory_pressure = tcp_enter_memory_pressure,
2244 .sockets_allocated = &tcp_sockets_allocated,
2245 .memory_allocated = &tcp_memory_allocated,
2246 .memory_pressure = &tcp_memory_pressure,
2247 .sysctl_mem = sysctl_tcp_mem,
2248 .sysctl_wmem = sysctl_tcp_wmem,
2249 .sysctl_rmem = sysctl_tcp_rmem,
2250 .max_header = MAX_TCP_HEADER,
2251 .obj_size = sizeof(struct tcp6_sock),
8feaf0c0 2252 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
60236fdd 2253 .rsk_prot = &tcp6_request_sock_ops,
1da177e4
LT
2254};
2255
2256static struct inet6_protocol tcpv6_protocol = {
2257 .handler = tcp_v6_rcv,
2258 .err_handler = tcp_v6_err,
2259 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2260};
2261
2262extern struct proto_ops inet6_stream_ops;
2263
2264static struct inet_protosw tcpv6_protosw = {
2265 .type = SOCK_STREAM,
2266 .protocol = IPPROTO_TCP,
2267 .prot = &tcpv6_prot,
2268 .ops = &inet6_stream_ops,
2269 .capability = -1,
2270 .no_check = 0,
2271 .flags = INET_PROTOSW_PERMANENT,
2272};
2273
2274void __init tcpv6_init(void)
2275{
2276 /* register inet6 protocol */
2277 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2278 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2279 inet6_register_protosw(&tcpv6_protosw);
2280}