[NETFILTER]: Fix gcc-3.4.x warning about iplicit operator precedence
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / tcp_ipv6.c
CommitLineData
1da177e4
LT
1/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
60236fdd 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
1da177e4
LT
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
6e04e021 87 return (hashent & (tcp_hashinfo.ehash_size - 1));
1da177e4
LT
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
0f7ff927
ACM
101static inline int tcp_v6_bind_conflict(const struct sock *sk,
102 const struct inet_bind_bucket *tb)
1da177e4 103{
0f7ff927
ACM
104 const struct sock *sk2;
105 const struct hlist_node *node;
1da177e4
LT
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
0f7ff927
ACM
129 struct inet_bind_hashbucket *head;
130 struct inet_bind_bucket *tb;
1da177e4
LT
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
6e04e021
ACM
141 spin_lock(&tcp_hashinfo.portalloc_lock);
142 if (tcp_hashinfo.port_rover < low)
c3924c70
FH
143 rover = low;
144 else
6e04e021 145 rover = tcp_hashinfo.port_rover;
1da177e4 146 do { rover++;
c3924c70 147 if (rover > high)
1da177e4 148 rover = low;
6e04e021 149 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
1da177e4 150 spin_lock(&head->lock);
0f7ff927 151 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
152 if (tb->port == rover)
153 goto next;
154 break;
155 next:
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
6e04e021
ACM
158 tcp_hashinfo.port_rover = rover;
159 spin_unlock(&tcp_hashinfo.portalloc_lock);
1da177e4 160
d5d28375
DM
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
166 */
1da177e4 167 ret = 1;
d5d28375 168 if (unlikely(remaining <= 0))
1da177e4
LT
169 goto fail;
170
171 /* OK, here is the one we will use. */
172 snum = rover;
173 } else {
6e04e021 174 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
1da177e4 175 spin_lock(&head->lock);
0f7ff927 176 inet_bind_bucket_for_each(tb, node, &head->chain)
1da177e4
LT
177 if (tb->port == snum)
178 goto tb_found;
179 }
180 tb = NULL;
181 goto tb_not_found;
182tb_found:
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
186 goto success;
187 } else {
188 ret = 1;
189 if (tcp_v6_bind_conflict(sk, tb))
190 goto fail_unlock;
191 }
192 }
193tb_not_found:
194 ret = 1;
6e04e021
ACM
195 if (tb == NULL) {
196 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
197 if (tb == NULL)
198 goto fail_unlock;
199 }
1da177e4
LT
200 if (hlist_empty(&tb->owners)) {
201 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
202 tb->fastreuse = 1;
203 else
204 tb->fastreuse = 0;
205 } else if (tb->fastreuse &&
206 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
207 tb->fastreuse = 0;
208
209success:
463c84b9 210 if (!inet_csk(sk)->icsk_bind_hash)
2d8c4ce5 211 inet_bind_hash(sk, tb, snum);
463c84b9 212 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
1da177e4
LT
213 ret = 0;
214
215fail_unlock:
216 spin_unlock(&head->lock);
217fail:
218 local_bh_enable();
219 return ret;
220}
221
222static __inline__ void __tcp_v6_hash(struct sock *sk)
223{
224 struct hlist_head *list;
225 rwlock_t *lock;
226
227 BUG_TRAP(sk_unhashed(sk));
228
229 if (sk->sk_state == TCP_LISTEN) {
6e04e021
ACM
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock;
f3f05f70 232 inet_listen_wlock(&tcp_hashinfo);
1da177e4
LT
233 } else {
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
6e04e021
ACM
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
1da177e4
LT
237 write_lock(lock);
238 }
239
240 __sk_add_node(sk, list);
241 sock_prot_inc_use(sk->sk_prot);
242 write_unlock(lock);
243}
244
245
246static void tcp_v6_hash(struct sock *sk)
247{
248 if (sk->sk_state != TCP_CLOSE) {
249 struct tcp_sock *tp = tcp_sk(sk);
250
251 if (tp->af_specific == &ipv6_mapped) {
252 tcp_prot.hash(sk);
253 return;
254 }
255 local_bh_disable();
256 __tcp_v6_hash(sk);
257 local_bh_enable();
258 }
259}
260
261static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
262{
263 struct sock *sk;
264 struct hlist_node *node;
265 struct sock *result = NULL;
266 int score, hiscore;
267
268 hiscore=0;
6e04e021
ACM
269 read_lock(&tcp_hashinfo.lhash_lock);
270 sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
1da177e4
LT
271 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272 struct ipv6_pinfo *np = inet6_sk(sk);
273
274 score = 1;
275 if (!ipv6_addr_any(&np->rcv_saddr)) {
276 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
277 continue;
278 score++;
279 }
280 if (sk->sk_bound_dev_if) {
281 if (sk->sk_bound_dev_if != dif)
282 continue;
283 score++;
284 }
285 if (score == 3) {
286 result = sk;
287 break;
288 }
289 if (score > hiscore) {
290 hiscore = score;
291 result = sk;
292 }
293 }
294 }
295 if (result)
296 sock_hold(result);
6e04e021 297 read_unlock(&tcp_hashinfo.lhash_lock);
1da177e4
LT
298 return result;
299}
300
301/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
303 *
304 * The sockhash lock must be held as a reader here.
305 */
306
307static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308 struct in6_addr *daddr, u16 hnum,
309 int dif)
310{
1da177e4 311 struct sock *sk;
8feaf0c0
ACM
312 const struct hlist_node *node;
313 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
1da177e4
LT
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
316 */
8feaf0c0
ACM
317 const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
319
1da177e4
LT
320 read_lock(&head->lock);
321 sk_for_each(sk, node, &head->chain) {
322 /* For IPV6 do the cheaper port and family tests first. */
8feaf0c0 323 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
1da177e4
LT
324 goto hit; /* You sunk my battleship! */
325 }
326 /* Must check for a TIME_WAIT'er before going to listener hash. */
6e04e021 327 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
8feaf0c0 328 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4
LT
329
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
8feaf0c0
ACM
332 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
333
334 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
335 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
336 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
1da177e4
LT
337 goto hit;
338 }
339 }
340 read_unlock(&head->lock);
341 return NULL;
342
343hit:
344 sock_hold(sk);
345 read_unlock(&head->lock);
346 return sk;
347}
348
349
350static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 hnum,
352 int dif)
353{
354 struct sock *sk;
355
356 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
357
358 if (sk)
359 return sk;
360
361 return tcp_v6_lookup_listener(daddr, hnum, dif);
362}
363
364inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
365 struct in6_addr *daddr, u16 dport,
366 int dif)
367{
368 struct sock *sk;
369
370 local_bh_disable();
371 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
372 local_bh_enable();
373
374 return sk;
375}
376
377EXPORT_SYMBOL_GPL(tcp_v6_lookup);
378
379
380/*
381 * Open request hash tables.
382 */
383
463c84b9 384static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
1da177e4
LT
385{
386 u32 a, b, c;
387
388 a = raddr->s6_addr32[0];
389 b = raddr->s6_addr32[1];
390 c = raddr->s6_addr32[2];
391
392 a += JHASH_GOLDEN_RATIO;
393 b += JHASH_GOLDEN_RATIO;
394 c += rnd;
395 __jhash_mix(a, b, c);
396
397 a += raddr->s6_addr32[3];
398 b += (u32) rport;
399 __jhash_mix(a, b, c);
400
401 return c & (TCP_SYNQ_HSIZE - 1);
402}
403
463c84b9 404static struct request_sock *tcp_v6_search_req(const struct sock *sk,
60236fdd 405 struct request_sock ***prevp,
1da177e4
LT
406 __u16 rport,
407 struct in6_addr *raddr,
408 struct in6_addr *laddr,
409 int iif)
410{
463c84b9
ACM
411 const struct inet_connection_sock *icsk = inet_csk(sk);
412 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
60236fdd 413 struct request_sock *req, **prev;
1da177e4
LT
414
415 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
416 (req = *prev) != NULL;
417 prev = &req->dl_next) {
2e6599cb
ACM
418 const struct tcp6_request_sock *treq = tcp6_rsk(req);
419
420 if (inet_rsk(req)->rmt_port == rport &&
60236fdd 421 req->rsk_ops->family == AF_INET6 &&
2e6599cb
ACM
422 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
423 ipv6_addr_equal(&treq->loc_addr, laddr) &&
424 (!treq->iif || treq->iif == iif)) {
1da177e4
LT
425 BUG_TRAP(req->sk == NULL);
426 *prevp = prev;
427 return req;
428 }
429 }
430
431 return NULL;
432}
433
434static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
435 struct in6_addr *saddr,
436 struct in6_addr *daddr,
437 unsigned long base)
438{
439 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
440}
441
442static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
443{
444 if (skb->protocol == htons(ETH_P_IPV6)) {
445 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
446 skb->nh.ipv6h->saddr.s6_addr32,
447 skb->h.th->dest,
448 skb->h.th->source);
449 } else {
450 return secure_tcp_sequence_number(skb->nh.iph->daddr,
451 skb->nh.iph->saddr,
452 skb->h.th->dest,
453 skb->h.th->source);
454 }
455}
456
457static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
8feaf0c0 458 struct inet_timewait_sock **twp)
1da177e4
LT
459{
460 struct inet_sock *inet = inet_sk(sk);
461 struct ipv6_pinfo *np = inet6_sk(sk);
462 struct in6_addr *daddr = &np->rcv_saddr;
463 struct in6_addr *saddr = &np->daddr;
464 int dif = sk->sk_bound_dev_if;
8feaf0c0
ACM
465 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
466 const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
6e04e021 467 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
1da177e4 468 struct sock *sk2;
8feaf0c0
ACM
469 const struct hlist_node *node;
470 struct inet_timewait_sock *tw;
1da177e4
LT
471
472 write_lock(&head->lock);
473
474 /* Check TIME-WAIT sockets first. */
6e04e021 475 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
8feaf0c0
ACM
476 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
477
478 tw = inet_twsk(sk2);
1da177e4
LT
479
480 if(*((__u32 *)&(tw->tw_dport)) == ports &&
481 sk2->sk_family == PF_INET6 &&
8feaf0c0
ACM
482 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
483 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
1da177e4 484 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
8feaf0c0 485 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
1da177e4
LT
486 struct tcp_sock *tp = tcp_sk(sk);
487
8feaf0c0
ACM
488 if (tcptw->tw_ts_recent_stamp &&
489 (!twp ||
490 (sysctl_tcp_tw_reuse &&
491 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
1da177e4 492 /* See comment in tcp_ipv4.c */
8feaf0c0 493 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1da177e4
LT
494 if (!tp->write_seq)
495 tp->write_seq = 1;
8feaf0c0
ACM
496 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
497 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1da177e4
LT
498 sock_hold(sk2);
499 goto unique;
500 } else
501 goto not_unique;
502 }
503 }
504 tw = NULL;
505
506 /* And established part... */
507 sk_for_each(sk2, node, &head->chain) {
8feaf0c0 508 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
1da177e4
LT
509 goto not_unique;
510 }
511
512unique:
513 BUG_TRAP(sk_unhashed(sk));
514 __sk_add_node(sk, &head->chain);
515 sk->sk_hashent = hash;
516 sock_prot_inc_use(sk->sk_prot);
517 write_unlock(&head->lock);
518
519 if (twp) {
520 *twp = tw;
521 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
522 } else if (tw) {
523 /* Silly. Should hash-dance instead... */
295ff7ed 524 inet_twsk_deschedule(tw, &tcp_death_row);
1da177e4
LT
525 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
526
8feaf0c0 527 inet_twsk_put(tw);
1da177e4
LT
528 }
529 return 0;
530
531not_unique:
532 write_unlock(&head->lock);
533 return -EADDRNOTAVAIL;
534}
535
536static inline u32 tcpv6_port_offset(const struct sock *sk)
537{
538 const struct inet_sock *inet = inet_sk(sk);
539 const struct ipv6_pinfo *np = inet6_sk(sk);
540
541 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
542 np->daddr.s6_addr32,
543 inet->dport);
544}
545
546static int tcp_v6_hash_connect(struct sock *sk)
547{
548 unsigned short snum = inet_sk(sk)->num;
0f7ff927
ACM
549 struct inet_bind_hashbucket *head;
550 struct inet_bind_bucket *tb;
1da177e4
LT
551 int ret;
552
553 if (!snum) {
554 int low = sysctl_local_port_range[0];
555 int high = sysctl_local_port_range[1];
556 int range = high - low;
557 int i;
558 int port;
559 static u32 hint;
560 u32 offset = hint + tcpv6_port_offset(sk);
561 struct hlist_node *node;
8feaf0c0 562 struct inet_timewait_sock *tw = NULL;
1da177e4
LT
563
564 local_bh_disable();
565 for (i = 1; i <= range; i++) {
566 port = low + (i + offset) % range;
6e04e021 567 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
1da177e4
LT
568 spin_lock(&head->lock);
569
570 /* Does not bother with rcv_saddr checks,
571 * because the established check is already
572 * unique enough.
573 */
0f7ff927 574 inet_bind_bucket_for_each(tb, node, &head->chain) {
1da177e4
LT
575 if (tb->port == port) {
576 BUG_TRAP(!hlist_empty(&tb->owners));
577 if (tb->fastreuse >= 0)
578 goto next_port;
579 if (!__tcp_v6_check_established(sk,
580 port,
581 &tw))
582 goto ok;
583 goto next_port;
584 }
585 }
586
6e04e021 587 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
1da177e4
LT
588 if (!tb) {
589 spin_unlock(&head->lock);
590 break;
591 }
592 tb->fastreuse = -1;
593 goto ok;
594
595 next_port:
596 spin_unlock(&head->lock);
597 }
598 local_bh_enable();
599
600 return -EADDRNOTAVAIL;
601
602ok:
603 hint += i;
604
605 /* Head lock still held and bh's disabled */
2d8c4ce5 606 inet_bind_hash(sk, tb, port);
1da177e4
LT
607 if (sk_unhashed(sk)) {
608 inet_sk(sk)->sport = htons(port);
609 __tcp_v6_hash(sk);
610 }
611 spin_unlock(&head->lock);
612
613 if (tw) {
295ff7ed 614 inet_twsk_deschedule(tw, &tcp_death_row);
8feaf0c0 615 inet_twsk_put(tw);
1da177e4
LT
616 }
617
618 ret = 0;
619 goto out;
620 }
621
6e04e021 622 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
463c84b9 623 tb = inet_csk(sk)->icsk_bind_hash;
1da177e4
LT
624 spin_lock_bh(&head->lock);
625
626 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
627 __tcp_v6_hash(sk);
628 spin_unlock_bh(&head->lock);
629 return 0;
630 } else {
631 spin_unlock(&head->lock);
632 /* No definite answer... Walk to established hash table */
633 ret = __tcp_v6_check_established(sk, snum, NULL);
634out:
635 local_bh_enable();
636 return ret;
637 }
638}
639
640static __inline__ int tcp_v6_iif(struct sk_buff *skb)
641{
642 return IP6CB(skb)->iif;
643}
644
645static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
646 int addr_len)
647{
648 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
649 struct inet_sock *inet = inet_sk(sk);
650 struct ipv6_pinfo *np = inet6_sk(sk);
651 struct tcp_sock *tp = tcp_sk(sk);
652 struct in6_addr *saddr = NULL, *final_p = NULL, final;
653 struct flowi fl;
654 struct dst_entry *dst;
655 int addr_type;
656 int err;
657
658 if (addr_len < SIN6_LEN_RFC2133)
659 return -EINVAL;
660
661 if (usin->sin6_family != AF_INET6)
662 return(-EAFNOSUPPORT);
663
664 memset(&fl, 0, sizeof(fl));
665
666 if (np->sndflow) {
667 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
668 IP6_ECN_flow_init(fl.fl6_flowlabel);
669 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
670 struct ip6_flowlabel *flowlabel;
671 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
672 if (flowlabel == NULL)
673 return -EINVAL;
674 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
675 fl6_sock_release(flowlabel);
676 }
677 }
678
679 /*
680 * connect() to INADDR_ANY means loopback (BSD'ism).
681 */
682
683 if(ipv6_addr_any(&usin->sin6_addr))
684 usin->sin6_addr.s6_addr[15] = 0x1;
685
686 addr_type = ipv6_addr_type(&usin->sin6_addr);
687
688 if(addr_type & IPV6_ADDR_MULTICAST)
689 return -ENETUNREACH;
690
691 if (addr_type&IPV6_ADDR_LINKLOCAL) {
692 if (addr_len >= sizeof(struct sockaddr_in6) &&
693 usin->sin6_scope_id) {
694 /* If interface is set while binding, indices
695 * must coincide.
696 */
697 if (sk->sk_bound_dev_if &&
698 sk->sk_bound_dev_if != usin->sin6_scope_id)
699 return -EINVAL;
700
701 sk->sk_bound_dev_if = usin->sin6_scope_id;
702 }
703
704 /* Connect to link-local address requires an interface */
705 if (!sk->sk_bound_dev_if)
706 return -EINVAL;
707 }
708
709 if (tp->rx_opt.ts_recent_stamp &&
710 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
711 tp->rx_opt.ts_recent = 0;
712 tp->rx_opt.ts_recent_stamp = 0;
713 tp->write_seq = 0;
714 }
715
716 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
717 np->flow_label = fl.fl6_flowlabel;
718
719 /*
720 * TCP over IPv4
721 */
722
723 if (addr_type == IPV6_ADDR_MAPPED) {
724 u32 exthdrlen = tp->ext_header_len;
725 struct sockaddr_in sin;
726
727 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
728
729 if (__ipv6_only_sock(sk))
730 return -ENETUNREACH;
731
732 sin.sin_family = AF_INET;
733 sin.sin_port = usin->sin6_port;
734 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
735
736 tp->af_specific = &ipv6_mapped;
737 sk->sk_backlog_rcv = tcp_v4_do_rcv;
738
739 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
740
741 if (err) {
742 tp->ext_header_len = exthdrlen;
743 tp->af_specific = &ipv6_specific;
744 sk->sk_backlog_rcv = tcp_v6_do_rcv;
745 goto failure;
746 } else {
747 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
748 inet->saddr);
749 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
750 inet->rcv_saddr);
751 }
752
753 return err;
754 }
755
756 if (!ipv6_addr_any(&np->rcv_saddr))
757 saddr = &np->rcv_saddr;
758
759 fl.proto = IPPROTO_TCP;
760 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
761 ipv6_addr_copy(&fl.fl6_src,
762 (saddr ? saddr : &np->saddr));
763 fl.oif = sk->sk_bound_dev_if;
764 fl.fl_ip_dport = usin->sin6_port;
765 fl.fl_ip_sport = inet->sport;
766
767 if (np->opt && np->opt->srcrt) {
768 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
769 ipv6_addr_copy(&final, &fl.fl6_dst);
770 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
771 final_p = &final;
772 }
773
774 err = ip6_dst_lookup(sk, &dst, &fl);
775 if (err)
776 goto failure;
777 if (final_p)
778 ipv6_addr_copy(&fl.fl6_dst, final_p);
779
780 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
781 dst_release(dst);
782 goto failure;
783 }
784
785 if (saddr == NULL) {
786 saddr = &fl.fl6_src;
787 ipv6_addr_copy(&np->rcv_saddr, saddr);
788 }
789
790 /* set the source address */
791 ipv6_addr_copy(&np->saddr, saddr);
792 inet->rcv_saddr = LOOPBACK4_IPV6;
793
794 ip6_dst_store(sk, dst, NULL);
795 sk->sk_route_caps = dst->dev->features &
796 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
797
798 tp->ext_header_len = 0;
799 if (np->opt)
800 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
801
802 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
803
804 inet->dport = usin->sin6_port;
805
806 tcp_set_state(sk, TCP_SYN_SENT);
807 err = tcp_v6_hash_connect(sk);
808 if (err)
809 goto late_failure;
810
811 if (!tp->write_seq)
812 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
813 np->daddr.s6_addr32,
814 inet->sport,
815 inet->dport);
816
817 err = tcp_connect(sk);
818 if (err)
819 goto late_failure;
820
821 return 0;
822
823late_failure:
824 tcp_set_state(sk, TCP_CLOSE);
825 __sk_dst_reset(sk);
826failure:
827 inet->dport = 0;
828 sk->sk_route_caps = 0;
829 return err;
830}
831
832static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
833 int type, int code, int offset, __u32 info)
834{
835 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
836 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
837 struct ipv6_pinfo *np;
838 struct sock *sk;
839 int err;
840 struct tcp_sock *tp;
841 __u32 seq;
842
843 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
844
845 if (sk == NULL) {
846 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
847 return;
848 }
849
850 if (sk->sk_state == TCP_TIME_WAIT) {
8feaf0c0 851 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
852 return;
853 }
854
855 bh_lock_sock(sk);
856 if (sock_owned_by_user(sk))
857 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
858
859 if (sk->sk_state == TCP_CLOSE)
860 goto out;
861
862 tp = tcp_sk(sk);
863 seq = ntohl(th->seq);
864 if (sk->sk_state != TCP_LISTEN &&
865 !between(seq, tp->snd_una, tp->snd_nxt)) {
866 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
867 goto out;
868 }
869
870 np = inet6_sk(sk);
871
872 if (type == ICMPV6_PKT_TOOBIG) {
873 struct dst_entry *dst = NULL;
874
875 if (sock_owned_by_user(sk))
876 goto out;
877 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
878 goto out;
879
880 /* icmp should have updated the destination cache entry */
881 dst = __sk_dst_check(sk, np->dst_cookie);
882
883 if (dst == NULL) {
884 struct inet_sock *inet = inet_sk(sk);
885 struct flowi fl;
886
887 /* BUGGG_FUTURE: Again, it is not clear how
888 to handle rthdr case. Ignore this complexity
889 for now.
890 */
891 memset(&fl, 0, sizeof(fl));
892 fl.proto = IPPROTO_TCP;
893 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
894 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
895 fl.oif = sk->sk_bound_dev_if;
896 fl.fl_ip_dport = inet->dport;
897 fl.fl_ip_sport = inet->sport;
898
899 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
900 sk->sk_err_soft = -err;
901 goto out;
902 }
903
904 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
905 sk->sk_err_soft = -err;
906 goto out;
907 }
908
909 } else
910 dst_hold(dst);
911
912 if (tp->pmtu_cookie > dst_mtu(dst)) {
913 tcp_sync_mss(sk, dst_mtu(dst));
914 tcp_simple_retransmit(sk);
915 } /* else let the usual retransmit timer handle it */
916 dst_release(dst);
917 goto out;
918 }
919
920 icmpv6_err_convert(type, code, &err);
921
60236fdd 922 /* Might be for an request_sock */
1da177e4 923 switch (sk->sk_state) {
60236fdd 924 struct request_sock *req, **prev;
1da177e4
LT
925 case TCP_LISTEN:
926 if (sock_owned_by_user(sk))
927 goto out;
928
463c84b9 929 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
1da177e4
LT
930 &hdr->saddr, tcp_v6_iif(skb));
931 if (!req)
932 goto out;
933
934 /* ICMPs are not backlogged, hence we cannot get
935 * an established socket here.
936 */
937 BUG_TRAP(req->sk == NULL);
938
2e6599cb 939 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
940 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
941 goto out;
942 }
943
463c84b9 944 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
945 goto out;
946
947 case TCP_SYN_SENT:
948 case TCP_SYN_RECV: /* Cannot happen.
949 It can, it SYNs are crossed. --ANK */
950 if (!sock_owned_by_user(sk)) {
951 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
952 sk->sk_err = err;
953 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
954
955 tcp_done(sk);
956 } else
957 sk->sk_err_soft = err;
958 goto out;
959 }
960
961 if (!sock_owned_by_user(sk) && np->recverr) {
962 sk->sk_err = err;
963 sk->sk_error_report(sk);
964 } else
965 sk->sk_err_soft = err;
966
967out:
968 bh_unlock_sock(sk);
969 sock_put(sk);
970}
971
972
60236fdd 973static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
974 struct dst_entry *dst)
975{
2e6599cb 976 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
977 struct ipv6_pinfo *np = inet6_sk(sk);
978 struct sk_buff * skb;
979 struct ipv6_txoptions *opt = NULL;
980 struct in6_addr * final_p = NULL, final;
981 struct flowi fl;
982 int err = -1;
983
984 memset(&fl, 0, sizeof(fl));
985 fl.proto = IPPROTO_TCP;
2e6599cb
ACM
986 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
987 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 988 fl.fl6_flowlabel = 0;
2e6599cb
ACM
989 fl.oif = treq->iif;
990 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
991 fl.fl_ip_sport = inet_sk(sk)->sport;
992
993 if (dst == NULL) {
994 opt = np->opt;
995 if (opt == NULL &&
996 np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
997 treq->pktopts) {
998 struct sk_buff *pktopts = treq->pktopts;
1da177e4
LT
999 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1000 if (rxopt->srcrt)
1001 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1002 }
1003
1004 if (opt && opt->srcrt) {
1005 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1006 ipv6_addr_copy(&final, &fl.fl6_dst);
1007 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1008 final_p = &final;
1009 }
1010
1011 err = ip6_dst_lookup(sk, &dst, &fl);
1012 if (err)
1013 goto done;
1014 if (final_p)
1015 ipv6_addr_copy(&fl.fl6_dst, final_p);
1016 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1017 goto done;
1018 }
1019
1020 skb = tcp_make_synack(sk, dst, req);
1021 if (skb) {
1022 struct tcphdr *th = skb->h.th;
1023
1024 th->check = tcp_v6_check(th, skb->len,
2e6599cb 1025 &treq->loc_addr, &treq->rmt_addr,
1da177e4
LT
1026 csum_partial((char *)th, skb->len, skb->csum));
1027
2e6599cb 1028 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
1029 err = ip6_xmit(sk, skb, &fl, opt, 0);
1030 if (err == NET_XMIT_CN)
1031 err = 0;
1032 }
1033
1034done:
1035 dst_release(dst);
1036 if (opt && opt != np->opt)
1037 sock_kfree_s(sk, opt, opt->tot_len);
1038 return err;
1039}
1040
60236fdd 1041static void tcp_v6_reqsk_destructor(struct request_sock *req)
1da177e4 1042{
2e6599cb
ACM
1043 if (tcp6_rsk(req)->pktopts)
1044 kfree_skb(tcp6_rsk(req)->pktopts);
1da177e4
LT
1045}
1046
60236fdd 1047static struct request_sock_ops tcp6_request_sock_ops = {
1da177e4 1048 .family = AF_INET6,
2e6599cb 1049 .obj_size = sizeof(struct tcp6_request_sock),
1da177e4 1050 .rtx_syn_ack = tcp_v6_send_synack,
60236fdd
ACM
1051 .send_ack = tcp_v6_reqsk_send_ack,
1052 .destructor = tcp_v6_reqsk_destructor,
1da177e4
LT
1053 .send_reset = tcp_v6_send_reset
1054};
1055
1056static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1057{
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1059 struct inet6_skb_parm *opt = IP6CB(skb);
1060
1061 if (np->rxopt.all) {
1062 if ((opt->hop && np->rxopt.bits.hopopts) ||
1063 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1064 np->rxopt.bits.rxflow) ||
1065 (opt->srcrt && np->rxopt.bits.srcrt) ||
1066 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1067 return 1;
1068 }
1069 return 0;
1070}
1071
1072
1073static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1074 struct sk_buff *skb)
1075{
1076 struct ipv6_pinfo *np = inet6_sk(sk);
1077
1078 if (skb->ip_summed == CHECKSUM_HW) {
1079 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1080 skb->csum = offsetof(struct tcphdr, check);
1081 } else {
1082 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1083 csum_partial((char *)th, th->doff<<2,
1084 skb->csum));
1085 }
1086}
1087
1088
1089static void tcp_v6_send_reset(struct sk_buff *skb)
1090{
1091 struct tcphdr *th = skb->h.th, *t1;
1092 struct sk_buff *buff;
1093 struct flowi fl;
1094
1095 if (th->rst)
1096 return;
1097
1098 if (!ipv6_unicast_destination(skb))
1099 return;
1100
1101 /*
1102 * We need to grab some memory, and put together an RST,
1103 * and then put it into the queue to be sent.
1104 */
1105
1106 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1107 GFP_ATOMIC);
1108 if (buff == NULL)
1109 return;
1110
1111 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1112
1113 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1114
1115 /* Swap the send and the receive. */
1116 memset(t1, 0, sizeof(*t1));
1117 t1->dest = th->source;
1118 t1->source = th->dest;
1119 t1->doff = sizeof(*t1)/4;
1120 t1->rst = 1;
1121
1122 if(th->ack) {
1123 t1->seq = th->ack_seq;
1124 } else {
1125 t1->ack = 1;
1126 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1127 + skb->len - (th->doff<<2));
1128 }
1129
1130 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1131
1132 memset(&fl, 0, sizeof(fl));
1133 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1134 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1135
1136 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1137 sizeof(*t1), IPPROTO_TCP,
1138 buff->csum);
1139
1140 fl.proto = IPPROTO_TCP;
1141 fl.oif = tcp_v6_iif(skb);
1142 fl.fl_ip_dport = t1->dest;
1143 fl.fl_ip_sport = t1->source;
1144
1145 /* sk = NULL, but it is safe for now. RST socket required. */
1146 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1147
1148 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1149 dst_release(buff->dst);
1150 return;
1151 }
1152
1153 ip6_xmit(NULL, buff, &fl, NULL, 0);
1154 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1155 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1156 return;
1157 }
1158
1159 kfree_skb(buff);
1160}
1161
1162static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1163{
1164 struct tcphdr *th = skb->h.th, *t1;
1165 struct sk_buff *buff;
1166 struct flowi fl;
1167 int tot_len = sizeof(struct tcphdr);
1168
1169 if (ts)
1170 tot_len += 3*4;
1171
1172 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1173 GFP_ATOMIC);
1174 if (buff == NULL)
1175 return;
1176
1177 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1178
1179 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1180
1181 /* Swap the send and the receive. */
1182 memset(t1, 0, sizeof(*t1));
1183 t1->dest = th->source;
1184 t1->source = th->dest;
1185 t1->doff = tot_len/4;
1186 t1->seq = htonl(seq);
1187 t1->ack_seq = htonl(ack);
1188 t1->ack = 1;
1189 t1->window = htons(win);
1190
1191 if (ts) {
1192 u32 *ptr = (u32*)(t1 + 1);
1193 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1194 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1195 *ptr++ = htonl(tcp_time_stamp);
1196 *ptr = htonl(ts);
1197 }
1198
1199 buff->csum = csum_partial((char *)t1, tot_len, 0);
1200
1201 memset(&fl, 0, sizeof(fl));
1202 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1203 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1204
1205 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1206 tot_len, IPPROTO_TCP,
1207 buff->csum);
1208
1209 fl.proto = IPPROTO_TCP;
1210 fl.oif = tcp_v6_iif(skb);
1211 fl.fl_ip_dport = t1->dest;
1212 fl.fl_ip_sport = t1->source;
1213
1214 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1215 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1216 dst_release(buff->dst);
1217 return;
1218 }
1219 ip6_xmit(NULL, buff, &fl, NULL, 0);
1220 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1221 return;
1222 }
1223
1224 kfree_skb(buff);
1225}
1226
1227static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1228{
8feaf0c0
ACM
1229 struct inet_timewait_sock *tw = inet_twsk(sk);
1230 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 1231
8feaf0c0
ACM
1232 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1233 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1234 tcptw->tw_ts_recent);
1da177e4 1235
8feaf0c0 1236 inet_twsk_put(tw);
1da177e4
LT
1237}
1238
60236fdd 1239static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 1240{
2e6599cb 1241 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1da177e4
LT
1242}
1243
1244
1245static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1246{
60236fdd 1247 struct request_sock *req, **prev;
1da177e4 1248 struct tcphdr *th = skb->h.th;
1da177e4
LT
1249 struct sock *nsk;
1250
1251 /* Find possible connection requests. */
463c84b9 1252 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1da177e4
LT
1253 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1254 if (req)
1255 return tcp_check_req(sk, skb, req, prev);
1256
1257 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1258 th->source,
1259 &skb->nh.ipv6h->daddr,
1260 ntohs(th->dest),
1261 tcp_v6_iif(skb));
1262
1263 if (nsk) {
1264 if (nsk->sk_state != TCP_TIME_WAIT) {
1265 bh_lock_sock(nsk);
1266 return nsk;
1267 }
8feaf0c0 1268 inet_twsk_put((struct inet_timewait_sock *)nsk);
1da177e4
LT
1269 return NULL;
1270 }
1271
1272#if 0 /*def CONFIG_SYN_COOKIES*/
1273 if (!th->rst && !th->syn && th->ack)
1274 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1275#endif
1276 return sk;
1277}
1278
60236fdd 1279static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1da177e4 1280{
463c84b9
ACM
1281 struct inet_connection_sock *icsk = inet_csk(sk);
1282 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1283 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1da177e4 1284
463c84b9
ACM
1285 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1286 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1da177e4
LT
1287}
1288
1289
1290/* FIXME: this is substantially similar to the ipv4 code.
1291 * Can some kind of merge be done? -- erics
1292 */
1293static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1294{
2e6599cb 1295 struct tcp6_request_sock *treq;
1da177e4
LT
1296 struct ipv6_pinfo *np = inet6_sk(sk);
1297 struct tcp_options_received tmp_opt;
1298 struct tcp_sock *tp = tcp_sk(sk);
60236fdd 1299 struct request_sock *req = NULL;
1da177e4
LT
1300 __u32 isn = TCP_SKB_CB(skb)->when;
1301
1302 if (skb->protocol == htons(ETH_P_IP))
1303 return tcp_v4_conn_request(sk, skb);
1304
1305 if (!ipv6_unicast_destination(skb))
1306 goto drop;
1307
1308 /*
1309 * There are no SYN attacks on IPv6, yet...
1310 */
463c84b9 1311 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1312 if (net_ratelimit())
1313 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1314 goto drop;
1315 }
1316
463c84b9 1317 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1318 goto drop;
1319
60236fdd 1320 req = reqsk_alloc(&tcp6_request_sock_ops);
1da177e4
LT
1321 if (req == NULL)
1322 goto drop;
1323
1324 tcp_clear_options(&tmp_opt);
1325 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1326 tmp_opt.user_mss = tp->rx_opt.user_mss;
1327
1328 tcp_parse_options(skb, &tmp_opt, 0);
1329
1330 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331 tcp_openreq_init(req, &tmp_opt, skb);
1332
2e6599cb
ACM
1333 treq = tcp6_rsk(req);
1334 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1335 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1da177e4 1336 TCP_ECN_create_request(req, skb->h.th);
2e6599cb 1337 treq->pktopts = NULL;
1da177e4
LT
1338 if (ipv6_opt_accepted(sk, skb) ||
1339 np->rxopt.bits.rxinfo ||
1340 np->rxopt.bits.rxhlim) {
1341 atomic_inc(&skb->users);
2e6599cb 1342 treq->pktopts = skb;
1da177e4 1343 }
2e6599cb 1344 treq->iif = sk->sk_bound_dev_if;
1da177e4
LT
1345
1346 /* So that link locals have meaning */
1347 if (!sk->sk_bound_dev_if &&
2e6599cb
ACM
1348 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1349 treq->iif = tcp_v6_iif(skb);
1da177e4
LT
1350
1351 if (isn == 0)
1352 isn = tcp_v6_init_sequence(sk,skb);
1353
2e6599cb 1354 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1355
1356 if (tcp_v6_send_synack(sk, req, NULL))
1357 goto drop;
1358
1359 tcp_v6_synq_add(sk, req);
1360
1361 return 0;
1362
1363drop:
1364 if (req)
60236fdd 1365 reqsk_free(req);
1da177e4
LT
1366
1367 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1368 return 0; /* don't send reset */
1369}
1370
1371static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1372 struct request_sock *req,
1da177e4
LT
1373 struct dst_entry *dst)
1374{
2e6599cb 1375 struct tcp6_request_sock *treq = tcp6_rsk(req);
1da177e4
LT
1376 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1377 struct tcp6_sock *newtcp6sk;
1378 struct inet_sock *newinet;
1379 struct tcp_sock *newtp;
1380 struct sock *newsk;
1381 struct ipv6_txoptions *opt;
1382
1383 if (skb->protocol == htons(ETH_P_IP)) {
1384 /*
1385 * v6 mapped
1386 */
1387
1388 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1389
1390 if (newsk == NULL)
1391 return NULL;
1392
1393 newtcp6sk = (struct tcp6_sock *)newsk;
1394 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1395
1396 newinet = inet_sk(newsk);
1397 newnp = inet6_sk(newsk);
1398 newtp = tcp_sk(newsk);
1399
1400 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1401
1402 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1403 newinet->daddr);
1404
1405 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1406 newinet->saddr);
1407
1408 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1409
1410 newtp->af_specific = &ipv6_mapped;
1411 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1412 newnp->pktoptions = NULL;
1413 newnp->opt = NULL;
1414 newnp->mcast_oif = tcp_v6_iif(skb);
1415 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1416
e6848976
ACM
1417 /*
1418 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419 * here, tcp_create_openreq_child now does this for us, see the comment in
1420 * that function for the gory details. -acme
1da177e4 1421 */
1da177e4
LT
1422
1423 /* It is tricky place. Until this moment IPv4 tcp
1424 worked with IPv6 af_tcp.af_specific.
1425 Sync it now.
1426 */
1427 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1428
1429 return newsk;
1430 }
1431
1432 opt = np->opt;
1433
1434 if (sk_acceptq_is_full(sk))
1435 goto out_overflow;
1436
1437 if (np->rxopt.bits.srcrt == 2 &&
2e6599cb
ACM
1438 opt == NULL && treq->pktopts) {
1439 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1da177e4 1440 if (rxopt->srcrt)
2e6599cb 1441 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1da177e4
LT
1442 }
1443
1444 if (dst == NULL) {
1445 struct in6_addr *final_p = NULL, final;
1446 struct flowi fl;
1447
1448 memset(&fl, 0, sizeof(fl));
1449 fl.proto = IPPROTO_TCP;
2e6599cb 1450 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1da177e4
LT
1451 if (opt && opt->srcrt) {
1452 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1453 ipv6_addr_copy(&final, &fl.fl6_dst);
1454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1455 final_p = &final;
1456 }
2e6599cb 1457 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1da177e4 1458 fl.oif = sk->sk_bound_dev_if;
2e6599cb 1459 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1da177e4
LT
1460 fl.fl_ip_sport = inet_sk(sk)->sport;
1461
1462 if (ip6_dst_lookup(sk, &dst, &fl))
1463 goto out;
1464
1465 if (final_p)
1466 ipv6_addr_copy(&fl.fl6_dst, final_p);
1467
1468 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1469 goto out;
1470 }
1471
1472 newsk = tcp_create_openreq_child(sk, req, skb);
1473 if (newsk == NULL)
1474 goto out;
1475
e6848976
ACM
1476 /*
1477 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1478 * count here, tcp_create_openreq_child now does this for us, see the
1479 * comment in that function for the gory details. -acme
1480 */
1da177e4
LT
1481
1482 ip6_dst_store(newsk, dst, NULL);
1483 newsk->sk_route_caps = dst->dev->features &
1484 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1485
1486 newtcp6sk = (struct tcp6_sock *)newsk;
1487 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1488
1489 newtp = tcp_sk(newsk);
1490 newinet = inet_sk(newsk);
1491 newnp = inet6_sk(newsk);
1492
1493 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1494
2e6599cb
ACM
1495 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1496 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1497 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1498 newsk->sk_bound_dev_if = treq->iif;
1da177e4
LT
1499
1500 /* Now IPv6 options...
1501
1502 First: no IPv4 options.
1503 */
1504 newinet->opt = NULL;
1505
1506 /* Clone RX bits */
1507 newnp->rxopt.all = np->rxopt.all;
1508
1509 /* Clone pktoptions received with SYN */
1510 newnp->pktoptions = NULL;
2e6599cb
ACM
1511 if (treq->pktopts != NULL) {
1512 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1513 kfree_skb(treq->pktopts);
1514 treq->pktopts = NULL;
1da177e4
LT
1515 if (newnp->pktoptions)
1516 skb_set_owner_r(newnp->pktoptions, newsk);
1517 }
1518 newnp->opt = NULL;
1519 newnp->mcast_oif = tcp_v6_iif(skb);
1520 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1521
1522 /* Clone native IPv6 options from listening socket (if any)
1523
1524 Yes, keeping reference count would be much more clever,
1525 but we make one more one thing there: reattach optmem
1526 to newsk.
1527 */
1528 if (opt) {
1529 newnp->opt = ipv6_dup_options(newsk, opt);
1530 if (opt != np->opt)
1531 sock_kfree_s(sk, opt, opt->tot_len);
1532 }
1533
1534 newtp->ext_header_len = 0;
1535 if (newnp->opt)
1536 newtp->ext_header_len = newnp->opt->opt_nflen +
1537 newnp->opt->opt_flen;
1538
1539 tcp_sync_mss(newsk, dst_mtu(dst));
1540 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1541 tcp_initialize_rcv_mss(newsk);
1542
1543 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1544
1545 __tcp_v6_hash(newsk);
2d8c4ce5 1546 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1547
1548 return newsk;
1549
1550out_overflow:
1551 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1552out:
1553 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1554 if (opt && opt != np->opt)
1555 sock_kfree_s(sk, opt, opt->tot_len);
1556 dst_release(dst);
1557 return NULL;
1558}
1559
1560static int tcp_v6_checksum_init(struct sk_buff *skb)
1561{
1562 if (skb->ip_summed == CHECKSUM_HW) {
1563 skb->ip_summed = CHECKSUM_UNNECESSARY;
1564 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1565 &skb->nh.ipv6h->daddr,skb->csum))
1566 return 0;
64ce2073 1567 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1da177e4
LT
1568 }
1569 if (skb->len <= 76) {
1570 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1572 return -1;
1573 skb->ip_summed = CHECKSUM_UNNECESSARY;
1574 } else {
1575 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1576 &skb->nh.ipv6h->daddr,0);
1577 }
1578 return 0;
1579}
1580
1581/* The socket must have it's spinlock held when we get
1582 * here.
1583 *
1584 * We have a potential double-lock case here, so even when
1585 * doing backlog processing we use the BH locking scheme.
1586 * This is because we cannot sleep with the original spinlock
1587 * held.
1588 */
1589static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1590{
1591 struct ipv6_pinfo *np = inet6_sk(sk);
1592 struct tcp_sock *tp;
1593 struct sk_buff *opt_skb = NULL;
1594
1595 /* Imagine: socket is IPv6. IPv4 packet arrives,
1596 goes to IPv4 receive handler and backlogged.
1597 From backlog it always goes here. Kerboom...
1598 Fortunately, tcp_rcv_established and rcv_established
1599 handle them correctly, but it is not case with
1600 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1601 */
1602
1603 if (skb->protocol == htons(ETH_P_IP))
1604 return tcp_v4_do_rcv(sk, skb);
1605
1606 if (sk_filter(sk, skb, 0))
1607 goto discard;
1608
1609 /*
1610 * socket locking is here for SMP purposes as backlog rcv
1611 * is currently called with bh processing disabled.
1612 */
1613
1614 /* Do Stevens' IPV6_PKTOPTIONS.
1615
1616 Yes, guys, it is the only place in our code, where we
1617 may make it not affecting IPv4.
1618 The rest of code is protocol independent,
1619 and I do not like idea to uglify IPv4.
1620
1621 Actually, all the idea behind IPV6_PKTOPTIONS
1622 looks not very well thought. For now we latch
1623 options, received in the last packet, enqueued
1624 by tcp. Feel free to propose better solution.
1625 --ANK (980728)
1626 */
1627 if (np->rxopt.all)
1628 opt_skb = skb_clone(skb, GFP_ATOMIC);
1629
1630 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 TCP_CHECK_TIMER(sk);
1632 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1633 goto reset;
1634 TCP_CHECK_TIMER(sk);
1635 if (opt_skb)
1636 goto ipv6_pktoptions;
1637 return 0;
1638 }
1639
1640 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1641 goto csum_err;
1642
1643 if (sk->sk_state == TCP_LISTEN) {
1644 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1645 if (!nsk)
1646 goto discard;
1647
1648 /*
1649 * Queue it on the new socket if the new socket is active,
1650 * otherwise we just shortcircuit this and continue with
1651 * the new socket..
1652 */
1653 if(nsk != sk) {
1654 if (tcp_child_process(sk, nsk, skb))
1655 goto reset;
1656 if (opt_skb)
1657 __kfree_skb(opt_skb);
1658 return 0;
1659 }
1660 }
1661
1662 TCP_CHECK_TIMER(sk);
1663 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1664 goto reset;
1665 TCP_CHECK_TIMER(sk);
1666 if (opt_skb)
1667 goto ipv6_pktoptions;
1668 return 0;
1669
1670reset:
1671 tcp_v6_send_reset(skb);
1672discard:
1673 if (opt_skb)
1674 __kfree_skb(opt_skb);
1675 kfree_skb(skb);
1676 return 0;
1677csum_err:
1678 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1679 goto discard;
1680
1681
1682ipv6_pktoptions:
1683 /* Do you ask, what is it?
1684
1685 1. skb was enqueued by tcp.
1686 2. skb is added to tail of read queue, rather than out of order.
1687 3. socket is not in passive state.
1688 4. Finally, it really contains options, which user wants to receive.
1689 */
1690 tp = tcp_sk(sk);
1691 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1692 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1693 if (np->rxopt.bits.rxinfo)
1694 np->mcast_oif = tcp_v6_iif(opt_skb);
1695 if (np->rxopt.bits.rxhlim)
1696 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1697 if (ipv6_opt_accepted(sk, opt_skb)) {
1698 skb_set_owner_r(opt_skb, sk);
1699 opt_skb = xchg(&np->pktoptions, opt_skb);
1700 } else {
1701 __kfree_skb(opt_skb);
1702 opt_skb = xchg(&np->pktoptions, NULL);
1703 }
1704 }
1705
1706 if (opt_skb)
1707 kfree_skb(opt_skb);
1708 return 0;
1709}
1710
1711static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1712{
1713 struct sk_buff *skb = *pskb;
1714 struct tcphdr *th;
1715 struct sock *sk;
1716 int ret;
1717
1718 if (skb->pkt_type != PACKET_HOST)
1719 goto discard_it;
1720
1721 /*
1722 * Count it even if it's bad.
1723 */
1724 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1725
1726 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1727 goto discard_it;
1728
1729 th = skb->h.th;
1730
1731 if (th->doff < sizeof(struct tcphdr)/4)
1732 goto bad_packet;
1733 if (!pskb_may_pull(skb, th->doff*4))
1734 goto discard_it;
1735
1736 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1737 tcp_v6_checksum_init(skb) < 0))
1738 goto bad_packet;
1739
1740 th = skb->h.th;
1741 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1742 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1743 skb->len - th->doff*4);
1744 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1745 TCP_SKB_CB(skb)->when = 0;
1746 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1747 TCP_SKB_CB(skb)->sacked = 0;
1748
1749 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1750 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1751
1752 if (!sk)
1753 goto no_tcp_socket;
1754
1755process:
1756 if (sk->sk_state == TCP_TIME_WAIT)
1757 goto do_time_wait;
1758
1759 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1760 goto discard_and_relse;
1761
1762 if (sk_filter(sk, skb, 0))
1763 goto discard_and_relse;
1764
1765 skb->dev = NULL;
1766
1767 bh_lock_sock(sk);
1768 ret = 0;
1769 if (!sock_owned_by_user(sk)) {
1770 if (!tcp_prequeue(sk, skb))
1771 ret = tcp_v6_do_rcv(sk, skb);
1772 } else
1773 sk_add_backlog(sk, skb);
1774 bh_unlock_sock(sk);
1775
1776 sock_put(sk);
1777 return ret ? -1 : 0;
1778
1779no_tcp_socket:
1780 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1781 goto discard_it;
1782
1783 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1784bad_packet:
1785 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1786 } else {
1787 tcp_v6_send_reset(skb);
1788 }
1789
1790discard_it:
1791
1792 /*
1793 * Discard frame
1794 */
1795
1796 kfree_skb(skb);
1797 return 0;
1798
1799discard_and_relse:
1800 sock_put(sk);
1801 goto discard_it;
1802
1803do_time_wait:
1804 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
8feaf0c0 1805 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1806 goto discard_it;
1807 }
1808
1809 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1810 TCP_INC_STATS_BH(TCP_MIB_INERRS);
8feaf0c0 1811 inet_twsk_put((struct inet_timewait_sock *)sk);
1da177e4
LT
1812 goto discard_it;
1813 }
1814
8feaf0c0
ACM
1815 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1816 skb, th)) {
1da177e4
LT
1817 case TCP_TW_SYN:
1818 {
1819 struct sock *sk2;
1820
1821 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1822 if (sk2 != NULL) {
295ff7ed
ACM
1823 struct inet_timewait_sock *tw = inet_twsk(sk);
1824 inet_twsk_deschedule(tw, &tcp_death_row);
1825 inet_twsk_put(tw);
1da177e4
LT
1826 sk = sk2;
1827 goto process;
1828 }
1829 /* Fall through to ACK */
1830 }
1831 case TCP_TW_ACK:
1832 tcp_v6_timewait_ack(sk, skb);
1833 break;
1834 case TCP_TW_RST:
1835 goto no_tcp_socket;
1836 case TCP_TW_SUCCESS:;
1837 }
1838 goto discard_it;
1839}
1840
1841static int tcp_v6_rebuild_header(struct sock *sk)
1842{
1843 int err;
1844 struct dst_entry *dst;
1845 struct ipv6_pinfo *np = inet6_sk(sk);
1846
1847 dst = __sk_dst_check(sk, np->dst_cookie);
1848
1849 if (dst == NULL) {
1850 struct inet_sock *inet = inet_sk(sk);
1851 struct in6_addr *final_p = NULL, final;
1852 struct flowi fl;
1853
1854 memset(&fl, 0, sizeof(fl));
1855 fl.proto = IPPROTO_TCP;
1856 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1857 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1858 fl.fl6_flowlabel = np->flow_label;
1859 fl.oif = sk->sk_bound_dev_if;
1860 fl.fl_ip_dport = inet->dport;
1861 fl.fl_ip_sport = inet->sport;
1862
1863 if (np->opt && np->opt->srcrt) {
1864 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1865 ipv6_addr_copy(&final, &fl.fl6_dst);
1866 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1867 final_p = &final;
1868 }
1869
1870 err = ip6_dst_lookup(sk, &dst, &fl);
1871 if (err) {
1872 sk->sk_route_caps = 0;
1873 return err;
1874 }
1875 if (final_p)
1876 ipv6_addr_copy(&fl.fl6_dst, final_p);
1877
1878 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1879 sk->sk_err_soft = -err;
1880 dst_release(dst);
1881 return err;
1882 }
1883
1884 ip6_dst_store(sk, dst, NULL);
1885 sk->sk_route_caps = dst->dev->features &
1886 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1887 }
1888
1889 return 0;
1890}
1891
1892static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1893{
1894 struct sock *sk = skb->sk;
1895 struct inet_sock *inet = inet_sk(sk);
1896 struct ipv6_pinfo *np = inet6_sk(sk);
1897 struct flowi fl;
1898 struct dst_entry *dst;
1899 struct in6_addr *final_p = NULL, final;
1900
1901 memset(&fl, 0, sizeof(fl));
1902 fl.proto = IPPROTO_TCP;
1903 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1904 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1905 fl.fl6_flowlabel = np->flow_label;
1906 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1907 fl.oif = sk->sk_bound_dev_if;
1908 fl.fl_ip_sport = inet->sport;
1909 fl.fl_ip_dport = inet->dport;
1910
1911 if (np->opt && np->opt->srcrt) {
1912 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1913 ipv6_addr_copy(&final, &fl.fl6_dst);
1914 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1915 final_p = &final;
1916 }
1917
1918 dst = __sk_dst_check(sk, np->dst_cookie);
1919
1920 if (dst == NULL) {
1921 int err = ip6_dst_lookup(sk, &dst, &fl);
1922
1923 if (err) {
1924 sk->sk_err_soft = -err;
1925 return err;
1926 }
1927
1928 if (final_p)
1929 ipv6_addr_copy(&fl.fl6_dst, final_p);
1930
1931 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1932 sk->sk_route_caps = 0;
1933 dst_release(dst);
1934 return err;
1935 }
1936
1937 ip6_dst_store(sk, dst, NULL);
1938 sk->sk_route_caps = dst->dev->features &
1939 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1940 }
1941
1942 skb->dst = dst_clone(dst);
1943
1944 /* Restore final destination back after routing done */
1945 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1946
1947 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1948}
1949
1950static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1951{
1952 struct ipv6_pinfo *np = inet6_sk(sk);
1953 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1954
1955 sin6->sin6_family = AF_INET6;
1956 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1957 sin6->sin6_port = inet_sk(sk)->dport;
1958 /* We do not store received flowlabel for TCP */
1959 sin6->sin6_flowinfo = 0;
1960 sin6->sin6_scope_id = 0;
1961 if (sk->sk_bound_dev_if &&
1962 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1963 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1964}
1965
1966static int tcp_v6_remember_stamp(struct sock *sk)
1967{
1968 /* Alas, not yet... */
1969 return 0;
1970}
1971
1972static struct tcp_func ipv6_specific = {
1973 .queue_xmit = tcp_v6_xmit,
1974 .send_check = tcp_v6_send_check,
1975 .rebuild_header = tcp_v6_rebuild_header,
1976 .conn_request = tcp_v6_conn_request,
1977 .syn_recv_sock = tcp_v6_syn_recv_sock,
1978 .remember_stamp = tcp_v6_remember_stamp,
1979 .net_header_len = sizeof(struct ipv6hdr),
1980
1981 .setsockopt = ipv6_setsockopt,
1982 .getsockopt = ipv6_getsockopt,
1983 .addr2sockaddr = v6_addr2sockaddr,
1984 .sockaddr_len = sizeof(struct sockaddr_in6)
1985};
1986
1987/*
1988 * TCP over IPv4 via INET6 API
1989 */
1990
1991static struct tcp_func ipv6_mapped = {
1992 .queue_xmit = ip_queue_xmit,
1993 .send_check = tcp_v4_send_check,
32519f11 1994 .rebuild_header = inet_sk_rebuild_header,
1da177e4
LT
1995 .conn_request = tcp_v6_conn_request,
1996 .syn_recv_sock = tcp_v6_syn_recv_sock,
1997 .remember_stamp = tcp_v4_remember_stamp,
1998 .net_header_len = sizeof(struct iphdr),
1999
2000 .setsockopt = ipv6_setsockopt,
2001 .getsockopt = ipv6_getsockopt,
2002 .addr2sockaddr = v6_addr2sockaddr,
2003 .sockaddr_len = sizeof(struct sockaddr_in6)
2004};
2005
2006
2007
2008/* NOTE: A lot of things set to zero explicitly by call to
2009 * sk_alloc() so need not be done here.
2010 */
2011static int tcp_v6_init_sock(struct sock *sk)
2012{
6687e988 2013 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
2014 struct tcp_sock *tp = tcp_sk(sk);
2015
2016 skb_queue_head_init(&tp->out_of_order_queue);
2017 tcp_init_xmit_timers(sk);
2018 tcp_prequeue_init(tp);
2019
6687e988 2020 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
2021 tp->mdev = TCP_TIMEOUT_INIT;
2022
2023 /* So many TCP implementations out there (incorrectly) count the
2024 * initial SYN frame in their delayed-ACK and congestion control
2025 * algorithms that we must have the following bandaid to talk
2026 * efficiently to them. -DaveM
2027 */
2028 tp->snd_cwnd = 2;
2029
2030 /* See draft-stevens-tcpca-spec-01 for discussion of the
2031 * initialization of these values.
2032 */
2033 tp->snd_ssthresh = 0x7fffffff;
2034 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 2035 tp->mss_cache = 536;
1da177e4
LT
2036
2037 tp->reordering = sysctl_tcp_reordering;
2038
2039 sk->sk_state = TCP_CLOSE;
2040
2041 tp->af_specific = &ipv6_specific;
6687e988 2042 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
2043 sk->sk_write_space = sk_stream_write_space;
2044 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2045
2046 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2047 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2048
2049 atomic_inc(&tcp_sockets_allocated);
2050
2051 return 0;
2052}
2053
2054static int tcp_v6_destroy_sock(struct sock *sk)
2055{
2056 extern int tcp_v4_destroy_sock(struct sock *sk);
2057
2058 tcp_v4_destroy_sock(sk);
2059 return inet6_destroy_sock(sk);
2060}
2061
2062/* Proc filesystem TCPv6 sock list dumping. */
2063static void get_openreq6(struct seq_file *seq,
60236fdd 2064 struct sock *sk, struct request_sock *req, int i, int uid)
1da177e4
LT
2065{
2066 struct in6_addr *dest, *src;
2067 int ttd = req->expires - jiffies;
2068
2069 if (ttd < 0)
2070 ttd = 0;
2071
2e6599cb
ACM
2072 src = &tcp6_rsk(req)->loc_addr;
2073 dest = &tcp6_rsk(req)->rmt_addr;
1da177e4
LT
2074 seq_printf(seq,
2075 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2076 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2077 i,
2078 src->s6_addr32[0], src->s6_addr32[1],
2079 src->s6_addr32[2], src->s6_addr32[3],
2080 ntohs(inet_sk(sk)->sport),
2081 dest->s6_addr32[0], dest->s6_addr32[1],
2082 dest->s6_addr32[2], dest->s6_addr32[3],
2e6599cb 2083 ntohs(inet_rsk(req)->rmt_port),
1da177e4
LT
2084 TCP_SYN_RECV,
2085 0,0, /* could print option size, but that is af dependent. */
2086 1, /* timers active (only the expire timer) */
2087 jiffies_to_clock_t(ttd),
2088 req->retrans,
2089 uid,
2090 0, /* non standard timer */
2091 0, /* open_requests have no inode */
2092 0, req);
2093}
2094
2095static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2096{
2097 struct in6_addr *dest, *src;
2098 __u16 destp, srcp;
2099 int timer_active;
2100 unsigned long timer_expires;
2101 struct inet_sock *inet = inet_sk(sp);
2102 struct tcp_sock *tp = tcp_sk(sp);
463c84b9 2103 const struct inet_connection_sock *icsk = inet_csk(sp);
1da177e4
LT
2104 struct ipv6_pinfo *np = inet6_sk(sp);
2105
2106 dest = &np->daddr;
2107 src = &np->rcv_saddr;
2108 destp = ntohs(inet->dport);
2109 srcp = ntohs(inet->sport);
463c84b9
ACM
2110
2111 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2112 timer_active = 1;
463c84b9
ACM
2113 timer_expires = icsk->icsk_timeout;
2114 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2115 timer_active = 4;
463c84b9 2116 timer_expires = icsk->icsk_timeout;
1da177e4
LT
2117 } else if (timer_pending(&sp->sk_timer)) {
2118 timer_active = 2;
2119 timer_expires = sp->sk_timer.expires;
2120 } else {
2121 timer_active = 0;
2122 timer_expires = jiffies;
2123 }
2124
2125 seq_printf(seq,
2126 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2127 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2128 i,
2129 src->s6_addr32[0], src->s6_addr32[1],
2130 src->s6_addr32[2], src->s6_addr32[3], srcp,
2131 dest->s6_addr32[0], dest->s6_addr32[1],
2132 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2133 sp->sk_state,
2134 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2135 timer_active,
2136 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2137 icsk->icsk_retransmits,
1da177e4 2138 sock_i_uid(sp),
6687e988 2139 icsk->icsk_probes_out,
1da177e4
LT
2140 sock_i_ino(sp),
2141 atomic_read(&sp->sk_refcnt), sp,
463c84b9
ACM
2142 icsk->icsk_rto,
2143 icsk->icsk_ack.ato,
2144 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1da177e4
LT
2145 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2146 );
2147}
2148
2149static void get_timewait6_sock(struct seq_file *seq,
8feaf0c0 2150 struct inet_timewait_sock *tw, int i)
1da177e4
LT
2151{
2152 struct in6_addr *dest, *src;
2153 __u16 destp, srcp;
8feaf0c0 2154 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1da177e4
LT
2155 int ttd = tw->tw_ttd - jiffies;
2156
2157 if (ttd < 0)
2158 ttd = 0;
2159
8feaf0c0
ACM
2160 dest = &tcp6tw->tw_v6_daddr;
2161 src = &tcp6tw->tw_v6_rcv_saddr;
1da177e4
LT
2162 destp = ntohs(tw->tw_dport);
2163 srcp = ntohs(tw->tw_sport);
2164
2165 seq_printf(seq,
2166 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2167 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2168 i,
2169 src->s6_addr32[0], src->s6_addr32[1],
2170 src->s6_addr32[2], src->s6_addr32[3], srcp,
2171 dest->s6_addr32[0], dest->s6_addr32[1],
2172 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2173 tw->tw_substate, 0, 0,
2174 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2175 atomic_read(&tw->tw_refcnt), tw);
2176}
2177
2178#ifdef CONFIG_PROC_FS
2179static int tcp6_seq_show(struct seq_file *seq, void *v)
2180{
2181 struct tcp_iter_state *st;
2182
2183 if (v == SEQ_START_TOKEN) {
2184 seq_puts(seq,
2185 " sl "
2186 "local_address "
2187 "remote_address "
2188 "st tx_queue rx_queue tr tm->when retrnsmt"
2189 " uid timeout inode\n");
2190 goto out;
2191 }
2192 st = seq->private;
2193
2194 switch (st->state) {
2195 case TCP_SEQ_STATE_LISTENING:
2196 case TCP_SEQ_STATE_ESTABLISHED:
2197 get_tcp6_sock(seq, v, st->num);
2198 break;
2199 case TCP_SEQ_STATE_OPENREQ:
2200 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2201 break;
2202 case TCP_SEQ_STATE_TIME_WAIT:
2203 get_timewait6_sock(seq, v, st->num);
2204 break;
2205 }
2206out:
2207 return 0;
2208}
2209
2210static struct file_operations tcp6_seq_fops;
2211static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2212 .owner = THIS_MODULE,
2213 .name = "tcp6",
2214 .family = AF_INET6,
2215 .seq_show = tcp6_seq_show,
2216 .seq_fops = &tcp6_seq_fops,
2217};
2218
2219int __init tcp6_proc_init(void)
2220{
2221 return tcp_proc_register(&tcp6_seq_afinfo);
2222}
2223
2224void tcp6_proc_exit(void)
2225{
2226 tcp_proc_unregister(&tcp6_seq_afinfo);
2227}
2228#endif
2229
2230struct proto tcpv6_prot = {
2231 .name = "TCPv6",
2232 .owner = THIS_MODULE,
2233 .close = tcp_close,
2234 .connect = tcp_v6_connect,
2235 .disconnect = tcp_disconnect,
463c84b9 2236 .accept = inet_csk_accept,
1da177e4
LT
2237 .ioctl = tcp_ioctl,
2238 .init = tcp_v6_init_sock,
2239 .destroy = tcp_v6_destroy_sock,
2240 .shutdown = tcp_shutdown,
2241 .setsockopt = tcp_setsockopt,
2242 .getsockopt = tcp_getsockopt,
2243 .sendmsg = tcp_sendmsg,
2244 .recvmsg = tcp_recvmsg,
2245 .backlog_rcv = tcp_v6_do_rcv,
2246 .hash = tcp_v6_hash,
2247 .unhash = tcp_unhash,
2248 .get_port = tcp_v6_get_port,
2249 .enter_memory_pressure = tcp_enter_memory_pressure,
2250 .sockets_allocated = &tcp_sockets_allocated,
2251 .memory_allocated = &tcp_memory_allocated,
2252 .memory_pressure = &tcp_memory_pressure,
0a5578cf 2253 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2254 .sysctl_mem = sysctl_tcp_mem,
2255 .sysctl_wmem = sysctl_tcp_wmem,
2256 .sysctl_rmem = sysctl_tcp_rmem,
2257 .max_header = MAX_TCP_HEADER,
2258 .obj_size = sizeof(struct tcp6_sock),
8feaf0c0 2259 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
60236fdd 2260 .rsk_prot = &tcp6_request_sock_ops,
1da177e4
LT
2261};
2262
2263static struct inet6_protocol tcpv6_protocol = {
2264 .handler = tcp_v6_rcv,
2265 .err_handler = tcp_v6_err,
2266 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2267};
2268
2269extern struct proto_ops inet6_stream_ops;
2270
2271static struct inet_protosw tcpv6_protosw = {
2272 .type = SOCK_STREAM,
2273 .protocol = IPPROTO_TCP,
2274 .prot = &tcpv6_prot,
2275 .ops = &inet6_stream_ops,
2276 .capability = -1,
2277 .no_check = 0,
2278 .flags = INET_PROTOSW_PERMANENT,
2279};
2280
2281void __init tcpv6_init(void)
2282{
2283 /* register inet6 protocol */
2284 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2285 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2286 inet6_register_protosw(&tcpv6_protosw);
2287}