smsc95xx: remove EEPROM loaded check
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / ipvs / ip_vs_proto_tcp.c
CommitLineData
1da177e4
LT
1/*
2 * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
3 *
1da177e4
LT
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
9aada7ac
HE
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
1da177e4
LT
19#include <linux/kernel.h>
20#include <linux/ip.h>
21#include <linux/tcp.h> /* for tcphdr */
22#include <net/ip.h>
23#include <net/tcp.h> /* for csum_tcpudp_magic */
63f2c046 24#include <net/ip6_checksum.h>
af1e1cf0 25#include <linux/netfilter.h>
1da177e4
LT
26#include <linux/netfilter_ipv4.h>
27
28#include <net/ip_vs.h>
29
30
31static struct ip_vs_conn *
51ef348b
JV
32tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
33 const struct ip_vs_iphdr *iph, unsigned int proto_off,
34 int inverse)
1da177e4 35{
014d730d 36 __be16 _ports[2], *pptr;
1da177e4
LT
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
28364a59
JV
43 return ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
1da177e4 46 } else {
28364a59
JV
47 return ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
1da177e4
LT
50 }
51}
52
53static struct ip_vs_conn *
51ef348b
JV
54tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
55 const struct ip_vs_iphdr *iph, unsigned int proto_off,
56 int inverse)
1da177e4 57{
014d730d 58 __be16 _ports[2], *pptr;
1da177e4
LT
59
60 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
61 if (pptr == NULL)
62 return NULL;
63
64 if (likely(!inverse)) {
28364a59
JV
65 return ip_vs_conn_out_get(af, iph->protocol,
66 &iph->saddr, pptr[0],
67 &iph->daddr, pptr[1]);
1da177e4 68 } else {
28364a59
JV
69 return ip_vs_conn_out_get(af, iph->protocol,
70 &iph->daddr, pptr[1],
71 &iph->saddr, pptr[0]);
1da177e4
LT
72 }
73}
74
75
76static int
51ef348b 77tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
1da177e4
LT
78 int *verdict, struct ip_vs_conn **cpp)
79{
80 struct ip_vs_service *svc;
81 struct tcphdr _tcph, *th;
3c2e0505 82 struct ip_vs_iphdr iph;
1da177e4 83
51ef348b 84 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
3c2e0505
JV
85
86 th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
1da177e4
LT
87 if (th == NULL) {
88 *verdict = NF_DROP;
89 return 0;
90 }
91
92 if (th->syn &&
51ef348b
JV
93 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
94 th->dest))) {
1da177e4
LT
95 if (ip_vs_todrop()) {
96 /*
97 * It seems that we are very loaded.
98 * We have to drop this packet :(
99 */
100 ip_vs_service_put(svc);
101 *verdict = NF_DROP;
102 return 0;
103 }
104
105 /*
106 * Let the virtual server select a real server for the
107 * incoming connection, and create a connection entry.
108 */
109 *cpp = ip_vs_schedule(svc, skb);
110 if (!*cpp) {
111 *verdict = ip_vs_leave(svc, skb, pp);
112 return 0;
113 }
114 ip_vs_service_put(svc);
115 }
116 return 1;
117}
118
119
120static inline void
0bbdd42b
JV
121tcp_fast_csum_update(int af, struct tcphdr *tcph,
122 const union nf_inet_addr *oldip,
123 const union nf_inet_addr *newip,
014d730d 124 __be16 oldport, __be16 newport)
1da177e4 125{
0bbdd42b
JV
126#ifdef CONFIG_IP_VS_IPV6
127 if (af == AF_INET6)
128 tcph->check =
129 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
130 ip_vs_check_diff2(oldport, newport,
131 ~csum_unfold(tcph->check))));
132 else
133#endif
1da177e4 134 tcph->check =
0bbdd42b 135 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
f9214b26
AV
136 ip_vs_check_diff2(oldport, newport,
137 ~csum_unfold(tcph->check))));
1da177e4
LT
138}
139
140
503e81f6
SH
141static inline void
142tcp_partial_csum_update(int af, struct tcphdr *tcph,
143 const union nf_inet_addr *oldip,
144 const union nf_inet_addr *newip,
145 __be16 oldlen, __be16 newlen)
146{
147#ifdef CONFIG_IP_VS_IPV6
148 if (af == AF_INET6)
149 tcph->check =
150 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
151 ip_vs_check_diff2(oldlen, newlen,
152 ~csum_unfold(tcph->check))));
153 else
154#endif
155 tcph->check =
156 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
157 ip_vs_check_diff2(oldlen, newlen,
158 ~csum_unfold(tcph->check))));
159}
160
161
1da177e4 162static int
3db05fea 163tcp_snat_handler(struct sk_buff *skb,
1da177e4
LT
164 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
165{
166 struct tcphdr *tcph;
0bbdd42b 167 unsigned int tcphoff;
503e81f6 168 int oldlen;
0bbdd42b
JV
169
170#ifdef CONFIG_IP_VS_IPV6
171 if (cp->af == AF_INET6)
172 tcphoff = sizeof(struct ipv6hdr);
173 else
174#endif
175 tcphoff = ip_hdrlen(skb);
503e81f6 176 oldlen = skb->len - tcphoff;
1da177e4
LT
177
178 /* csum_check requires unshared skb */
3db05fea 179 if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
1da177e4
LT
180 return 0;
181
182 if (unlikely(cp->app != NULL)) {
183 /* Some checks before mangling */
0bbdd42b 184 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
1da177e4
LT
185 return 0;
186
187 /* Call application helper if needed */
3db05fea 188 if (!ip_vs_app_pkt_out(cp, skb))
1da177e4
LT
189 return 0;
190 }
191
0bbdd42b 192 tcph = (void *)skb_network_header(skb) + tcphoff;
1da177e4
LT
193 tcph->source = cp->vport;
194
195 /* Adjust TCP checksums */
503e81f6
SH
196 if (skb->ip_summed == CHECKSUM_PARTIAL) {
197 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
ca62059b
HH
198 htons(oldlen),
199 htons(skb->len - tcphoff));
503e81f6 200 } else if (!cp->app) {
1da177e4 201 /* Only port and addr are changed, do fast csum update */
0bbdd42b 202 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
1da177e4 203 cp->dport, cp->vport);
3db05fea
HX
204 if (skb->ip_summed == CHECKSUM_COMPLETE)
205 skb->ip_summed = CHECKSUM_NONE;
1da177e4
LT
206 } else {
207 /* full checksum calculation */
208 tcph->check = 0;
3db05fea 209 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
0bbdd42b
JV
210#ifdef CONFIG_IP_VS_IPV6
211 if (cp->af == AF_INET6)
212 tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
213 &cp->caddr.in6,
214 skb->len - tcphoff,
215 cp->protocol, skb->csum);
216 else
217#endif
218 tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
219 cp->caddr.ip,
220 skb->len - tcphoff,
221 cp->protocol,
222 skb->csum);
223
1da177e4
LT
224 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
225 pp->name, tcph->check,
226 (char*)&(tcph->check) - (char*)tcph);
227 }
228 return 1;
229}
230
231
232static int
3db05fea 233tcp_dnat_handler(struct sk_buff *skb,
1da177e4
LT
234 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
235{
236 struct tcphdr *tcph;
0bbdd42b 237 unsigned int tcphoff;
503e81f6 238 int oldlen;
0bbdd42b
JV
239
240#ifdef CONFIG_IP_VS_IPV6
241 if (cp->af == AF_INET6)
242 tcphoff = sizeof(struct ipv6hdr);
243 else
244#endif
245 tcphoff = ip_hdrlen(skb);
503e81f6 246 oldlen = skb->len - tcphoff;
1da177e4
LT
247
248 /* csum_check requires unshared skb */
3db05fea 249 if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
1da177e4
LT
250 return 0;
251
252 if (unlikely(cp->app != NULL)) {
253 /* Some checks before mangling */
0bbdd42b 254 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
1da177e4
LT
255 return 0;
256
257 /*
258 * Attempt ip_vs_app call.
259 * It will fix ip_vs_conn and iph ack_seq stuff
260 */
3db05fea 261 if (!ip_vs_app_pkt_in(cp, skb))
1da177e4
LT
262 return 0;
263 }
264
0bbdd42b 265 tcph = (void *)skb_network_header(skb) + tcphoff;
1da177e4
LT
266 tcph->dest = cp->dport;
267
268 /*
269 * Adjust TCP checksums
270 */
503e81f6
SH
271 if (skb->ip_summed == CHECKSUM_PARTIAL) {
272 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
ca62059b
HH
273 htons(oldlen),
274 htons(skb->len - tcphoff));
503e81f6 275 } else if (!cp->app) {
1da177e4 276 /* Only port and addr are changed, do fast csum update */
0bbdd42b 277 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
1da177e4 278 cp->vport, cp->dport);
3db05fea
HX
279 if (skb->ip_summed == CHECKSUM_COMPLETE)
280 skb->ip_summed = CHECKSUM_NONE;
1da177e4
LT
281 } else {
282 /* full checksum calculation */
283 tcph->check = 0;
3db05fea 284 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
0bbdd42b
JV
285#ifdef CONFIG_IP_VS_IPV6
286 if (cp->af == AF_INET6)
287 tcph->check = csum_ipv6_magic(&cp->caddr.in6,
288 &cp->daddr.in6,
289 skb->len - tcphoff,
290 cp->protocol, skb->csum);
291 else
292#endif
293 tcph->check = csum_tcpudp_magic(cp->caddr.ip,
294 cp->daddr.ip,
295 skb->len - tcphoff,
296 cp->protocol,
297 skb->csum);
3db05fea 298 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4
LT
299 }
300 return 1;
301}
302
303
304static int
51ef348b 305tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
1da177e4 306{
51ef348b
JV
307 unsigned int tcphoff;
308
309#ifdef CONFIG_IP_VS_IPV6
310 if (af == AF_INET6)
311 tcphoff = sizeof(struct ipv6hdr);
312 else
313#endif
314 tcphoff = ip_hdrlen(skb);
1da177e4
LT
315
316 switch (skb->ip_summed) {
317 case CHECKSUM_NONE:
318 skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
84fa7933 319 case CHECKSUM_COMPLETE:
51ef348b
JV
320#ifdef CONFIG_IP_VS_IPV6
321 if (af == AF_INET6) {
322 if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
323 &ipv6_hdr(skb)->daddr,
324 skb->len - tcphoff,
325 ipv6_hdr(skb)->nexthdr,
326 skb->csum)) {
327 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
328 "Failed checksum for");
329 return 0;
330 }
331 } else
332#endif
333 if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
334 ip_hdr(skb)->daddr,
335 skb->len - tcphoff,
336 ip_hdr(skb)->protocol,
337 skb->csum)) {
338 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
339 "Failed checksum for");
340 return 0;
341 }
1da177e4
LT
342 break;
343 default:
84fa7933 344 /* No need to checksum. */
1da177e4
LT
345 break;
346 }
347
348 return 1;
349}
350
351
352#define TCP_DIR_INPUT 0
353#define TCP_DIR_OUTPUT 4
354#define TCP_DIR_INPUT_ONLY 8
355
9b5b5cff 356static const int tcp_state_off[IP_VS_DIR_LAST] = {
1da177e4
LT
357 [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
358 [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
359 [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
360};
361
362/*
363 * Timeout table[state]
364 */
365static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
366 [IP_VS_TCP_S_NONE] = 2*HZ,
367 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
368 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
369 [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
370 [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
371 [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
372 [IP_VS_TCP_S_CLOSE] = 10*HZ,
373 [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
374 [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
375 [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
376 [IP_VS_TCP_S_SYNACK] = 120*HZ,
377 [IP_VS_TCP_S_LAST] = 2*HZ,
378};
379
1da177e4
LT
380static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
381 [IP_VS_TCP_S_NONE] = "NONE",
382 [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
383 [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
384 [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
385 [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
386 [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
387 [IP_VS_TCP_S_CLOSE] = "CLOSE",
388 [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
389 [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
390 [IP_VS_TCP_S_LISTEN] = "LISTEN",
391 [IP_VS_TCP_S_SYNACK] = "SYNACK",
392 [IP_VS_TCP_S_LAST] = "BUG!",
393};
394
395#define sNO IP_VS_TCP_S_NONE
396#define sES IP_VS_TCP_S_ESTABLISHED
397#define sSS IP_VS_TCP_S_SYN_SENT
398#define sSR IP_VS_TCP_S_SYN_RECV
399#define sFW IP_VS_TCP_S_FIN_WAIT
400#define sTW IP_VS_TCP_S_TIME_WAIT
401#define sCL IP_VS_TCP_S_CLOSE
402#define sCW IP_VS_TCP_S_CLOSE_WAIT
403#define sLA IP_VS_TCP_S_LAST_ACK
404#define sLI IP_VS_TCP_S_LISTEN
405#define sSA IP_VS_TCP_S_SYNACK
406
407struct tcp_states_t {
408 int next_state[IP_VS_TCP_S_LAST];
409};
410
411static const char * tcp_state_name(int state)
412{
413 if (state >= IP_VS_TCP_S_LAST)
414 return "ERR!";
415 return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
416}
417
418static struct tcp_states_t tcp_states [] = {
419/* INPUT */
420/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
421/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
422/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
423/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
424/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
425
426/* OUTPUT */
427/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
428/*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
429/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
430/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
431/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
432
433/* INPUT-ONLY */
434/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
435/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
436/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
437/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
438/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
439};
440
441static struct tcp_states_t tcp_states_dos [] = {
442/* INPUT */
443/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
444/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
445/*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
446/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
447/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
448
449/* OUTPUT */
450/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
451/*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
452/*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
453/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
454/*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
455
456/* INPUT-ONLY */
457/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
458/*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
459/*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
460/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
461/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
462};
463
464static struct tcp_states_t *tcp_state_table = tcp_states;
465
466
467static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
468{
469 int on = (flags & 1); /* secure_tcp */
470
471 /*
472 ** FIXME: change secure_tcp to independent sysctl var
473 ** or make it per-service or per-app because it is valid
474 ** for most if not for all of the applications. Something
475 ** like "capabilities" (flags) for each object.
476 */
477 tcp_state_table = (on? tcp_states_dos : tcp_states);
478}
479
480static int
481tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
482{
483 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
484 tcp_state_name_table, sname, to);
485}
486
487static inline int tcp_state_idx(struct tcphdr *th)
488{
489 if (th->rst)
490 return 3;
491 if (th->syn)
492 return 0;
493 if (th->fin)
494 return 1;
495 if (th->ack)
496 return 2;
497 return -1;
498}
499
500static inline void
501set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
502 int direction, struct tcphdr *th)
503{
504 int state_idx;
505 int new_state = IP_VS_TCP_S_CLOSE;
506 int state_off = tcp_state_off[direction];
507
508 /*
509 * Update state offset to INPUT_ONLY if necessary
510 * or delete NO_OUTPUT flag if output packet detected
511 */
512 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
513 if (state_off == TCP_DIR_OUTPUT)
514 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
515 else
516 state_off = TCP_DIR_INPUT_ONLY;
517 }
518
519 if ((state_idx = tcp_state_idx(th)) < 0) {
520 IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
521 goto tcp_state_out;
522 }
523
524 new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
525
526 tcp_state_out:
527 if (new_state != cp->state) {
528 struct ip_vs_dest *dest = cp->dest;
529
cfc78c5a
JV
530 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
531 "%s:%d state: %s->%s conn->refcnt:%d\n",
532 pp->name,
533 ((state_off == TCP_DIR_OUTPUT) ?
534 "output " : "input "),
535 th->syn ? 'S' : '.',
536 th->fin ? 'F' : '.',
537 th->ack ? 'A' : '.',
538 th->rst ? 'R' : '.',
539 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
540 ntohs(cp->dport),
541 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
542 ntohs(cp->cport),
543 tcp_state_name(cp->state),
544 tcp_state_name(new_state),
545 atomic_read(&cp->refcnt));
546
1da177e4
LT
547 if (dest) {
548 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
549 (new_state != IP_VS_TCP_S_ESTABLISHED)) {
550 atomic_dec(&dest->activeconns);
551 atomic_inc(&dest->inactconns);
552 cp->flags |= IP_VS_CONN_F_INACTIVE;
553 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
554 (new_state == IP_VS_TCP_S_ESTABLISHED)) {
555 atomic_inc(&dest->activeconns);
556 atomic_dec(&dest->inactconns);
557 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
558 }
559 }
560 }
561
562 cp->timeout = pp->timeout_table[cp->state = new_state];
563}
564
565
566/*
567 * Handle state transitions
568 */
569static int
570tcp_state_transition(struct ip_vs_conn *cp, int direction,
571 const struct sk_buff *skb,
572 struct ip_vs_protocol *pp)
573{
574 struct tcphdr _tcph, *th;
575
0bbdd42b
JV
576#ifdef CONFIG_IP_VS_IPV6
577 int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
578#else
579 int ihl = ip_hdrlen(skb);
580#endif
581
582 th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
1da177e4
LT
583 if (th == NULL)
584 return 0;
585
586 spin_lock(&cp->lock);
587 set_tcp_state(pp, cp, direction, th);
588 spin_unlock(&cp->lock);
589
590 return 1;
591}
592
593
594/*
595 * Hash table for TCP application incarnations
596 */
597#define TCP_APP_TAB_BITS 4
598#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
599#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
600
601static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
602static DEFINE_SPINLOCK(tcp_app_lock);
603
75e7ce66 604static inline __u16 tcp_app_hashkey(__be16 port)
1da177e4 605{
75e7ce66
AV
606 return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
607 & TCP_APP_TAB_MASK;
1da177e4
LT
608}
609
610
611static int tcp_register_app(struct ip_vs_app *inc)
612{
613 struct ip_vs_app *i;
75e7ce66
AV
614 __u16 hash;
615 __be16 port = inc->port;
1da177e4
LT
616 int ret = 0;
617
618 hash = tcp_app_hashkey(port);
619
620 spin_lock_bh(&tcp_app_lock);
621 list_for_each_entry(i, &tcp_apps[hash], p_list) {
622 if (i->port == port) {
623 ret = -EEXIST;
624 goto out;
625 }
626 }
627 list_add(&inc->p_list, &tcp_apps[hash]);
628 atomic_inc(&ip_vs_protocol_tcp.appcnt);
629
630 out:
631 spin_unlock_bh(&tcp_app_lock);
632 return ret;
633}
634
635
636static void
637tcp_unregister_app(struct ip_vs_app *inc)
638{
639 spin_lock_bh(&tcp_app_lock);
640 atomic_dec(&ip_vs_protocol_tcp.appcnt);
641 list_del(&inc->p_list);
642 spin_unlock_bh(&tcp_app_lock);
643}
644
645
646static int
647tcp_app_conn_bind(struct ip_vs_conn *cp)
648{
649 int hash;
650 struct ip_vs_app *inc;
651 int result = 0;
652
653 /* Default binding: bind app only for NAT */
654 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
655 return 0;
656
657 /* Lookup application incarnations and bind the right one */
658 hash = tcp_app_hashkey(cp->vport);
659
660 spin_lock(&tcp_app_lock);
661 list_for_each_entry(inc, &tcp_apps[hash], p_list) {
662 if (inc->port == cp->vport) {
663 if (unlikely(!ip_vs_app_inc_get(inc)))
664 break;
665 spin_unlock(&tcp_app_lock);
666
cfc78c5a
JV
667 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
668 "%s:%u to app %s on port %u\n",
669 __func__,
670 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
671 ntohs(cp->cport),
672 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
673 ntohs(cp->vport),
674 inc->name, ntohs(inc->port));
675
1da177e4
LT
676 cp->app = inc;
677 if (inc->init_conn)
678 result = inc->init_conn(inc, cp);
679 goto out;
680 }
681 }
682 spin_unlock(&tcp_app_lock);
683
684 out:
685 return result;
686}
687
688
689/*
690 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
691 */
692void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
693{
694 spin_lock(&cp->lock);
695 cp->state = IP_VS_TCP_S_LISTEN;
696 cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
697 spin_unlock(&cp->lock);
698}
699
700
ba602a81 701static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
1da177e4
LT
702{
703 IP_VS_INIT_HASH_TABLE(tcp_apps);
704 pp->timeout_table = tcp_timeouts;
705}
706
707
ba602a81 708static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
1da177e4
LT
709{
710}
711
712
713struct ip_vs_protocol ip_vs_protocol_tcp = {
714 .name = "TCP",
715 .protocol = IPPROTO_TCP,
2ad17def 716 .num_states = IP_VS_TCP_S_LAST,
1da177e4
LT
717 .dont_defrag = 0,
718 .appcnt = ATOMIC_INIT(0),
ba602a81
DM
719 .init = ip_vs_tcp_init,
720 .exit = ip_vs_tcp_exit,
1da177e4
LT
721 .register_app = tcp_register_app,
722 .unregister_app = tcp_unregister_app,
723 .conn_schedule = tcp_conn_schedule,
724 .conn_in_get = tcp_conn_in_get,
725 .conn_out_get = tcp_conn_out_get,
726 .snat_handler = tcp_snat_handler,
727 .dnat_handler = tcp_dnat_handler,
728 .csum_check = tcp_csum_check,
729 .state_name = tcp_state_name,
730 .state_transition = tcp_state_transition,
731 .app_conn_bind = tcp_app_conn_bind,
732 .debug_packet = ip_vs_tcpudp_debug_packet,
733 .timeout_change = tcp_timeout_change,
734 .set_state_timeout = tcp_set_state_timeout,
735};