Commit | Line | Data |
---|---|---|
f4bc17cd JA |
1 | /* |
2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | |
3 | * | |
4 | * Portions Copyright (C) 2001-2002 | |
5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | |
6 | * | |
7 | * Portions Copyright (C) 2003-2010 | |
8 | * Julian Anastasov | |
9 | * | |
10 | * | |
11 | * This code is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
24 | * | |
25 | * | |
26 | * Authors: | |
27 | * Ben North <ben@redfrontdoor.org> | |
28 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | |
29 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match | |
30 | * | |
31 | * | |
32 | * Current status: | |
33 | * | |
34 | * - provide conntrack confirmation for new and related connections, by | |
35 | * this way we can see their proper conntrack state in all hooks | |
36 | * - support for all forwarding methods, not only NAT | |
37 | * - FTP support (NAT), ability to support other NAT apps with expectations | |
38 | * - to correctly create expectations for related NAT connections the proper | |
39 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires | |
40 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables | |
41 | * NAT rules are needed) | |
42 | * - alter reply for NAT when forwarding packet in original direction: | |
43 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or | |
44 | * when RELATED conntrack is created from real server (Active FTP DATA) | |
45 | * - if iptables_nat is not loaded the Passive FTP will not work (the | |
46 | * PASV response can not be NAT-ed) but Active FTP should work | |
47 | * | |
48 | */ | |
49 | ||
50 | #define KMSG_COMPONENT "IPVS" | |
51 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
52 | ||
53 | #include <linux/module.h> | |
54 | #include <linux/types.h> | |
55 | #include <linux/kernel.h> | |
56 | #include <linux/errno.h> | |
57 | #include <linux/compiler.h> | |
58 | #include <linux/vmalloc.h> | |
59 | #include <linux/skbuff.h> | |
60 | #include <net/ip.h> | |
61 | #include <linux/netfilter.h> | |
62 | #include <linux/netfilter_ipv4.h> | |
63 | #include <net/ip_vs.h> | |
64 | #include <net/netfilter/nf_conntrack_core.h> | |
65 | #include <net/netfilter/nf_conntrack_expect.h> | |
66 | #include <net/netfilter/nf_conntrack_helper.h> | |
67 | #include <net/netfilter/nf_conntrack_zones.h> | |
68 | ||
69 | ||
70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | |
71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | |
72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | |
73 | (T)->dst.protonum | |
74 | ||
75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | |
76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | |
77 | &((C)->vaddr.ip), ntohs((C)->vport), \ | |
78 | &((C)->daddr.ip), ntohs((C)->dport), \ | |
79 | (C)->protocol, (C)->state | |
80 | ||
81 | void | |
82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | |
83 | { | |
84 | enum ip_conntrack_info ctinfo; | |
05b4b065 | 85 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
f4bc17cd JA |
86 | struct nf_conntrack_tuple new_tuple; |
87 | ||
88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || | |
89 | nf_ct_is_dying(ct)) | |
90 | return; | |
91 | ||
92 | /* Never alter conntrack for non-NAT conns */ | |
93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
94 | return; | |
95 | ||
96 | /* Alter reply only in original direction */ | |
97 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | |
98 | return; | |
99 | ||
100 | /* | |
101 | * The connection is not yet in the hashtable, so we update it. | |
102 | * CIP->VIP will remain the same, so leave the tuple in | |
103 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | |
104 | * real-server we will see RIP->DIP. | |
105 | */ | |
106 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
107 | /* | |
108 | * This will also take care of UDP and other protocols. | |
109 | */ | |
110 | if (outin) { | |
111 | new_tuple.src.u3 = cp->daddr; | |
112 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
113 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
114 | new_tuple.src.u.tcp.port = cp->dport; | |
115 | } else { | |
116 | new_tuple.dst.u3 = cp->vaddr; | |
117 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | |
118 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | |
119 | new_tuple.dst.u.tcp.port = cp->vport; | |
120 | } | |
121 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " | |
122 | "ctinfo=%d, old reply=" FMT_TUPLE | |
123 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", | |
124 | __func__, ct, ct->status, ctinfo, | |
125 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), | |
126 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); | |
127 | nf_conntrack_alter_reply(ct, &new_tuple); | |
128 | } | |
129 | ||
3c2de2ae | 130 | int ip_vs_confirm_conntrack(struct sk_buff *skb) |
f4bc17cd JA |
131 | { |
132 | return nf_conntrack_confirm(skb); | |
133 | } | |
134 | ||
135 | /* | |
136 | * Called from init_conntrack() as expectfn handler. | |
137 | */ | |
138 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |
139 | struct nf_conntrack_expect *exp) | |
140 | { | |
141 | struct nf_conntrack_tuple *orig, new_reply; | |
142 | struct ip_vs_conn *cp; | |
f11017ec | 143 | struct ip_vs_conn_param p; |
6e67e586 | 144 | struct net *net = nf_ct_net(ct); |
f4bc17cd JA |
145 | |
146 | if (exp->tuple.src.l3num != PF_INET) | |
147 | return; | |
148 | ||
149 | /* | |
150 | * We assume that no NF locks are held before this callback. | |
151 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | |
152 | * expectations even if they use wildcard values, now we provide the | |
153 | * actual values from the newly created original conntrack direction. | |
154 | * The conntrack is confirmed when packet reaches IPVS hooks. | |
155 | */ | |
156 | ||
157 | /* RS->CLIENT */ | |
158 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
6e67e586 | 159 | ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, |
f11017ec SH |
160 | &orig->src.u3, orig->src.u.tcp.port, |
161 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | |
162 | cp = ip_vs_conn_out_get(&p); | |
f4bc17cd JA |
163 | if (cp) { |
164 | /* Change reply CLIENT->RS to CLIENT->VS */ | |
165 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
166 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
167 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | |
168 | __func__, ct, ct->status, | |
169 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
170 | ARG_CONN(cp)); | |
171 | new_reply.dst.u3 = cp->vaddr; | |
172 | new_reply.dst.u.tcp.port = cp->vport; | |
173 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | |
174 | ", inout cp=" FMT_CONN "\n", | |
175 | __func__, ct, | |
176 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
177 | ARG_CONN(cp)); | |
178 | goto alter; | |
179 | } | |
180 | ||
181 | /* CLIENT->VS */ | |
f11017ec | 182 | cp = ip_vs_conn_in_get(&p); |
f4bc17cd JA |
183 | if (cp) { |
184 | /* Change reply VS->CLIENT to RS->CLIENT */ | |
185 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
186 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | |
187 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | |
188 | __func__, ct, ct->status, | |
189 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
190 | ARG_CONN(cp)); | |
191 | new_reply.src.u3 = cp->daddr; | |
192 | new_reply.src.u.tcp.port = cp->dport; | |
193 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " | |
194 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | |
195 | __func__, ct, | |
196 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | |
197 | ARG_CONN(cp)); | |
198 | goto alter; | |
199 | } | |
200 | ||
201 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE | |
202 | " - unknown expect\n", | |
203 | __func__, ct, ct->status, ARG_TUPLE(orig)); | |
204 | return; | |
205 | ||
206 | alter: | |
207 | /* Never alter conntrack for non-NAT conns */ | |
208 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | |
209 | nf_conntrack_alter_reply(ct, &new_reply); | |
210 | ip_vs_conn_put(cp); | |
211 | return; | |
212 | } | |
213 | ||
214 | /* | |
215 | * Create NF conntrack expectation with wildcard (optional) source port. | |
216 | * Then the default callback function will alter the reply and will confirm | |
217 | * the conntrack entry when the first packet comes. | |
218 | * Use port 0 to expect connection from any port. | |
219 | */ | |
220 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | |
221 | struct ip_vs_conn *cp, u_int8_t proto, | |
222 | const __be16 port, int from_rs) | |
223 | { | |
224 | struct nf_conntrack_expect *exp; | |
225 | ||
226 | if (ct == NULL || nf_ct_is_untracked(ct)) | |
227 | return; | |
228 | ||
229 | exp = nf_ct_expect_alloc(ct); | |
230 | if (!exp) | |
231 | return; | |
232 | ||
233 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), | |
234 | from_rs ? &cp->daddr : &cp->caddr, | |
235 | from_rs ? &cp->caddr : &cp->vaddr, | |
236 | proto, port ? &port : NULL, | |
237 | from_rs ? &cp->cport : &cp->vport); | |
238 | ||
239 | exp->expectfn = ip_vs_nfct_expect_callback; | |
240 | ||
241 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | |
242 | __func__, ct, ARG_TUPLE(&exp->tuple)); | |
243 | nf_ct_expect_related(exp); | |
244 | nf_ct_expect_put(exp); | |
245 | } | |
246 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); | |
247 | ||
248 | /* | |
249 | * Our connection was terminated, try to drop the conntrack immediately | |
250 | */ | |
251 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | |
252 | { | |
253 | struct nf_conntrack_tuple_hash *h; | |
254 | struct nf_conn *ct; | |
255 | struct nf_conntrack_tuple tuple; | |
256 | ||
257 | if (!cp->cport) | |
258 | return; | |
259 | ||
260 | tuple = (struct nf_conntrack_tuple) { | |
261 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | |
262 | tuple.src.u3 = cp->caddr; | |
263 | tuple.src.u.all = cp->cport; | |
264 | tuple.src.l3num = cp->af; | |
265 | tuple.dst.u3 = cp->vaddr; | |
266 | tuple.dst.u.all = cp->vport; | |
267 | ||
268 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | |
269 | " for conn " FMT_CONN "\n", | |
270 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | |
271 | ||
6e67e586 HS |
272 | h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, |
273 | &tuple); | |
f4bc17cd JA |
274 | if (h) { |
275 | ct = nf_ct_tuplehash_to_ctrack(h); | |
276 | /* Show what happens instead of calling nf_ct_kill() */ | |
277 | if (del_timer(&ct->timeout)) { | |
278 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" | |
279 | FMT_TUPLE "\n", | |
280 | __func__, ct, ARG_TUPLE(&tuple)); | |
281 | if (ct->timeout.function) | |
282 | ct->timeout.function(ct->timeout.data); | |
283 | } else { | |
284 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | |
285 | FMT_TUPLE "\n", | |
286 | __func__, ct, ARG_TUPLE(&tuple)); | |
287 | } | |
288 | nf_ct_put(ct); | |
289 | } else { | |
290 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | |
291 | __func__, ARG_TUPLE(&tuple)); | |
292 | } | |
293 | } | |
294 |