1 /* ip_nat_helper.c - generic support functions for NAT helpers
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <linux/module.h>
11 #include <linux/kmod.h>
12 #include <linux/types.h>
13 #include <linux/timer.h>
14 #include <linux/skbuff.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <net/checksum.h>
20 #include <linux/netfilter_ipv4.h>
21 #include <net/netfilter/nf_conntrack.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_expect.h>
24 #include <net/netfilter/nf_nat.h>
25 #include <net/netfilter/nf_nat_protocol.h>
26 #include <net/netfilter/nf_nat_core.h>
27 #include <net/netfilter/nf_nat_helper.h>
31 #define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
33 #define DEBUGP(format, args...)
34 #define DUMP_OFFSET(x)
37 static DEFINE_SPINLOCK(nf_nat_seqofs_lock
);
39 /* Setup TCP sequence correction given this change at this sequence */
41 adjust_tcp_sequence(u32 seq
,
44 enum ip_conntrack_info ctinfo
)
47 struct nf_nat_seq
*this_way
, *other_way
;
48 struct nf_conn_nat
*nat
= nfct_nat(ct
);
50 DEBUGP("nf_nat_resize_packet: old_size = %u, new_size = %u\n",
51 (*skb
)->len
, new_size
);
53 dir
= CTINFO2DIR(ctinfo
);
55 this_way
= &nat
->info
.seq
[dir
];
56 other_way
= &nat
->info
.seq
[!dir
];
58 DEBUGP("nf_nat_resize_packet: Seq_offset before: ");
59 DUMP_OFFSET(this_way
);
61 spin_lock_bh(&nf_nat_seqofs_lock
);
63 /* SYN adjust. If it's uninitialized, or this is after last
64 * correction, record it: we don't handle more than one
65 * adjustment in the window, but do deal with common case of a
67 if (this_way
->offset_before
== this_way
->offset_after
||
68 before(this_way
->correction_pos
, seq
)) {
69 this_way
->correction_pos
= seq
;
70 this_way
->offset_before
= this_way
->offset_after
;
71 this_way
->offset_after
+= sizediff
;
73 spin_unlock_bh(&nf_nat_seqofs_lock
);
75 DEBUGP("nf_nat_resize_packet: Seq_offset after: ");
76 DUMP_OFFSET(this_way
);
79 /* Frobs data inside this packet, which is linear. */
80 static void mangle_contents(struct sk_buff
*skb
,
82 unsigned int match_offset
,
83 unsigned int match_len
,
84 const char *rep_buffer
,
89 BUG_ON(skb_is_nonlinear(skb
));
90 data
= skb_network_header(skb
) + dataoff
;
92 /* move post-replacement */
93 memmove(data
+ match_offset
+ rep_len
,
94 data
+ match_offset
+ match_len
,
95 skb
->tail
- (data
+ match_offset
+ match_len
));
97 /* insert data from buffer */
98 memcpy(data
+ match_offset
, rep_buffer
, rep_len
);
100 /* update skb info */
101 if (rep_len
> match_len
) {
102 DEBUGP("nf_nat_mangle_packet: Extending packet by "
103 "%u from %u bytes\n", rep_len
- match_len
,
105 skb_put(skb
, rep_len
- match_len
);
107 DEBUGP("nf_nat_mangle_packet: Shrinking packet from "
108 "%u from %u bytes\n", match_len
- rep_len
,
110 __skb_trim(skb
, skb
->len
+ rep_len
- match_len
);
113 /* fix IP hdr checksum information */
114 ip_hdr(skb
)->tot_len
= htons(skb
->len
);
115 ip_send_check(ip_hdr(skb
));
118 /* Unusual, but possible case. */
119 static int enlarge_skb(struct sk_buff
**pskb
, unsigned int extra
)
121 struct sk_buff
*nskb
;
123 if ((*pskb
)->len
+ extra
> 65535)
126 nskb
= skb_copy_expand(*pskb
, skb_headroom(*pskb
), extra
, GFP_ATOMIC
);
130 /* Transfer socket to new skb. */
132 skb_set_owner_w(nskb
, (*pskb
)->sk
);
138 /* Generic function for mangling variable-length address changes inside
139 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
142 * Takes care about all the nasty sequence number changes, checksumming,
143 * skb enlargement, ...
147 nf_nat_mangle_tcp_packet(struct sk_buff
**pskb
,
149 enum ip_conntrack_info ctinfo
,
150 unsigned int match_offset
,
151 unsigned int match_len
,
152 const char *rep_buffer
,
153 unsigned int rep_len
)
159 if (!skb_make_writable(pskb
, (*pskb
)->len
))
162 if (rep_len
> match_len
&&
163 rep_len
- match_len
> skb_tailroom(*pskb
) &&
164 !enlarge_skb(pskb
, rep_len
- match_len
))
167 SKB_LINEAR_ASSERT(*pskb
);
170 tcph
= (void *)iph
+ iph
->ihl
*4;
172 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
173 mangle_contents(*pskb
, iph
->ihl
*4 + tcph
->doff
*4,
174 match_offset
, match_len
, rep_buffer
, rep_len
);
176 datalen
= (*pskb
)->len
- iph
->ihl
*4;
177 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
179 tcph
->check
= tcp_v4_check(datalen
,
180 iph
->saddr
, iph
->daddr
,
181 csum_partial((char *)tcph
,
184 nf_proto_csum_replace2(&tcph
->check
, *pskb
,
185 htons(oldlen
), htons(datalen
), 1);
187 if (rep_len
!= match_len
) {
188 set_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
);
189 adjust_tcp_sequence(ntohl(tcph
->seq
),
190 (int)rep_len
- (int)match_len
,
192 /* Tell TCP window tracking about seq change */
193 nf_conntrack_tcp_update(*pskb
, ip_hdrlen(*pskb
),
194 ct
, CTINFO2DIR(ctinfo
));
198 EXPORT_SYMBOL(nf_nat_mangle_tcp_packet
);
200 /* Generic function for mangling variable-length address changes inside
201 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
202 * command in the Amanda protocol)
204 * Takes care about all the nasty sequence number changes, checksumming,
205 * skb enlargement, ...
207 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
208 * should be fairly easy to do.
211 nf_nat_mangle_udp_packet(struct sk_buff
**pskb
,
213 enum ip_conntrack_info ctinfo
,
214 unsigned int match_offset
,
215 unsigned int match_len
,
216 const char *rep_buffer
,
217 unsigned int rep_len
)
223 /* UDP helpers might accidentally mangle the wrong packet */
225 if ((*pskb
)->len
< iph
->ihl
*4 + sizeof(*udph
) +
226 match_offset
+ match_len
)
229 if (!skb_make_writable(pskb
, (*pskb
)->len
))
232 if (rep_len
> match_len
&&
233 rep_len
- match_len
> skb_tailroom(*pskb
) &&
234 !enlarge_skb(pskb
, rep_len
- match_len
))
238 udph
= (void *)iph
+ iph
->ihl
*4;
240 oldlen
= (*pskb
)->len
- iph
->ihl
*4;
241 mangle_contents(*pskb
, iph
->ihl
*4 + sizeof(*udph
),
242 match_offset
, match_len
, rep_buffer
, rep_len
);
244 /* update the length of the UDP packet */
245 datalen
= (*pskb
)->len
- iph
->ihl
*4;
246 udph
->len
= htons(datalen
);
248 /* fix udp checksum if udp checksum was previously calculated */
249 if (!udph
->check
&& (*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
)
252 if ((*pskb
)->ip_summed
!= CHECKSUM_PARTIAL
) {
254 udph
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
255 datalen
, IPPROTO_UDP
,
256 csum_partial((char *)udph
,
259 udph
->check
= CSUM_MANGLED_0
;
261 nf_proto_csum_replace2(&udph
->check
, *pskb
,
262 htons(oldlen
), htons(datalen
), 1);
266 EXPORT_SYMBOL(nf_nat_mangle_udp_packet
);
268 /* Adjust one found SACK option including checksum correction */
270 sack_adjust(struct sk_buff
*skb
,
272 unsigned int sackoff
,
273 unsigned int sackend
,
274 struct nf_nat_seq
*natseq
)
276 while (sackoff
< sackend
) {
277 struct tcp_sack_block_wire
*sack
;
278 __be32 new_start_seq
, new_end_seq
;
280 sack
= (void *)skb
->data
+ sackoff
;
281 if (after(ntohl(sack
->start_seq
) - natseq
->offset_before
,
282 natseq
->correction_pos
))
283 new_start_seq
= htonl(ntohl(sack
->start_seq
)
284 - natseq
->offset_after
);
286 new_start_seq
= htonl(ntohl(sack
->start_seq
)
287 - natseq
->offset_before
);
289 if (after(ntohl(sack
->end_seq
) - natseq
->offset_before
,
290 natseq
->correction_pos
))
291 new_end_seq
= htonl(ntohl(sack
->end_seq
)
292 - natseq
->offset_after
);
294 new_end_seq
= htonl(ntohl(sack
->end_seq
)
295 - natseq
->offset_before
);
297 DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
298 ntohl(sack
->start_seq
), new_start_seq
,
299 ntohl(sack
->end_seq
), new_end_seq
);
301 nf_proto_csum_replace4(&tcph
->check
, skb
,
302 sack
->start_seq
, new_start_seq
, 0);
303 nf_proto_csum_replace4(&tcph
->check
, skb
,
304 sack
->end_seq
, new_end_seq
, 0);
305 sack
->start_seq
= new_start_seq
;
306 sack
->end_seq
= new_end_seq
;
307 sackoff
+= sizeof(*sack
);
311 /* TCP SACK sequence number adjustment */
312 static inline unsigned int
313 nf_nat_sack_adjust(struct sk_buff
**pskb
,
316 enum ip_conntrack_info ctinfo
)
318 unsigned int dir
, optoff
, optend
;
319 struct nf_conn_nat
*nat
= nfct_nat(ct
);
321 optoff
= ip_hdrlen(*pskb
) + sizeof(struct tcphdr
);
322 optend
= ip_hdrlen(*pskb
) + tcph
->doff
* 4;
324 if (!skb_make_writable(pskb
, optend
))
327 dir
= CTINFO2DIR(ctinfo
);
329 while (optoff
< optend
) {
330 /* Usually: option, length. */
331 unsigned char *op
= (*pskb
)->data
+ optoff
;
340 /* no partial options */
341 if (optoff
+ 1 == optend
||
342 optoff
+ op
[1] > optend
||
345 if (op
[0] == TCPOPT_SACK
&&
346 op
[1] >= 2+TCPOLEN_SACK_PERBLOCK
&&
347 ((op
[1] - 2) % TCPOLEN_SACK_PERBLOCK
) == 0)
348 sack_adjust(*pskb
, tcph
, optoff
+2,
350 &nat
->info
.seq
[!dir
]);
357 /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
359 nf_nat_seq_adjust(struct sk_buff
**pskb
,
361 enum ip_conntrack_info ctinfo
)
365 __be32 newseq
, newack
;
366 struct nf_conn_nat
*nat
= nfct_nat(ct
);
367 struct nf_nat_seq
*this_way
, *other_way
;
369 dir
= CTINFO2DIR(ctinfo
);
371 this_way
= &nat
->info
.seq
[dir
];
372 other_way
= &nat
->info
.seq
[!dir
];
374 if (!skb_make_writable(pskb
, ip_hdrlen(*pskb
) + sizeof(*tcph
)))
377 tcph
= (void *)(*pskb
)->data
+ ip_hdrlen(*pskb
);
378 if (after(ntohl(tcph
->seq
), this_way
->correction_pos
))
379 newseq
= htonl(ntohl(tcph
->seq
) + this_way
->offset_after
);
381 newseq
= htonl(ntohl(tcph
->seq
) + this_way
->offset_before
);
383 if (after(ntohl(tcph
->ack_seq
) - other_way
->offset_before
,
384 other_way
->correction_pos
))
385 newack
= htonl(ntohl(tcph
->ack_seq
) - other_way
->offset_after
);
387 newack
= htonl(ntohl(tcph
->ack_seq
) - other_way
->offset_before
);
389 nf_proto_csum_replace4(&tcph
->check
, *pskb
, tcph
->seq
, newseq
, 0);
390 nf_proto_csum_replace4(&tcph
->check
, *pskb
, tcph
->ack_seq
, newack
, 0);
392 DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
393 ntohl(tcph
->seq
), ntohl(newseq
), ntohl(tcph
->ack_seq
),
397 tcph
->ack_seq
= newack
;
399 if (!nf_nat_sack_adjust(pskb
, tcph
, ct
, ctinfo
))
402 nf_conntrack_tcp_update(*pskb
, ip_hdrlen(*pskb
), ct
, dir
);
406 EXPORT_SYMBOL(nf_nat_seq_adjust
);
408 /* Setup NAT on this expected conntrack so it follows master. */
409 /* If we fail to get a free NAT slot, we'll get dropped on confirm */
410 void nf_nat_follow_master(struct nf_conn
*ct
,
411 struct nf_conntrack_expect
*exp
)
413 struct nf_nat_range range
;
415 /* This must be a fresh one. */
416 BUG_ON(ct
->status
& IPS_NAT_DONE_MASK
);
418 /* Change src to where master sends to */
419 range
.flags
= IP_NAT_RANGE_MAP_IPS
;
420 range
.min_ip
= range
.max_ip
421 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.dst
.u3
.ip
;
422 /* hook doesn't matter, but it has to do source manip */
423 nf_nat_setup_info(ct
, &range
, NF_IP_POST_ROUTING
);
425 /* For DST manip, map port here to where it's expected. */
426 range
.flags
= (IP_NAT_RANGE_MAP_IPS
| IP_NAT_RANGE_PROTO_SPECIFIED
);
427 range
.min
= range
.max
= exp
->saved_proto
;
428 range
.min_ip
= range
.max_ip
429 = ct
->master
->tuplehash
[!exp
->dir
].tuple
.src
.u3
.ip
;
430 /* hook doesn't matter, but it has to do destination manip */
431 nf_nat_setup_info(ct
, &range
, NF_IP_PRE_ROUTING
);
433 EXPORT_SYMBOL(nf_nat_follow_master
);