core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / netfilter / xt_TCPMSS.c
CommitLineData
cdd289a2
PM
1/*
2 * This is a module which is used for setting the MSS option in TCP packets.
3 *
4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
f229f6ce 5 * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
cdd289a2
PM
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
8bee4bad 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
cdd289a2
PM
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/ip.h>
5a0e3ad6 15#include <linux/gfp.h>
cdd289a2
PM
16#include <linux/ipv6.h>
17#include <linux/tcp.h>
37c08387
JE
18#include <net/dst.h>
19#include <net/flow.h>
cdd289a2 20#include <net/ipv6.h>
37c08387 21#include <net/route.h>
cdd289a2
PM
22#include <net/tcp.h>
23
24#include <linux/netfilter_ipv4/ip_tables.h>
25#include <linux/netfilter_ipv6/ip6_tables.h>
26#include <linux/netfilter/x_tables.h>
27#include <linux/netfilter/xt_tcpudp.h>
28#include <linux/netfilter/xt_TCPMSS.h>
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
2ae15b64 32MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
cdd289a2
PM
33MODULE_ALIAS("ipt_TCPMSS");
34MODULE_ALIAS("ip6t_TCPMSS");
35
36static inline unsigned int
37optlen(const u_int8_t *opt, unsigned int offset)
38{
39 /* Beware zero-length options: make finite progress */
40 if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
41 return 1;
42 else
43 return opt[offset+1];
44}
45
46static int
3db05fea 47tcpmss_mangle_packet(struct sk_buff *skb,
70d19f80 48 const struct xt_action_param *par,
37c08387 49 unsigned int in_mtu,
cdd289a2
PM
50 unsigned int tcphoff,
51 unsigned int minlen)
52{
70d19f80 53 const struct xt_tcpmss_info *info = par->targinfo;
cdd289a2
PM
54 struct tcphdr *tcph;
55 unsigned int tcplen, i;
56 __be16 oldval;
57 u16 newmss;
58 u8 *opt;
59
b396966c
PO
60 /* This is a fragment, no TCP header is available */
61 if (par->fragoff != 0)
62 return XT_CONTINUE;
63
3db05fea 64 if (!skb_make_writable(skb, skb->len))
cdd289a2
PM
65 return -1;
66
3db05fea
HX
67 tcplen = skb->len - tcphoff;
68 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
cdd289a2 69
10a19939
SA
70 /* Header cannot be larger than the packet */
71 if (tcplen < tcph->doff*4)
cdd289a2 72 return -1;
cdd289a2
PM
73
74 if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
adf30907 75 if (dst_mtu(skb_dst(skb)) <= minlen) {
e87cc472
JP
76 net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
77 dst_mtu(skb_dst(skb)));
cdd289a2
PM
78 return -1;
79 }
37c08387 80 if (in_mtu <= minlen) {
e87cc472
JP
81 net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
82 in_mtu);
37c08387
JE
83 return -1;
84 }
adf30907 85 newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
cdd289a2
PM
86 } else
87 newmss = info->mss;
88
89 opt = (u_int8_t *)tcph;
90 for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
91 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
92 opt[i+1] == TCPOLEN_MSS) {
93 u_int16_t oldmss;
94
95 oldmss = (opt[i+2] << 8) | opt[i+3];
96
17008064
BL
97 /* Never increase MSS, even when setting it, as
98 * doing so results in problems for hosts that rely
99 * on MSS being set correctly.
100 */
101 if (oldmss <= newmss)
cdd289a2
PM
102 return 0;
103
104 opt[i+2] = (newmss & 0xff00) >> 8;
7c4e36bc 105 opt[i+3] = newmss & 0x00ff;
cdd289a2 106
be0ea7d5
PM
107 inet_proto_csum_replace2(&tcph->check, skb,
108 htons(oldmss), htons(newmss),
109 0);
cdd289a2
PM
110 return 0;
111 }
112 }
113
10a19939
SA
114 /* There is data after the header so the option can't be added
115 without moving it, and doing so may make the SYN packet
116 itself too large. Accept the packet unmodified instead. */
117 if (tcplen > tcph->doff*4)
118 return 0;
119
cdd289a2
PM
120 /*
121 * MSS Option not found ?! add it..
122 */
3db05fea
HX
123 if (skb_tailroom(skb) < TCPOLEN_MSS) {
124 if (pskb_expand_head(skb, 0,
125 TCPOLEN_MSS - skb_tailroom(skb),
2ca7b0ac 126 GFP_ATOMIC))
cdd289a2 127 return -1;
3db05fea 128 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
cdd289a2
PM
129 }
130
3db05fea 131 skb_put(skb, TCPOLEN_MSS);
cdd289a2 132
70d19f80
PO
133 /*
134 * IPv4: RFC 1122 states "If an MSS option is not received at
135 * connection setup, TCP MUST assume a default send MSS of 536".
136 * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
137 * length IPv6 header of 60, ergo the default MSS value is 1220
138 * Since no MSS was provided, we must use the default values
409b545a 139 */
70d19f80
PO
140 if (par->family == NFPROTO_IPV4)
141 newmss = min(newmss, (u16)536);
142 else
143 newmss = min(newmss, (u16)1220);
409b545a 144
cdd289a2
PM
145 opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
146 memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
147
be0ea7d5
PM
148 inet_proto_csum_replace2(&tcph->check, skb,
149 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
cdd289a2
PM
150 opt[0] = TCPOPT_MSS;
151 opt[1] = TCPOLEN_MSS;
152 opt[2] = (newmss & 0xff00) >> 8;
7c4e36bc 153 opt[3] = newmss & 0x00ff;
cdd289a2 154
be0ea7d5 155 inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
cdd289a2
PM
156
157 oldval = ((__be16 *)tcph)[6];
158 tcph->doff += TCPOLEN_MSS/4;
be0ea7d5
PM
159 inet_proto_csum_replace2(&tcph->check, skb,
160 oldval, ((__be16 *)tcph)[6], 0);
cdd289a2
PM
161 return TCPOLEN_MSS;
162}
163
db1a75bd
JE
164static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
165 unsigned int family)
37c08387 166{
a1bbb0e6 167 struct flowi fl;
37c08387
JE
168 const struct nf_afinfo *ai;
169 struct rtable *rt = NULL;
170 u_int32_t mtu = ~0U;
171
a1bbb0e6
DM
172 if (family == PF_INET) {
173 struct flowi4 *fl4 = &fl.u.ip4;
174 memset(fl4, 0, sizeof(*fl4));
175 fl4->daddr = ip_hdr(skb)->saddr;
176 } else {
177 struct flowi6 *fl6 = &fl.u.ip6;
db1a75bd 178
a1bbb0e6 179 memset(fl6, 0, sizeof(*fl6));
4e3fd7a0 180 fl6->daddr = ipv6_hdr(skb)->saddr;
a1bbb0e6 181 }
37c08387 182 rcu_read_lock();
db1a75bd 183 ai = nf_get_afinfo(family);
37c08387 184 if (ai != NULL)
0fae2e77 185 ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
37c08387
JE
186 rcu_read_unlock();
187
188 if (rt != NULL) {
d8d1f30b
CG
189 mtu = dst_mtu(&rt->dst);
190 dst_release(&rt->dst);
37c08387
JE
191 }
192 return mtu;
193}
194
cdd289a2 195static unsigned int
4b560b44 196tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
cdd289a2 197{
3db05fea 198 struct iphdr *iph = ip_hdr(skb);
cdd289a2
PM
199 __be16 newlen;
200 int ret;
201
70d19f80 202 ret = tcpmss_mangle_packet(skb, par,
db1a75bd 203 tcpmss_reverse_mtu(skb, PF_INET),
37c08387 204 iph->ihl * 4,
cdd289a2
PM
205 sizeof(*iph) + sizeof(struct tcphdr));
206 if (ret < 0)
207 return NF_DROP;
208 if (ret > 0) {
3db05fea 209 iph = ip_hdr(skb);
cdd289a2 210 newlen = htons(ntohs(iph->tot_len) + ret);
be0ea7d5 211 csum_replace2(&iph->check, iph->tot_len, newlen);
cdd289a2
PM
212 iph->tot_len = newlen;
213 }
214 return XT_CONTINUE;
215}
216
c0cd1156 217#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
cdd289a2 218static unsigned int
4b560b44 219tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
cdd289a2 220{
3db05fea 221 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
cdd289a2 222 u8 nexthdr;
75f2811c 223 __be16 frag_off;
cdd289a2
PM
224 int tcphoff;
225 int ret;
226
227 nexthdr = ipv6h->nexthdr;
75f2811c 228 tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
9dc0564e 229 if (tcphoff < 0)
cdd289a2 230 return NF_DROP;
70d19f80 231 ret = tcpmss_mangle_packet(skb, par,
db1a75bd 232 tcpmss_reverse_mtu(skb, PF_INET6),
37c08387 233 tcphoff,
cdd289a2
PM
234 sizeof(*ipv6h) + sizeof(struct tcphdr));
235 if (ret < 0)
236 return NF_DROP;
237 if (ret > 0) {
3db05fea 238 ipv6h = ipv6_hdr(skb);
cdd289a2
PM
239 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
240 }
241 return XT_CONTINUE;
242}
243#endif
244
cdd289a2 245/* Must specify -p tcp --syn */
e1931b78 246static inline bool find_syn_match(const struct xt_entry_match *m)
cdd289a2
PM
247{
248 const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
249
250 if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
a3433f35 251 tcpinfo->flg_cmp & TCPHDR_SYN &&
cdd289a2 252 !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
e1931b78 253 return true;
cdd289a2 254
e1931b78 255 return false;
cdd289a2
PM
256}
257
135367b8 258static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
cdd289a2 259{
af5d6dc2
JE
260 const struct xt_tcpmss_info *info = par->targinfo;
261 const struct ipt_entry *e = par->entryinfo;
dcea992a 262 const struct xt_entry_match *ematch;
cdd289a2
PM
263
264 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
af5d6dc2 265 (par->hook_mask & ~((1 << NF_INET_FORWARD) |
6e23ae2a
PM
266 (1 << NF_INET_LOCAL_OUT) |
267 (1 << NF_INET_POST_ROUTING))) != 0) {
8bee4bad
JE
268 pr_info("path-MTU clamping only supported in "
269 "FORWARD, OUTPUT and POSTROUTING hooks\n");
d6b00a53 270 return -EINVAL;
cdd289a2 271 }
dcea992a
JE
272 xt_ematch_foreach(ematch, e)
273 if (find_syn_match(ematch))
d6b00a53 274 return 0;
8bee4bad 275 pr_info("Only works on TCP SYN packets\n");
d6b00a53 276 return -EINVAL;
cdd289a2
PM
277}
278
c0cd1156 279#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
135367b8 280static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
cdd289a2 281{
af5d6dc2
JE
282 const struct xt_tcpmss_info *info = par->targinfo;
283 const struct ip6t_entry *e = par->entryinfo;
dcea992a 284 const struct xt_entry_match *ematch;
cdd289a2
PM
285
286 if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
af5d6dc2 287 (par->hook_mask & ~((1 << NF_INET_FORWARD) |
6e23ae2a
PM
288 (1 << NF_INET_LOCAL_OUT) |
289 (1 << NF_INET_POST_ROUTING))) != 0) {
8bee4bad
JE
290 pr_info("path-MTU clamping only supported in "
291 "FORWARD, OUTPUT and POSTROUTING hooks\n");
d6b00a53 292 return -EINVAL;
cdd289a2 293 }
dcea992a
JE
294 xt_ematch_foreach(ematch, e)
295 if (find_syn_match(ematch))
d6b00a53 296 return 0;
8bee4bad 297 pr_info("Only works on TCP SYN packets\n");
d6b00a53 298 return -EINVAL;
cdd289a2
PM
299}
300#endif
301
d3c5ee6d 302static struct xt_target tcpmss_tg_reg[] __read_mostly = {
cdd289a2 303 {
ee999d8b 304 .family = NFPROTO_IPV4,
cdd289a2 305 .name = "TCPMSS",
d3c5ee6d
JE
306 .checkentry = tcpmss_tg4_check,
307 .target = tcpmss_tg4,
cdd289a2
PM
308 .targetsize = sizeof(struct xt_tcpmss_info),
309 .proto = IPPROTO_TCP,
310 .me = THIS_MODULE,
311 },
c0cd1156 312#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
cdd289a2 313 {
ee999d8b 314 .family = NFPROTO_IPV6,
cdd289a2 315 .name = "TCPMSS",
d3c5ee6d
JE
316 .checkentry = tcpmss_tg6_check,
317 .target = tcpmss_tg6,
cdd289a2
PM
318 .targetsize = sizeof(struct xt_tcpmss_info),
319 .proto = IPPROTO_TCP,
320 .me = THIS_MODULE,
321 },
322#endif
323};
324
d3c5ee6d 325static int __init tcpmss_tg_init(void)
cdd289a2 326{
d3c5ee6d 327 return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
cdd289a2
PM
328}
329
d3c5ee6d 330static void __exit tcpmss_tg_exit(void)
cdd289a2 331{
d3c5ee6d 332 xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
cdd289a2
PM
333}
334
d3c5ee6d
JE
335module_init(tcpmss_tg_init);
336module_exit(tcpmss_tg_exit);