staging: ath6kl: Convert A_CHAR to char
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
5a0e3ad6 40#include <linux/slab.h>
1da177e4
LT
41
42#include <linux/netfilter.h>
43#include <linux/netfilter_ipv6.h>
44
45#include <net/sock.h>
46#include <net/snmp.h>
47
48#include <net/ipv6.h>
49#include <net/ndisc.h>
50#include <net/protocol.h>
51#include <net/ip6_route.h>
52#include <net/addrconf.h>
53#include <net/rawv6.h>
54#include <net/icmp.h>
55#include <net/xfrm.h>
56#include <net/checksum.h>
7bc570c8 57#include <linux/mroute6.h>
1da177e4 58
ad0081e4 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
1da177e4 60
ef76bc23
HX
61int __ip6_local_out(struct sk_buff *skb)
62{
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
b2e0b385
JE
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
ef76bc23
HX
72}
73
74int ip6_local_out(struct sk_buff *skb)
75{
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83}
84EXPORT_SYMBOL_GPL(ip6_local_out);
85
1da177e4
LT
86/* dev_loopback_xmit for use with netfilter. */
87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88{
459a98ed 89 skb_reset_mac_header(newskb);
bbe735e4 90 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
adf30907 93 WARN_ON(!skb_dst(newskb));
1da177e4 94
e30b38c2 95 netif_rx_ni(newskb);
1da177e4
LT
96 return 0;
97}
98
9e508490 99static int ip6_finish_output2(struct sk_buff *skb)
1da177e4 100{
adf30907 101 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
102 struct net_device *dev = dst->dev;
103
104 skb->protocol = htons(ETH_P_IPV6);
105 skb->dev = dev;
106
0660e03f 107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
adf30907 108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1da177e4 109
7ad6848c 110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
d1db275d 111 ((mroute6_socket(dev_net(dev), skb) &&
bd91b8bf 112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116
117 /* Do not check for IFF_ALLMULTI; multicast routing
118 is not supported in any case.
119 */
120 if (newskb)
b2e0b385
JE
121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122 newskb, NULL, newskb->dev,
1da177e4
LT
123 ip6_dev_loopback_xmit);
124
0660e03f 125 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
126 IP6_INC_STATS(dev_net(dev), idev,
127 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
128 kfree_skb(skb);
129 return 0;
130 }
131 }
132
edf391ff
NH
133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134 skb->len);
1da177e4
LT
135 }
136
9e508490
JE
137 if (dst->hh)
138 return neigh_hh_output(dst->hh, skb);
139 else if (dst->neighbour)
140 return dst->neighbour->output(skb);
141
142 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb);
145 return -EINVAL;
1da177e4
LT
146}
147
9e508490
JE
148static int ip6_finish_output(struct sk_buff *skb)
149{
150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 dst_allfrag(skb_dst(skb)))
152 return ip6_fragment(skb, ip6_finish_output2);
153 else
154 return ip6_finish_output2(skb);
155}
156
1da177e4
LT
157int ip6_output(struct sk_buff *skb)
158{
9e508490 159 struct net_device *dev = skb_dst(skb)->dev;
adf30907 160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
778d80be 161 if (unlikely(idev->cnf.disable_ipv6)) {
9e508490 162 IP6_INC_STATS(dev_net(dev), idev,
3bd653c8 163 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
164 kfree_skb(skb);
165 return 0;
166 }
167
9c6eb28a
JE
168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169 ip6_finish_output,
170 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1da177e4
LT
171}
172
1da177e4 173/*
b5d43998 174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
1da177e4
LT
175 */
176
177int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
4e15ed4d 178 struct ipv6_txoptions *opt)
1da177e4 179{
3bd653c8 180 struct net *net = sock_net(sk);
b30bd282 181 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4 182 struct in6_addr *first_hop = &fl->fl6_dst;
adf30907 183 struct dst_entry *dst = skb_dst(skb);
1da177e4
LT
184 struct ipv6hdr *hdr;
185 u8 proto = fl->proto;
186 int seg_len = skb->len;
e651f03a
GR
187 int hlimit = -1;
188 int tclass = 0;
1da177e4
LT
189 u32 mtu;
190
191 if (opt) {
c2636b4d 192 unsigned int head_room;
1da177e4
LT
193
194 /* First: exthdrs may take lots of space (~8K for now)
195 MAX_HEADER is not enough.
196 */
197 head_room = opt->opt_nflen + opt->opt_flen;
198 seg_len += head_room;
199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200
201 if (skb_headroom(skb) < head_room) {
202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 203 if (skb2 == NULL) {
adf30907 204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d
YH
205 IPSTATS_MIB_OUTDISCARDS);
206 kfree_skb(skb);
1da177e4
LT
207 return -ENOBUFS;
208 }
a11d206d
YH
209 kfree_skb(skb);
210 skb = skb2;
83d7eb29 211 skb_set_owner_w(skb, sk);
1da177e4
LT
212 }
213 if (opt->opt_flen)
214 ipv6_push_frag_opts(skb, opt, &proto);
215 if (opt->opt_nflen)
216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217 }
218
e2d1bca7
ACM
219 skb_push(skb, sizeof(struct ipv6hdr));
220 skb_reset_network_header(skb);
0660e03f 221 hdr = ipv6_hdr(skb);
1da177e4
LT
222
223 /*
224 * Fill in the IPv6 header
225 */
e651f03a
GR
226 if (np) {
227 tclass = np->tclass;
1da177e4 228 hlimit = np->hop_limit;
e651f03a 229 }
1da177e4 230 if (hlimit < 0)
6b75d090 231 hlimit = ip6_dst_hoplimit(dst);
1da177e4 232
90bcaf7b 233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 234
1da177e4
LT
235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit;
238
239 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
240 ipv6_addr_copy(&hdr->daddr, first_hop);
241
a2c2064f 242 skb->priority = sk->sk_priority;
4a19ec58 243 skb->mark = sk->sk_mark;
a2c2064f 244
1da177e4 245 mtu = dst_mtu(dst);
283d07ac 246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
adf30907 247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
edf391ff 248 IPSTATS_MIB_OUT, skb->len);
b2e0b385
JE
249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250 dst->dev, dst_output);
1da177e4
LT
251 }
252
253 if (net_ratelimit())
254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255 skb->dev = dst->dev;
3ffe533c 256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
258 kfree_skb(skb);
259 return -EMSGSIZE;
260}
261
7159039a
YH
262EXPORT_SYMBOL(ip6_xmit);
263
1da177e4
LT
264/*
265 * To avoid extra problems ND packets are send through this
266 * routine. It's code duplication but I really want to avoid
267 * extra checks since ipv6_build_header is used by TCP (which
268 * is for us performance critical)
269 */
270
271int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 272 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
273 int proto, int len)
274{
275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr;
277 int totlen;
278
279 skb->protocol = htons(ETH_P_IPV6);
280 skb->dev = dev;
281
282 totlen = len + sizeof(struct ipv6hdr);
283
55f79cc0
ACM
284 skb_reset_network_header(skb);
285 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 286 hdr = ipv6_hdr(skb);
1da177e4 287
ae08e1f0 288 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
289
290 hdr->payload_len = htons(len);
291 hdr->nexthdr = proto;
292 hdr->hop_limit = np->hop_limit;
293
294 ipv6_addr_copy(&hdr->saddr, saddr);
295 ipv6_addr_copy(&hdr->daddr, daddr);
296
297 return 0;
298}
299
300static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
301{
302 struct ip6_ra_chain *ra;
303 struct sock *last = NULL;
304
305 read_lock(&ip6_ra_lock);
306 for (ra = ip6_ra_chain; ra; ra = ra->next) {
307 struct sock *sk = ra->sk;
0bd1b59b
AM
308 if (sk && ra->sel == sel &&
309 (!sk->sk_bound_dev_if ||
310 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
311 if (last) {
312 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
313 if (skb2)
314 rawv6_rcv(last, skb2);
315 }
316 last = sk;
317 }
318 }
319
320 if (last) {
321 rawv6_rcv(last, skb);
322 read_unlock(&ip6_ra_lock);
323 return 1;
324 }
325 read_unlock(&ip6_ra_lock);
326 return 0;
327}
328
e21e0b5f
VN
329static int ip6_forward_proxy_check(struct sk_buff *skb)
330{
0660e03f 331 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
332 u8 nexthdr = hdr->nexthdr;
333 int offset;
334
335 if (ipv6_ext_hdr(nexthdr)) {
336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
337 if (offset < 0)
338 return 0;
339 } else
340 offset = sizeof(struct ipv6hdr);
341
342 if (nexthdr == IPPROTO_ICMPV6) {
343 struct icmp6hdr *icmp6;
344
d56f90a7
ACM
345 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346 offset + 1 - skb->data)))
e21e0b5f
VN
347 return 0;
348
d56f90a7 349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
350
351 switch (icmp6->icmp6_type) {
352 case NDISC_ROUTER_SOLICITATION:
353 case NDISC_ROUTER_ADVERTISEMENT:
354 case NDISC_NEIGHBOUR_SOLICITATION:
355 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356 case NDISC_REDIRECT:
357 /* For reaction involving unicast neighbor discovery
358 * message destined to the proxied address, pass it to
359 * input function.
360 */
361 return 1;
362 default:
363 break;
364 }
365 }
366
74553b09
VN
367 /*
368 * The proxying router can't forward traffic sent to a link-local
369 * address, so signal the sender and discard the packet. This
370 * behavior is clarified by the MIPv6 specification.
371 */
372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373 dst_link_failure(skb);
374 return -1;
375 }
376
e21e0b5f
VN
377 return 0;
378}
379
1da177e4
LT
380static inline int ip6_forward_finish(struct sk_buff *skb)
381{
382 return dst_output(skb);
383}
384
385int ip6_forward(struct sk_buff *skb)
386{
adf30907 387 struct dst_entry *dst = skb_dst(skb);
0660e03f 388 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 389 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 390 struct net *net = dev_net(dst->dev);
14f3ad6f 391 u32 mtu;
1ab1457c 392
53b7997f 393 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
394 goto error;
395
4497b076
BH
396 if (skb_warn_if_lro(skb))
397 goto drop;
398
1da177e4 399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
401 goto drop;
402 }
403
72b43d08
AK
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
406
35fc92a9 407 skb_forward_csum(skb);
1da177e4
LT
408
409 /*
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
415 *
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
421 */
422 if (opt->ra) {
d56f90a7 423 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
426 }
427
428 /*
429 * check and decrement ttl
430 */
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
3ffe533c 434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
483a47d2
DL
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
437
438 kfree_skb(skb);
439 return -ETIMEDOUT;
440 }
441
fbea49e1 442 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 443 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
e21e0b5f 447 return ip6_input(skb);
74553b09 448 else if (proxied < 0) {
3bd653c8
DL
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
74553b09
VN
451 goto drop;
452 }
e21e0b5f
VN
453 }
454
1da177e4 455 if (!xfrm6_route_forward(skb)) {
3bd653c8 456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
457 goto drop;
458 }
adf30907 459 dst = skb_dst(skb);
1da177e4
LT
460
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
1e5dc146 463 We don't send redirects to frames decapsulated from IPsec.
1da177e4 464 */
1e5dc146 465 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 466 !skb_sec_path(skb)) {
1da177e4
LT
467 struct in6_addr *target = NULL;
468 struct rt6_info *rt;
469 struct neighbour *n = dst->neighbour;
470
471 /*
472 * incoming and outgoing devices are the same
473 * send a redirect.
474 */
475
476 rt = (struct rt6_info *) dst;
477 if ((rt->rt6i_flags & RTF_GATEWAY))
478 target = (struct in6_addr*)&n->primary_key;
479 else
480 target = &hdr->daddr;
481
482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect)
484 */
485 if (xrlim_allow(dst, 1*HZ))
486 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
489
1da177e4 490 /* This check is security critical. */
f81b2e7d
YH
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
3ffe533c 496 ICMPV6_NOT_NEIGHBOUR, 0);
5bb1ab09
DS
497 goto error;
498 }
1da177e4
LT
499 }
500
14f3ad6f
UW
501 mtu = dst_mtu(dst);
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
504
0aa68271 505 if (skb->len > mtu && !skb_is_gso(skb)) {
1da177e4
LT
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
14f3ad6f 508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
483a47d2
DL
509 IP6_INC_STATS_BH(net,
510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
519 goto drop;
520 }
521
0660e03f 522 hdr = ipv6_hdr(skb);
1da177e4
LT
523
524 /* Mangling hops number delayed to point after skb COW */
1ab1457c 525
1da177e4
LT
526 hdr->hop_limit--;
527
483a47d2 528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
b2e0b385 529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
6e23ae2a 530 ip6_forward_finish);
1da177e4
LT
531
532error:
483a47d2 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
534drop:
535 kfree_skb(skb);
536 return -EINVAL;
537}
538
539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540{
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
adf30907
ED
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
1da177e4 546 to->dev = from->dev;
82e91ffe 547 to->mark = from->mark;
1da177e4
LT
548
549#ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551#endif
e7ac05f3 552 nf_copy(to, from);
ba9dda3a
JK
553#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556#endif
984bc16c 557 skb_copy_secmark(to, from);
1da177e4
LT
558}
559
560int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561{
562 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 565 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 566 int found_rhdr = 0;
0660e03f 567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
27637df9 574 break;
1da177e4 575 case NEXTHDR_ROUTING:
27637df9
MN
576 found_rhdr = 1;
577 break;
1da177e4 578 case NEXTHDR_DEST:
59fbb3a6 579#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582#endif
583 if (found_rhdr)
584 return offset;
1da177e4
LT
585 break;
586 default :
587 return offset;
588 }
27637df9
MN
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
1da177e4
LT
594 }
595
596 return offset;
597}
598
ad0081e4 599int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
1da177e4 600{
1da177e4 601 struct sk_buff *frag;
adf30907 602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
d91675f9 603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
604 struct ipv6hdr *tmp_hdr;
605 struct frag_hdr *fh;
606 unsigned int mtu, hlen, left, len;
ae08e1f0 607 __be32 frag_id = 0;
1da177e4
LT
608 int ptr, offset = 0, err=0;
609 u8 *prevhdr, nexthdr = 0;
adf30907 610 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4 611
1da177e4
LT
612 hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 nexthdr = *prevhdr;
614
628a5c56 615 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
616
617 /* We must not fragment if the socket is set to force MTU discovery
14f3ad6f 618 * or if the skb it not generated by a local socket.
b881ef76 619 */
f2228f78 620 if (!skb->local_df && skb->len > mtu) {
adf30907 621 skb->dev = skb_dst(skb)->dev;
3ffe533c 622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
adf30907 623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 624 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
625 kfree_skb(skb);
626 return -EMSGSIZE;
627 }
628
d91675f9
YH
629 if (np && np->frag_size < mtu) {
630 if (np->frag_size)
631 mtu = np->frag_size;
632 }
633 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4 634
21dc3301 635 if (skb_has_frag_list(skb)) {
1da177e4 636 int first_len = skb_pagelen(skb);
3d13008e 637 struct sk_buff *frag2;
1da177e4
LT
638
639 if (first_len - hlen > mtu ||
640 ((first_len - hlen) & 7) ||
641 skb_cloned(skb))
642 goto slow_path;
643
4d9092bb 644 skb_walk_frags(skb, frag) {
1da177e4
LT
645 /* Correct geometry. */
646 if (frag->len > mtu ||
647 ((frag->len & 7) && frag->next) ||
648 skb_headroom(frag) < hlen)
3d13008e 649 goto slow_path_clean;
1da177e4 650
1da177e4
LT
651 /* Partially cloned skb? */
652 if (skb_shared(frag))
3d13008e 653 goto slow_path_clean;
2fdba6b0
HX
654
655 BUG_ON(frag->sk);
656 if (skb->sk) {
2fdba6b0
HX
657 frag->sk = skb->sk;
658 frag->destructor = sock_wfree;
2fdba6b0 659 }
3d13008e 660 skb->truesize -= frag->truesize;
1da177e4
LT
661 }
662
663 err = 0;
664 offset = 0;
665 frag = skb_shinfo(skb)->frag_list;
4d9092bb 666 skb_frag_list_init(skb);
1da177e4
LT
667 /* BUILD HEADER */
668
9a217a1c 669 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 671 if (!tmp_hdr) {
adf30907 672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 673 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
674 return -ENOMEM;
675 }
676
1da177e4
LT
677 __skb_pull(skb, hlen);
678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
679 __skb_push(skb, hlen);
680 skb_reset_network_header(skb);
d56f90a7 681 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4 682
7ea2f2c5 683 ipv6_select_ident(fh);
1da177e4
LT
684 fh->nexthdr = nexthdr;
685 fh->reserved = 0;
686 fh->frag_off = htons(IP6_MF);
687 frag_id = fh->identification;
688
689 first_len = skb_pagelen(skb);
690 skb->data_len = first_len - skb_headlen(skb);
691 skb->len = first_len;
0660e03f
ACM
692 ipv6_hdr(skb)->payload_len = htons(first_len -
693 sizeof(struct ipv6hdr));
a11d206d 694
d8d1f30b 695 dst_hold(&rt->dst);
1da177e4
LT
696
697 for (;;) {
698 /* Prepare header of the next frame,
699 * before previous one went down. */
700 if (frag) {
701 frag->ip_summed = CHECKSUM_NONE;
badff6d0 702 skb_reset_transport_header(frag);
1da177e4 703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
704 __skb_push(frag, hlen);
705 skb_reset_network_header(frag);
d56f90a7
ACM
706 memcpy(skb_network_header(frag), tmp_hdr,
707 hlen);
1da177e4
LT
708 offset += skb->len - hlen - sizeof(struct frag_hdr);
709 fh->nexthdr = nexthdr;
710 fh->reserved = 0;
711 fh->frag_off = htons(offset);
712 if (frag->next != NULL)
713 fh->frag_off |= htons(IP6_MF);
714 fh->identification = frag_id;
0660e03f
ACM
715 ipv6_hdr(frag)->payload_len =
716 htons(frag->len -
717 sizeof(struct ipv6hdr));
1da177e4
LT
718 ip6_copy_metadata(frag, skb);
719 }
1ab1457c 720
1da177e4 721 err = output(skb);
dafee490 722 if(!err)
d8d1f30b 723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 724 IPSTATS_MIB_FRAGCREATES);
dafee490 725
1da177e4
LT
726 if (err || !frag)
727 break;
728
729 skb = frag;
730 frag = skb->next;
731 skb->next = NULL;
732 }
733
a51482bd 734 kfree(tmp_hdr);
1da177e4
LT
735
736 if (err == 0) {
d8d1f30b 737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 738 IPSTATS_MIB_FRAGOKS);
d8d1f30b 739 dst_release(&rt->dst);
1da177e4
LT
740 return 0;
741 }
742
743 while (frag) {
744 skb = frag->next;
745 kfree_skb(frag);
746 frag = skb;
747 }
748
d8d1f30b 749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
3bd653c8 750 IPSTATS_MIB_FRAGFAILS);
d8d1f30b 751 dst_release(&rt->dst);
1da177e4 752 return err;
3d13008e
ED
753
754slow_path_clean:
755 skb_walk_frags(skb, frag2) {
756 if (frag2 == frag)
757 break;
758 frag2->sk = NULL;
759 frag2->destructor = NULL;
760 skb->truesize += frag2->truesize;
761 }
1da177e4
LT
762 }
763
764slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
767
768 /*
769 * Fragment the datagram.
770 */
771
772 *prevhdr = NEXTHDR_FRAGMENT;
773
774 /*
775 * Keep copying data until we run out.
776 */
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending upto and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
786 }
787 /*
788 * Allocate buffer.
789 */
790
d8d1f30b 791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
adf30907 793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 794 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
795 err = -ENOMEM;
796 goto fail;
797 }
798
799 /*
800 * Set up data on packet
801 */
802
803 ip6_copy_metadata(frag, skb);
d8d1f30b 804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
1da177e4 805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 806 skb_reset_network_header(frag);
badff6d0 807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
1da177e4
LT
810
811 /*
812 * Charge the memory for the fragment to any owner
813 * it might possess
814 */
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
817
818 /*
819 * Copy the packet header into the new buffer.
820 */
d626f62b 821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
822
823 /*
824 * Build fragment header.
825 */
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
f36d6ab1 828 if (!frag_id) {
7ea2f2c5 829 ipv6_select_ident(fh);
1da177e4
LT
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
833
834 /*
835 * Copy a block of the IP datagram.
836 */
8984e41d 837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
838 BUG();
839 left -= len;
840
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
1da177e4
LT
846
847 ptr += len;
848 offset += len;
849
850 /*
851 * Put this fragment into the sending queue.
852 */
1da177e4
LT
853 err = output(frag);
854 if (err)
855 goto fail;
dafee490 856
adf30907 857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
3bd653c8 858 IPSTATS_MIB_FRAGCREATES);
1da177e4 859 }
adf30907 860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 861 IPSTATS_MIB_FRAGOKS);
1da177e4 862 kfree_skb(skb);
1da177e4
LT
863 return err;
864
865fail:
adf30907 866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
a11d206d 867 IPSTATS_MIB_FRAGFAILS);
1ab1457c 868 kfree_skb(skb);
1da177e4
LT
869 return err;
870}
871
cf6b1982
YH
872static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
875{
a02cec21
ED
876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
cf6b1982
YH
878}
879
497c615a
HX
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
1da177e4 883{
497c615a
HX
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 886
497c615a
HX
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
cf6b1982
YH
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
497c615a
HX
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
cf6b1982 907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
908#ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910#endif
cf6b1982 911 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
912 dst_release(dst);
913 dst = NULL;
1da177e4
LT
914 }
915
497c615a
HX
916out:
917 return dst;
918}
919
920static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
922{
923 int err;
3b1e0a65 924 struct net *net = sock_net(sk);
497c615a 925
1da177e4 926 if (*dst == NULL)
8a3edd80 927 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
928
929 if ((err = (*dst)->error))
930 goto out_err_release;
931
932 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 933 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
44456d37 937 if (err)
1da177e4 938 goto out_err_release;
1da177e4
LT
939 }
940
95c385b4 941#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
942 /*
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
949 */
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
961
962 if (redirect) {
963 /*
964 * We need to get the dst entry for the
965 * default router instead
966 */
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
95c385b4 973 }
e550dfb0 974 }
95c385b4
NH
975#endif
976
1da177e4
LT
977 return 0;
978
979out_err_release:
ca46f9c8 980 if (err == -ENETUNREACH)
483a47d2 981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985}
34a0b3cd 986
497c615a
HX
987/**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998{
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1001}
3cf3dc6c
ACM
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
497c615a
HX
1004/**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1009 *
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1014 *
1015 * It returns zero on success, or a standard errno code on error.
1016 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018{
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1023 }
1024
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1026}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
34a0b3cd 1029static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1034
1035{
1036 struct sk_buff *skb;
1037 int err;
1038
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1042 */
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1049
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1052
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1055
1056 /* initialize network header pointer */
c1d2bbe1 1057 skb_reset_network_header(skb);
e89e9cf5
AR
1058
1059 /* initialize protocol header pointer */
b0e380b1 1060 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1061
84fa7933 1062 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 }
1066
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1071
c31d5326
SS
1072 /* Specify the length of each IPv6 datagram fragment.
1073 * It has to be a multiple of 8.
1074 */
1075 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1076 sizeof(struct frag_hdr)) & ~7;
f83ef8c0 1077 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
7ea2f2c5 1078 ipv6_select_ident(&fhdr);
e89e9cf5
AR
1079 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1080 __skb_queue_tail(&sk->sk_write_queue, skb);
1081
1082 return 0;
1083 }
1084 /* There is not enough support do UPD LSO,
1085 * so follow normal path
1086 */
1087 kfree_skb(skb);
1088
1089 return err;
1090}
1da177e4 1091
0178b695
HX
1092static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1093 gfp_t gfp)
1094{
1095 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1096}
1097
1098static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1099 gfp_t gfp)
1100{
1101 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1102}
1103
41a1f8ea
YH
1104int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1105 int offset, int len, int odd, struct sk_buff *skb),
1106 void *from, int length, int transhdrlen,
1107 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
13b52cd4 1108 struct rt6_info *rt, unsigned int flags, int dontfrag)
1da177e4
LT
1109{
1110 struct inet_sock *inet = inet_sk(sk);
1111 struct ipv6_pinfo *np = inet6_sk(sk);
1112 struct sk_buff *skb;
1113 unsigned int maxfraglen, fragheaderlen;
1114 int exthdrlen;
1115 int hh_len;
1116 int mtu;
1117 int copy;
1118 int err;
1119 int offset = 0;
1120 int csummode = CHECKSUM_NONE;
1121
1122 if (flags&MSG_PROBE)
1123 return 0;
1124 if (skb_queue_empty(&sk->sk_write_queue)) {
1125 /*
1126 * setup for corking
1127 */
1128 if (opt) {
0178b695 1129 if (WARN_ON(np->cork.opt))
1da177e4 1130 return -EINVAL;
0178b695
HX
1131
1132 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1133 if (unlikely(np->cork.opt == NULL))
1134 return -ENOBUFS;
1135
1136 np->cork.opt->tot_len = opt->tot_len;
1137 np->cork.opt->opt_flen = opt->opt_flen;
1138 np->cork.opt->opt_nflen = opt->opt_nflen;
1139
1140 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1141 sk->sk_allocation);
1142 if (opt->dst0opt && !np->cork.opt->dst0opt)
1143 return -ENOBUFS;
1144
1145 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1146 sk->sk_allocation);
1147 if (opt->dst1opt && !np->cork.opt->dst1opt)
1148 return -ENOBUFS;
1149
1150 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1151 sk->sk_allocation);
1152 if (opt->hopopt && !np->cork.opt->hopopt)
1153 return -ENOBUFS;
1154
1155 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1156 sk->sk_allocation);
1157 if (opt->srcrt && !np->cork.opt->srcrt)
1158 return -ENOBUFS;
1159
1da177e4
LT
1160 /* need source address above miyazawa*/
1161 }
d8d1f30b
CG
1162 dst_hold(&rt->dst);
1163 inet->cork.dst = &rt->dst;
1da177e4
LT
1164 inet->cork.fl = *fl;
1165 np->cork.hop_limit = hlimit;
41a1f8ea 1166 np->cork.tclass = tclass;
628a5c56 1167 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
d8d1f30b 1168 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
c7503609 1169 if (np->frag_size < mtu) {
d91675f9
YH
1170 if (np->frag_size)
1171 mtu = np->frag_size;
1172 }
1173 inet->cork.fragsize = mtu;
d8d1f30b 1174 if (dst_allfrag(rt->dst.path))
1da177e4
LT
1175 inet->cork.flags |= IPCORK_ALLFRAG;
1176 inet->cork.length = 0;
1177 sk->sk_sndmsg_page = NULL;
1178 sk->sk_sndmsg_off = 0;
d8d1f30b 1179 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1180 rt->rt6i_nfheader_len;
1da177e4
LT
1181 length += exthdrlen;
1182 transhdrlen += exthdrlen;
1183 } else {
c8cdaf99 1184 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1185 fl = &inet->cork.fl;
0178b695 1186 opt = np->cork.opt;
1da177e4
LT
1187 transhdrlen = 0;
1188 exthdrlen = 0;
1189 mtu = inet->cork.fragsize;
1190 }
1191
d8d1f30b 1192 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4 1193
a1b05140 1194 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1195 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1196 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1197
1198 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1199 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1200 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1201 return -EMSGSIZE;
1202 }
1203 }
1204
1205 /*
1206 * Let's try using as much space as possible.
1207 * Use MTU if total length of the message fits into the MTU.
1208 * Otherwise, we need to reserve fragment header and
1209 * fragment alignment (= 8-15 octects, in total).
1210 *
1211 * Note that we may need to "move" the data from the tail of
1ab1457c 1212 * of the buffer to the new fragment when we split
1da177e4
LT
1213 * the message.
1214 *
1ab1457c 1215 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1216 * at once if non-fragmentable extension headers
1217 * are too large.
1ab1457c 1218 * --yoshfuji
1da177e4
LT
1219 */
1220
1221 inet->cork.length += length;
4b340ae2
BH
1222 if (length > mtu) {
1223 int proto = sk->sk_protocol;
1224 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1225 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1226 return -EMSGSIZE;
1227 }
e89e9cf5 1228
4b340ae2 1229 if (proto == IPPROTO_UDP &&
d8d1f30b 1230 (rt->dst.dev->features & NETIF_F_UFO)) {
4b340ae2
BH
1231
1232 err = ip6_ufo_append_data(sk, getfrag, from, length,
1233 hh_len, fragheaderlen,
1234 transhdrlen, mtu, flags);
1235 if (err)
1236 goto error;
1237 return 0;
1238 }
e89e9cf5 1239 }
1da177e4
LT
1240
1241 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1242 goto alloc_new_skb;
1243
1244 while (length > 0) {
1245 /* Check if the remaining data fits into current packet. */
1246 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1247 if (copy < length)
1248 copy = maxfraglen - skb->len;
1249
1250 if (copy <= 0) {
1251 char *data;
1252 unsigned int datalen;
1253 unsigned int fraglen;
1254 unsigned int fraggap;
1255 unsigned int alloclen;
1256 struct sk_buff *skb_prev;
1257alloc_new_skb:
1258 skb_prev = skb;
1259
1260 /* There's no room in the current skb */
1261 if (skb_prev)
1262 fraggap = skb_prev->len - maxfraglen;
1263 else
1264 fraggap = 0;
1265
1266 /*
1267 * If remaining data exceeds the mtu,
1268 * we know we need more fragment(s).
1269 */
1270 datalen = length + fraggap;
1271 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1272 datalen = maxfraglen - fragheaderlen;
1273
1274 fraglen = datalen + fragheaderlen;
1275 if ((flags & MSG_MORE) &&
d8d1f30b 1276 !(rt->dst.dev->features&NETIF_F_SG))
1da177e4
LT
1277 alloclen = mtu;
1278 else
1279 alloclen = datalen + fragheaderlen;
1280
1281 /*
1282 * The last fragment gets additional space at tail.
1283 * Note: we overallocate on fragments with MSG_MODE
1284 * because we have no idea if we're the last one.
1285 */
1286 if (datalen == length + fraggap)
d8d1f30b 1287 alloclen += rt->dst.trailer_len;
1da177e4
LT
1288
1289 /*
1290 * We just reserve space for fragment header.
1ab1457c 1291 * Note: this may be overallocation if the message
1da177e4
LT
1292 * (without MSG_MORE) fits into the MTU.
1293 */
1294 alloclen += sizeof(struct frag_hdr);
1295
1296 if (transhdrlen) {
1297 skb = sock_alloc_send_skb(sk,
1298 alloclen + hh_len,
1299 (flags & MSG_DONTWAIT), &err);
1300 } else {
1301 skb = NULL;
1302 if (atomic_read(&sk->sk_wmem_alloc) <=
1303 2 * sk->sk_sndbuf)
1304 skb = sock_wmalloc(sk,
1305 alloclen + hh_len, 1,
1306 sk->sk_allocation);
1307 if (unlikely(skb == NULL))
1308 err = -ENOBUFS;
1309 }
1310 if (skb == NULL)
1311 goto error;
1312 /*
1313 * Fill in the control structures
1314 */
1315 skb->ip_summed = csummode;
1316 skb->csum = 0;
1317 /* reserve for fragmentation */
1318 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1319
1320 /*
1321 * Find where to start putting bytes
1322 */
1323 data = skb_put(skb, fraglen);
c14d2450 1324 skb_set_network_header(skb, exthdrlen);
1da177e4 1325 data += fragheaderlen;
b0e380b1
ACM
1326 skb->transport_header = (skb->network_header +
1327 fragheaderlen);
1da177e4
LT
1328 if (fraggap) {
1329 skb->csum = skb_copy_and_csum_bits(
1330 skb_prev, maxfraglen,
1331 data + transhdrlen, fraggap, 0);
1332 skb_prev->csum = csum_sub(skb_prev->csum,
1333 skb->csum);
1334 data += fraggap;
e9fa4f7b 1335 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1336 }
1337 copy = datalen - transhdrlen - fraggap;
1338 if (copy < 0) {
1339 err = -EINVAL;
1340 kfree_skb(skb);
1341 goto error;
1342 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1343 err = -EFAULT;
1344 kfree_skb(skb);
1345 goto error;
1346 }
1347
1348 offset += copy;
1349 length -= datalen - fraggap;
1350 transhdrlen = 0;
1351 exthdrlen = 0;
1352 csummode = CHECKSUM_NONE;
1353
1354 /*
1355 * Put the packet on the pending queue
1356 */
1357 __skb_queue_tail(&sk->sk_write_queue, skb);
1358 continue;
1359 }
1360
1361 if (copy > length)
1362 copy = length;
1363
d8d1f30b 1364 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1da177e4
LT
1365 unsigned int off;
1366
1367 off = skb->len;
1368 if (getfrag(from, skb_put(skb, copy),
1369 offset, copy, off, skb) < 0) {
1370 __skb_trim(skb, off);
1371 err = -EFAULT;
1372 goto error;
1373 }
1374 } else {
1375 int i = skb_shinfo(skb)->nr_frags;
1376 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1377 struct page *page = sk->sk_sndmsg_page;
1378 int off = sk->sk_sndmsg_off;
1379 unsigned int left;
1380
1381 if (page && (left = PAGE_SIZE - off) > 0) {
1382 if (copy >= left)
1383 copy = left;
1384 if (page != frag->page) {
1385 if (i == MAX_SKB_FRAGS) {
1386 err = -EMSGSIZE;
1387 goto error;
1388 }
1389 get_page(page);
1390 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1391 frag = &skb_shinfo(skb)->frags[i];
1392 }
1393 } else if(i < MAX_SKB_FRAGS) {
1394 if (copy > PAGE_SIZE)
1395 copy = PAGE_SIZE;
1396 page = alloc_pages(sk->sk_allocation, 0);
1397 if (page == NULL) {
1398 err = -ENOMEM;
1399 goto error;
1400 }
1401 sk->sk_sndmsg_page = page;
1402 sk->sk_sndmsg_off = 0;
1403
1404 skb_fill_page_desc(skb, i, page, 0, 0);
1405 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1406 } else {
1407 err = -EMSGSIZE;
1408 goto error;
1409 }
1410 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1411 err = -EFAULT;
1412 goto error;
1413 }
1414 sk->sk_sndmsg_off += copy;
1415 frag->size += copy;
1416 skb->len += copy;
1417 skb->data_len += copy;
f945fa7a
HX
1418 skb->truesize += copy;
1419 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1420 }
1421 offset += copy;
1422 length -= copy;
1423 }
1424 return 0;
1425error:
1426 inet->cork.length -= length;
3bd653c8 1427 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1428 return err;
1429}
1430
bf138862
PE
1431static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1432{
0178b695
HX
1433 if (np->cork.opt) {
1434 kfree(np->cork.opt->dst0opt);
1435 kfree(np->cork.opt->dst1opt);
1436 kfree(np->cork.opt->hopopt);
1437 kfree(np->cork.opt->srcrt);
1438 kfree(np->cork.opt);
1439 np->cork.opt = NULL;
1440 }
1441
c8cdaf99
YH
1442 if (inet->cork.dst) {
1443 dst_release(inet->cork.dst);
1444 inet->cork.dst = NULL;
bf138862
PE
1445 inet->cork.flags &= ~IPCORK_ALLFRAG;
1446 }
1447 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1448}
1449
1da177e4
LT
1450int ip6_push_pending_frames(struct sock *sk)
1451{
1452 struct sk_buff *skb, *tmp_skb;
1453 struct sk_buff **tail_skb;
1454 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1455 struct inet_sock *inet = inet_sk(sk);
1456 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1457 struct net *net = sock_net(sk);
1da177e4
LT
1458 struct ipv6hdr *hdr;
1459 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1460 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1461 struct flowi *fl = &inet->cork.fl;
1462 unsigned char proto = fl->proto;
1463 int err = 0;
1464
1465 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1466 goto out;
1467 tail_skb = &(skb_shinfo(skb)->frag_list);
1468
1469 /* move skb->data to ip header from ext header */
d56f90a7 1470 if (skb->data < skb_network_header(skb))
bbe735e4 1471 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1472 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1473 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1474 *tail_skb = tmp_skb;
1475 tail_skb = &(tmp_skb->next);
1476 skb->len += tmp_skb->len;
1477 skb->data_len += tmp_skb->len;
1da177e4 1478 skb->truesize += tmp_skb->truesize;
1da177e4
LT
1479 tmp_skb->destructor = NULL;
1480 tmp_skb->sk = NULL;
1da177e4
LT
1481 }
1482
28a89453 1483 /* Allow local fragmentation. */
b5c15fc0 1484 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1485 skb->local_df = 1;
1486
1da177e4 1487 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1488 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1489 if (opt && opt->opt_flen)
1490 ipv6_push_frag_opts(skb, opt, &proto);
1491 if (opt && opt->opt_nflen)
1492 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1493
e2d1bca7
ACM
1494 skb_push(skb, sizeof(struct ipv6hdr));
1495 skb_reset_network_header(skb);
0660e03f 1496 hdr = ipv6_hdr(skb);
1ab1457c 1497
90bcaf7b 1498 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1499 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1500
1da177e4
LT
1501 hdr->hop_limit = np->cork.hop_limit;
1502 hdr->nexthdr = proto;
1503 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1504 ipv6_addr_copy(&hdr->daddr, final_dst);
1505
a2c2064f 1506 skb->priority = sk->sk_priority;
4a19ec58 1507 skb->mark = sk->sk_mark;
a2c2064f 1508
d8d1f30b 1509 skb_dst_set(skb, dst_clone(&rt->dst));
edf391ff 1510 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
14878f75 1511 if (proto == IPPROTO_ICMPV6) {
adf30907 1512 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
14878f75 1513
5a57d4c7 1514 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1515 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1516 }
1517
ef76bc23 1518 err = ip6_local_out(skb);
1da177e4
LT
1519 if (err) {
1520 if (err > 0)
6ce9e7b5 1521 err = net_xmit_errno(err);
1da177e4
LT
1522 if (err)
1523 goto error;
1524 }
1525
1526out:
bf138862 1527 ip6_cork_release(inet, np);
1da177e4
LT
1528 return err;
1529error:
06254914 1530 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1531 goto out;
1532}
1533
1534void ip6_flush_pending_frames(struct sock *sk)
1535{
1da177e4
LT
1536 struct sk_buff *skb;
1537
1538 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
adf30907
ED
1539 if (skb_dst(skb))
1540 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
e1f52208 1541 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1542 kfree_skb(skb);
1543 }
1544
bf138862 1545 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1546}