[AF_RXRPC]: constify function pointer tables
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on linux/net/ipv4/ip_output.c
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
22 * etc.
23 *
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
28 * for datagram xmit
29 */
30
1da177e4 31#include <linux/errno.h>
ef76bc23 32#include <linux/kernel.h>
1da177e4
LT
33#include <linux/string.h>
34#include <linux/socket.h>
35#include <linux/net.h>
36#include <linux/netdevice.h>
37#include <linux/if_arp.h>
38#include <linux/in6.h>
39#include <linux/tcp.h>
40#include <linux/route.h>
b59f45d0 41#include <linux/module.h>
1da177e4
LT
42
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45
46#include <net/sock.h>
47#include <net/snmp.h>
48
49#include <net/ipv6.h>
50#include <net/ndisc.h>
51#include <net/protocol.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/rawv6.h>
55#include <net/icmp.h>
56#include <net/xfrm.h>
57#include <net/checksum.h>
58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62{
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
65
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
71}
72
ef76bc23
HX
73int __ip6_local_out(struct sk_buff *skb)
74{
75 int len;
76
77 len = skb->len - sizeof(struct ipv6hdr);
78 if (len > IPV6_MAXPLEN)
79 len = 0;
80 ipv6_hdr(skb)->payload_len = htons(len);
81
6e23ae2a 82 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
83 dst_output);
84}
85
86int ip6_local_out(struct sk_buff *skb)
87{
88 int err;
89
90 err = __ip6_local_out(skb);
91 if (likely(err == 1))
92 err = dst_output(skb);
93
94 return err;
95}
96EXPORT_SYMBOL_GPL(ip6_local_out);
97
ad643a79 98static int ip6_output_finish(struct sk_buff *skb)
1da177e4 99{
1da177e4 100 struct dst_entry *dst = skb->dst;
1da177e4 101
3644f0ce
SH
102 if (dst->hh)
103 return neigh_hh_output(dst->hh, skb);
104 else if (dst->neighbour)
1da177e4
LT
105 return dst->neighbour->output(skb);
106
a11d206d 107 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
108 kfree_skb(skb);
109 return -EINVAL;
110
111}
112
113/* dev_loopback_xmit for use with netfilter. */
114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115{
459a98ed 116 skb_reset_mac_header(newskb);
bbe735e4 117 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
120 BUG_TRAP(newskb->dst);
121
122 netif_rx(newskb);
123 return 0;
124}
125
126
127static int ip6_output2(struct sk_buff *skb)
128{
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
0660e03f 135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
0660e03f
ACM
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr)) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
6e23ae2a
PM
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
1da177e4
LT
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
6e23ae2a
PM
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
1da177e4
LT
164}
165
628a5c56
JH
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
1da177e4
LT
174int ip6_output(struct sk_buff *skb)
175{
628a5c56 176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 177 dst_allfrag(skb->dst))
1da177e4
LT
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
181}
182
1da177e4
LT
183/*
184 * xmit an sk_buff (used by TCP)
185 */
186
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
189{
b30bd282 190 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb->dst;
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
41a1f8ea 196 int hlimit, tclass;
1da177e4
LT
197 u32 mtu;
198
199 if (opt) {
c2636b4d 200 unsigned int head_room;
1da177e4
LT
201
202 /* First: exthdrs may take lots of space (~8K for now)
203 MAX_HEADER is not enough.
204 */
205 head_room = opt->opt_nflen + opt->opt_flen;
206 seg_len += head_room;
207 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
208
209 if (skb_headroom(skb) < head_room) {
210 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
211 if (skb2 == NULL) {
212 IP6_INC_STATS(ip6_dst_idev(skb->dst),
213 IPSTATS_MIB_OUTDISCARDS);
214 kfree_skb(skb);
1da177e4
LT
215 return -ENOBUFS;
216 }
a11d206d
YH
217 kfree_skb(skb);
218 skb = skb2;
1da177e4
LT
219 if (sk)
220 skb_set_owner_w(skb, sk);
221 }
222 if (opt->opt_flen)
223 ipv6_push_frag_opts(skb, opt, &proto);
224 if (opt->opt_nflen)
225 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
226 }
227
e2d1bca7
ACM
228 skb_push(skb, sizeof(struct ipv6hdr));
229 skb_reset_network_header(skb);
0660e03f 230 hdr = ipv6_hdr(skb);
1da177e4
LT
231
232 /*
233 * Fill in the IPv6 header
234 */
235
1da177e4
LT
236 hlimit = -1;
237 if (np)
238 hlimit = np->hop_limit;
239 if (hlimit < 0)
240 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
241 if (hlimit < 0)
242 hlimit = ipv6_get_hoplimit(dst->dev);
243
41a1f8ea
YH
244 tclass = -1;
245 if (np)
246 tclass = np->tclass;
247 if (tclass < 0)
248 tclass = 0;
249
90bcaf7b 250 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 251
1da177e4
LT
252 hdr->payload_len = htons(seg_len);
253 hdr->nexthdr = proto;
254 hdr->hop_limit = hlimit;
255
256 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
257 ipv6_addr_copy(&hdr->daddr, first_hop);
258
a2c2064f
PM
259 skb->priority = sk->sk_priority;
260
1da177e4 261 mtu = dst_mtu(dst);
89114afd 262 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
263 IP6_INC_STATS(ip6_dst_idev(skb->dst),
264 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 265 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 266 dst_output);
1da177e4
LT
267 }
268
269 if (net_ratelimit())
270 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
271 skb->dev = dst->dev;
272 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 273 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
274 kfree_skb(skb);
275 return -EMSGSIZE;
276}
277
7159039a
YH
278EXPORT_SYMBOL(ip6_xmit);
279
1da177e4
LT
280/*
281 * To avoid extra problems ND packets are send through this
282 * routine. It's code duplication but I really want to avoid
283 * extra checks since ipv6_build_header is used by TCP (which
284 * is for us performance critical)
285 */
286
287int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
288 struct in6_addr *saddr, struct in6_addr *daddr,
289 int proto, int len)
290{
291 struct ipv6_pinfo *np = inet6_sk(sk);
292 struct ipv6hdr *hdr;
293 int totlen;
294
295 skb->protocol = htons(ETH_P_IPV6);
296 skb->dev = dev;
297
298 totlen = len + sizeof(struct ipv6hdr);
299
55f79cc0
ACM
300 skb_reset_network_header(skb);
301 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 302 hdr = ipv6_hdr(skb);
1da177e4 303
ae08e1f0 304 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
305
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
309
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
312
313 return 0;
314}
315
316static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317{
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
320
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
0bd1b59b
AM
324 if (sk && ra->sel == sel &&
325 (!sk->sk_bound_dev_if ||
326 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
327 if (last) {
328 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
329 if (skb2)
330 rawv6_rcv(last, skb2);
331 }
332 last = sk;
333 }
334 }
335
336 if (last) {
337 rawv6_rcv(last, skb);
338 read_unlock(&ip6_ra_lock);
339 return 1;
340 }
341 read_unlock(&ip6_ra_lock);
342 return 0;
343}
344
e21e0b5f
VN
345static int ip6_forward_proxy_check(struct sk_buff *skb)
346{
0660e03f 347 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
348 u8 nexthdr = hdr->nexthdr;
349 int offset;
350
351 if (ipv6_ext_hdr(nexthdr)) {
352 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
353 if (offset < 0)
354 return 0;
355 } else
356 offset = sizeof(struct ipv6hdr);
357
358 if (nexthdr == IPPROTO_ICMPV6) {
359 struct icmp6hdr *icmp6;
360
d56f90a7
ACM
361 if (!pskb_may_pull(skb, (skb_network_header(skb) +
362 offset + 1 - skb->data)))
e21e0b5f
VN
363 return 0;
364
d56f90a7 365 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
366
367 switch (icmp6->icmp6_type) {
368 case NDISC_ROUTER_SOLICITATION:
369 case NDISC_ROUTER_ADVERTISEMENT:
370 case NDISC_NEIGHBOUR_SOLICITATION:
371 case NDISC_NEIGHBOUR_ADVERTISEMENT:
372 case NDISC_REDIRECT:
373 /* For reaction involving unicast neighbor discovery
374 * message destined to the proxied address, pass it to
375 * input function.
376 */
377 return 1;
378 default:
379 break;
380 }
381 }
382
74553b09
VN
383 /*
384 * The proxying router can't forward traffic sent to a link-local
385 * address, so signal the sender and discard the packet. This
386 * behavior is clarified by the MIPv6 specification.
387 */
388 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
389 dst_link_failure(skb);
390 return -1;
391 }
392
e21e0b5f
VN
393 return 0;
394}
395
1da177e4
LT
396static inline int ip6_forward_finish(struct sk_buff *skb)
397{
398 return dst_output(skb);
399}
400
401int ip6_forward(struct sk_buff *skb)
402{
403 struct dst_entry *dst = skb->dst;
0660e03f 404 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 405 struct inet6_skb_parm *opt = IP6CB(skb);
1ab1457c 406
1da177e4
LT
407 if (ipv6_devconf.forwarding == 0)
408 goto error;
409
410 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 411 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
412 goto drop;
413 }
414
35fc92a9 415 skb_forward_csum(skb);
1da177e4
LT
416
417 /*
418 * We DO NOT make any processing on
419 * RA packets, pushing them to user level AS IS
420 * without ane WARRANTY that application will be able
421 * to interpret them. The reason is that we
422 * cannot make anything clever here.
423 *
424 * We are not end-node, so that if packet contains
425 * AH/ESP, we cannot make anything.
426 * Defragmentation also would be mistake, RA packets
427 * cannot be fragmented, because there is no warranty
428 * that different fragments will go along one path. --ANK
429 */
430 if (opt->ra) {
d56f90a7 431 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
432 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
433 return 0;
434 }
435
436 /*
437 * check and decrement ttl
438 */
439 if (hdr->hop_limit <= 1) {
440 /* Force OUTPUT device used as source address */
441 skb->dev = dst->dev;
442 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
443 0, skb->dev);
a11d206d 444 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
445
446 kfree_skb(skb);
447 return -ETIMEDOUT;
448 }
449
fbea49e1
YH
450 /* XXX: idev->cnf.proxy_ndp? */
451 if (ipv6_devconf.proxy_ndp &&
426b5303 452 pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
453 int proxied = ip6_forward_proxy_check(skb);
454 if (proxied > 0)
e21e0b5f 455 return ip6_input(skb);
74553b09 456 else if (proxied < 0) {
a11d206d 457 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
458 goto drop;
459 }
e21e0b5f
VN
460 }
461
1da177e4 462 if (!xfrm6_route_forward(skb)) {
a11d206d 463 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
464 goto drop;
465 }
466 dst = skb->dst;
467
468 /* IPv6 specs say nothing about it, but it is clear that we cannot
469 send redirects to source routed frames.
1e5dc146 470 We don't send redirects to frames decapsulated from IPsec.
1da177e4 471 */
1e5dc146
MN
472 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
473 !skb->sp) {
1da177e4
LT
474 struct in6_addr *target = NULL;
475 struct rt6_info *rt;
476 struct neighbour *n = dst->neighbour;
477
478 /*
479 * incoming and outgoing devices are the same
480 * send a redirect.
481 */
482
483 rt = (struct rt6_info *) dst;
484 if ((rt->rt6i_flags & RTF_GATEWAY))
485 target = (struct in6_addr*)&n->primary_key;
486 else
487 target = &hdr->daddr;
488
489 /* Limit redirects both by destination (here)
490 and by source (inside ndisc_send_redirect)
491 */
492 if (xrlim_allow(dst, 1*HZ))
493 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
494 } else {
495 int addrtype = ipv6_addr_type(&hdr->saddr);
496
1da177e4 497 /* This check is security critical. */
5bb1ab09
DS
498 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
499 goto error;
500 if (addrtype & IPV6_ADDR_LINKLOCAL) {
501 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
502 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
503 goto error;
504 }
1da177e4
LT
505 }
506
507 if (skb->len > dst_mtu(dst)) {
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
511 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
512 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
513 kfree_skb(skb);
514 return -EMSGSIZE;
515 }
516
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 518 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
519 goto drop;
520 }
521
0660e03f 522 hdr = ipv6_hdr(skb);
1da177e4
LT
523
524 /* Mangling hops number delayed to point after skb COW */
1ab1457c 525
1da177e4
LT
526 hdr->hop_limit--;
527
a11d206d 528 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
529 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
1da177e4
LT
531
532error:
a11d206d 533 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
534drop:
535 kfree_skb(skb);
536 return -EINVAL;
537}
538
539static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540{
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
1da177e4
LT
544 dst_release(to->dst);
545 to->dst = dst_clone(from->dst);
546 to->dev = from->dev;
82e91ffe 547 to->mark = from->mark;
1da177e4
LT
548
549#ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551#endif
e7ac05f3 552 nf_copy(to, from);
ba9dda3a
JK
553#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556#endif
984bc16c 557 skb_copy_secmark(to, from);
1da177e4
LT
558}
559
560int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561{
562 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 565 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 566 int found_rhdr = 0;
0660e03f 567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
568
569 while (offset + 1 <= packet_len) {
570
571 switch (**nexthdr) {
572
573 case NEXTHDR_HOP:
27637df9 574 break;
1da177e4 575 case NEXTHDR_ROUTING:
27637df9
MN
576 found_rhdr = 1;
577 break;
1da177e4 578 case NEXTHDR_DEST:
59fbb3a6 579#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582#endif
583 if (found_rhdr)
584 return offset;
1da177e4
LT
585 break;
586 default :
587 return offset;
588 }
27637df9
MN
589
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
1da177e4
LT
594 }
595
596 return offset;
597}
b59f45d0 598EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
1da177e4
LT
599
600static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
601{
602 struct net_device *dev;
603 struct sk_buff *frag;
604 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 605 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
606 struct ipv6hdr *tmp_hdr;
607 struct frag_hdr *fh;
608 unsigned int mtu, hlen, left, len;
ae08e1f0 609 __be32 frag_id = 0;
1da177e4
LT
610 int ptr, offset = 0, err=0;
611 u8 *prevhdr, nexthdr = 0;
612
613 dev = rt->u.dst.dev;
614 hlen = ip6_find_1stfragopt(skb, &prevhdr);
615 nexthdr = *prevhdr;
616
628a5c56 617 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
618
619 /* We must not fragment if the socket is set to force MTU discovery
620 * or if the skb it not generated by a local socket. (This last
621 * check should be redundant, but it's free.)
622 */
623 if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
624 skb->dev = skb->dst->dev;
625 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
626 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
627 kfree_skb(skb);
628 return -EMSGSIZE;
629 }
630
d91675f9
YH
631 if (np && np->frag_size < mtu) {
632 if (np->frag_size)
633 mtu = np->frag_size;
634 }
635 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
636
637 if (skb_shinfo(skb)->frag_list) {
638 int first_len = skb_pagelen(skb);
29ffe1a5 639 int truesizes = 0;
1da177e4
LT
640
641 if (first_len - hlen > mtu ||
642 ((first_len - hlen) & 7) ||
643 skb_cloned(skb))
644 goto slow_path;
645
646 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
647 /* Correct geometry. */
648 if (frag->len > mtu ||
649 ((frag->len & 7) && frag->next) ||
650 skb_headroom(frag) < hlen)
651 goto slow_path;
652
1da177e4
LT
653 /* Partially cloned skb? */
654 if (skb_shared(frag))
655 goto slow_path;
2fdba6b0
HX
656
657 BUG_ON(frag->sk);
658 if (skb->sk) {
659 sock_hold(skb->sk);
660 frag->sk = skb->sk;
661 frag->destructor = sock_wfree;
29ffe1a5 662 truesizes += frag->truesize;
2fdba6b0 663 }
1da177e4
LT
664 }
665
666 err = 0;
667 offset = 0;
668 frag = skb_shinfo(skb)->frag_list;
669 skb_shinfo(skb)->frag_list = NULL;
670 /* BUILD HEADER */
671
9a217a1c 672 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 673 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 674 if (!tmp_hdr) {
a11d206d 675 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
676 return -ENOMEM;
677 }
678
1da177e4
LT
679 __skb_pull(skb, hlen);
680 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
681 __skb_push(skb, hlen);
682 skb_reset_network_header(skb);
d56f90a7 683 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
684
685 ipv6_select_ident(skb, fh);
686 fh->nexthdr = nexthdr;
687 fh->reserved = 0;
688 fh->frag_off = htons(IP6_MF);
689 frag_id = fh->identification;
690
691 first_len = skb_pagelen(skb);
692 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 693 skb->truesize -= truesizes;
1da177e4 694 skb->len = first_len;
0660e03f
ACM
695 ipv6_hdr(skb)->payload_len = htons(first_len -
696 sizeof(struct ipv6hdr));
a11d206d
YH
697
698 dst_hold(&rt->u.dst);
1da177e4
LT
699
700 for (;;) {
701 /* Prepare header of the next frame,
702 * before previous one went down. */
703 if (frag) {
704 frag->ip_summed = CHECKSUM_NONE;
badff6d0 705 skb_reset_transport_header(frag);
1da177e4 706 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
707 __skb_push(frag, hlen);
708 skb_reset_network_header(frag);
d56f90a7
ACM
709 memcpy(skb_network_header(frag), tmp_hdr,
710 hlen);
1da177e4
LT
711 offset += skb->len - hlen - sizeof(struct frag_hdr);
712 fh->nexthdr = nexthdr;
713 fh->reserved = 0;
714 fh->frag_off = htons(offset);
715 if (frag->next != NULL)
716 fh->frag_off |= htons(IP6_MF);
717 fh->identification = frag_id;
0660e03f
ACM
718 ipv6_hdr(frag)->payload_len =
719 htons(frag->len -
720 sizeof(struct ipv6hdr));
1da177e4
LT
721 ip6_copy_metadata(frag, skb);
722 }
1ab1457c 723
1da177e4 724 err = output(skb);
dafee490 725 if(!err)
a11d206d 726 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 727
1da177e4
LT
728 if (err || !frag)
729 break;
730
731 skb = frag;
732 frag = skb->next;
733 skb->next = NULL;
734 }
735
a51482bd 736 kfree(tmp_hdr);
1da177e4
LT
737
738 if (err == 0) {
a11d206d
YH
739 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
740 dst_release(&rt->u.dst);
1da177e4
LT
741 return 0;
742 }
743
744 while (frag) {
745 skb = frag->next;
746 kfree_skb(frag);
747 frag = skb;
748 }
749
a11d206d
YH
750 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
751 dst_release(&rt->u.dst);
1da177e4
LT
752 return err;
753 }
754
755slow_path:
756 left = skb->len - hlen; /* Space per frame */
757 ptr = hlen; /* Where to start from */
758
759 /*
760 * Fragment the datagram.
761 */
762
763 *prevhdr = NEXTHDR_FRAGMENT;
764
765 /*
766 * Keep copying data until we run out.
767 */
768 while(left > 0) {
769 len = left;
770 /* IF: it doesn't fit, use 'mtu' - the data space left */
771 if (len > mtu)
772 len = mtu;
773 /* IF: we are not sending upto and including the packet end
774 then align the next start on an eight byte boundary */
775 if (len < left) {
776 len &= ~7;
777 }
778 /*
779 * Allocate buffer.
780 */
781
782 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 783 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
784 IP6_INC_STATS(ip6_dst_idev(skb->dst),
785 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
786 err = -ENOMEM;
787 goto fail;
788 }
789
790 /*
791 * Set up data on packet
792 */
793
794 ip6_copy_metadata(frag, skb);
795 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
796 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 797 skb_reset_network_header(frag);
badff6d0 798 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
799 frag->transport_header = (frag->network_header + hlen +
800 sizeof(struct frag_hdr));
1da177e4
LT
801
802 /*
803 * Charge the memory for the fragment to any owner
804 * it might possess
805 */
806 if (skb->sk)
807 skb_set_owner_w(frag, skb->sk);
808
809 /*
810 * Copy the packet header into the new buffer.
811 */
d626f62b 812 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
813
814 /*
815 * Build fragment header.
816 */
817 fh->nexthdr = nexthdr;
818 fh->reserved = 0;
f36d6ab1 819 if (!frag_id) {
1da177e4
LT
820 ipv6_select_ident(skb, fh);
821 frag_id = fh->identification;
822 } else
823 fh->identification = frag_id;
824
825 /*
826 * Copy a block of the IP datagram.
827 */
8984e41d 828 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
829 BUG();
830 left -= len;
831
832 fh->frag_off = htons(offset);
833 if (left > 0)
834 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
835 ipv6_hdr(frag)->payload_len = htons(frag->len -
836 sizeof(struct ipv6hdr));
1da177e4
LT
837
838 ptr += len;
839 offset += len;
840
841 /*
842 * Put this fragment into the sending queue.
843 */
1da177e4
LT
844 err = output(frag);
845 if (err)
846 goto fail;
dafee490 847
a11d206d 848 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 849 }
a11d206d
YH
850 IP6_INC_STATS(ip6_dst_idev(skb->dst),
851 IPSTATS_MIB_FRAGOKS);
1da177e4 852 kfree_skb(skb);
1da177e4
LT
853 return err;
854
855fail:
a11d206d
YH
856 IP6_INC_STATS(ip6_dst_idev(skb->dst),
857 IPSTATS_MIB_FRAGFAILS);
1ab1457c 858 kfree_skb(skb);
1da177e4
LT
859 return err;
860}
861
cf6b1982
YH
862static inline int ip6_rt_check(struct rt6key *rt_key,
863 struct in6_addr *fl_addr,
864 struct in6_addr *addr_cache)
865{
866 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
867 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
868}
869
497c615a
HX
870static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
871 struct dst_entry *dst,
872 struct flowi *fl)
1da177e4 873{
497c615a
HX
874 struct ipv6_pinfo *np = inet6_sk(sk);
875 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 876
497c615a
HX
877 if (!dst)
878 goto out;
879
880 /* Yes, checking route validity in not connected
881 * case is not very simple. Take into account,
882 * that we do not support routing by source, TOS,
883 * and MSG_DONTROUTE --ANK (980726)
884 *
cf6b1982
YH
885 * 1. ip6_rt_check(): If route was host route,
886 * check that cached destination is current.
497c615a
HX
887 * If it is network route, we still may
888 * check its validity using saved pointer
889 * to the last used address: daddr_cache.
890 * We do not want to save whole address now,
891 * (because main consumer of this service
892 * is tcp, which has not this problem),
893 * so that the last trick works only on connected
894 * sockets.
895 * 2. oif also should be the same.
896 */
cf6b1982 897 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
898#ifdef CONFIG_IPV6_SUBTREES
899 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
900#endif
cf6b1982 901 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
902 dst_release(dst);
903 dst = NULL;
1da177e4
LT
904 }
905
497c615a
HX
906out:
907 return dst;
908}
909
910static int ip6_dst_lookup_tail(struct sock *sk,
911 struct dst_entry **dst, struct flowi *fl)
912{
913 int err;
914
1da177e4
LT
915 if (*dst == NULL)
916 *dst = ip6_route_output(sk, fl);
917
918 if ((err = (*dst)->error))
919 goto out_err_release;
920
921 if (ipv6_addr_any(&fl->fl6_src)) {
922 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
44456d37 923 if (err)
1da177e4 924 goto out_err_release;
1da177e4
LT
925 }
926
95c385b4
NH
927#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
928 /*
929 * Here if the dst entry we've looked up
930 * has a neighbour entry that is in the INCOMPLETE
931 * state and the src address from the flow is
932 * marked as OPTIMISTIC, we release the found
933 * dst entry and replace it instead with the
934 * dst entry of the nexthop router
935 */
936 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
937 struct inet6_ifaddr *ifp;
938 struct flowi fl_gw;
939 int redirect;
940
1cab3da6
DL
941 ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
942 (*dst)->dev, 1);
95c385b4
NH
943
944 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
945 if (ifp)
946 in6_ifa_put(ifp);
947
948 if (redirect) {
949 /*
950 * We need to get the dst entry for the
951 * default router instead
952 */
953 dst_release(*dst);
954 memcpy(&fl_gw, fl, sizeof(struct flowi));
955 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
956 *dst = ip6_route_output(sk, &fl_gw);
957 if ((err = (*dst)->error))
958 goto out_err_release;
959 }
960 }
961#endif
962
1da177e4
LT
963 return 0;
964
965out_err_release:
ca46f9c8
MC
966 if (err == -ENETUNREACH)
967 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
968 dst_release(*dst);
969 *dst = NULL;
970 return err;
971}
34a0b3cd 972
497c615a
HX
973/**
974 * ip6_dst_lookup - perform route lookup on flow
975 * @sk: socket which provides route info
976 * @dst: pointer to dst_entry * for result
977 * @fl: flow to lookup
978 *
979 * This function performs a route lookup on the given flow.
980 *
981 * It returns zero on success, or a standard errno code on error.
982 */
983int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
984{
985 *dst = NULL;
986 return ip6_dst_lookup_tail(sk, dst, fl);
987}
3cf3dc6c
ACM
988EXPORT_SYMBOL_GPL(ip6_dst_lookup);
989
497c615a
HX
990/**
991 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
992 * @sk: socket which provides the dst cache and route info
993 * @dst: pointer to dst_entry * for result
994 * @fl: flow to lookup
995 *
996 * This function performs a route lookup on the given flow with the
997 * possibility of using the cached route in the socket if it is valid.
998 * It will take the socket dst lock when operating on the dst cache.
999 * As a result, this function can only be used in process context.
1000 *
1001 * It returns zero on success, or a standard errno code on error.
1002 */
1003int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1004{
1005 *dst = NULL;
1006 if (sk) {
1007 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1008 *dst = ip6_sk_dst_check(sk, *dst, fl);
1009 }
1010
1011 return ip6_dst_lookup_tail(sk, dst, fl);
1012}
1013EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1014
34a0b3cd 1015static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1016 int getfrag(void *from, char *to, int offset, int len,
1017 int odd, struct sk_buff *skb),
1018 void *from, int length, int hh_len, int fragheaderlen,
1019 int transhdrlen, int mtu,unsigned int flags)
1020
1021{
1022 struct sk_buff *skb;
1023 int err;
1024
1025 /* There is support for UDP large send offload by network
1026 * device, so create one single skb packet containing complete
1027 * udp datagram
1028 */
1029 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1030 skb = sock_alloc_send_skb(sk,
1031 hh_len + fragheaderlen + transhdrlen + 20,
1032 (flags & MSG_DONTWAIT), &err);
1033 if (skb == NULL)
1034 return -ENOMEM;
1035
1036 /* reserve space for Hardware header */
1037 skb_reserve(skb, hh_len);
1038
1039 /* create space for UDP/IP header */
1040 skb_put(skb,fragheaderlen + transhdrlen);
1041
1042 /* initialize network header pointer */
c1d2bbe1 1043 skb_reset_network_header(skb);
e89e9cf5
AR
1044
1045 /* initialize protocol header pointer */
b0e380b1 1046 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1047
84fa7933 1048 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1049 skb->csum = 0;
1050 sk->sk_sndmsg_off = 0;
1051 }
1052
1053 err = skb_append_datato_frags(sk,skb, getfrag, from,
1054 (length - transhdrlen));
1055 if (!err) {
1056 struct frag_hdr fhdr;
1057
1058 /* specify the length of each IP datagram fragment*/
1ab1457c 1059 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1060 sizeof(struct frag_hdr);
f83ef8c0 1061 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1062 ipv6_select_ident(skb, &fhdr);
1063 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1064 __skb_queue_tail(&sk->sk_write_queue, skb);
1065
1066 return 0;
1067 }
1068 /* There is not enough support do UPD LSO,
1069 * so follow normal path
1070 */
1071 kfree_skb(skb);
1072
1073 return err;
1074}
1da177e4 1075
41a1f8ea
YH
1076int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1077 int offset, int len, int odd, struct sk_buff *skb),
1078 void *from, int length, int transhdrlen,
1079 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1080 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1081{
1082 struct inet_sock *inet = inet_sk(sk);
1083 struct ipv6_pinfo *np = inet6_sk(sk);
1084 struct sk_buff *skb;
1085 unsigned int maxfraglen, fragheaderlen;
1086 int exthdrlen;
1087 int hh_len;
1088 int mtu;
1089 int copy;
1090 int err;
1091 int offset = 0;
1092 int csummode = CHECKSUM_NONE;
1093
1094 if (flags&MSG_PROBE)
1095 return 0;
1096 if (skb_queue_empty(&sk->sk_write_queue)) {
1097 /*
1098 * setup for corking
1099 */
1100 if (opt) {
1101 if (np->cork.opt == NULL) {
1102 np->cork.opt = kmalloc(opt->tot_len,
1103 sk->sk_allocation);
1104 if (unlikely(np->cork.opt == NULL))
1105 return -ENOBUFS;
1106 } else if (np->cork.opt->tot_len < opt->tot_len) {
1107 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1108 return -EINVAL;
1109 }
1110 memcpy(np->cork.opt, opt, opt->tot_len);
1111 inet->cork.flags |= IPCORK_OPT;
1112 /* need source address above miyazawa*/
1113 }
1114 dst_hold(&rt->u.dst);
1115 np->cork.rt = rt;
1116 inet->cork.fl = *fl;
1117 np->cork.hop_limit = hlimit;
41a1f8ea 1118 np->cork.tclass = tclass;
628a5c56
JH
1119 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1120 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1121 if (np->frag_size < mtu) {
d91675f9
YH
1122 if (np->frag_size)
1123 mtu = np->frag_size;
1124 }
1125 inet->cork.fragsize = mtu;
1da177e4
LT
1126 if (dst_allfrag(rt->u.dst.path))
1127 inet->cork.flags |= IPCORK_ALLFRAG;
1128 inet->cork.length = 0;
1129 sk->sk_sndmsg_page = NULL;
1130 sk->sk_sndmsg_off = 0;
01488942 1131 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1132 rt->rt6i_nfheader_len;
1da177e4
LT
1133 length += exthdrlen;
1134 transhdrlen += exthdrlen;
1135 } else {
1136 rt = np->cork.rt;
1137 fl = &inet->cork.fl;
1138 if (inet->cork.flags & IPCORK_OPT)
1139 opt = np->cork.opt;
1140 transhdrlen = 0;
1141 exthdrlen = 0;
1142 mtu = inet->cork.fragsize;
1143 }
1144
1145 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1146
a1b05140 1147 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1148 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1149 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1150
1151 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1152 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1153 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1154 return -EMSGSIZE;
1155 }
1156 }
1157
1158 /*
1159 * Let's try using as much space as possible.
1160 * Use MTU if total length of the message fits into the MTU.
1161 * Otherwise, we need to reserve fragment header and
1162 * fragment alignment (= 8-15 octects, in total).
1163 *
1164 * Note that we may need to "move" the data from the tail of
1ab1457c 1165 * of the buffer to the new fragment when we split
1da177e4
LT
1166 * the message.
1167 *
1ab1457c 1168 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1169 * at once if non-fragmentable extension headers
1170 * are too large.
1ab1457c 1171 * --yoshfuji
1da177e4
LT
1172 */
1173
1174 inet->cork.length += length;
e89e9cf5
AR
1175 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1176 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1177
baa829d8
PM
1178 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1179 fragheaderlen, transhdrlen, mtu,
1180 flags);
1181 if (err)
e89e9cf5 1182 goto error;
e89e9cf5
AR
1183 return 0;
1184 }
1da177e4
LT
1185
1186 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1187 goto alloc_new_skb;
1188
1189 while (length > 0) {
1190 /* Check if the remaining data fits into current packet. */
1191 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1192 if (copy < length)
1193 copy = maxfraglen - skb->len;
1194
1195 if (copy <= 0) {
1196 char *data;
1197 unsigned int datalen;
1198 unsigned int fraglen;
1199 unsigned int fraggap;
1200 unsigned int alloclen;
1201 struct sk_buff *skb_prev;
1202alloc_new_skb:
1203 skb_prev = skb;
1204
1205 /* There's no room in the current skb */
1206 if (skb_prev)
1207 fraggap = skb_prev->len - maxfraglen;
1208 else
1209 fraggap = 0;
1210
1211 /*
1212 * If remaining data exceeds the mtu,
1213 * we know we need more fragment(s).
1214 */
1215 datalen = length + fraggap;
1216 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1217 datalen = maxfraglen - fragheaderlen;
1218
1219 fraglen = datalen + fragheaderlen;
1220 if ((flags & MSG_MORE) &&
1221 !(rt->u.dst.dev->features&NETIF_F_SG))
1222 alloclen = mtu;
1223 else
1224 alloclen = datalen + fragheaderlen;
1225
1226 /*
1227 * The last fragment gets additional space at tail.
1228 * Note: we overallocate on fragments with MSG_MODE
1229 * because we have no idea if we're the last one.
1230 */
1231 if (datalen == length + fraggap)
1232 alloclen += rt->u.dst.trailer_len;
1233
1234 /*
1235 * We just reserve space for fragment header.
1ab1457c 1236 * Note: this may be overallocation if the message
1da177e4
LT
1237 * (without MSG_MORE) fits into the MTU.
1238 */
1239 alloclen += sizeof(struct frag_hdr);
1240
1241 if (transhdrlen) {
1242 skb = sock_alloc_send_skb(sk,
1243 alloclen + hh_len,
1244 (flags & MSG_DONTWAIT), &err);
1245 } else {
1246 skb = NULL;
1247 if (atomic_read(&sk->sk_wmem_alloc) <=
1248 2 * sk->sk_sndbuf)
1249 skb = sock_wmalloc(sk,
1250 alloclen + hh_len, 1,
1251 sk->sk_allocation);
1252 if (unlikely(skb == NULL))
1253 err = -ENOBUFS;
1254 }
1255 if (skb == NULL)
1256 goto error;
1257 /*
1258 * Fill in the control structures
1259 */
1260 skb->ip_summed = csummode;
1261 skb->csum = 0;
1262 /* reserve for fragmentation */
1263 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1264
1265 /*
1266 * Find where to start putting bytes
1267 */
1268 data = skb_put(skb, fraglen);
c14d2450 1269 skb_set_network_header(skb, exthdrlen);
1da177e4 1270 data += fragheaderlen;
b0e380b1
ACM
1271 skb->transport_header = (skb->network_header +
1272 fragheaderlen);
1da177e4
LT
1273 if (fraggap) {
1274 skb->csum = skb_copy_and_csum_bits(
1275 skb_prev, maxfraglen,
1276 data + transhdrlen, fraggap, 0);
1277 skb_prev->csum = csum_sub(skb_prev->csum,
1278 skb->csum);
1279 data += fraggap;
e9fa4f7b 1280 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1281 }
1282 copy = datalen - transhdrlen - fraggap;
1283 if (copy < 0) {
1284 err = -EINVAL;
1285 kfree_skb(skb);
1286 goto error;
1287 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1288 err = -EFAULT;
1289 kfree_skb(skb);
1290 goto error;
1291 }
1292
1293 offset += copy;
1294 length -= datalen - fraggap;
1295 transhdrlen = 0;
1296 exthdrlen = 0;
1297 csummode = CHECKSUM_NONE;
1298
1299 /*
1300 * Put the packet on the pending queue
1301 */
1302 __skb_queue_tail(&sk->sk_write_queue, skb);
1303 continue;
1304 }
1305
1306 if (copy > length)
1307 copy = length;
1308
1309 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1310 unsigned int off;
1311
1312 off = skb->len;
1313 if (getfrag(from, skb_put(skb, copy),
1314 offset, copy, off, skb) < 0) {
1315 __skb_trim(skb, off);
1316 err = -EFAULT;
1317 goto error;
1318 }
1319 } else {
1320 int i = skb_shinfo(skb)->nr_frags;
1321 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1322 struct page *page = sk->sk_sndmsg_page;
1323 int off = sk->sk_sndmsg_off;
1324 unsigned int left;
1325
1326 if (page && (left = PAGE_SIZE - off) > 0) {
1327 if (copy >= left)
1328 copy = left;
1329 if (page != frag->page) {
1330 if (i == MAX_SKB_FRAGS) {
1331 err = -EMSGSIZE;
1332 goto error;
1333 }
1334 get_page(page);
1335 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1336 frag = &skb_shinfo(skb)->frags[i];
1337 }
1338 } else if(i < MAX_SKB_FRAGS) {
1339 if (copy > PAGE_SIZE)
1340 copy = PAGE_SIZE;
1341 page = alloc_pages(sk->sk_allocation, 0);
1342 if (page == NULL) {
1343 err = -ENOMEM;
1344 goto error;
1345 }
1346 sk->sk_sndmsg_page = page;
1347 sk->sk_sndmsg_off = 0;
1348
1349 skb_fill_page_desc(skb, i, page, 0, 0);
1350 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1351 } else {
1352 err = -EMSGSIZE;
1353 goto error;
1354 }
1355 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1356 err = -EFAULT;
1357 goto error;
1358 }
1359 sk->sk_sndmsg_off += copy;
1360 frag->size += copy;
1361 skb->len += copy;
1362 skb->data_len += copy;
f945fa7a
HX
1363 skb->truesize += copy;
1364 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1365 }
1366 offset += copy;
1367 length -= copy;
1368 }
1369 return 0;
1370error:
1371 inet->cork.length -= length;
a11d206d 1372 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1373 return err;
1374}
1375
bf138862
PE
1376static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1377{
1378 inet->cork.flags &= ~IPCORK_OPT;
1379 kfree(np->cork.opt);
1380 np->cork.opt = NULL;
1381 if (np->cork.rt) {
1382 dst_release(&np->cork.rt->u.dst);
1383 np->cork.rt = NULL;
1384 inet->cork.flags &= ~IPCORK_ALLFRAG;
1385 }
1386 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1387}
1388
1da177e4
LT
1389int ip6_push_pending_frames(struct sock *sk)
1390{
1391 struct sk_buff *skb, *tmp_skb;
1392 struct sk_buff **tail_skb;
1393 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1394 struct inet_sock *inet = inet_sk(sk);
1395 struct ipv6_pinfo *np = inet6_sk(sk);
1396 struct ipv6hdr *hdr;
1397 struct ipv6_txoptions *opt = np->cork.opt;
1398 struct rt6_info *rt = np->cork.rt;
1399 struct flowi *fl = &inet->cork.fl;
1400 unsigned char proto = fl->proto;
1401 int err = 0;
1402
1403 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1404 goto out;
1405 tail_skb = &(skb_shinfo(skb)->frag_list);
1406
1407 /* move skb->data to ip header from ext header */
d56f90a7 1408 if (skb->data < skb_network_header(skb))
bbe735e4 1409 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1410 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1411 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1412 *tail_skb = tmp_skb;
1413 tail_skb = &(tmp_skb->next);
1414 skb->len += tmp_skb->len;
1415 skb->data_len += tmp_skb->len;
1da177e4
LT
1416 skb->truesize += tmp_skb->truesize;
1417 __sock_put(tmp_skb->sk);
1418 tmp_skb->destructor = NULL;
1419 tmp_skb->sk = NULL;
1da177e4
LT
1420 }
1421
1422 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1423 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1424 if (opt && opt->opt_flen)
1425 ipv6_push_frag_opts(skb, opt, &proto);
1426 if (opt && opt->opt_nflen)
1427 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1428
e2d1bca7
ACM
1429 skb_push(skb, sizeof(struct ipv6hdr));
1430 skb_reset_network_header(skb);
0660e03f 1431 hdr = ipv6_hdr(skb);
1ab1457c 1432
90bcaf7b 1433 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1434 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1435
1da177e4
LT
1436 hdr->hop_limit = np->cork.hop_limit;
1437 hdr->nexthdr = proto;
1438 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1439 ipv6_addr_copy(&hdr->daddr, final_dst);
1440
a2c2064f
PM
1441 skb->priority = sk->sk_priority;
1442
1da177e4 1443 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1444 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1445 if (proto == IPPROTO_ICMPV6) {
1446 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1447
1448 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1449 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1450 }
1451
ef76bc23 1452 err = ip6_local_out(skb);
1da177e4
LT
1453 if (err) {
1454 if (err > 0)
3320da89 1455 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1456 if (err)
1457 goto error;
1458 }
1459
1460out:
bf138862 1461 ip6_cork_release(inet, np);
1da177e4
LT
1462 return err;
1463error:
1464 goto out;
1465}
1466
1467void ip6_flush_pending_frames(struct sock *sk)
1468{
1da177e4
LT
1469 struct sk_buff *skb;
1470
1471 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1472 if (skb->dst)
1473 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1474 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1475 kfree_skb(skb);
1476 }
1477
bf138862 1478 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1479}